diff --git a/src/agents/cli-runner.reliability.test.ts b/src/agents/cli-runner.reliability.test.ts index 2f07090849d4..bbf1a56526e9 100644 --- a/src/agents/cli-runner.reliability.test.ts +++ b/src/agents/cli-runner.reliability.test.ts @@ -10,10 +10,7 @@ import { } from "../auto-reply/reply/reply-run-registry.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { getGlobalHookRunner } from "../plugins/hook-runner-global.js"; -import { - buildPersistedUserTurnMessage, - createUserTurnTranscriptRecorder, -} from "../sessions/user-turn-transcript.js"; +import { createUserTurnTranscriptRecorder } from "../sessions/user-turn-transcript.js"; import { runPreparedCliAgent } from "./cli-runner.js"; import { createManagedRun, @@ -891,6 +888,7 @@ describe("runCliAgent reliability", () => { const recorder = createUserTurnTranscriptRecorder({ input: { text: "recorder display prompt", + media: [{ path: "/tmp/image.png", contentType: "image/png" }], timestamp: 123, idempotencyKey: "cli-recorder:user", }, @@ -935,6 +933,8 @@ describe("runCliAgent reliability", () => { expect.objectContaining({ role: "user", content: "recorder display prompt", + MediaPath: "/tmp/image.png", + MediaType: "image/png", timestamp: 123, idempotencyKey: "cli-recorder:user", }), @@ -1023,62 +1023,6 @@ describe("runCliAgent reliability", () => { } }); - it("persists approved CLI media user turns without caller-side transcript shaping", async () => { - supervisorSpawnMock.mockResolvedValueOnce( - createManagedRun({ - reason: "exit", - exitCode: 0, - exitSignal: null, - durationMs: 50, - stdout: "image handled", - stderr: "", - timedOut: false, - noOutputTimedOut: false, - }), - ); - const { dir, sessionFile } = createSessionFile(); - const onUserMessagePersisted = vi.fn(); - - try { - const context = buildPreparedContext({ - sessionKey: "agent:main:main", - runId: "run-persist-cli-media", - }); - await runPreparedCliAgent({ - ...context, - params: { - ...context.params, - agentId: "main", - sessionFile, - workspaceDir: dir, - prompt: "runtime image prompt", - userTurnTranscript: { - message: buildPersistedUserTurnMessage({ - text: "describe this", - media: [{ path: "/tmp/image.png", contentType: "image/png" }], - timestamp: 123, - }), - }, - onUserMessagePersisted, - }, - }); - - expect(onUserMessagePersisted).toHaveBeenCalledWith( - expect.objectContaining({ - role: "user", - content: "describe this", - MediaPath: "/tmp/image.png", - MediaType: "image/png", - }), - ); - const transcript = fs.readFileSync(sessionFile, "utf-8"); - expect(transcript).toContain('"MediaPath":"/tmp/image.png"'); - expect(transcript).toContain('"MediaType":"image/png"'); - } finally { - fs.rmSync(dir, { recursive: true, force: true }); - } - }); - it("blocks CLI runs before llm_input and model execution when before_agent_run blocks", async () => { supervisorSpawnMock.mockClear(); const onUserMessagePersisted = vi.fn(); diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts index 31935b23c9be..101cbfe8a957 100644 --- a/src/auto-reply/reply/get-reply-run.media-only.test.ts +++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts @@ -1,4 +1,4 @@ -import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import { mkdtemp, rm, writeFile } from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { importFreshModule } from "openclaw/plugin-sdk/test-fixtures"; @@ -971,164 +971,6 @@ describe("runPreparedReply media-only handling", () => { expect(call.followupRun.imageOrder).toEqual(["inline"]); }); - it("persists staged relative media paths as workspace-backed paths", async () => { - const tmpDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-followup-image-")); - cleanupPaths.push(tmpDir); - const relativeImagePath = "media/inbound/inbound.png"; - const imagePath = path.join(tmpDir, relativeImagePath); - await mkdir(path.dirname(imagePath), { recursive: true }); - await writeFile( - imagePath, - Buffer.from( - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=", - "base64", - ), - ); - - const result = await runPreparedReply( - baseParams({ - ctx: { - Body: "describe this", - RawBody: "describe this", - CommandBody: "describe this", - MediaPaths: [relativeImagePath], - MediaTypes: ["image/png"], - MediaWorkspaceDir: tmpDir, - OriginatingChannel: "telegram", - OriginatingTo: "42", - ChatType: "direct", - }, - sessionCtx: { - Body: "describe this", - BodyStripped: "describe this", - Provider: "telegram", - OriginatingChannel: "telegram", - OriginatingTo: "42", - ChatType: "direct", - MediaPaths: [relativeImagePath], - MediaTypes: ["image/png"], - MediaWorkspaceDir: tmpDir, - }, - }), - ); - - expect(result).toEqual({ text: "ok" }); - const call = requireRunReplyAgentCall(); - expect(call.followupRun.userTurnTranscriptRecorder?.message).toMatchObject({ - role: "user", - content: "describe this", - MediaPath: imagePath, - MediaPaths: [imagePath], - MediaType: "image/png", - MediaTypes: ["image/png"], - }); - }); - - it("persists clean media captions instead of model-only media notes", async () => { - const tmpDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-followup-image-")); - cleanupPaths.push(tmpDir); - const imagePath = path.join(tmpDir, "inbound.png"); - await writeFile( - imagePath, - Buffer.from( - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=", - "base64", - ), - ); - - await runPreparedReply( - baseParams({ - ctx: { - Body: "What is in this image?", - RawBody: "What is in this image?", - CommandBody: "What is in this image?", - MediaPaths: [imagePath], - MediaTypes: ["image/png"], - MediaWorkspaceDir: tmpDir, - OriginatingChannel: "telegram", - OriginatingTo: "42", - ChatType: "direct", - }, - sessionCtx: { - Body: "[media attached: media://inbound/a.png (image/png)]\nTo send an image back, prefer the message tool (media/path/filePath).\nWhat is in this image?", - BodyStripped: - "[media attached: media://inbound/a.png (image/png)]\nTo send an image back, prefer the message tool (media/path/filePath).\nWhat is in this image?", - Provider: "telegram", - OriginatingChannel: "telegram", - OriginatingTo: "42", - ChatType: "direct", - MediaPaths: [imagePath], - MediaTypes: ["image/png"], - MediaWorkspaceDir: tmpDir, - }, - }), - ); - - const call = requireRunReplyAgentCall(); - expect(call.followupRun.userTurnTranscriptRecorder?.message).toMatchObject({ - role: "user", - content: "What is in this image?", - MediaPath: imagePath, - MediaPaths: [imagePath], - MediaType: "image/png", - MediaTypes: ["image/png"], - }); - const persistedContent = call.followupRun.userTurnTranscriptRecorder?.message?.content; - expect(persistedContent).toBe("What is in this image?"); - expect(persistedContent).not.toContain("media attached"); - expect(persistedContent).not.toContain("message tool"); - }); - - it("uses a media-only transcript label for exact media placeholders", async () => { - const tmpDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-followup-image-")); - cleanupPaths.push(tmpDir); - const imagePath = path.join(tmpDir, "inbound.png"); - await writeFile( - imagePath, - Buffer.from( - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=", - "base64", - ), - ); - - await runPreparedReply( - baseParams({ - ctx: { - Body: "", - RawBody: "", - CommandBody: "", - MediaPaths: [imagePath], - MediaTypes: ["image/png"], - MediaWorkspaceDir: tmpDir, - OriginatingChannel: "telegram", - OriginatingTo: "42", - ChatType: "direct", - }, - sessionCtx: { - Body: "", - BodyStripped: "", - Provider: "telegram", - OriginatingChannel: "telegram", - OriginatingTo: "42", - ChatType: "direct", - MediaPaths: [imagePath], - MediaTypes: ["image/png"], - MediaWorkspaceDir: tmpDir, - }, - }), - ); - - const call = requireRunReplyAgentCall(); - expect(call.followupRun.userTurnTranscriptRecorder?.message).toMatchObject({ - role: "user", - content: "[User sent media without caption]", - MediaPath: imagePath, - MediaPaths: [imagePath], - MediaType: "image/png", - MediaTypes: ["image/png"], - }); - }); - it("does not copy prior session media onto text-only followups", async () => { await runPreparedReply( baseParams({ diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts index 143242bc3ff4..ef599d166c23 100644 --- a/src/gateway/server-methods/chat.directive-tags.test.ts +++ b/src/gateway/server-methods/chat.directive-tags.test.ts @@ -567,6 +567,17 @@ function userUpdateMessage( : undefined; } +function readPersistedUserMessages(): Array> { + return readTranscriptJsonLines(mockState.transcriptPath) + .map((entry) => entry.message) + .filter( + (candidate): candidate is Record => + typeof candidate === "object" && + candidate !== null && + (candidate as { role?: unknown }).role === "user", + ); +} + function expectDispatchContextFields(expected: { OriginatingChannel?: unknown; OriginatingTo?: unknown; @@ -3108,32 +3119,6 @@ describe("chat directive tag stripping for non-streaming final payloads", () => expect(userUpdates).toHaveLength(0); }); - it("does not emit raw user transcript content when before_agent_run blocks without a persisted marker", async () => { - createTranscriptFixture("openclaw-chat-send-user-transcript-blocked-live-signal-"); - mockState.finalText = "The agent cannot read this message."; - mockState.triggerAgentRunStart = true; - mockState.hasBeforeAgentRunHooks = true; - mockState.dispatchBlockedByBeforeAgentRun = true; - const respond = vi.fn(); - const context = createChatContext(); - - await runNonStreamingChatSend({ - context, - respond, - idempotencyKey: "idem-user-transcript-blocked-live-signal", - message: "secret prompt blocked before persistence", - expectBroadcast: false, - }); - - const userUpdates = mockState.emittedTranscriptUpdates.filter( - (update) => - typeof update.message === "object" && - update.message !== null && - (update.message as { role?: unknown }).role === "user", - ); - expect(userUpdates).toHaveLength(0); - }); - it("does not persist raw user transcript content when a delivered before_agent_run block is followed by a dispatch error", async () => { createTranscriptFixture("openclaw-chat-send-user-transcript-blocked-delivery-error-"); mockState.triggerAgentRunStart = true; @@ -3166,15 +3151,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () => ); }); expect(findUserUpdate()).toBeUndefined(); - const persistedUsers = readTranscriptJsonLines(mockState.transcriptPath) - .map((entry) => entry.message) - .filter( - (candidate): candidate is Record => - typeof candidate === "object" && - candidate !== null && - (candidate as { role?: unknown }).role === "user", - ); - expect(persistedUsers).toHaveLength(0); + expect(readPersistedUserMessages()).toHaveLength(0); }); it("emits a user transcript update when hooks pass and the started agent throws before runtime persistence", async () => { @@ -4525,14 +4502,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () => expect(message?.role).toBe("user"); expect(message?.content).toBe("quick command"); expect(typeof message?.timestamp).toBe("number"); - const persistedUser = readTranscriptJsonLines(mockState.transcriptPath) - .map((entry) => entry.message) - .find( - (candidate): candidate is Record => - typeof candidate === "object" && - candidate !== null && - (candidate as { role?: unknown }).role === "user", - ); + const persistedUser = readPersistedUserMessages()[0]; expect(persistedUser?.content).toBe("quick command"); }); @@ -4559,14 +4529,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () => expect(message?.role).toBe("user"); expect(message?.content).toBe("hello from failed dispatch"); expect(typeof message?.timestamp).toBe("number"); - const persistedUser = readTranscriptJsonLines(mockState.transcriptPath) - .map((entry) => entry.message) - .find( - (candidate): candidate is Record => - typeof candidate === "object" && - candidate !== null && - (candidate as { role?: unknown }).role === "user", - ); + const persistedUser = readPersistedUserMessages()[0]; expect(persistedUser?.content).toBe("hello from failed dispatch"); }); }); @@ -4591,16 +4554,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () => }); await waitForAssertion(() => { - expect( - readTranscriptJsonLines(mockState.transcriptPath) - .map((entry) => entry.message) - .filter( - (candidate): candidate is Record => - typeof candidate === "object" && - candidate !== null && - (candidate as { role?: unknown }).role === "user", - ), - ).toEqual([ + expect(readPersistedUserMessages()).toEqual([ expect.objectContaining({ role: "user", content: "hello from replayed failed dispatch", @@ -4665,14 +4619,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () => expect(userUpdate?.sessionKey).toBe("main"); expect(message?.role).toBe("user"); expect(message?.content).toBe("hello before cli startup failure"); - const persistedUser = readTranscriptJsonLines(mockState.transcriptPath) - .map((entry) => entry.message) - .find( - (candidate): candidate is Record => - typeof candidate === "object" && - candidate !== null && - (candidate as { role?: unknown }).role === "user", - ); + const persistedUser = readPersistedUserMessages()[0]; expect(persistedUser?.content).toBe("hello before cli startup failure"); }); }); @@ -4698,14 +4645,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () => const message = userUpdateMessage(userUpdate); expect(message?.content).toBe("[redacted by hook]"); expect(mockState.beforeMessageWriteCalls).toHaveLength(1); - const persistedUser = readTranscriptJsonLines(mockState.transcriptPath) - .map((entry) => entry.message) - .find( - (candidate): candidate is Record => - typeof candidate === "object" && - candidate !== null && - (candidate as { role?: unknown }).role === "user", - ); + const persistedUser = readPersistedUserMessages()[0]; expect(persistedUser?.content).toBe("[redacted by hook]"); expect(JSON.stringify(persistedUser)).not.toContain("raw sensitive prompt"); }); @@ -4734,16 +4674,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () => expect(mockState.beforeMessageWriteCalls).toHaveLength(1); }); expect(findUserUpdate()).toBeUndefined(); - expect( - readTranscriptJsonLines(mockState.transcriptPath) - .map((entry) => entry.message) - .filter( - (candidate): candidate is Record => - typeof candidate === "object" && - candidate !== null && - (candidate as { role?: unknown }).role === "user", - ), - ).toHaveLength(0); + expect(readPersistedUserMessages()).toHaveLength(0); }); it("emits a user transcript update when a started agent returns an error before runtime persistence", async () => { diff --git a/src/sessions/user-turn-transcript.test.ts b/src/sessions/user-turn-transcript.test.ts index 53e27cb61bb9..f65998823b74 100644 --- a/src/sessions/user-turn-transcript.test.ts +++ b/src/sessions/user-turn-transcript.test.ts @@ -11,7 +11,6 @@ import { afterEach, describe, expect, it } from "vitest"; import { appendUserTurnTranscriptMessage, buildPersistedUserTurnMediaInputsFromFields, - buildPersistedUserTurnMessage, createUserTurnTranscriptRecorder, mergePreparedUserTurnMessageForRuntime, persistUserTurnTranscript, @@ -135,88 +134,17 @@ describe("user turn transcript persistence", () => { }); }); - describe("buildPersistedUserTurnMessage", () => { - it("builds a plain user transcript message for text-only turns", () => { - expect( - buildPersistedUserTurnMessage({ - text: "hello", - timestamp: 123, - idempotencyKey: "turn-1", - }), - ).toEqual({ - role: "user", - content: "hello", - timestamp: 123, - idempotencyKey: "turn-1", - }); - }); - - it("adds structured media fields to the user transcript message", () => { - expect( - buildPersistedUserTurnMessage({ - text: "What is in this image?", - media: [{ path: "/tmp/a.png", contentType: "image/png" }], - timestamp: 123, - }), - ).toEqual({ - role: "user", - content: "What is in this image?", - timestamp: 123, - MediaPath: "/tmp/a.png", - MediaPaths: ["/tmp/a.png"], - MediaType: "image/png", - MediaTypes: ["image/png"], - }); - }); - - it("does not infer media from marker-like user text", () => { - expect( - buildPersistedUserTurnMessage({ - text: "[media attached: media://inbound/photo.png]\nWhat is this?", - timestamp: 123, - }), - ).toEqual({ - role: "user", - content: "[media attached: media://inbound/photo.png]\nWhat is this?", - timestamp: 123, - }); - }); - - it("uses an explicit media-only display text when provided", () => { - expect( - buildPersistedUserTurnMessage({ - text: "", - mediaOnlyText: "[User sent media]", - media: [{ path: "/tmp/a.png", contentType: "image/png" }], - }), - ).toEqual({ - role: "user", - content: "[User sent media]", - MediaPath: "/tmp/a.png", - MediaPaths: ["/tmp/a.png"], - MediaType: "image/png", - MediaTypes: ["image/png"], - }); - }); - - it("keeps media-only transcript content empty by default", () => { - expect( - buildPersistedUserTurnMessage({ - media: [{ path: "/tmp/a.png", contentType: "image/png" }], - }), - ).toEqual({ - role: "user", - content: "", - MediaPath: "/tmp/a.png", - MediaPaths: ["/tmp/a.png"], - MediaType: "image/png", - MediaTypes: ["image/png"], - }); - }); - }); - describe("mergePreparedUserTurnMessageForRuntime", () => { it("adds prepared transcript metadata to runtime user messages", () => { + const recorder = createUserTurnTranscriptRecorder({ + input: { + text: "display prompt", + media: [{ path: "/tmp/image.png", contentType: "image/png" }], + timestamp: 123, + }, + target: { transcriptPath: "/tmp/session.jsonl" }, + }); + expect( mergePreparedUserTurnMessageForRuntime({ runtimeMessage: castAgentMessage({ @@ -224,11 +152,7 @@ describe("user turn transcript persistence", () => { content: "runtime prompt", provenance: { sourceChannel: "telegram" }, }), - preparedMessage: buildPersistedUserTurnMessage({ - text: "display prompt", - media: [{ path: "/tmp/image.png", contentType: "image/png" }], - timestamp: 123, - }), + preparedMessage: recorder.message, }), ).toMatchObject({ role: "user", @@ -241,6 +165,10 @@ describe("user turn transcript persistence", () => { }); it("does not replace blocked before_agent_run user markers", () => { + const recorder = createUserTurnTranscriptRecorder({ + input: { text: "raw prompt" }, + target: { transcriptPath: "/tmp/session.jsonl" }, + }); const blocked = castAgentMessage({ role: "user", content: "[blocked]", @@ -250,22 +178,22 @@ describe("user turn transcript persistence", () => { expect( mergePreparedUserTurnMessageForRuntime({ runtimeMessage: blocked, - preparedMessage: buildPersistedUserTurnMessage({ - text: "raw prompt", - }), + preparedMessage: recorder.message, }), ).toBe(blocked); }); it("does not apply prepared user metadata to assistant messages", () => { + const recorder = createUserTurnTranscriptRecorder({ + input: { text: "display prompt" }, + target: { transcriptPath: "/tmp/session.jsonl" }, + }); const assistant = castAgentMessage({ role: "assistant", content: "hello" }); expect( mergePreparedUserTurnMessageForRuntime({ runtimeMessage: assistant, - preparedMessage: buildPersistedUserTurnMessage({ - text: "display prompt", - }), + preparedMessage: recorder.message, }), ).toBe(assistant); }); diff --git a/src/sessions/user-turn-transcript.ts b/src/sessions/user-turn-transcript.ts index eee36c1a37d7..952b3109eab3 100644 --- a/src/sessions/user-turn-transcript.ts +++ b/src/sessions/user-turn-transcript.ts @@ -296,7 +296,7 @@ function buildPersistedUserTurnMediaFields( }; } -export function buildPersistedUserTurnMessage(params: UserTurnInput): PersistedUserTurnMessage { +function buildPersistedUserTurnMessage(params: UserTurnInput): PersistedUserTurnMessage { const mediaFields = buildPersistedUserTurnMediaFields(params.media); const hasMedia = Boolean(mediaFields.MediaPath); const text = normalizeTranscriptText(params.text);