diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts index 848ca99e27c..e11fa8a18b5 100644 --- a/src/auto-reply/reply/get-reply-run.media-only.test.ts +++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts @@ -1,4 +1,4 @@ -import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { importFreshModule } from "openclaw/plugin-sdk/test-fixtures"; @@ -971,6 +971,59 @@ describe("runPreparedReply media-only handling", () => { expect(call.followupRun.imageOrder).toEqual(["inline"]); }); + it("persists staged relative media paths as workspace-backed paths", async () => { + const tmpDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-followup-image-")); + cleanupPaths.push(tmpDir); + const relativeImagePath = "media/inbound/inbound.png"; + const imagePath = path.join(tmpDir, relativeImagePath); + await mkdir(path.dirname(imagePath), { recursive: true }); + await writeFile( + imagePath, + Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=", + "base64", + ), + ); + + const result = await runPreparedReply( + baseParams({ + ctx: { + Body: "describe this", + RawBody: "describe this", + CommandBody: "describe this", + MediaPaths: [relativeImagePath], + MediaTypes: ["image/png"], + MediaWorkspaceDir: tmpDir, + OriginatingChannel: "telegram", + OriginatingTo: "42", + ChatType: "direct", + }, + sessionCtx: { + Body: "describe this", + BodyStripped: "describe this", + Provider: "telegram", + OriginatingChannel: "telegram", + OriginatingTo: "42", + ChatType: "direct", + MediaPaths: [relativeImagePath], + MediaTypes: ["image/png"], + MediaWorkspaceDir: tmpDir, + }, + }), + ); + + expect(result).toEqual({ text: "ok" }); + const call = requireRunReplyAgentCall(); + expect(call.followupRun.userMessageForPersistence).toMatchObject({ + role: "user", + content: "describe this", + MediaPath: imagePath, + MediaPaths: [imagePath], + MediaType: "image/png", + MediaTypes: ["image/png"], + }); + }); + it("persists clean media captions instead of model-only media notes", async () => { const tmpDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-followup-image-")); cleanupPaths.push(tmpDir); diff --git a/src/sessions/user-turn-transcript.test.ts b/src/sessions/user-turn-transcript.test.ts index c74f6b3d697..38ecc9f610e 100644 --- a/src/sessions/user-turn-transcript.test.ts +++ b/src/sessions/user-turn-transcript.test.ts @@ -65,6 +65,51 @@ describe("user turn transcript persistence", () => { ).toEqual([{ url: "media://inbound/a.png", contentType: "image/png" }]); }); + it("infers transcript media type from media path when explicit type is absent", () => { + expect( + buildPersistedUserTurnMediaInputsFromFields({ + MediaPaths: ["/tmp/a.png", "https://example.test/report.pdf"], + }), + ).toEqual([ + { path: "/tmp/a.png", contentType: "image/png" }, + { path: "https://example.test/report.pdf", contentType: "application/pdf" }, + ]); + }); + + it("resolves staged relative media paths against the media workspace", () => { + const workspaceDir = createTempDir("openclaw-user-turn-media-"); + + expect( + buildPersistedUserTurnMediaInputsFromFields({ + MediaPath: "media/inbound/a.png", + MediaPaths: ["media/inbound/a.png", "media/inbound/b.jpg"], + MediaType: "image/png", + MediaTypes: ["image/png", "image/jpeg"], + MediaWorkspaceDir: workspaceDir, + }), + ).toEqual([ + { path: path.join(workspaceDir, "media/inbound/a.png"), contentType: "image/png" }, + { path: path.join(workspaceDir, "media/inbound/b.jpg"), contentType: "image/jpeg" }, + ]); + }); + + it("does not rewrite absolute or URL-like media paths", () => { + const workspaceDir = createTempDir("openclaw-user-turn-media-"); + const absolutePath = path.join(workspaceDir, "media/inbound/a.png"); + + expect( + buildPersistedUserTurnMediaInputsFromFields({ + MediaPaths: [absolutePath, "media://inbound/b.jpg", "https://example.test/c.png"], + MediaTypes: ["image/png", "image/jpeg", "image/png"], + MediaWorkspaceDir: workspaceDir, + }), + ).toEqual([ + { path: absolutePath, contentType: "image/png" }, + { path: "media://inbound/b.jpg", contentType: "image/jpeg" }, + { path: "https://example.test/c.png", contentType: "image/png" }, + ]); + }); + it("does not infer media from absent structured fields", () => { expect(buildPersistedUserTurnMediaInputsFromFields(undefined)).toEqual([]); expect(buildPersistedUserTurnMediaInputsFromFields({})).toEqual([]); diff --git a/src/sessions/user-turn-transcript.ts b/src/sessions/user-turn-transcript.ts index 349918c75d4..6210d1281bd 100644 --- a/src/sessions/user-turn-transcript.ts +++ b/src/sessions/user-turn-transcript.ts @@ -1,9 +1,11 @@ +import path from "node:path"; import type { AgentMessage } from "@earendil-works/pi-agent-core"; import { appendSessionTranscriptMessage } from "../config/sessions/transcript-append.js"; import { resolveSessionTranscriptFile } from "../config/sessions/transcript.js"; import type { SessionEntry } from "../config/sessions/types.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { logVerbose } from "../globals.js"; +import { mimeTypeFromFilePath } from "../media/mime.js"; import { emitSessionTranscriptUpdate } from "./transcript-events.js"; export type PersistedUserTurnMediaInput = { @@ -123,6 +125,7 @@ export type PersistedUserTurnMediaFieldSource = { MediaUrls?: readonly (string | null | undefined)[] | null; MediaType?: string | null; MediaTypes?: readonly (string | null | undefined)[] | null; + MediaWorkspaceDir?: string | null; }; function normalizeOptionalText(value: string | null | undefined): string | undefined { @@ -205,6 +208,23 @@ function normalizeOptionalTextArray( ); } +const URL_LIKE_MEDIA_PATH_PATTERN = /^[a-z][a-z0-9+.-]*:/i; + +function resolveTranscriptMediaPath(pathValue: string, workspaceDir: string | undefined): string { + if (!workspaceDir || path.isAbsolute(pathValue) || URL_LIKE_MEDIA_PATH_PATTERN.test(pathValue)) { + return pathValue; + } + return path.join(workspaceDir, pathValue); +} + +function resolveTranscriptMediaType(params: { + explicitType: string | undefined; + mediaPath: string | undefined; + mediaUrl: string | undefined; +}): string | undefined { + return params.explicitType ?? mimeTypeFromFilePath(params.mediaPath ?? params.mediaUrl); +} + export function buildPersistedUserTurnMediaInputsFromFields( fields: PersistedUserTurnMediaFieldSource | null | undefined, ): PersistedUserTurnMediaInput[] { @@ -218,19 +238,25 @@ export function buildPersistedUserTurnMediaInputsFromFields( const singlePath = normalizeOptionalText(fields.MediaPath); const singleUrl = normalizeOptionalText(fields.MediaUrl); const singleType = normalizeOptionalText(fields.MediaType); + const workspaceDir = normalizeOptionalText(fields.MediaWorkspaceDir); const mediaCount = Math.max(paths.length, urls.length, singlePath || singleUrl ? 1 : 0); const media: PersistedUserTurnMediaInput[] = []; for (let index = 0; index < mediaCount; index += 1) { - const path = paths[index] ?? (index === 0 ? singlePath : undefined); + const rawPath = paths[index] ?? (index === 0 ? singlePath : undefined); + const mediaPath = rawPath ? resolveTranscriptMediaPath(rawPath, workspaceDir) : undefined; const url = urls[index] ?? (index === 0 ? singleUrl : undefined); - if (!path && !url) { + if (!mediaPath && !url) { continue; } media.push({ - ...(path ? { path } : {}), + ...(mediaPath ? { path: mediaPath } : {}), ...(url ? { url } : {}), - contentType: types[index] ?? singleType, + contentType: resolveTranscriptMediaType({ + explicitType: types[index] ?? singleType, + mediaPath, + mediaUrl: url, + }), }); }