From 54731492a285d8d665430ba1bdb561643519ee8c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 20:17:41 +0100 Subject: [PATCH] fix(gateway): persist webchat images as managed media (#70719) * fix(gateway): persist webchat images as managed media * fix(ui): keep managed image auth same-origin * docs: note managed webchat image fix --- CHANGELOG.md | 1 + docs/web/control-ui.md | 1 + src/gateway/managed-image-attachments.test.ts | 992 +++++++++++++++ src/gateway/managed-image-attachments.ts | 1097 +++++++++++++++++ src/gateway/server-http.ts | 23 + .../chat.directive-tags.test.ts | 7 +- src/gateway/server-methods/chat.ts | 376 +++++- .../server.chat.gateway-server-chat.test.ts | 94 ++ ui/src/ui/chat/grouped-render.test.ts | 90 ++ ui/src/ui/chat/grouped-render.ts | 157 ++- 10 files changed, 2792 insertions(+), 46 deletions(-) create mode 100644 src/gateway/managed-image-attachments.test.ts create mode 100644 src/gateway/managed-image-attachments.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a3460ef339..b6e9554665b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai - Approvals/security: require explicit chat exec-approval enablement instead of auto-enabling approval clients just because approvers resolve from config or owner allowlists. (#70715) Thanks @vincentkoc. - Discord/security: keep native slash-command channel policy from bypassing configured owner or member restrictions, while preserving channel-policy fallback when no stricter access rule exists. (#70711) Thanks @vincentkoc. - Android/security: stop `ASK_OPENCLAW` intents from auto-sending injected prompts, so external app actions only prefill the draft instead of dispatching it immediately. (#70714) Thanks @vincentkoc. +- Control UI/chat: persist assistant-generated images as authenticated managed media so webchat history reloads show the image instead of dropping it. (#70719) - Control UI/chat: queue Stop-button aborts across Gateway reconnects so a disconnected active run is canceled on reconnect instead of only clearing local UI state. (#70673) Thanks @chinar-amrutkar. - Secrets/Windows: strip UTF-8 BOMs from file-backed secrets and keep unavailable ACL checks fail-closed unless trusted file or exec providers explicitly opt into `allowInsecurePath`. (#70662) Thanks @zhanggpcsu. - Agents/image generation: escape ignored override values in tool warnings so parsed `MEDIA:` directives cannot be injected through unsupported model options. (#70710) Thanks @vincentkoc. diff --git a/docs/web/control-ui.md b/docs/web/control-ui.md index 8a1b4e56b91..8c74bbd4ab2 100644 --- a/docs/web/control-ui.md +++ b/docs/web/control-ui.md @@ -149,6 +149,7 @@ Cron jobs panel notes: - `chat.send` is **non-blocking**: it acks immediately with `{ runId, status: "started" }` and the response streams via `chat` events. - Re-sending with the same `idempotencyKey` returns `{ status: "in_flight" }` while running, and `{ status: "ok" }` after completion. - `chat.history` responses are size-bounded for UI safety. When transcript entries are too large, Gateway may truncate long text fields, omit heavy metadata blocks, and replace oversized messages with a placeholder (`[chat.history omitted: message too large]`). +- Assistant/generated images are persisted as managed media references and served back through authenticated Gateway media URLs, so reloads do not depend on raw base64 image payloads staying in the chat history response. - `chat.history` also strips display-only inline directive tags from visible assistant text (for example `[[reply_to_*]]` and `[[audio_as_voice]]`), plain-text tool-call XML payloads (including `...`, `...`, `...`, `...`, and truncated tool-call blocks), and leaked ASCII/full-width model control tokens, and omits assistant entries whose whole visible text is only the exact silent token `NO_REPLY` / `no_reply`. - `chat.inject` appends an assistant note to the session transcript and broadcasts a `chat` event for UI-only updates (no agent run, no channel delivery). - The chat header model and thinking pickers patch the active session immediately through `sessions.patch`; they are persistent session overrides, not one-turn-only send options. diff --git a/src/gateway/managed-image-attachments.test.ts b/src/gateway/managed-image-attachments.test.ts new file mode 100644 index 00000000000..32892896588 --- /dev/null +++ b/src/gateway/managed-image-attachments.test.ts @@ -0,0 +1,992 @@ +import fs from "node:fs/promises"; +import http from "node:http"; +import type { AddressInfo } from "node:net"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { createPinnedLookup } from "../infra/net/ssrf.js"; +import { setMediaStoreNetworkDepsForTest } from "../media/store.js"; + +const authorizeGatewayHttpRequestOrReplyMock = vi.fn(); +const resolveOpenAiCompatibleHttpOperatorScopesMock = vi.fn(); +const getLatestSubagentRunByChildSessionKeyMock = vi.fn(); +const loadSessionEntryMock = vi.fn(); +const readSessionMessagesMock = vi.fn(); + +vi.mock("./http-utils.js", () => ({ + authorizeGatewayHttpRequestOrReply: authorizeGatewayHttpRequestOrReplyMock, + resolveOpenAiCompatibleHttpOperatorScopes: resolveOpenAiCompatibleHttpOperatorScopesMock, +})); + +vi.mock("./session-utils.js", () => ({ + loadSessionEntry: loadSessionEntryMock, + readSessionMessages: readSessionMessagesMock, +})); + +vi.mock("../agents/subagent-registry.js", () => ({ + getLatestSubagentRunByChildSessionKey: getLatestSubagentRunByChildSessionKeyMock, +})); + +const { + DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS, + attachManagedOutgoingImagesToMessage, + cleanupManagedOutgoingImageRecords, + createManagedOutgoingImageBlocks, + handleManagedOutgoingImageHttpRequest, + resolveManagedImageAttachmentLimits, +} = await import("./managed-image-attachments.js"); + +type RequestResult = { + statusCode: number; + headers: http.IncomingHttpHeaders; + body: Buffer; +}; + +const TINY_PNG_BASE64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAusB9WnXcZ0AAAAASUVORK5CYII="; + +async function createPngDataUrl(width: number, height: number): Promise { + const sharp = (await import("sharp")).default; + const buffer = await sharp({ + create: { + width, + height, + channels: 4, + background: { r: 24, g: 64, b: 128, alpha: 1 }, + }, + }) + .png() + .toBuffer(); + return `data:image/png;base64,${buffer.toString("base64")}`; +} + +async function createNoisyPngBuffer(width: number, height: number): Promise { + const sharp = (await import("sharp")).default; + const pixels = Buffer.alloc(width * height * 4); + for (let i = 0; i < pixels.length; i += 4) { + const seed = i / 4; + pixels[i] = seed % 251; + pixels[i + 1] = (seed * 17) % 253; + pixels[i + 2] = (seed * 29) % 255; + pixels[i + 3] = 255; + } + return sharp(pixels, { raw: { width, height, channels: 4 } }) + .png({ compressionLevel: 0 }) + .toBuffer(); +} + +async function createFixture( + stateDir: string, + options?: { sessionKey?: string; attachmentId?: string; filename?: string }, +) { + const attachmentId = options?.attachmentId ?? "11111111-1111-4111-8111-111111111111"; + const sessionKey = options?.sessionKey ?? "agent:main:main"; + const filename = options?.filename ?? `${attachmentId}-cat-full.png`; + const originalPath = path.join(stateDir, "files", filename); + await fs.mkdir(path.dirname(originalPath), { recursive: true }); + await fs.writeFile(originalPath, Buffer.from("original-image")); + const record: Record = { + attachmentId, + sessionKey, + messageId: "msg-1", + createdAt: new Date().toISOString(), + alt: "Cat", + original: { + path: originalPath, + contentType: "image/png", + width: 1024, + height: 768, + sizeBytes: 14, + filename: "cat.png", + }, + }; + const recordsDir = path.join(stateDir, "media", "outgoing", "records"); + await fs.mkdir(recordsDir, { recursive: true }); + await fs.writeFile( + path.join(recordsDir, `${attachmentId}.json`), + JSON.stringify(record, null, 2), + "utf-8", + ); + return { attachmentId, sessionKey, originalPath }; +} + +async function requestManagedImage(params: { + stateDir: string; + pathName: string; + method?: string; + scopes?: string[]; + denyAuth?: boolean; + authResponse?: Record; + headers?: Record; + transcriptMessages?: Record[]; + subagentRun?: Record | null; + sessionEntry?: { sessionId: string; sessionFile?: string }; +}) { + authorizeGatewayHttpRequestOrReplyMock.mockImplementation(async ({ res }) => { + if (params.denyAuth) { + res.statusCode = 401; + res.end(); + return null; + } + return { ok: true, ...params.authResponse }; + }); + resolveOpenAiCompatibleHttpOperatorScopesMock.mockReturnValue(params.scopes ?? ["operator.read"]); + getLatestSubagentRunByChildSessionKeyMock.mockReturnValue(params.subagentRun ?? null); + loadSessionEntryMock.mockReturnValue({ + storePath: path.join(params.stateDir, "gateway-sessions.json"), + entry: params.sessionEntry ?? { sessionId: "sess-1", sessionFile: "session.jsonl" }, + }); + readSessionMessagesMock.mockReturnValue( + params.transcriptMessages ?? [ + { + role: "assistant", + content: [ + { + type: "image", + url: params.pathName, + openUrl: params.pathName, + }, + ], + __openclaw: { id: "msg-1" }, + }, + ], + ); + + const auth = { mode: "test" } as never; + const server = http.createServer(async (req, res) => { + const handled = await handleManagedOutgoingImageHttpRequest(req, res, { + auth, + trustedProxies: ["127.0.0.1/32"], + allowRealIpFallback: false, + stateDir: params.stateDir, + }); + if (!handled) { + res.statusCode = 404; + res.end("unhandled"); + } + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const address = server.address() as AddressInfo; + + try { + const result = await new Promise((resolve, reject) => { + const req = http.request( + { + host: "127.0.0.1", + port: address.port, + path: params.pathName, + method: params.method ?? "GET", + headers: params.headers, + }, + async (res) => { + const chunks: Buffer[] = []; + for await (const chunk of res) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + resolve({ + statusCode: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks), + }); + }, + ); + req.on("error", reject); + req.end(); + }); + + return { result, auth }; + } finally { + await new Promise((resolve, reject) => + server.close((error) => (error ? reject(error) : resolve())), + ); + } +} + +describe("resolveManagedImageAttachmentLimits", () => { + it("keeps the existing public limit shape", () => { + expect(resolveManagedImageAttachmentLimits()).toEqual(DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS); + }); +}); + +describe("handleManagedOutgoingImageHttpRequest", () => { + let stateDir: string; + + beforeEach(async () => { + stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "managed-images-")); + vi.clearAllMocks(); + }); + + afterEach(async () => { + setMediaStoreNetworkDepsForTest(); + await fs.rm(stateDir, { recursive: true, force: true }); + }); + + it("serves full images for authorized chat-history readers", async () => { + const { attachmentId, sessionKey } = await createFixture(stateDir); + + const { result } = await requestManagedImage({ + stateDir, + pathName: `/api/chat/media/outgoing/${encodeURIComponent(sessionKey)}/${attachmentId}/full`, + headers: { "x-openclaw-requester-session-key": sessionKey }, + }); + + expect(result.statusCode).toBe(200); + expect(result.headers["content-type"]).toBe("image/png"); + expect(result.headers["content-disposition"]).toContain("inline"); + expect(result.body.toString("utf-8")).toBe("original-image"); + }); + + it("rejects unauthenticated requests before serving bytes", async () => { + const { attachmentId, sessionKey } = await createFixture(stateDir); + + const { result } = await requestManagedImage({ + stateDir, + pathName: `/api/chat/media/outgoing/${encodeURIComponent(sessionKey)}/${attachmentId}/full`, + denyAuth: true, + }); + + expect(result.statusCode).toBe(401); + expect(result.body.byteLength).toBe(0); + }); + + it("rejects requests from unrelated sessions", async () => { + const { attachmentId, sessionKey } = await createFixture(stateDir); + + const { result } = await requestManagedImage({ + stateDir, + pathName: `/api/chat/media/outgoing/${encodeURIComponent(sessionKey)}/${attachmentId}/full`, + headers: { "x-openclaw-requester-session-key": "agent:main:other" }, + }); + + expect(result.statusCode).toBe(403); + }); + + it("allows device-token access without requester session ownership", async () => { + const { attachmentId, sessionKey } = await createFixture(stateDir); + + const { result } = await requestManagedImage({ + stateDir, + pathName: `/api/chat/media/outgoing/${encodeURIComponent(sessionKey)}/${attachmentId}/full`, + authResponse: { authMethod: "device-token" }, + }); + + expect(result.statusCode).toBe(200); + expect(result.body.toString("utf-8")).toBe("original-image"); + }); + + it("rejects non-GET methods", async () => { + const { attachmentId, sessionKey } = await createFixture(stateDir); + + const { result } = await requestManagedImage({ + stateDir, + pathName: `/api/chat/media/outgoing/${encodeURIComponent(sessionKey)}/${attachmentId}/full`, + method: "POST", + headers: { "x-openclaw-requester-session-key": sessionKey }, + }); + + expect(result.statusCode).toBe(405); + }); + + it("rejects malformed encoded session keys", async () => { + const { attachmentId } = await createFixture(stateDir); + + const { result } = await requestManagedImage({ + stateDir, + pathName: `/api/chat/media/outgoing/%E0%A4%A/${attachmentId}/full`, + authResponse: { authMethod: "device-token" }, + }); + + expect(result.statusCode).toBe(404); + }); + + it("reuses the session attachment index across requests until the transcript changes", async () => { + const { attachmentId, sessionKey } = await createFixture(stateDir); + const sessionFile = path.join(stateDir, "sessions", "sess-main.jsonl"); + await fs.mkdir(path.dirname(sessionFile), { recursive: true }); + await fs.writeFile(sessionFile, '{"message":{}}\n', "utf-8"); + + const transcriptMessages = [ + { + __openclaw: { id: "msg-1" }, + content: [ + { + type: "image", + url: `/api/chat/media/outgoing/${encodeURIComponent(sessionKey)}/${attachmentId}/full`, + openUrl: `/api/chat/media/outgoing/${encodeURIComponent(sessionKey)}/${attachmentId}/full`, + }, + ], + }, + ]; + + const pathName = `/api/chat/media/outgoing/${encodeURIComponent(sessionKey)}/${attachmentId}/full`; + const first = await requestManagedImage({ + stateDir, + pathName, + headers: { "x-openclaw-requester-session-key": sessionKey }, + sessionEntry: { sessionId: "sess-main", sessionFile }, + transcriptMessages, + }); + const second = await requestManagedImage({ + stateDir, + pathName, + headers: { "x-openclaw-requester-session-key": sessionKey }, + sessionEntry: { sessionId: "sess-main", sessionFile }, + transcriptMessages, + }); + + expect(first.result.statusCode).toBe(200); + expect(second.result.statusCode).toBe(200); + expect(readSessionMessagesMock).toHaveBeenCalledTimes(1); + + await new Promise((resolve) => setTimeout(resolve, 5)); + await fs.writeFile(sessionFile, '{"message":{}}\n{"message":{"content":"updated"}}\n', "utf-8"); + + const third = await requestManagedImage({ + stateDir, + pathName, + headers: { "x-openclaw-requester-session-key": sessionKey }, + sessionEntry: { sessionId: "sess-main", sessionFile }, + transcriptMessages, + }); + + expect(third.result.statusCode).toBe(200); + expect(readSessionMessagesMock).toHaveBeenCalledTimes(2); + }); +}); + +describe("createManagedOutgoingImageBlocks", () => { + let stateDir: string; + + beforeEach(async () => { + stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "managed-image-blocks-")); + vi.clearAllMocks(); + }); + + afterEach(async () => { + setMediaStoreNetworkDepsForTest(); + await fs.rm(stateDir, { recursive: true, force: true }); + }); + + it("creates inline/open blocks that both point at the full image", async () => { + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [`data:image/png;base64,${TINY_PNG_BASE64}`], + stateDir, + messageId: "msg-1", + }); + + expect(blocks).toHaveLength(1); + expect(blocks[0]).toMatchObject({ + type: "image", + alt: "Generated image 1", + mimeType: "image/png", + }); + expect(blocks[0]?.url).toBe(blocks[0]?.openUrl); + expect(String(blocks[0]?.url)).toMatch(/\/full$/); + + const recordsDir = path.join(stateDir, "media", "outgoing", "records"); + const [recordName] = await fs.readdir(recordsDir); + const record = JSON.parse(await fs.readFile(path.join(recordsDir, recordName), "utf-8")) as { + original: { path: string }; + }; + expect(record.original.path).toContain( + `${path.sep}media${path.sep}outgoing${path.sep}originals${path.sep}`, + ); + }); + + it("rejects oversized image data urls before decoding the payload", async () => { + const oversizedDataUrl = "data:image/png;base64,AAAAAA=="; + + await expect( + createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [oversizedDataUrl], + stateDir, + limits: { + ...DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS, + maxBytes: 3, + }, + }), + ).rejects.toThrow(/Generated image 1.*byte limit/); + + await expect(fs.readdir(path.join(stateDir, "media", "outgoing", "records"))).rejects.toThrow(); + }); + + it("rewrites local image sources into managed display blocks without leaking the source path", async () => { + const previousStateDir = process.env.OPENCLAW_STATE_DIR; + process.env.OPENCLAW_STATE_DIR = stateDir; + const sourcePath = path.join(stateDir, "workspace", "fixtures", "dot.png"); + await fs.mkdir(path.dirname(sourcePath), { recursive: true }); + await fs.writeFile(sourcePath, Buffer.from(TINY_PNG_BASE64, "base64")); + + try { + const blocks = await createManagedOutgoingImageBlocks({ + stateDir, + sessionKey: "agent:main:main", + mediaUrls: [sourcePath], + localRoots: [path.join(stateDir, "workspace")], + }); + + expect(blocks).toHaveLength(1); + expect(blocks[0]).toMatchObject({ + type: "image", + url: expect.stringContaining("/api/chat/media/outgoing/agent%3Amain%3Amain/"), + openUrl: expect.stringContaining("/full"), + }); + expect(blocks[0]?.url).toBe(blocks[0]?.openUrl); + expect(JSON.stringify(blocks[0])).not.toContain(sourcePath); + + const attachmentId = String(blocks[0]?.url).split("/").at(-2); + expect(attachmentId).toBeTruthy(); + const record = JSON.parse( + await fs.readFile( + path.join(stateDir, "media", "outgoing", "records", `${attachmentId}.json`), + "utf-8", + ), + ) as { original: { filename: string; path: string } }; + expect(record.original.filename).toMatch(/\.png$/); + expect(record.original.path).not.toBe(sourcePath); + expect(record.original.path).toContain(path.join(stateDir, "media", "outgoing", "originals")); + } finally { + if (previousStateDir == null) { + delete process.env.OPENCLAW_STATE_DIR; + } else { + process.env.OPENCLAW_STATE_DIR = previousStateDir; + } + } + }); + + it("ingests external image URLs into managed storage instead of hotlinking them", async () => { + const previousStateDir = process.env.OPENCLAW_STATE_DIR; + process.env.OPENCLAW_STATE_DIR = stateDir; + const imageBuffer = Buffer.from(TINY_PNG_BASE64, "base64"); + const upstream = http.createServer((req, res) => { + expect(req.url).toBe("/remote-cat.png?sig=secret"); + res.statusCode = 200; + res.setHeader("content-type", "image/png"); + res.end(imageBuffer); + }); + + await new Promise((resolve) => upstream.listen(0, "127.0.0.1", resolve)); + const address = upstream.address() as AddressInfo; + setMediaStoreNetworkDepsForTest({ + resolvePinnedHostname: async (hostname) => ({ + hostname, + addresses: ["127.0.0.1"], + lookup: createPinnedLookup({ hostname, addresses: ["127.0.0.1"] }), + }), + }); + + try { + const sourceUrl = `http://127.0.0.1:${address.port}/remote-cat.png?sig=secret`; + const blocks = await createManagedOutgoingImageBlocks({ + stateDir, + sessionKey: "agent:main:main", + mediaUrls: [sourceUrl], + }); + + expect(blocks).toHaveLength(1); + expect(blocks[0]?.alt).toBe("remote-cat.png"); + expect(blocks[0]).toMatchObject({ + type: "image", + url: expect.stringContaining("/api/chat/media/outgoing/agent%3Amain%3Amain/"), + openUrl: expect.stringContaining("/full"), + }); + expect(blocks[0]?.url).toBe(blocks[0]?.openUrl); + expect(JSON.stringify(blocks[0])).not.toContain("127.0.0.1"); + expect(JSON.stringify(blocks[0])).not.toContain("sig=secret"); + + const attachmentId = String(blocks[0]?.url).split("/").at(-2); + expect(attachmentId).toBeTruthy(); + const record = JSON.parse( + await fs.readFile( + path.join(stateDir, "media", "outgoing", "records", `${attachmentId}.json`), + "utf-8", + ), + ) as { original: { path: string } }; + expect(record.original.path).toContain(path.join(stateDir, "media", "outgoing", "originals")); + expect(JSON.stringify(record)).not.toContain("127.0.0.1"); + expect(JSON.stringify(record)).not.toContain("sig=secret"); + expect(await fs.readFile(record.original.path)).toEqual(imageBuffer); + } finally { + setMediaStoreNetworkDepsForTest(); + await new Promise((resolve, reject) => + upstream.close((error) => (error ? reject(error) : resolve())), + ); + if (previousStateDir == null) { + delete process.env.OPENCLAW_STATE_DIR; + } else { + process.env.OPENCLAW_STATE_DIR = previousStateDir; + } + } + }); + + it("keeps managed originals under the state-dir media root when config path differs", async () => { + const previousStateDir = process.env.OPENCLAW_STATE_DIR; + const previousConfigPath = process.env.OPENCLAW_CONFIG_PATH; + const externalConfigDir = await fs.mkdtemp(path.join(os.tmpdir(), "managed-image-config-")); + process.env.OPENCLAW_STATE_DIR = stateDir; + process.env.OPENCLAW_CONFIG_PATH = path.join(externalConfigDir, "config.json"); + const sourcePath = path.join(stateDir, "workspace", "fixtures", "dot.png"); + await fs.mkdir(path.dirname(sourcePath), { recursive: true }); + await fs.writeFile(sourcePath, Buffer.from(TINY_PNG_BASE64, "base64")); + + try { + const blocks = await createManagedOutgoingImageBlocks({ + stateDir, + sessionKey: "agent:main:main", + mediaUrls: [sourcePath], + localRoots: [path.join(stateDir, "workspace")], + }); + + const attachmentId = String(blocks[0]?.url).split("/").at(-2); + expect(attachmentId).toBeTruthy(); + + const record = JSON.parse( + await fs.readFile( + path.join(stateDir, "media", "outgoing", "records", `${attachmentId}.json`), + "utf-8", + ), + ) as { original: { path: string } }; + + expect(record.original.path).toContain(path.join(stateDir, "media", "outgoing", "originals")); + expect(record.original.path).not.toContain(externalConfigDir); + await expect(fs.access(record.original.path)).resolves.toBeUndefined(); + } finally { + await fs.rm(externalConfigDir, { recursive: true, force: true }); + if (previousStateDir == null) { + delete process.env.OPENCLAW_STATE_DIR; + } else { + process.env.OPENCLAW_STATE_DIR = previousStateDir; + } + if (previousConfigPath == null) { + delete process.env.OPENCLAW_CONFIG_PATH; + } else { + process.env.OPENCLAW_CONFIG_PATH = previousConfigPath; + } + } + }); + + it("merges configured managed image limits with defaults", () => { + expect(resolveManagedImageAttachmentLimits()).toEqual(DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS); + expect( + resolveManagedImageAttachmentLimits({ + maxWidth: 8192, + maxHeight: 2048, + }), + ).toEqual({ + ...DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS, + maxWidth: 8192, + maxHeight: 2048, + }); + }); + + it("rejects managed outgoing images that exceed configured byte limits", async () => { + await expect( + createManagedOutgoingImageBlocks({ + stateDir, + sessionKey: "agent:main:main", + mediaUrls: [`data:image/png;base64,${TINY_PNG_BASE64}`], + limits: { maxBytes: 32 }, + }), + ).rejects.toThrow(/0MB limit|32 bytes|byte limit/i); + }); + + it("adds a warning block when an image is resized to fit limits", async () => { + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [await createPngDataUrl(200, 120)], + stateDir, + limits: { maxWidth: 64, maxHeight: 64, maxPixels: 4096 }, + }); + + expect(blocks).toHaveLength(2); + expect(blocks[0]?.type).toBe("image"); + expect(blocks[1]).toMatchObject({ type: "text" }); + }); + + it("skips broken attachments when continueOnPrepareError is enabled", async () => { + const onPrepareError = vi.fn(); + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [await createPngDataUrl(32, 32), path.join(stateDir, "missing.png")], + stateDir, + localRoots: [stateDir], + continueOnPrepareError: true, + onPrepareError, + }); + + expect(blocks).toHaveLength(1); + expect(blocks[0]).toMatchObject({ type: "image" }); + expect(onPrepareError).toHaveBeenCalledTimes(1); + expect(onPrepareError.mock.calls[0]?.[0]).toBeInstanceOf(Error); + expect(onPrepareError.mock.calls[0]?.[0]?.message).toMatch( + /Managed image attachment .* could not be prepared/i, + ); + }); + + it("accepts URL images up to the configured managed-image byte limit", async () => { + const previousStateDir = process.env.OPENCLAW_STATE_DIR; + process.env.OPENCLAW_STATE_DIR = stateDir; + const imageBuffer = await createNoisyPngBuffer(1600, 1200); + expect(imageBuffer.byteLength).toBeGreaterThan(5 * 1024 * 1024); + expect(imageBuffer.byteLength).toBeLessThan(DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS.maxBytes); + + const server = http.createServer((_req, res) => { + res.statusCode = 200; + res.setHeader("content-type", "image/png"); + res.end(imageBuffer); + }); + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const address = server.address() as AddressInfo; + setMediaStoreNetworkDepsForTest({ + resolvePinnedHostname: async (hostname) => ({ + hostname, + addresses: ["127.0.0.1"], + lookup: createPinnedLookup({ hostname, addresses: ["127.0.0.1"] }), + }), + }); + + try { + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [`http://127.0.0.1:${address.port}/large-image.png`], + stateDir, + }); + + expect(blocks).toHaveLength(1); + expect(blocks[0]).toMatchObject({ type: "image" }); + } finally { + setMediaStoreNetworkDepsForTest(); + await new Promise((resolve, reject) => + server.close((error) => (error ? reject(error) : resolve())), + ); + if (previousStateDir == null) { + delete process.env.OPENCLAW_STATE_DIR; + } else { + process.env.OPENCLAW_STATE_DIR = previousStateDir; + } + } + }); + + it("rejects local image paths outside allowed roots", async () => { + const outsideDir = await fs.mkdtemp(path.join(os.tmpdir(), "managed-image-outside-")); + const outsidePath = path.join(outsideDir, "outside.png"); + await fs.writeFile(outsidePath, Buffer.from(TINY_PNG_BASE64, "base64")); + + try { + await expect( + createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [outsidePath], + stateDir, + localRoots: [path.join(stateDir, "workspace")], + }), + ).rejects.toThrow(/could not be prepared/i); + } finally { + await fs.rm(outsideDir, { recursive: true, force: true }); + } + }); + + it("accepts local image paths inside allowed roots", async () => { + const allowedDir = path.join(stateDir, "workspace", "uploads"); + const allowedPath = path.join(allowedDir, "inside.png"); + await fs.mkdir(allowedDir, { recursive: true }); + await fs.writeFile(allowedPath, Buffer.from(TINY_PNG_BASE64, "base64")); + + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [allowedPath], + stateDir, + localRoots: [path.join(stateDir, "workspace")], + }); + + expect(blocks).toHaveLength(1); + expect(blocks[0]).toMatchObject({ type: "image" }); + }); + + it("rejects relative local image paths that resolve outside allowed roots", async () => { + const allowedWorkspaceDir = path.join(stateDir, "workspace"); + const outsidePath = path.join(stateDir, "outside.png"); + await fs.mkdir(allowedWorkspaceDir, { recursive: true }); + await fs.writeFile(outsidePath, Buffer.from(TINY_PNG_BASE64, "base64")); + + const cwdSpy = vi.spyOn(process, "cwd").mockReturnValue(allowedWorkspaceDir); + try { + await expect( + createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: ["../outside.png"], + stateDir, + localRoots: [allowedWorkspaceDir], + }), + ).rejects.toThrow(/could not be prepared/i); + } finally { + cwdSpy.mockRestore(); + } + }); + + it("drops downloaded non-image sources without leaving orphaned originals", async () => { + const pdfPath = path.join(stateDir, "not-an-image.pdf"); + await fs.writeFile(pdfPath, Buffer.from("%PDF-1.4\n% test\n")); + + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [pdfPath], + stateDir, + localRoots: [stateDir], + }); + expect(blocks).toEqual([]); + const originalsDir = path.join(stateDir, "media", "outgoing", "originals"); + let originals: string[] | null = null; + try { + originals = await fs.readdir(originalsDir); + } catch (error) { + expect(error).toMatchObject({ code: "ENOENT" }); + } + expect(originals ?? []).toEqual([]); + }); + + it("skips oversized downloaded non-image sources instead of failing finalization", async () => { + const audioPath = path.join(stateDir, "large-audio.mp3"); + await fs.writeFile(audioPath, Buffer.alloc(2048, 1)); + + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [audioPath], + stateDir, + localRoots: [stateDir], + limits: { maxBytes: 1024 }, + }); + expect(blocks).toEqual([]); + const originalsDir = path.join(stateDir, "media", "outgoing", "originals"); + let originals: string[] | null = null; + try { + originals = await fs.readdir(originalsDir); + } catch (error) { + expect(error).toMatchObject({ code: "ENOENT" }); + } + expect(originals ?? []).toEqual([]); + }); + + it("does not reap older transient records while creating a new managed image", async () => { + const staleOriginalPath = path.join(stateDir, "files", "stale-cat.png"); + const staleAttachmentId = "stale-att"; + const staleRecordPath = path.join( + stateDir, + "media", + "outgoing", + "records", + `${staleAttachmentId}.json`, + ); + await fs.mkdir(path.dirname(staleOriginalPath), { recursive: true }); + await fs.mkdir(path.dirname(staleRecordPath), { recursive: true }); + await fs.writeFile(staleOriginalPath, Buffer.from(TINY_PNG_BASE64, "base64")); + await fs.writeFile( + staleRecordPath, + JSON.stringify( + { + attachmentId: staleAttachmentId, + sessionKey: "agent:main:main", + messageId: null, + createdAt: new Date(0).toISOString(), + updatedAt: new Date(0).toISOString(), + retentionClass: "transient", + alt: "Stale cat", + original: { + path: staleOriginalPath, + contentType: "image/png", + width: 1, + height: 1, + sizeBytes: Buffer.from(TINY_PNG_BASE64, "base64").byteLength, + filename: "stale-cat.png", + }, + }, + null, + 2, + ), + "utf-8", + ); + + await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [`data:image/png;base64,${TINY_PNG_BASE64}`], + stateDir, + }); + + await expect(fs.access(staleRecordPath)).resolves.toBeUndefined(); + await expect(fs.access(staleOriginalPath)).resolves.toBeUndefined(); + }); +}); + +describe("attachManagedOutgoingImagesToMessage", () => { + let stateDir: string; + + beforeEach(async () => { + stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "managed-image-attach-")); + vi.clearAllMocks(); + }); + + afterEach(async () => { + await fs.rm(stateDir, { recursive: true, force: true }); + }); + + it("upgrades transient image records to history when the message is committed", async () => { + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:main", + mediaUrls: [`data:image/png;base64,${TINY_PNG_BASE64}`], + stateDir, + }); + + await attachManagedOutgoingImagesToMessage({ + messageId: "msg-committed", + blocks: blocks as Record[], + stateDir, + }); + + const recordsDir = path.join(stateDir, "media", "outgoing", "records"); + const [recordName] = await fs.readdir(recordsDir); + const record = JSON.parse(await fs.readFile(path.join(recordsDir, recordName), "utf-8")) as { + messageId: string | null; + retentionClass?: string; + updatedAt?: string; + }; + expect(record.messageId).toBe("msg-committed"); + expect(record.retentionClass).toBe("history"); + expect(typeof record.updatedAt).toBe("string"); + }); +}); + +describe("cleanupManagedOutgoingImageRecords", () => { + let stateDir: string; + + beforeEach(async () => { + stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "managed-image-cleanup-")); + vi.clearAllMocks(); + }); + + afterEach(async () => { + await fs.rm(stateDir, { recursive: true, force: true }); + }); + + it("cleans up dereferenced records and original files", async () => { + const fixture = await createFixture(stateDir); + loadSessionEntryMock.mockReturnValue({ + storePath: path.join(stateDir, "gateway-sessions.json"), + entry: { sessionId: "sess-main", sessionFile: "/tmp/sess-main.jsonl" }, + }); + readSessionMessagesMock.mockReturnValue([]); + + const result = await cleanupManagedOutgoingImageRecords({ stateDir }); + + expect(result).toMatchObject({ + deletedRecordCount: 1, + deletedFileCount: 1, + retainedCount: 0, + }); + await expect(fs.access(fixture.originalPath)).rejects.toThrow(); + }); + + it("retains committed records that are still referenced by a full-image block", async () => { + const fixture = await createFixture(stateDir); + loadSessionEntryMock.mockReturnValue({ + storePath: path.join(stateDir, "gateway-sessions.json"), + entry: { sessionId: "sess-main", sessionFile: "/tmp/sess-main.jsonl" }, + }); + readSessionMessagesMock.mockReturnValue([ + { + __openclaw: { id: "msg-1" }, + content: [ + { + type: "image", + url: `/api/chat/media/outgoing/${encodeURIComponent(fixture.sessionKey)}/${fixture.attachmentId}/full`, + openUrl: `/api/chat/media/outgoing/${encodeURIComponent(fixture.sessionKey)}/${fixture.attachmentId}/full`, + }, + ], + }, + ]); + + const result = await cleanupManagedOutgoingImageRecords({ stateDir }); + + expect(result).toMatchObject({ + deletedRecordCount: 0, + deletedFileCount: 0, + retainedCount: 1, + }); + await expect(fs.access(fixture.originalPath)).resolves.toBeUndefined(); + }); + + it("reads each session transcript once while evaluating committed records", async () => { + const firstFixture = await createFixture(stateDir, { + attachmentId: "11111111-1111-4111-8111-111111111111", + filename: "att-1.png", + }); + const secondFixture = await createFixture(stateDir, { + attachmentId: "22222222-2222-4222-8222-222222222222", + filename: "att-2.png", + }); + loadSessionEntryMock.mockReturnValue({ + storePath: path.join(stateDir, "gateway-sessions.json"), + entry: { sessionId: "sess-main", sessionFile: "/tmp/sess-main.jsonl" }, + }); + readSessionMessagesMock.mockReturnValue([ + { + __openclaw: { id: "msg-1" }, + content: [ + { + type: "image", + url: `/api/chat/media/outgoing/${encodeURIComponent(firstFixture.sessionKey)}/${firstFixture.attachmentId}/full`, + openUrl: `/api/chat/media/outgoing/${encodeURIComponent(firstFixture.sessionKey)}/${firstFixture.attachmentId}/full`, + }, + { + type: "image", + url: `/api/chat/media/outgoing/${encodeURIComponent(secondFixture.sessionKey)}/${secondFixture.attachmentId}/full`, + openUrl: `/api/chat/media/outgoing/${encodeURIComponent(secondFixture.sessionKey)}/${secondFixture.attachmentId}/full`, + }, + ], + }, + ]); + + const result = await cleanupManagedOutgoingImageRecords({ stateDir }); + + expect(result).toMatchObject({ + deletedRecordCount: 0, + deletedFileCount: 0, + retainedCount: 2, + }); + expect(readSessionMessagesMock).toHaveBeenCalledTimes(1); + }); + + it("does not delete files still referenced by other sessions during session-scoped cleanup", async () => { + const retainedFixture = await createFixture(stateDir, { + sessionKey: "agent:other:session", + attachmentId: "33333333-3333-4333-8333-333333333333", + }); + const deletedFixture = await createFixture(stateDir, { + sessionKey: "agent:main:main", + attachmentId: "44444444-4444-4444-8444-444444444444", + }); + + loadSessionEntryMock.mockImplementation((sessionKey: string) => ({ + storePath: path.join(stateDir, "gateway-sessions.json"), + entry: { + sessionId: sessionKey === retainedFixture.sessionKey ? "sess-other" : "sess-main", + sessionFile: "/tmp/session.jsonl", + }, + })); + readSessionMessagesMock.mockReturnValue([]); + + const result = await cleanupManagedOutgoingImageRecords({ + stateDir, + sessionKey: deletedFixture.sessionKey, + forceDeleteSessionRecords: true, + }); + + expect(result).toMatchObject({ + deletedRecordCount: 1, + retainedCount: 1, + }); + await expect(fs.access(retainedFixture.originalPath)).resolves.toBeUndefined(); + }); +}); diff --git a/src/gateway/managed-image-attachments.ts b/src/gateway/managed-image-attachments.ts new file mode 100644 index 00000000000..8747aefb0ee --- /dev/null +++ b/src/gateway/managed-image-attachments.ts @@ -0,0 +1,1097 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import type { IncomingMessage, ServerResponse } from "node:http"; +import path from "node:path"; +import { getLatestSubagentRunByChildSessionKey } from "../agents/subagent-registry.js"; +import { resolveStateDir } from "../config/paths.js"; +import { safeFileURLToPath } from "../infra/local-file-access.js"; +import { + getImageMetadata, + hasAlphaChannel, + resizeToJpeg, + resizeToPng, +} from "../media/image-ops.js"; +import { assertLocalMediaAllowed } from "../media/local-media-access.js"; +import { isPassThroughRemoteMediaSource } from "../media/media-source-url.js"; +import { MEDIA_MAX_BYTES, saveMediaBuffer, saveMediaSource } from "../media/store.js"; +import { resolveUserPath } from "../utils.js"; +import type { AuthRateLimiter } from "./auth-rate-limit.js"; +import type { ResolvedGatewayAuth } from "./auth.js"; +import { sendJson, sendMethodNotAllowed } from "./http-common.js"; +import { + authorizeGatewayHttpRequestOrReply, + resolveOpenAiCompatibleHttpOperatorScopes, +} from "./http-utils.js"; +import { authorizeOperatorScopesForMethod } from "./method-scopes.js"; +import { loadSessionEntry, readSessionMessages } from "./session-utils.js"; + +const OUTGOING_IMAGE_ROUTE_PREFIX = "/api/chat/media/outgoing"; +const DEFAULT_TRANSIENT_OUTGOING_IMAGE_TTL_MS = 15 * 60 * 1000; +const MANAGED_OUTGOING_ATTACHMENT_ID_RE = + /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; +const DATA_URL_RE = /^data:/i; +const WINDOWS_DRIVE_RE = /^[A-Za-z]:[\\/]/; + +export const DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS = { + maxBytes: 12 * 1024 * 1024, + maxWidth: 4096, + maxHeight: 4096, + maxPixels: 20_000_000, +} as const; + +export type ManagedImageAttachmentLimits = { + maxBytes: number; + maxWidth: number; + maxHeight: number; + maxPixels: number; +}; + +type ManagedImageAttachmentLimitsConfig = Partial< + Pick +>; + +type ManagedImageRecordVariant = { + path: string; + contentType: string; + width: number | null; + height: number | null; + sizeBytes: number | null; + filename: string | null; +}; + +type ManagedImageRetentionClass = "transient" | "history"; + +type ManagedImageRecord = { + attachmentId: string; + sessionKey: string; + messageId: string | null; + createdAt: string; + updatedAt?: string; + retentionClass?: ManagedImageRetentionClass; + alt: string; + original: ManagedImageRecordVariant; +}; + +type ParsedImageDataUrl = + | { kind: "not-data-url" } + | { kind: "non-image-data-url" } + | { kind: "image-data-url"; buffer: Buffer; contentType: string }; + +type ManagedImageBlock = Record; + +type CleanupManagedOutgoingImageRecordsResult = { + deletedRecordCount: number; + deletedFileCount: number; + retainedCount: number; +}; + +type SessionManagedOutgoingAttachmentIndex = Set; + +type SessionManagedOutgoingAttachmentIndexCacheEntry = { + transcriptPath: string; + mtimeMs: number; + size: number; + index: SessionManagedOutgoingAttachmentIndex; +}; + +const sessionManagedOutgoingAttachmentIndexCache = new Map< + string, + SessionManagedOutgoingAttachmentIndexCacheEntry +>(); +const MAX_SESSION_MANAGED_OUTGOING_ATTACHMENT_INDEX_CACHE_ENTRIES = 500; + +export function resolveManagedImageAttachmentLimits( + config?: ManagedImageAttachmentLimitsConfig | null, +): ManagedImageAttachmentLimits { + return { + maxBytes: config?.maxBytes ?? DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS.maxBytes, + maxWidth: config?.maxWidth ?? DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS.maxWidth, + maxHeight: config?.maxHeight ?? DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS.maxHeight, + maxPixels: config?.maxPixels ?? DEFAULT_MANAGED_IMAGE_ATTACHMENT_LIMITS.maxPixels, + }; +} + +function formatLimitMiB(bytes: number): string { + if (bytes < 1024 * 1024) { + return `${bytes} bytes`; + } + return Number.isInteger(bytes / (1024 * 1024)) + ? `${bytes / (1024 * 1024)} MiB` + : `${(bytes / (1024 * 1024)).toFixed(1)} MiB`; +} + +function createManagedImageAttachmentError(message: string) { + const error = new Error(message); + error.name = "ManagedImageAttachmentError"; + return error; +} + +function isManagedImageAttachmentSafeError(error: unknown): error is Error { + if (!(error instanceof Error)) { + return false; + } + if (error.name === "ManagedImageAttachmentError") { + return true; + } + return ( + error.message.startsWith("Managed image attachment ") || + error.message.startsWith("Invalid image data URL") + ); +} + +function getSanitizedManagedImageAttachmentError(error: unknown, alt: string): Error { + if (isManagedImageAttachmentSafeError(error)) { + return error; + } + return createManagedImageAttachmentError( + `Managed image attachment ${JSON.stringify(alt)} could not be prepared`, + ); +} + +function validateManagedImageBuffer( + buffer: Buffer, + alt: string, + limits: ManagedImageAttachmentLimits, +): void { + if (buffer.byteLength > limits.maxBytes) { + throw createManagedImageAttachmentError( + `Managed image attachment ${JSON.stringify(alt)} exceeds the ${formatLimitMiB(limits.maxBytes)} byte limit`, + ); + } +} + +function estimateBase64DecodedByteLength(base64: string): number { + const normalized = base64.replace(/\s+/g, ""); + const paddingMatch = /=+$/u.exec(normalized); + const padding = Math.min(paddingMatch?.[0].length ?? 0, 2); + return Math.floor((normalized.length * 3) / 4) - padding; +} + +function getManagedImageMetadataLimitError( + metadata: { width: number; height: number } | null, + alt: string, + limits: ManagedImageAttachmentLimits, +): string | null { + if (!metadata) { + return `Managed image attachment ${JSON.stringify(alt)} is missing readable dimensions`; + } + + if (metadata.width > limits.maxWidth) { + return `Managed image attachment ${JSON.stringify(alt)} exceeds the ${limits.maxWidth}px width limit`; + } + if (metadata.height > limits.maxHeight) { + return `Managed image attachment ${JSON.stringify(alt)} exceeds the ${limits.maxHeight}px height limit`; + } + if (metadata.width * metadata.height > limits.maxPixels) { + return `Managed image attachment ${JSON.stringify(alt)} exceeds the ${limits.maxPixels.toLocaleString("en-US")} pixel limit`; + } + return null; +} + +function computeManagedImageResizeTarget( + metadata: { width: number; height: number }, + limits: ManagedImageAttachmentLimits, +): { width: number; height: number } | null { + const scale = Math.min( + 1, + limits.maxWidth / metadata.width, + limits.maxHeight / metadata.height, + Math.sqrt(limits.maxPixels / (metadata.width * metadata.height)), + ); + if (!Number.isFinite(scale) || scale >= 1) { + return null; + } + + let width = Math.max(1, Math.floor(metadata.width * scale)); + let height = Math.max(1, Math.floor(metadata.height * scale)); + while ( + width > limits.maxWidth || + height > limits.maxHeight || + width * height > limits.maxPixels + ) { + if (width >= height && width > 1) { + width -= 1; + } else if (height > 1) { + height -= 1; + } else { + break; + } + } + return { width, height }; +} + +async function resizeManagedImageBufferToLimits(params: { + buffer: Buffer; + contentType: string; + metadata: { width: number; height: number }; + limits: ManagedImageAttachmentLimits; +}): Promise<{ buffer: Buffer; contentType: string; width: number; height: number }> { + const target = computeManagedImageResizeTarget(params.metadata, params.limits); + if (!target) { + return { + buffer: params.buffer, + contentType: params.contentType, + width: params.metadata.width, + height: params.metadata.height, + }; + } + + const preserveAlpha = await hasAlphaChannel(params.buffer).catch(() => false); + const resizedBuffer = preserveAlpha + ? await resizeToPng({ + buffer: params.buffer, + maxSide: Math.max(target.width, target.height), + compressionLevel: 9, + withoutEnlargement: true, + }) + : await resizeToJpeg({ + buffer: params.buffer, + maxSide: Math.max(target.width, target.height), + quality: 92, + withoutEnlargement: true, + }); + + return { + buffer: resizedBuffer, + contentType: preserveAlpha ? "image/png" : "image/jpeg", + width: target.width, + height: target.height, + }; +} + +function resolveOutgoingRecordsDir(stateDir = resolveStateDir()) { + return path.join(stateDir, "media", "outgoing", "records"); +} + +function resolveOutgoingOriginalsDir(stateDir = resolveStateDir()) { + return path.join(stateDir, "media", "outgoing", "originals"); +} + +function resolveOutgoingRecordPath(attachmentId: string, stateDir = resolveStateDir()) { + return path.join(resolveOutgoingRecordsDir(stateDir), `${attachmentId}.json`); +} + +function buildOutgoingVariantUrl(sessionKey: string, attachmentId: string, variant: "full") { + return `${OUTGOING_IMAGE_ROUTE_PREFIX}/${encodeURIComponent(sessionKey)}/${attachmentId}/${variant}`; +} + +function resolveRequesterSessionKey(req: IncomingMessage) { + const raw = req.headers["x-openclaw-requester-session-key"]; + if (Array.isArray(raw)) { + return raw[0]?.trim() || null; + } + return typeof raw === "string" && raw.trim().length > 0 ? raw.trim() : null; +} + +async function requesterOwnsManagedImageSession(params: { + requesterSessionKey: string; + targetSessionKey: string; +}) { + if (params.requesterSessionKey === params.targetSessionKey) { + return true; + } + const subagentRun = getLatestSubagentRunByChildSessionKey(params.targetSessionKey); + if (!subagentRun) { + return false; + } + return ( + subagentRun.requesterSessionKey === params.requesterSessionKey || + subagentRun.controllerSessionKey === params.requesterSessionKey + ); +} + +function deriveAltText(source: string, index: number) { + const fallback = `Generated image ${index + 1}`; + try { + if (/^https?:\/\//i.test(source)) { + const parsed = new URL(source); + const name = path.basename(parsed.pathname || "").trim(); + return name || fallback; + } + } catch { + // Fall through to local path handling. + } + const localName = path.basename(source).trim(); + return localName || fallback; +} + +function resolveLocalMediaPath(source: string): string | undefined { + const trimmed = source.trim(); + if (!trimmed || isPassThroughRemoteMediaSource(trimmed) || DATA_URL_RE.test(trimmed)) { + return undefined; + } + if (trimmed.startsWith("file://")) { + try { + return safeFileURLToPath(trimmed); + } catch { + return undefined; + } + } + if (trimmed.startsWith("~")) { + return resolveUserPath(trimmed); + } + if (path.isAbsolute(trimmed) || WINDOWS_DRIVE_RE.test(trimmed)) { + return path.resolve(trimmed); + } + return undefined; +} + +function parseImageDataUrl( + source: string, + alt: string, + limits: ManagedImageAttachmentLimits, +): ParsedImageDataUrl { + const trimmed = source.trim(); + if (!trimmed.startsWith("data:")) { + return { kind: "not-data-url" }; + } + const match = /^data:([^;,]+)(?:;[^,]*)*;base64,([A-Za-z0-9+/=\s]+)$/i.exec(trimmed); + if (!match) { + throw new Error("Invalid image data URL"); + } + const contentType = match[1]?.trim().toLowerCase() ?? ""; + if (!contentType.startsWith("image/")) { + return { kind: "non-image-data-url" }; + } + if (estimateBase64DecodedByteLength(match[2]) > limits.maxBytes) { + throw createManagedImageAttachmentError( + `Managed image attachment ${JSON.stringify(alt)} exceeds the ${formatLimitMiB(limits.maxBytes)} byte limit`, + ); + } + return { + kind: "image-data-url", + buffer: Buffer.from(match[2].replace(/\s+/g, ""), "base64"), + contentType, + }; +} + +async function getVariantStats(filePath: string) { + const [stats, metadataBuffer] = await Promise.all([fs.stat(filePath), fs.readFile(filePath)]); + const metadata = (await getImageMetadata(metadataBuffer).catch(() => null)) ?? { + width: null, + height: null, + }; + return { + width: metadata.width ?? null, + height: metadata.height ?? null, + sizeBytes: Number.isFinite(stats.size) ? stats.size : null, + }; +} + +async function writeManagedImageRecord(record: ManagedImageRecord, stateDir = resolveStateDir()) { + const recordPath = resolveOutgoingRecordPath(record.attachmentId, stateDir); + await fs.mkdir(path.dirname(recordPath), { recursive: true }); + await fs.writeFile(recordPath, JSON.stringify(record, null, 2), "utf-8"); +} + +async function deleteManagedImageRecordArtifacts( + record: ManagedImageRecord, + stateDir = resolveStateDir(), +) { + const files = new Set(); + if (record.original?.path) { + files.add(record.original.path); + } + let deletedFileCount = 0; + for (const filePath of files) { + try { + await fs.rm(filePath, { force: true }); + deletedFileCount += 1; + } catch { + // Ignore cleanup races or already-missing files. + } + } + try { + await fs.rm(resolveOutgoingRecordPath(record.attachmentId, stateDir), { force: true }); + } catch { + // Ignore cleanup races or already-missing records. + } + return deletedFileCount; +} + +async function deleteOrphanManagedImageFiles(params: { + stateDir: string; + referencedPaths: ReadonlySet; +}) { + let deletedFileCount = 0; + for (const dir of [resolveOutgoingOriginalsDir(params.stateDir)]) { + let names: string[] = []; + try { + names = await fs.readdir(dir); + } catch { + continue; + } + for (const name of names) { + const filePath = path.join(dir, name); + if (params.referencedPaths.has(filePath)) { + continue; + } + try { + const stats = await fs.stat(filePath); + if (!stats.isFile()) { + continue; + } + } catch { + continue; + } + try { + await fs.rm(filePath, { force: true }); + deletedFileCount += 1; + } catch { + // Ignore cleanup races or already-missing files. + } + } + } + return deletedFileCount; +} + +export async function cleanupManagedOutgoingImageRecords(params?: { + stateDir?: string; + nowMs?: number; + transientMaxAgeMs?: number; + sessionKey?: string; + forceDeleteSessionRecords?: boolean; +}): Promise { + const stateDir = params?.stateDir ?? resolveStateDir(); + const nowMs = params?.nowMs ?? Date.now(); + const transientMaxAgeMs = params?.transientMaxAgeMs ?? DEFAULT_TRANSIENT_OUTGOING_IMAGE_TTL_MS; + const sessionKeyFilter = params?.sessionKey ?? null; + const forceDeleteSessionRecords = params?.forceDeleteSessionRecords === true; + const recordsDir = resolveOutgoingRecordsDir(stateDir); + let names: string[] = []; + try { + names = await fs.readdir(recordsDir); + } catch { + names = []; + } + + let deletedRecordCount = 0; + let deletedFileCount = 0; + let retainedCount = 0; + const retainedReferencedPaths = new Set(); + const transcriptAttachmentIndexCache = new Map< + string, + SessionManagedOutgoingAttachmentIndex | null + >(); + for (const name of names) { + if (!name.endsWith(".json")) { + continue; + } + const recordPath = path.join(recordsDir, name); + let record: ManagedImageRecord; + try { + record = JSON.parse(await fs.readFile(recordPath, "utf-8")) as ManagedImageRecord; + } catch { + try { + await fs.rm(recordPath, { force: true }); + } catch { + // Ignore cleanup races or already-missing records. + } + deletedRecordCount += 1; + continue; + } + if (sessionKeyFilter && record.sessionKey !== sessionKeyFilter) { + if (record.original?.path) { + retainedReferencedPaths.add(record.original.path); + } + retainedCount += 1; + continue; + } + + let shouldDelete = false; + if ( + forceDeleteSessionRecords && + (!sessionKeyFilter || record.sessionKey === sessionKeyFilter) + ) { + shouldDelete = true; + } else if (record.messageId) { + shouldDelete = !(await recordMatchesTranscriptMessage( + record, + transcriptAttachmentIndexCache, + )); + } else { + const createdAtMs = Date.parse(record.createdAt); + shouldDelete = Number.isFinite(createdAtMs) && nowMs - createdAtMs >= transientMaxAgeMs; + } + + if (shouldDelete) { + deletedRecordCount += 1; + deletedFileCount += await deleteManagedImageRecordArtifacts(record, stateDir); + } else { + if (record.original?.path) { + retainedReferencedPaths.add(record.original.path); + } + retainedCount += 1; + } + } + + deletedFileCount += await deleteOrphanManagedImageFiles({ + stateDir, + referencedPaths: retainedReferencedPaths, + }); + + return { deletedRecordCount, deletedFileCount, retainedCount }; +} + +async function readManagedImageRecord( + attachmentId: string, + stateDir = resolveStateDir(), +): Promise { + try { + const raw = await fs.readFile(resolveOutgoingRecordPath(attachmentId, stateDir), "utf-8"); + return JSON.parse(raw) as ManagedImageRecord; + } catch { + return null; + } +} + +function buildManagedImageBlock(record: ManagedImageRecord): ManagedImageBlock { + const fullUrl = buildOutgoingVariantUrl(record.sessionKey, record.attachmentId, "full"); + return { + type: "image", + url: fullUrl, + openUrl: fullUrl, + alt: record.alt, + mimeType: record.original.contentType, + width: record.original.width, + height: record.original.height, + }; +} + +function buildManagedOutgoingAttachmentRefKey(messageId: string, attachmentId: string) { + return `${messageId}::${attachmentId}`; +} + +function buildManagedImageResizeWarningBlock(params: { + alt: string; + originalWidth: number; + originalHeight: number; + resizedWidth: number; + resizedHeight: number; +}): ManagedImageBlock { + return { + type: "text", + text: + `[Image warning] ${params.alt} exceeded gateway dimension/pixel limits and was resized from ` + + `${params.originalWidth}×${params.originalHeight} to ${params.resizedWidth}×${params.resizedHeight}.`, + }; +} + +function toRecordFilename(filePath: string) { + const name = path.basename(filePath).trim(); + return name || null; +} + +function asArray(value: string[] | undefined | null) { + return Array.isArray(value) + ? value.filter((item) => typeof item === "string" && item.trim()) + : []; +} + +function parseManagedOutgoingRoute(value: string) { + try { + const parsed = new URL(value, "http://localhost"); + const match = parsed.pathname.match(/^\/api\/chat\/media\/outgoing\/([^/]+)\/([^/]+)\/full$/); + if (!match) { + return null; + } + if (!MANAGED_OUTGOING_ATTACHMENT_ID_RE.test(match[2])) { + return null; + } + return { + sessionKey: decodeURIComponent(match[1]), + attachmentId: match[2], + }; + } catch { + return null; + } +} + +function collectManagedOutgoingAttachmentRefs( + blocks: readonly Record[] | undefined, + expectedSessionKey?: string, +) { + const refs = new Map(); + for (const block of blocks ?? []) { + if (block?.type !== "image") { + continue; + } + for (const candidate of [block.url, block.openUrl]) { + if (typeof candidate !== "string") { + continue; + } + const parsed = parseManagedOutgoingRoute(candidate); + if (!parsed) { + continue; + } + if (expectedSessionKey && parsed.sessionKey !== expectedSessionKey) { + continue; + } + refs.set(parsed.attachmentId, { + attachmentId: parsed.attachmentId, + sessionKey: parsed.sessionKey, + }); + } + } + return [...refs.values()]; +} + +function getCachedSessionManagedOutgoingAttachmentIndex( + sessionKey: string, + stat: { transcriptPath: string; mtimeMs: number; size: number }, +) { + const cached = sessionManagedOutgoingAttachmentIndexCache.get(sessionKey); + if (!cached) { + return null; + } + if ( + cached.transcriptPath !== stat.transcriptPath || + cached.mtimeMs !== stat.mtimeMs || + cached.size !== stat.size + ) { + sessionManagedOutgoingAttachmentIndexCache.delete(sessionKey); + return null; + } + sessionManagedOutgoingAttachmentIndexCache.delete(sessionKey); + sessionManagedOutgoingAttachmentIndexCache.set(sessionKey, cached); + return cached.index; +} + +function setCachedSessionManagedOutgoingAttachmentIndex( + sessionKey: string, + stat: { transcriptPath: string; mtimeMs: number; size: number }, + index: SessionManagedOutgoingAttachmentIndex, +) { + sessionManagedOutgoingAttachmentIndexCache.set(sessionKey, { + transcriptPath: stat.transcriptPath, + mtimeMs: stat.mtimeMs, + size: stat.size, + index, + }); + while ( + sessionManagedOutgoingAttachmentIndexCache.size > + MAX_SESSION_MANAGED_OUTGOING_ATTACHMENT_INDEX_CACHE_ENTRIES + ) { + const oldestKey = sessionManagedOutgoingAttachmentIndexCache.keys().next().value; + if (!oldestKey) { + break; + } + sessionManagedOutgoingAttachmentIndexCache.delete(oldestKey); + } +} + +async function getSessionManagedOutgoingAttachmentIndex( + sessionKey: string, + cache?: Map, +) { + if (cache?.has(sessionKey)) { + return cache.get(sessionKey) ?? null; + } + const { storePath, entry } = loadSessionEntry(sessionKey); + const sessionId = entry?.sessionId; + if (!sessionId) { + cache?.set(sessionKey, null); + return null; + } + + let transcriptStat: { transcriptPath: string; mtimeMs: number; size: number } | null = null; + const transcriptPath = typeof entry?.sessionFile === "string" ? entry.sessionFile.trim() : ""; + if (transcriptPath) { + try { + const stat = await fs.stat(transcriptPath); + transcriptStat = { + transcriptPath, + mtimeMs: stat.mtimeMs, + size: stat.size, + }; + const cachedIndex = getCachedSessionManagedOutgoingAttachmentIndex( + sessionKey, + transcriptStat, + ); + if (cachedIndex) { + cache?.set(sessionKey, cachedIndex); + return cachedIndex; + } + } catch { + sessionManagedOutgoingAttachmentIndexCache.delete(sessionKey); + } + } + + const messages = readSessionMessages(sessionId, storePath, entry.sessionFile); + const index: SessionManagedOutgoingAttachmentIndex = new Set(); + for (const message of messages) { + const meta = (message as { __openclaw?: { id?: string } } | null)?.__openclaw; + const messageId = meta?.id; + if (typeof messageId !== "string" || !messageId) { + continue; + } + for (const ref of collectManagedOutgoingAttachmentRefs( + Array.isArray((message as { content?: unknown[] } | null)?.content) + ? ((message as { content: unknown[] }).content as Record[]) + : [], + sessionKey, + )) { + index.add(buildManagedOutgoingAttachmentRefKey(messageId, ref.attachmentId)); + } + } + + if (transcriptStat) { + setCachedSessionManagedOutgoingAttachmentIndex(sessionKey, transcriptStat, index); + } + cache?.set(sessionKey, index); + return index; +} + +async function recordMatchesTranscriptMessage( + record: ManagedImageRecord, + cache?: Map, +) { + if (!record.messageId) { + return false; + } + const index = await getSessionManagedOutgoingAttachmentIndex(record.sessionKey, cache); + return ( + index?.has(buildManagedOutgoingAttachmentRefKey(record.messageId, record.attachmentId)) ?? false + ); +} + +export async function attachManagedOutgoingImagesToMessage(params: { + messageId: string; + blocks?: readonly Record[]; + stateDir?: string; +}) { + const messageId = params.messageId.trim(); + if (!messageId) { + return; + } + const refs = collectManagedOutgoingAttachmentRefs(params.blocks); + if (refs.length === 0) { + return; + } + await Promise.all( + refs.map(async ({ attachmentId, sessionKey }) => { + const record = await readManagedImageRecord(attachmentId, params.stateDir); + if (!record || record.sessionKey !== sessionKey) { + return; + } + if (record.messageId === messageId && record.retentionClass === "history") { + return; + } + await writeManagedImageRecord( + { + ...record, + messageId, + retentionClass: "history", + updatedAt: new Date().toISOString(), + }, + params.stateDir, + ); + }), + ); +} + +export async function createManagedOutgoingImageBlocks(params: { + sessionKey: string; + mediaUrls?: string[] | null; + stateDir?: string; + messageId?: string | null; + limits?: ManagedImageAttachmentLimitsConfig | null; + localRoots?: readonly string[] | "any"; + continueOnPrepareError?: boolean; + onPrepareError?: (error: Error) => void; +}): Promise { + const sessionKey = params.sessionKey.trim(); + if (!sessionKey) { + return []; + } + const mediaUrls = asArray(params.mediaUrls); + if (mediaUrls.length === 0) { + return []; + } + const stateDir = params.stateDir ?? resolveStateDir(); + const limits = resolveManagedImageAttachmentLimits(params.limits); + const blocks: ManagedImageBlock[] = []; + for (const [index, mediaUrl] of mediaUrls.entries()) { + const fallbackAlt = `Generated image ${index + 1}`; + const parsedDataUrl = parseImageDataUrl(mediaUrl, fallbackAlt, limits); + const alt = + parsedDataUrl.kind === "image-data-url" ? fallbackAlt : deriveAltText(mediaUrl, index); + if (parsedDataUrl.kind === "non-image-data-url") { + continue; + } + + let savedOriginalPath: string | null = null; + try { + let resizeWarning: ManagedImageBlock | null = null; + if (parsedDataUrl.kind === "image-data-url") { + validateManagedImageBuffer(parsedDataUrl.buffer, alt, limits); + } + let savedOriginal = + parsedDataUrl.kind === "image-data-url" + ? await saveMediaBuffer( + parsedDataUrl.buffer, + parsedDataUrl.contentType, + "outgoing/originals", + limits.maxBytes, + `generated-image-${index + 1}`, + ) + : await (async () => { + const localMediaPath = resolveLocalMediaPath(mediaUrl); + if (localMediaPath) { + await assertLocalMediaAllowed(localMediaPath, params.localRoots); + } + return await saveMediaSource( + mediaUrl, + undefined, + "outgoing/originals", + Math.max(limits.maxBytes, MEDIA_MAX_BYTES), + ); + })(); + savedOriginalPath = savedOriginal.path; + let savedOriginalContentType = savedOriginal.contentType; + if (!savedOriginalContentType?.startsWith("image/")) { + await fs.rm(savedOriginal.path, { force: true }).catch(() => {}); + savedOriginalPath = null; + continue; + } + if (savedOriginal.size > limits.maxBytes) { + throw createManagedImageAttachmentError( + `Managed image attachment ${JSON.stringify(alt)} exceeds the ${formatLimitMiB(limits.maxBytes)} byte limit`, + ); + } + + let originalBuffer = + parsedDataUrl.kind === "image-data-url" + ? parsedDataUrl.buffer + : await fs.readFile(savedOriginal.path); + validateManagedImageBuffer(originalBuffer, alt, limits); + + let originalStats = await getVariantStats(savedOriginal.path); + if (originalStats.sizeBytes != null && originalStats.sizeBytes > limits.maxBytes) { + throw createManagedImageAttachmentError( + `Managed image attachment ${JSON.stringify(alt)} exceeds the ${formatLimitMiB(limits.maxBytes)} byte limit`, + ); + } + + const originalMetadata = + originalStats.width != null && originalStats.height != null + ? { width: originalStats.width, height: originalStats.height } + : await getImageMetadata(originalBuffer); + let effectiveMetadata = originalMetadata; + let metadataLimitError = getManagedImageMetadataLimitError(effectiveMetadata, alt, limits); + for (let resizeAttempt = 0; metadataLimitError; resizeAttempt += 1) { + if (!effectiveMetadata) { + throw createManagedImageAttachmentError(metadataLimitError); + } + if (resizeAttempt >= 3) { + throw createManagedImageAttachmentError(metadataLimitError); + } + const resized = await resizeManagedImageBufferToLimits({ + buffer: originalBuffer, + contentType: savedOriginalContentType, + metadata: effectiveMetadata, + limits, + }); + validateManagedImageBuffer(resized.buffer, alt, limits); + const replacement = await saveMediaBuffer( + resized.buffer, + resized.contentType, + "outgoing/originals", + limits.maxBytes, + toRecordFilename(savedOriginal.path) ?? `generated-image-${index + 1}`, + ); + await fs.rm(savedOriginal.path, { force: true }).catch(() => {}); + savedOriginal = replacement; + savedOriginalContentType = replacement.contentType ?? resized.contentType; + savedOriginalPath = savedOriginal.path; + originalBuffer = resized.buffer; + originalStats = await getVariantStats(savedOriginal.path); + effectiveMetadata = + originalStats.width != null && originalStats.height != null + ? { width: originalStats.width, height: originalStats.height } + : await getImageMetadata(originalBuffer); + metadataLimitError = getManagedImageMetadataLimitError(effectiveMetadata, alt, limits); + if (!metadataLimitError) { + resizeWarning = buildManagedImageResizeWarningBlock({ + alt, + originalWidth: originalMetadata?.width ?? effectiveMetadata?.width ?? resized.width, + originalHeight: originalMetadata?.height ?? effectiveMetadata?.height ?? resized.height, + resizedWidth: effectiveMetadata?.width ?? resized.width, + resizedHeight: effectiveMetadata?.height ?? resized.height, + }); + } + } + + const record: ManagedImageRecord = { + attachmentId: randomUUID(), + sessionKey, + messageId: params.messageId ?? null, + createdAt: new Date().toISOString(), + retentionClass: params.messageId ? "history" : "transient", + alt, + original: { + path: savedOriginal.path, + contentType: savedOriginalContentType, + width: originalStats.width, + height: originalStats.height, + sizeBytes: originalStats.sizeBytes, + filename: toRecordFilename(savedOriginal.path), + }, + }; + await writeManagedImageRecord(record, stateDir); + blocks.push(buildManagedImageBlock(record)); + if (resizeWarning) { + blocks.push(resizeWarning); + } + } catch (error) { + if (savedOriginalPath) { + await fs.rm(savedOriginalPath, { force: true }).catch(() => {}); + } + const sanitizedError = getSanitizedManagedImageAttachmentError(error, alt); + if (params.continueOnPrepareError) { + params.onPrepareError?.(sanitizedError); + continue; + } + throw sanitizedError; + } + } + return blocks; +} + +function sendStatus(res: ServerResponse, statusCode: number, body: string) { + if (res.writableEnded) { + return; + } + res.statusCode = statusCode; + res.setHeader("content-type", "text/plain; charset=utf-8"); + res.end(body); +} + +function safeAttachmentFilename(value: string | null) { + const fallback = "generated-image"; + const base = (value ?? fallback).replace(/[\r\n"\\]/g, "_").trim(); + return base || fallback; +} + +export async function handleManagedOutgoingImageHttpRequest( + req: IncomingMessage, + res: ServerResponse, + opts: { + auth: ResolvedGatewayAuth; + trustedProxies?: string[]; + allowRealIpFallback?: boolean; + rateLimiter?: AuthRateLimiter; + stateDir?: string; + }, +): Promise { + const requestUrl = new URL(req.url ?? "/", "http://localhost"); + const match = requestUrl.pathname.match(/^\/api\/chat\/media\/outgoing\/([^/]+)\/([^/]+)\/full$/); + if (!match) { + return false; + } + + if (req.method !== "GET") { + sendMethodNotAllowed(res, "GET"); + return true; + } + + const requestAuth = await authorizeGatewayHttpRequestOrReply({ + req, + res, + auth: opts.auth, + trustedProxies: opts.trustedProxies, + allowRealIpFallback: opts.allowRealIpFallback, + rateLimiter: opts.rateLimiter, + }); + if (!requestAuth) { + return true; + } + + const privilegedAccess = + requestAuth.trustDeclaredOperatorScopes || requestAuth.authMethod === "device-token"; + + const requestedScopes = resolveOpenAiCompatibleHttpOperatorScopes(req, requestAuth); + const scopeAuth = authorizeOperatorScopesForMethod("chat.history", requestedScopes); + if (!scopeAuth.allowed) { + sendJson(res, 403, { + ok: false, + error: { + type: "forbidden", + message: `missing scope: ${scopeAuth.missingScope}`, + }, + }); + return true; + } + + const encodedSessionKey = match[1]; + const attachmentId = match[2]; + if (!encodedSessionKey || !attachmentId) { + return false; + } + if (!MANAGED_OUTGOING_ATTACHMENT_ID_RE.test(attachmentId)) { + sendStatus(res, 404, "not found"); + return true; + } + let sessionKey: string; + try { + sessionKey = decodeURIComponent(encodedSessionKey); + } catch { + sendStatus(res, 404, "not found"); + return true; + } + const record = await readManagedImageRecord(attachmentId, opts.stateDir); + if (!record || record.sessionKey !== sessionKey) { + sendStatus(res, 404, "not found"); + return true; + } + if (!privilegedAccess) { + const requesterSessionKey = resolveRequesterSessionKey(req); + if (!requesterSessionKey) { + sendJson(res, 403, { + ok: false, + error: { + type: "forbidden", + message: "requester session ownership required", + }, + }); + return true; + } + const ownsSession = await requesterOwnsManagedImageSession({ + requesterSessionKey, + targetSessionKey: record.sessionKey, + }); + if (!ownsSession) { + sendJson(res, 403, { + ok: false, + error: { + type: "forbidden", + message: "requester session does not own attachment session", + }, + }); + return true; + } + } + if (!(await recordMatchesTranscriptMessage(record))) { + sendStatus(res, 404, "not found"); + return true; + } + + let body: Buffer; + try { + body = await fs.readFile(record.original.path); + } catch { + sendStatus(res, 404, "not found"); + return true; + } + + res.statusCode = 200; + res.setHeader("content-type", record.original.contentType || "application/octet-stream"); + res.setHeader("content-length", String(body.byteLength)); + res.setHeader("cache-control", "private, max-age=31536000, immutable"); + res.setHeader( + "content-disposition", + `inline; filename="${safeAttachmentFilename(record.original.filename)}"`, + ); + res.end(body); + return true; +} diff --git a/src/gateway/server-http.ts b/src/gateway/server-http.ts index dbdaad3fbd1..627b8101e6b 100644 --- a/src/gateway/server-http.ts +++ b/src/gateway/server-http.ts @@ -81,6 +81,9 @@ const HOOK_AUTH_FAILURE_WINDOW_MS = 60_000; let identityAvatarModulePromise: Promise | undefined; let controlUiModulePromise: Promise | undefined; let embeddingsHttpModulePromise: Promise | undefined; +let managedImageAttachmentsModulePromise: + | Promise + | undefined; let modelsHttpModulePromise: Promise | undefined; let openAiHttpModulePromise: Promise | undefined; let openResponsesHttpModulePromise: Promise | undefined; @@ -105,6 +108,11 @@ function getEmbeddingsHttpModule() { return embeddingsHttpModulePromise; } +function getManagedImageAttachmentsModule() { + managedImageAttachmentsModulePromise ??= import("./managed-image-attachments.js"); + return managedImageAttachmentsModulePromise; +} + function getModelsHttpModule() { modelsHttpModulePromise ??= import("./models-http.js"); return modelsHttpModulePromise; @@ -1061,6 +1069,21 @@ export function createGatewayHttpServer(opts: { }), ); + requestStages.push({ + name: "chat-managed-image-media", + run: async () => + (await getManagedImageAttachmentsModule()).handleManagedOutgoingImageHttpRequest( + req, + res, + { + auth: resolvedAuth, + trustedProxies, + allowRealIpFallback, + rateLimiter, + }, + ), + }); + if (controlUiEnabled) { requestStages.push({ name: "control-ui-assistant-media", diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts index 41a4097de3f..c80af46a318 100644 --- a/src/gateway/server-methods/chat.directive-tags.test.ts +++ b/src/gateway/server-methods/chat.directive-tags.test.ts @@ -2015,12 +2015,10 @@ describe("chat directive tag stripping for non-streaming final payloads", () => expect(transcriptUpdate).toMatchObject({ message: { role: "assistant", - content: [ - { type: "text", text: "[[reply_to_current]]Image reply" }, - { type: "input_image", image_url: "data:image/png;base64,cG5n" }, - ], + content: [{ type: "text", text: "[[reply_to_current]]Image reply" }], }, }); + expect(JSON.stringify(transcriptUpdate)).not.toContain("data:image/png;base64,cG5n"); }); it("does not persist sensitive image media into transcript updates", async () => { @@ -2060,6 +2058,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () => }); expect(JSON.stringify(transcriptUpdate)).not.toContain("input_image"); expect(JSON.stringify(transcriptUpdate)).not.toContain("data:image/png;base64,cG5n"); + expect(JSON.stringify(payload?.message)).not.toContain("/api/chat/media/outgoing/"); }); it("sanitizes replyToId before emitting inline reply directives", async () => { diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index 95e4e5a3378..80e79a26ac6 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -13,9 +13,13 @@ import type { MsgContext } from "../../auto-reply/templating.js"; import { extractCanvasFromText } from "../../chat/canvas-render.js"; import { resolveSessionFilePath } from "../../config/sessions.js"; import { jsonUtf8Bytes } from "../../infra/json-utf8-bytes.js"; +import { normalizeReplyPayloadsForDelivery } from "../../infra/outbound/payloads.js"; import { getSessionBindingService } from "../../infra/outbound/session-binding-service.js"; import { logLargePayload } from "../../logging/diagnostic-payload.js"; -import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js"; +import { + appendLocalMediaParentRoots, + getAgentScopedMediaLocalRoots, +} from "../../media/local-roots.js"; import { isAudioFileName } from "../../media/mime.js"; import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js"; import { type SavedMedia, saveMediaBuffer } from "../../media/store.js"; @@ -55,6 +59,11 @@ import { MediaOffloadError } from "../chat-attachments.js"; import { stripEnvelopeFromMessage, stripEnvelopeFromMessages } from "../chat-sanitize.js"; import { augmentChatHistoryWithCliSessionImports } from "../cli-session-history.js"; import { isSuppressedControlReplyText } from "../control-reply-text.js"; +import { + attachManagedOutgoingImagesToMessage, + cleanupManagedOutgoingImageRecords, + createManagedOutgoingImageBlocks, +} from "../managed-image-attachments.js"; import { ADMIN_SCOPE } from "../method-scopes.js"; import { GATEWAY_CLIENT_CAPS, @@ -86,7 +95,10 @@ import { injectTimestamp, timestampOptsFromConfig } from "./agent-timestamp.js"; import { setGatewayDedupeEntry } from "./agent-wait-dedupe.js"; import { normalizeRpcAttachmentsToChatAttachments } from "./attachment-normalize.js"; import { appendInjectedAssistantMessageToTranscript } from "./chat-transcript-inject.js"; -import { buildWebchatAssistantMessageFromReplyPayloads } from "./chat-webchat-media.js"; +import { + buildWebchatAssistantMessageFromReplyPayloads, + buildWebchatAudioContentBlocksFromReplyPayloads, +} from "./chat-webchat-media.js"; import type { GatewayRequestContext, GatewayRequestHandlerOptions, @@ -144,7 +156,9 @@ async function buildWebchatAssistantMediaMessage( export const DEFAULT_CHAT_HISTORY_TEXT_MAX_CHARS = 8_000; export const CHAT_HISTORY_MAX_SINGLE_MESSAGE_BYTES = 128 * 1024; const CHAT_HISTORY_OVERSIZED_PLACEHOLDER = "[chat.history omitted: message too large]"; +const MANAGED_OUTGOING_IMAGE_PATH_PREFIX = "/api/chat/media/outgoing/"; let chatHistoryPlaceholderEmitCount = 0; +const chatHistoryManagedImageCleanupState = new Map>(); const CHANNEL_AGNOSTIC_SESSION_SCOPES = new Set([ "main", "direct", @@ -255,6 +269,204 @@ function hasSensitiveMediaPayload(payloads: ReplyPayload[]): boolean { ); } +type AssistantDisplayContentBlock = Record; + +function sanitizeAssistantDisplayText(value?: string | null): string | undefined { + if (!value) { + return undefined; + } + const withoutEnvelope = stripEnvelopeFromMessage(value); + const normalized = typeof withoutEnvelope === "string" ? withoutEnvelope : value; + const stripped = stripInlineDirectiveTagsForDisplay(normalized).text.trim(); + return stripped || undefined; +} + +function extractAssistantDisplayTextFromContent( + content?: readonly AssistantDisplayContentBlock[] | null, +): string | undefined { + if (!Array.isArray(content) || content.length === 0) { + return undefined; + } + const parts = content + .map((block) => { + if (block?.type !== "text" || typeof block.text !== "string") { + return ""; + } + return block.text.trim(); + }) + .filter(Boolean); + return parts.length > 0 ? parts.join("\n\n") : undefined; +} + +async function buildAssistantDisplayContentFromReplyPayloads(params: { + sessionKey: string; + payloads: ReplyPayload[]; + managedImageLocalRoots?: Parameters[0]["localRoots"]; + includeSensitiveMedia?: boolean; + onLocalAudioAccessDenied?: (message: string) => void; + onManagedImagePrepareError?: (message: string) => void; +}): Promise { + const rawTextPayloadCount = params.payloads.filter( + (payload) => typeof payload.text === "string" && payload.text.trim().length > 0, + ).length; + const normalized = normalizeReplyPayloadsForDelivery(params.payloads); + if (normalized.length === 0) { + return rawTextPayloadCount > 0 ? [{ type: "text", text: "" }] : undefined; + } + + const content: AssistantDisplayContentBlock[] = []; + let strippedTextPayloadCount = 0; + for (const payload of normalized) { + const text = sanitizeAssistantDisplayText(payload.text); + if (text) { + content.push({ type: "text", text }); + } else if (typeof payload.text === "string" && payload.text.trim().length > 0) { + strippedTextPayloadCount += 1; + } + if (params.includeSensitiveMedia === false && payload.sensitiveMedia === true) { + continue; + } + const audioBlocks = await buildWebchatAudioContentBlocksFromReplyPayloads([payload], { + localRoots: Array.isArray(params.managedImageLocalRoots) + ? params.managedImageLocalRoots + : undefined, + onLocalAudioAccessDenied: (err) => { + params.onLocalAudioAccessDenied?.(formatForLog(err)); + }, + }); + content.push(...audioBlocks); + + const mediaUrls = Array.from( + new Set([ + ...(Array.isArray(payload.mediaUrls) ? payload.mediaUrls : []), + ...(typeof payload.mediaUrl === "string" ? [payload.mediaUrl] : []), + ]), + ); + const imageBlocks = await createManagedOutgoingImageBlocks({ + sessionKey: params.sessionKey, + mediaUrls, + localRoots: params.managedImageLocalRoots, + continueOnPrepareError: true, + onPrepareError: (error) => { + params.onManagedImagePrepareError?.(error.message); + }, + }); + if (imageBlocks.length > 0) { + content.push(...imageBlocks); + } + } + + if (content.length > 0) { + return content; + } + return strippedTextPayloadCount > 0 ? [{ type: "text", text: "" }] : undefined; +} + +function replaceAssistantContentTextBlocks( + content: readonly AssistantDisplayContentBlock[] | undefined, + transcriptMediaMessage: { content: Array> } | null, +): AssistantDisplayContentBlock[] | undefined { + const transcriptTextBlocks = (transcriptMediaMessage?.content ?? []).filter( + (block): block is AssistantDisplayContentBlock => + Boolean(block) && + typeof block === "object" && + block.type === "text" && + typeof block.text === "string", + ); + if (transcriptTextBlocks.length === 0) { + return content ? [...content] : undefined; + } + if (!content || content.length === 0) { + return [...transcriptTextBlocks]; + } + const merged: AssistantDisplayContentBlock[] = []; + let transcriptTextIndex = 0; + for (const block of content) { + if ( + block?.type === "text" && + typeof block.text === "string" && + transcriptTextIndex < transcriptTextBlocks.length + ) { + merged.push(transcriptTextBlocks[transcriptTextIndex++]); + continue; + } + merged.push(block); + } + if (transcriptTextIndex < transcriptTextBlocks.length) { + merged.unshift(...transcriptTextBlocks.slice(transcriptTextIndex)); + } + return merged; +} + +function isManagedOutgoingImageUrl(value: unknown): boolean { + if (typeof value !== "string" || !value.trim()) { + return false; + } + try { + const parsed = new URL(value, "http://localhost"); + return parsed.pathname.startsWith(MANAGED_OUTGOING_IMAGE_PATH_PREFIX); + } catch { + return false; + } +} + +function stripManagedOutgoingAssistantContentBlocks( + content: readonly AssistantDisplayContentBlock[] | undefined, +): AssistantDisplayContentBlock[] | undefined { + if (!content || content.length === 0) { + return undefined; + } + const filtered = content.filter((block) => { + if (block?.type !== "image") { + return true; + } + return !(isManagedOutgoingImageUrl(block.url) || isManagedOutgoingImageUrl(block.openUrl)); + }); + return filtered.length > 0 ? filtered : undefined; +} + +function extractAssistantDisplayText( + content: readonly AssistantDisplayContentBlock[] | undefined, +): string | undefined { + if (!content || content.length === 0) { + return undefined; + } + const text = content + .map((block) => (block?.type === "text" && typeof block.text === "string" ? block.text : "")) + .filter(Boolean) + .join("\n\n") + .trim(); + return text || undefined; +} + +function hasAssistantDisplayMediaContent( + content: readonly AssistantDisplayContentBlock[] | undefined, +): boolean { + return Boolean(content?.some((block) => block?.type !== "text")); +} + +function scheduleChatHistoryManagedImageCleanup(params: { + sessionKey: string; + context: Pick; +}) { + if (chatHistoryManagedImageCleanupState.has(params.sessionKey)) { + return; + } + const pending = cleanupManagedOutgoingImageRecords({ sessionKey: params.sessionKey }) + .then(() => undefined) + .catch((error) => { + params.context.logGateway.debug( + `chat.history managed image cleanup skipped sessionKey=${JSON.stringify(params.sessionKey)} error=${formatForLog(error)}`, + ); + }) + .finally(() => { + if (chatHistoryManagedImageCleanupState.get(params.sessionKey) === pending) { + chatHistoryManagedImageCleanupState.delete(params.sessionKey); + } + }); + chatHistoryManagedImageCleanupState.set(params.sessionKey, pending); +} + function resolveChatSendOriginatingRoute(params: { client?: { mode?: string | null; id?: string | null } | null; deliver?: boolean; @@ -1686,6 +1898,7 @@ export const chatHandlers: GatewayRequestHandlers = { messages: normalized, maxSingleMessageBytes: perMessageHardCap, }); + scheduleChatHistoryManagedImageCleanup({ sessionKey, context }); const capped = capArrayByJsonBytes(replaced.messages, maxHistoryBytes).items; const bounded = enforceChatHistoryFinalBudget({ messages: capped, maxBytes: maxHistoryBytes }); const placeholderCount = replaced.replacedCount + bounded.placeholderCount; @@ -2149,20 +2362,53 @@ export const chatHandlers: GatewayRequestHandlers = { if (!agentRunStarted || appendedWebchatAgentMedia || !isMediaBearingPayload(payload)) { return; } + const { storePath: latestStorePath, entry: latestEntry } = loadSessionEntry(sessionKey); + const sessionId = latestEntry?.sessionId ?? entry?.sessionId ?? clientRunId; + const resolvedTranscriptPath = resolveTranscriptPath({ + sessionId, + storePath: latestStorePath, + sessionFile: latestEntry?.sessionFile ?? entry?.sessionFile, + agentId, + }); + const mediaLocalRoots = appendLocalMediaParentRoots( + getAgentScopedMediaLocalRoots(cfg, agentId), + resolvedTranscriptPath ? [resolvedTranscriptPath] : undefined, + ); + const assistantContent = await buildAssistantDisplayContentFromReplyPayloads({ + sessionKey, + payloads: [payload], + managedImageLocalRoots: mediaLocalRoots, + includeSensitiveMedia: payload.sensitiveMedia !== true, + onLocalAudioAccessDenied: (message) => { + context.logGateway.warn(`webchat audio embedding denied local path: ${message}`); + }, + onManagedImagePrepareError: (message) => { + context.logGateway.warn(`webchat image embedding skipped attachment: ${message}`); + }, + }); const mediaMessage = await buildWebchatAssistantMediaMessage([payload], { - localRoots: getAgentScopedMediaLocalRoots(cfg, agentId), + localRoots: mediaLocalRoots, onLocalAudioAccessDenied: (message) => { context.logGateway.warn(`webchat audio embedding denied local path: ${message}`); }, }); - if (!mediaMessage) { + const persistedAssistantContent = replaceAssistantContentTextBlocks( + assistantContent, + mediaMessage, + ); + const persistedContentForAppend = hasAssistantDisplayMediaContent(persistedAssistantContent) + ? persistedAssistantContent + : undefined; + const transcriptReply = + mediaMessage?.transcriptText ?? + extractAssistantDisplayTextFromContent(assistantContent) ?? + buildTranscriptReplyText([payload]); + if (!transcriptReply && !persistedAssistantContent?.length && !assistantContent?.length) { return; } - const { storePath: latestStorePath, entry: latestEntry } = loadSessionEntry(sessionKey); - const sessionId = latestEntry?.sessionId ?? entry?.sessionId ?? clientRunId; const appended = appendAssistantTranscriptMessage({ - message: mediaMessage.transcriptText, - ...(payload.sensitiveMedia === true ? {} : { content: mediaMessage.content }), + message: transcriptReply, + ...(persistedContentForAppend?.length ? { content: persistedContentForAppend } : {}), sessionId, storePath: latestStorePath, sessionFile: latestEntry?.sessionFile, @@ -2171,6 +2417,12 @@ export const chatHandlers: GatewayRequestHandlers = { idempotencyKey: `${clientRunId}:assistant-media`, }); if (appended.ok) { + if (appended.messageId && assistantContent?.length) { + await attachManagedOutgoingImagesToMessage({ + messageId: appended.messageId, + blocks: assistantContent, + }); + } appendedWebchatAgentMedia = true; return; } @@ -2281,22 +2533,87 @@ export const chatHandlers: GatewayRequestHandlers = { const finalPayloads = deliveredReplies .filter((entry) => entry.kind === "final") .map((entry) => entry.payload); - const combinedReply = buildTranscriptReplyText(finalPayloads); + const { storePath: latestStorePath, entry: latestEntry } = + loadSessionEntry(sessionKey); + const sessionId = latestEntry?.sessionId ?? entry?.sessionId ?? clientRunId; + const resolvedTranscriptPath = resolveTranscriptPath({ + sessionId, + storePath: latestStorePath, + sessionFile: latestEntry?.sessionFile ?? entry?.sessionFile, + agentId, + }); + const mediaLocalRoots = appendLocalMediaParentRoots( + getAgentScopedMediaLocalRoots(cfg, agentId), + resolvedTranscriptPath ? [resolvedTranscriptPath] : undefined, + ); + const assistantContent = await buildAssistantDisplayContentFromReplyPayloads({ + sessionKey, + payloads: finalPayloads, + managedImageLocalRoots: mediaLocalRoots, + includeSensitiveMedia: false, + onLocalAudioAccessDenied: (message) => { + context.logGateway.warn(`webchat audio embedding denied local path: ${message}`); + }, + onManagedImagePrepareError: (message) => { + context.logGateway.warn(`webchat image embedding skipped attachment: ${message}`); + }, + }); const mediaMessage = await buildWebchatAssistantMediaMessage(finalPayloads, { - localRoots: getAgentScopedMediaLocalRoots(cfg, agentId), + localRoots: mediaLocalRoots, onLocalAudioAccessDenied: (message) => { context.logGateway.warn(`webchat audio embedding denied local path: ${message}`); }, }); const hasSensitiveMedia = hasSensitiveMediaPayload(finalPayloads); + const persistedAssistantContent = replaceAssistantContentTextBlocks( + hasSensitiveMedia + ? await buildAssistantDisplayContentFromReplyPayloads({ + sessionKey, + payloads: finalPayloads, + managedImageLocalRoots: mediaLocalRoots, + includeSensitiveMedia: false, + onLocalAudioAccessDenied: (message) => { + context.logGateway.warn( + `webchat audio embedding denied local path: ${message}`, + ); + }, + onManagedImagePrepareError: (message) => { + context.logGateway.warn( + `webchat image embedding skipped attachment: ${message}`, + ); + }, + }) + : assistantContent, + mediaMessage, + ); + const persistedContentForAppend = hasAssistantDisplayMediaContent( + persistedAssistantContent, + ) + ? persistedAssistantContent + : undefined; + const broadcastAssistantContent = hasAssistantDisplayMediaContent(assistantContent) + ? assistantContent + : hasAssistantDisplayMediaContent(mediaMessage?.content) + ? mediaMessage?.content + : assistantContent; + const displayReply = + extractAssistantDisplayTextFromContent(assistantContent) ?? + buildTranscriptReplyText(finalPayloads); + const transcriptReply = + mediaMessage?.transcriptText || + buildTranscriptReplyText(finalPayloads) || + displayReply; let message: Record | undefined; - if (mediaMessage || combinedReply) { - const { storePath: latestStorePath, entry: latestEntry } = - loadSessionEntry(sessionKey); - const sessionId = latestEntry?.sessionId ?? entry?.sessionId ?? clientRunId; + if ( + transcriptReply || + persistedContentForAppend?.length || + assistantContent?.length + ) { const appended = appendAssistantTranscriptMessage({ - message: mediaMessage?.transcriptText ?? combinedReply, - ...(mediaMessage && !hasSensitiveMedia ? { content: mediaMessage.content } : {}), + message: transcriptReply, + ...(persistedContentForAppend?.length + ? { content: persistedContentForAppend } + : {}), sessionId, storePath: latestStorePath, sessionFile: latestEntry?.sessionFile, @@ -2304,22 +2621,33 @@ export const chatHandlers: GatewayRequestHandlers = { createIfMissing: true, }); if (appended.ok) { - if (hasSensitiveMedia && mediaMessage) { - message = { - ...appended.message, - content: mediaMessage.content, - }; - } else { - message = appended.message; + if (appended.messageId && assistantContent?.length) { + await attachManagedOutgoingImagesToMessage({ + messageId: appended.messageId, + blocks: assistantContent, + }); } + message = broadcastAssistantContent?.length + ? { ...appended.message, content: broadcastAssistantContent } + : appended.message; } else { context.logGateway.warn( `webchat transcript append failed: ${appended.error ?? "unknown error"}`, ); + const fallbackAssistantContent = + stripManagedOutgoingAssistantContentBlocks(persistedAssistantContent) ?? + stripManagedOutgoingAssistantContentBlocks(assistantContent); + const fallbackText = + extractAssistantDisplayText(fallbackAssistantContent) ?? displayReply; const now = Date.now(); message = { role: "assistant", - content: mediaMessage?.content ?? [{ type: "text", text: combinedReply }], + ...(fallbackAssistantContent?.length + ? { content: fallbackAssistantContent } + : fallbackText + ? { content: [{ type: "text", text: fallbackText }] } + : {}), + ...(fallbackText ? { text: fallbackText } : {}), timestamp: now, // Keep this compatible with Pi stopReason enums even though this message isn't // persisted to the transcript due to the append failure. diff --git a/src/gateway/server.chat.gateway-server-chat.test.ts b/src/gateway/server.chat.gateway-server-chat.test.ts index 4b35cfa7c0f..c83cde8c7ca 100644 --- a/src/gateway/server.chat.gateway-server-chat.test.ts +++ b/src/gateway/server.chat.gateway-server-chat.test.ts @@ -790,6 +790,100 @@ describe("gateway server chat", () => { }); }); + test("chat.history persists assistant image data URLs as managed image blocks", async () => { + await withMainSessionStore(async (dir) => { + const previousStateDir = process.env.OPENCLAW_STATE_DIR; + process.env.OPENCLAW_STATE_DIR = dir; + const pngB64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="; + dispatchInboundMessageMock.mockImplementationOnce(async (...args: unknown[]) => { + const [params] = args as [ + { + dispatcher: { + sendFinalReply: (payload: { text?: string; mediaUrls?: string[] }) => boolean; + markComplete: () => void; + waitForIdle: () => Promise; + getQueuedCounts: () => { final: number; block: number; tool: number }; + }; + }, + ]; + params.dispatcher.sendFinalReply({ + mediaUrls: [`data:image/png;base64,${pngB64}`], + }); + params.dispatcher.markComplete(); + await params.dispatcher.waitForIdle(); + return { + queuedFinal: true, + counts: params.dispatcher.getQueuedCounts(), + }; + }); + + try { + const finalPromise = onceMessage( + ws, + (o) => + o.type === "event" && + o.event === "chat" && + o.payload?.state === "final" && + o.payload?.runId === "idem-managed-image-history", + 8000, + ); + const res = await rpcReq(ws, "chat.send", { + sessionKey: "main", + message: "show me an image", + idempotencyKey: "idem-managed-image-history", + }); + + expect(res.ok).toBe(true); + expect(res.payload?.runId).toBe("idem-managed-image-history"); + await finalPromise; + + let assistantMessage: Record | undefined; + for (let attempt = 0; attempt < 50; attempt += 1) { + const historyRes = await rpcReq<{ messages?: unknown[] }>(ws, "chat.history", { + sessionKey: "main", + }); + expect(historyRes.ok).toBe(true); + const messages = historyRes.payload?.messages ?? []; + assistantMessage = messages.find( + (message): message is Record => + typeof message === "object" && + message !== null && + (message as { role?: unknown }).role === "assistant", + ); + if (assistantMessage) { + break; + } + await new Promise((resolve) => setTimeout(resolve, 100)); + } + + expect(assistantMessage).toBeTruthy(); + const assistantContent = (assistantMessage as { content?: unknown[] }).content ?? []; + expect(assistantContent).toEqual([ + { type: "text", text: "Image reply" }, + expect.objectContaining({ + type: "image", + url: expect.stringContaining("/api/chat/media/outgoing/"), + openUrl: expect.stringContaining("/full"), + alt: "Generated image 1", + mimeType: "image/png", + width: 1, + height: 1, + }), + ]); + const serializedAssistant = JSON.stringify(assistantMessage); + expect(serializedAssistant).not.toContain("data:image/png;base64"); + expect(serializedAssistant).not.toContain(pngB64); + } finally { + if (previousStateDir == null) { + delete process.env.OPENCLAW_STATE_DIR; + } else { + process.env.OPENCLAW_STATE_DIR = previousStateDir; + } + } + }); + }); + test("chat.history hides assistant NO_REPLY-only entries and keeps mixed-content assistant entries", async () => { const historyMessages = await loadChatHistoryWithMessages(buildNoReplyHistoryFixture(true)); const roleAndText = historyMessages diff --git a/ui/src/ui/chat/grouped-render.test.ts b/ui/src/ui/chat/grouped-render.test.ts index 72115dbb524..0c4ea05696e 100644 --- a/ui/src/ui/chat/grouped-render.test.ts +++ b/ui/src/ui/chat/grouped-render.test.ts @@ -682,6 +682,96 @@ describe("grouped chat rendering", () => { expect(image?.getAttribute("src")).toBe("data:image/png;base64,cG5n"); }); + it("fetches managed chat images with auth and renders blob previews", async () => { + resetAssistantAttachmentAvailabilityCacheForTest(); + const objectUrl = "blob:managed-image"; + vi.stubGlobal( + "URL", + Object.assign(URL, { + createObjectURL: vi.fn(() => objectUrl), + revokeObjectURL: vi.fn(), + }), + ); + const fetchMock = vi.fn(async (_url: string, init?: RequestInit) => { + const headers = init?.headers as Headers; + expect(headers.get("Authorization")).toBe("Bearer session-token"); + expect(headers.get("x-openclaw-requester-session-key")).toBe("agent:main:main"); + return { + ok: true, + blob: async () => new Blob(["png"], { type: "image/png" }), + }; + }); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + const container = document.createElement("div"); + renderAssistantMessage( + container, + { + role: "assistant", + content: [ + { + type: "image", + url: "/api/chat/media/outgoing/agent%3Amain%3Amain/00000000-0000-4000-8000-000000000000/full", + alt: "Generated image 1", + width: 1, + height: 1, + }, + ], + timestamp: Date.now(), + }, + { + showToolCalls: false, + assistantAttachmentAuthToken: "session-token", + }, + ); + + await vi.waitFor(() => { + const image = container.querySelector(".chat-message-image"); + expect(image?.getAttribute("src")).toBe(objectUrl); + expect(image?.getAttribute("alt")).toBe("Generated image 1"); + }); + expect(fetchMock).toHaveBeenCalledWith( + "/api/chat/media/outgoing/agent%3Amain%3Amain/00000000-0000-4000-8000-000000000000/full", + expect.objectContaining({ + method: "GET", + credentials: "same-origin", + }), + ); + }); + + it("does not send auth to cross-origin managed-image-looking URLs", async () => { + const fetchMock = vi.fn(async () => { + throw new Error("cross-origin image URL should not be fetched with Control UI auth"); + }); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + const container = document.createElement("div"); + renderAssistantMessage( + container, + { + role: "assistant", + content: [ + { + type: "image", + url: "https://evil.example/api/chat/media/outgoing/agent%3Amain%3Amain/00000000-0000-4000-8000-000000000000/full", + alt: "Untrusted image", + }, + ], + timestamp: Date.now(), + }, + { + showToolCalls: false, + assistantAttachmentAuthToken: "session-token", + }, + ); + + const image = container.querySelector(".chat-message-image"); + expect(image?.getAttribute("src")).toBe( + "https://evil.example/api/chat/media/outgoing/agent%3Amain%3Amain/00000000-0000-4000-8000-000000000000/full", + ); + expect(fetchMock).not.toHaveBeenCalled(); + }); + it("renders canvas-only [embed] shortcodes inside the assistant bubble", () => { const container = document.createElement("div"); renderAssistantMessage( diff --git a/ui/src/ui/chat/grouped-render.ts b/ui/src/ui/chat/grouped-render.ts index 6ad69a52313..6a04e3f5ec2 100644 --- a/ui/src/ui/chat/grouped-render.ts +++ b/ui/src/ui/chat/grouped-render.ts @@ -1,5 +1,6 @@ import { html, nothing } from "lit"; import { unsafeHTML } from "lit/directives/unsafe-html.js"; +import { until } from "lit/directives/until.js"; import { getSafeLocalStorage } from "../../local-storage.ts"; import type { AssistantIdentity } from "../assistant-identity.ts"; import type { EmbedSandboxMode } from "../embed-sandbox.ts"; @@ -50,11 +51,20 @@ const ASSISTANT_ATTACHMENT_UNAVAILABLE_RETRY_MS = 5_000; export function resetAssistantAttachmentAvailabilityCacheForTest() { assistantAttachmentAvailabilityCache.clear(); + for (const blobUrl of managedImageBlobUrlResolvedCache.values()) { + URL.revokeObjectURL(blobUrl); + } + managedImageBlobUrlCache.clear(); + managedImageBlobUrlResolvedCache.clear(); + managedImageBlobUrlMissCache.clear(); } type ImageBlock = { url: string; + openUrl?: string; alt?: string; + width?: number; + height?: number; }; type ImageRenderOptions = { @@ -67,6 +77,11 @@ type RenderableImageBlock = ImageBlock & { displayUrl: string; }; +const managedImageBlobUrlCache = new Map>(); +const managedImageBlobUrlResolvedCache = new Map(); +const managedImageBlobUrlMissCache = new Map(); +const MANAGED_IMAGE_BLOB_URL_MISS_RETRY_MS = 5_000; + function appendImageBlock(images: ImageBlock[], block: ImageBlock) { if (!images.some((entry) => entry.url === block.url && entry.alt === block.alt)) { images.push(block); @@ -128,15 +143,22 @@ function extractImages(message: unknown): ImageBlock[] { if (b.type === "image") { // Handle source object format (from sendChatMessage) const source = b.source as Record | undefined; + const imageMeta = { + alt: typeof b.alt === "string" ? b.alt : undefined, + openUrl: typeof b.openUrl === "string" ? b.openUrl : undefined, + width: typeof b.width === "number" ? b.width : undefined, + height: typeof b.height === "number" ? b.height : undefined, + }; if (source?.type === "base64" && typeof source.data === "string") { appendImageBlock(images, { url: buildBase64ImageUrl({ data: source.data, mediaType: typeof source.media_type === "string" ? source.media_type : undefined, }), + ...imageMeta, }); } else if (typeof b.url === "string") { - appendImageBlock(images, { url: b.url }); + appendImageBlock(images, { url: b.url, ...imageMeta }); } } else if (b.type === "image_url") { // OpenAI format @@ -732,7 +754,7 @@ function resolveRenderableMessageImages( }); } -function renderMessageImages(images: RenderableImageBlock[]) { +function renderMessageImages(images: RenderableImageBlock[], opts?: ImageRenderOptions) { if (images.length === 0) { return nothing; } @@ -741,20 +763,31 @@ function renderMessageImages(images: RenderableImageBlock[]) { openExternalUrlSafe(url, { allowDataImage: true }); }; - return html` -
- ${images.map( - (img) => html` - ${img.alt openImage(img.displayUrl)} - /> - `, - )} -
+ const renderImageElement = (img: RenderableImageBlock, previewUrl: string) => html` + ${img.alt openImage(previewUrl)} + /> `; + + const renderImage = (img: RenderableImageBlock) => { + if (!isManagedOutgoingImageSource(img.displayUrl)) { + return renderImageElement(img, img.displayUrl); + } + const preview = resolveManagedOutgoingImageBlobUrl(img.displayUrl, opts).then((previewUrl) => { + if (!previewUrl) { + return nothing; + } + return renderImageElement(img, previewUrl); + }); + return until(preview, nothing); + }; + + return html`
${images.map((img) => renderImage(img))}
`; } function renderReplyPill(replyTarget: NormalizedMessage["replyTarget"]) { @@ -775,7 +808,7 @@ function renderReplyPill(replyTarget: NormalizedMessage["replyTarget"]) { function isLocalAssistantAttachmentSource(source: string): boolean { const trimmed = source.trim(); - if (/^\/(?:__openclaw__|media)\//.test(trimmed)) { + if (/^\/(?:__openclaw__|media|api\/chat\/media\/outgoing)\//.test(trimmed)) { return false; } return ( @@ -882,6 +915,94 @@ function buildAssistantAttachmentUrl( return `${normalizedBasePath}/__openclaw__/assistant-media?${params.toString()}`; } +function isManagedOutgoingImageSource(source: string): boolean { + const trimmed = source.trim(); + if (trimmed.startsWith("/api/chat/media/outgoing/")) { + return true; + } + try { + const parsed = new URL(trimmed, window.location.origin); + return ( + parsed.origin === window.location.origin && + parsed.pathname.startsWith("/api/chat/media/outgoing/") + ); + } catch { + return false; + } +} + +function resolveManagedOutgoingImageRequesterSessionKey(source: string): string | null { + try { + const parsed = new URL(source, window.location.origin); + const parts = parsed.pathname.split("/"); + const encodedSessionKey = parts[5]; + return encodedSessionKey ? decodeURIComponent(encodedSessionKey) : null; + } catch { + return null; + } +} + +function buildManagedOutgoingImageFetchUrl(source: string, basePath?: string): string { + if (!source.startsWith("/")) { + return source; + } + const normalizedBasePath = + basePath && basePath !== "/" ? (basePath.endsWith("/") ? basePath.slice(0, -1) : basePath) : ""; + return `${normalizedBasePath}${source}`; +} + +async function resolveManagedOutgoingImageBlobUrl( + source: string, + opts?: ImageRenderOptions, +): Promise { + const authToken = opts?.authToken?.trim() ?? ""; + const fetchUrl = buildManagedOutgoingImageFetchUrl(source, opts?.basePath); + const cacheKey = `${fetchUrl}::${authToken}`; + const cached = managedImageBlobUrlResolvedCache.get(cacheKey); + if (cached) { + return cached; + } + const missAt = managedImageBlobUrlMissCache.get(cacheKey); + if (missAt && Date.now() - missAt < MANAGED_IMAGE_BLOB_URL_MISS_RETRY_MS) { + return null; + } + let pending = managedImageBlobUrlCache.get(cacheKey); + if (!pending) { + pending = (async () => { + const requesterSessionKey = resolveManagedOutgoingImageRequesterSessionKey(source); + const headers = new Headers({ Accept: "image/*" }); + if (authToken) { + headers.set("Authorization", `Bearer ${authToken}`); + } + if (requesterSessionKey) { + headers.set("x-openclaw-requester-session-key", requesterSessionKey); + } + const res = await fetch(fetchUrl, { + method: "GET", + headers, + credentials: "same-origin", + }); + if (!res.ok) { + managedImageBlobUrlMissCache.set(cacheKey, Date.now()); + return null; + } + const blob = await res.blob(); + if (!blob.type.startsWith("image/")) { + managedImageBlobUrlMissCache.set(cacheKey, Date.now()); + return null; + } + const blobUrl = URL.createObjectURL(blob); + managedImageBlobUrlResolvedCache.set(cacheKey, blobUrl); + managedImageBlobUrlMissCache.delete(cacheKey); + return blobUrl; + })().finally(() => { + managedImageBlobUrlCache.delete(cacheKey); + }); + managedImageBlobUrlCache.set(cacheKey, pending); + } + return pending; +} + function buildAssistantAttachmentMetaUrl( source: string, basePath?: string, @@ -1325,7 +1446,7 @@ function renderGroupedMessage( ${toolMessageExpanded ? html`
- ${renderMessageImages(images)} + ${renderMessageImages(images, imageRenderOptions)} ${renderAssistantAttachments( assistantAttachments, opts.localMediaPreviewRoots ?? [], @@ -1381,7 +1502,7 @@ function renderGroupedMessage(
` : html` - ${renderMessageImages(images)} + ${renderMessageImages(images, imageRenderOptions)} ${renderAssistantAttachments( assistantAttachments, opts.localMediaPreviewRoots ?? [],