diff --git a/CHANGELOG.md b/CHANGELOG.md index 8da443ef7eb..d9d413029a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -112,6 +112,7 @@ Docs: https://docs.openclaw.ai - Gateway/HTTP: avoid loading managed outgoing-image media handlers for unrelated requests, so disabled OpenAI-compatible routes return 404 without waiting on lazy media sidecars. Thanks @vincentkoc. - Gateway/OpenAI-compatible: send the assistant role SSE chunk as soon as streaming chat-completion headers are accepted, so cold agent setup cannot leave `/v1/chat/completions` clients with a bodyless 200 response until their idle timeout fires. - Agents/media: avoid direct generated-media completion fallback while the announce-agent run is still pending, so async video and music completions do not duplicate raw media messages. (#77754) +- WebChat/Codex media: stage Codex app-server generated local images into managed media before Gateway display, so Codex-home image paths no longer hit `LocalMediaAccessError` while keeping Codex home out of the display allowlist. Thanks @frankekn. - TUI/sessions: bound the session picker to recent rows and use exact lookup-style refreshes for the active session, so dusty stores no longer make TUI hydrate weeks-old transcripts before becoming responsive. Thanks @vincentkoc. - Doctor/gateway: report recent supervisor restart handoffs in `openclaw doctor --deep`, using the installed service environment when available so service-managed clean exits are visible in guided diagnostics. Thanks @shakkernerd. - Gateway/status: show recent supervisor restart handoffs in `openclaw gateway status --deep`, including JSON details, so clean service-managed restarts are reported as restart handoffs instead of opaque stopped-service diagnostics. Thanks @shakkernerd. diff --git a/src/gateway/server-methods/chat-reply-media.test.ts b/src/gateway/server-methods/chat-reply-media.test.ts new file mode 100644 index 00000000000..90f61e7768c --- /dev/null +++ b/src/gateway/server-methods/chat-reply-media.test.ts @@ -0,0 +1,212 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { OpenClawConfig } from "../../config/types.openclaw.js"; +import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js"; +import { createManagedOutgoingImageBlocks } from "../managed-image-attachments.js"; +import { normalizeWebchatReplyMediaPathsForDisplay } from "./chat-reply-media.js"; + +const PNG_BYTES = Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=", + "base64", +); + +describe("normalizeWebchatReplyMediaPathsForDisplay", () => { + let rootDir = ""; + + beforeEach(async () => { + rootDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-webchat-reply-media-")); + vi.stubEnv("OPENCLAW_STATE_DIR", path.join(rootDir, "state")); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await fs.rm(rootDir, { recursive: true, force: true }); + rootDir = ""; + }); + + function createConfig(params: { + agentDir: string; + workspaceDir: string; + allowRead: boolean; + }): OpenClawConfig { + return { + tools: params.allowRead ? { allow: ["read"] } : { fs: { workspaceOnly: true } }, + agents: { + list: [ + { + id: "main", + agentDir: params.agentDir, + workspace: params.workspaceDir, + }, + ], + }, + }; + } + + async function createCodexHomeImage(params: { agentDir: string }): Promise { + const imagePath = path.join(params.agentDir, "codex-home", "outputs", "chart.png"); + await fs.mkdir(path.dirname(imagePath), { recursive: true }); + await fs.writeFile(imagePath, PNG_BYTES); + return imagePath; + } + + it("stages Codex-home image paths before Gateway managed-image display", async () => { + const stateDir = process.env.OPENCLAW_STATE_DIR ?? ""; + const agentDir = path.join(stateDir, "agents", "main", "agent"); + const workspaceDir = path.join(stateDir, "workspace"); + const sourcePath = await createCodexHomeImage({ agentDir }); + const cfg = createConfig({ agentDir, workspaceDir, allowRead: true }); + + const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey: "agent:main:webchat:direct:user", + agentId: "main", + payloads: [{ mediaUrls: [sourcePath] }], + }); + + const normalizedPath = payload?.mediaUrls?.[0]; + expect(normalizedPath).toBeTruthy(); + expect(normalizedPath).not.toBe(sourcePath); + expect(normalizedPath?.startsWith(path.join(stateDir, "media"))).toBe(true); + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:webchat:direct:user", + mediaUrls: payload?.mediaUrls ?? [], + localRoots: getAgentScopedMediaLocalRoots(cfg, "main"), + }); + + expect(blocks).toHaveLength(1); + expect((blocks[0] as { type?: string }).type).toBe("image"); + }); + + it("does not expose Codex-home media when host read policy is not enabled", async () => { + const stateDir = process.env.OPENCLAW_STATE_DIR ?? ""; + const agentDir = path.join(stateDir, "agents", "main", "agent"); + const workspaceDir = path.join(stateDir, "workspace"); + const sourcePath = await createCodexHomeImage({ agentDir }); + const cfg = createConfig({ agentDir, workspaceDir, allowRead: false }); + + const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey: "agent:main:webchat:direct:user", + agentId: "main", + payloads: [{ mediaUrls: [sourcePath] }], + }); + + expect(payload?.mediaUrl).toBeUndefined(); + expect(payload?.mediaUrls).toBeUndefined(); + expect(payload?.text).toBeTruthy(); + }); + + it("does not stage sensitive media before display suppression", async () => { + const stateDir = process.env.OPENCLAW_STATE_DIR ?? ""; + const agentDir = path.join(stateDir, "agents", "main", "agent"); + const workspaceDir = path.join(stateDir, "workspace"); + const sourcePath = await createCodexHomeImage({ agentDir }); + const cfg = createConfig({ agentDir, workspaceDir, allowRead: true }); + + const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey: "agent:main:webchat:direct:user", + agentId: "main", + payloads: [{ mediaUrls: [sourcePath], sensitiveMedia: true }], + }); + + expect(payload?.mediaUrl).toBeUndefined(); + expect(payload?.mediaUrls).toEqual([sourcePath]); + await expect(fs.stat(path.join(stateDir, "media", "outbound"))).rejects.toThrow(); + }); + + it("preserves inline data image replies for WebChat rendering", async () => { + const stateDir = process.env.OPENCLAW_STATE_DIR ?? ""; + const agentDir = path.join(stateDir, "agents", "main", "agent"); + const workspaceDir = path.join(stateDir, "workspace"); + const dataUrl = `data:image/png;base64,${PNG_BYTES.toString("base64")}`; + const cfg = createConfig({ agentDir, workspaceDir, allowRead: true }); + + const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey: "agent:main:webchat:direct:user", + agentId: "main", + payloads: [{ mediaUrls: [dataUrl] }], + }); + + expect(payload?.mediaUrl).toBeUndefined(); + expect(payload?.mediaUrls).toEqual([dataUrl]); + await expect(fs.stat(path.join(stateDir, "media", "outbound"))).rejects.toThrow(); + }); + + it("preserves local audio paths for WebChat audio embedding", async () => { + const stateDir = process.env.OPENCLAW_STATE_DIR ?? ""; + const agentDir = path.join(stateDir, "agents", "main", "agent"); + const workspaceDir = path.join(stateDir, "workspace"); + const audioPath = path.join(workspaceDir, "voice.mp3"); + await fs.mkdir(path.dirname(audioPath), { recursive: true }); + await fs.writeFile(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00])); + const cfg = createConfig({ agentDir, workspaceDir, allowRead: false }); + + const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey: "agent:main:webchat:direct:user", + agentId: "main", + payloads: [{ mediaUrls: [audioPath], trustedLocalMedia: true, audioAsVoice: true }], + }); + + expect(payload?.mediaUrl).toBeUndefined(); + expect(payload?.mediaUrls).toEqual([audioPath]); + expect(payload?.trustedLocalMedia).toBe(true); + expect(payload?.audioAsVoice).toBe(true); + await expect(fs.stat(path.join(stateDir, "media", "outbound"))).rejects.toThrow(); + }); + + it("preserves data images while staging mixed local image replies", async () => { + const stateDir = process.env.OPENCLAW_STATE_DIR ?? ""; + const agentDir = path.join(stateDir, "agents", "main", "agent"); + const workspaceDir = path.join(stateDir, "workspace"); + const sourcePath = await createCodexHomeImage({ agentDir }); + const dataUrl = `data:image/png;base64,${PNG_BYTES.toString("base64")}`; + const cfg = createConfig({ agentDir, workspaceDir, allowRead: true }); + + const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey: "agent:main:webchat:direct:user", + agentId: "main", + payloads: [{ mediaUrls: [dataUrl, sourcePath] }], + }); + + const normalizedLocalPath = payload?.mediaUrls?.[1]; + expect(payload?.mediaUrls?.[0]).toBe(dataUrl); + expect(normalizedLocalPath).toBeTruthy(); + expect(normalizedLocalPath).not.toBe(sourcePath); + expect(normalizedLocalPath?.startsWith(path.join(stateDir, "media"))).toBe(true); + const blocks = await createManagedOutgoingImageBlocks({ + sessionKey: "agent:main:webchat:direct:user", + mediaUrls: payload?.mediaUrls ?? [], + localRoots: getAgentScopedMediaLocalRoots(cfg, "main"), + }); + + expect(blocks).toHaveLength(2); + }); + + it("does not add a failure warning when a mixed inline image survives", async () => { + const stateDir = process.env.OPENCLAW_STATE_DIR ?? ""; + const agentDir = path.join(stateDir, "agents", "main", "agent"); + const workspaceDir = path.join(stateDir, "workspace"); + const sourcePath = await createCodexHomeImage({ agentDir }); + const dataUrl = `data:image/png;base64,${PNG_BYTES.toString("base64")}`; + const cfg = createConfig({ agentDir, workspaceDir, allowRead: false }); + + const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey: "agent:main:webchat:direct:user", + agentId: "main", + payloads: [{ mediaUrls: [sourcePath, dataUrl] }], + }); + + expect(payload?.text).toBeUndefined(); + expect(payload?.mediaUrl).toBe(dataUrl); + expect(payload?.mediaUrls).toEqual([dataUrl]); + await expect(fs.stat(path.join(stateDir, "media", "outbound"))).rejects.toThrow(); + }); +}); diff --git a/src/gateway/server-methods/chat-reply-media.ts b/src/gateway/server-methods/chat-reply-media.ts new file mode 100644 index 00000000000..bd16088b73a --- /dev/null +++ b/src/gateway/server-methods/chat-reply-media.ts @@ -0,0 +1,79 @@ +import { resolveAgentWorkspaceDir } from "../../agents/agent-scope.js"; +import type { ReplyPayload } from "../../auto-reply/reply-payload.js"; +import { createReplyMediaPathNormalizer } from "../../auto-reply/reply/reply-media-paths.runtime.js"; +import type { OpenClawConfig } from "../../config/types.openclaw.js"; +import { isAudioFileName } from "../../media/mime.js"; +import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; + +function isDataUrlMedia(mediaUrl: string): boolean { + return mediaUrl.trim().toLowerCase().startsWith("data:"); +} + +function shouldPreserveDisplayMediaUrl(mediaUrl: string): boolean { + return isDataUrlMedia(mediaUrl) || isAudioFileName(mediaUrl); +} + +export async function normalizeWebchatReplyMediaPathsForDisplay(params: { + cfg: OpenClawConfig; + sessionKey: string; + agentId: string; + workspaceDir?: string; + accountId?: string; + payloads: ReplyPayload[]; +}): Promise { + if (params.payloads.length === 0) { + return params.payloads; + } + const workspaceDir = params.workspaceDir ?? resolveAgentWorkspaceDir(params.cfg, params.agentId); + if (!workspaceDir) { + return params.payloads; + } + const normalizeMediaPaths = createReplyMediaPathNormalizer({ + cfg: params.cfg, + sessionKey: params.sessionKey, + agentId: params.agentId, + workspaceDir, + accountId: params.accountId, + }); + const normalized: ReplyPayload[] = []; + for (const payload of params.payloads) { + if (payload.sensitiveMedia === true) { + normalized.push(payload); + continue; + } + const mediaUrls = resolveSendableOutboundReplyParts(payload).mediaUrls; + if (!mediaUrls.some(shouldPreserveDisplayMediaUrl)) { + normalized.push(await normalizeMediaPaths(payload)); + continue; + } + if (!mediaUrls.some((mediaUrl) => !shouldPreserveDisplayMediaUrl(mediaUrl))) { + normalized.push(payload); + continue; + } + const mergedMediaUrls: string[] = []; + let text = payload.text; + for (const mediaUrl of mediaUrls) { + if (shouldPreserveDisplayMediaUrl(mediaUrl)) { + mergedMediaUrls.push(mediaUrl); + continue; + } + const normalizedPayload = await normalizeMediaPaths({ + ...payload, + mediaUrl, + mediaUrls: [mediaUrl], + }); + const normalizedMediaUrls = resolveSendableOutboundReplyParts(normalizedPayload).mediaUrls; + if (normalizedMediaUrls.length === 0) { + continue; + } + mergedMediaUrls.push(...normalizedMediaUrls); + } + normalized.push({ + ...payload, + text, + mediaUrl: mergedMediaUrls[0], + mediaUrls: mergedMediaUrls, + }); + } + return normalized; +} diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index b72c19838fd..8f2575532fb 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -110,6 +110,7 @@ import { formatForLog } from "../ws-log.js"; import { injectTimestamp, timestampOptsFromConfig } from "./agent-timestamp.js"; import { setGatewayDedupeEntry } from "./agent-wait-dedupe.js"; import { normalizeRpcAttachmentsToChatAttachments } from "./attachment-normalize.js"; +import { normalizeWebchatReplyMediaPathsForDisplay } from "./chat-reply-media.js"; import { appendInjectedAssistantMessageToTranscript } from "./chat-transcript-inject.js"; import { buildWebchatAssistantMessageFromReplyPayloads, @@ -2320,7 +2321,16 @@ export const chatHandlers: GatewayRequestHandlers = { if (!agentRunStarted || appendedWebchatAgentMedia || !isMediaBearingPayload(payload)) { return; } - const transcriptPayload = stripVisibleTextFromTtsSupplement(payload); + const [transcriptPayload] = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey, + agentId, + accountId, + payloads: [stripVisibleTextFromTtsSupplement(payload)], + }); + if (!transcriptPayload) { + return; + } const { storePath: latestStorePath, entry: latestEntry } = loadSessionEntry(sessionKey); const sessionId = latestEntry?.sessionId ?? backingSessionId ?? clientRunId; const resolvedTranscriptPath = resolveTranscriptPath({ @@ -2499,11 +2509,18 @@ export const chatHandlers: GatewayRequestHandlers = { sessionKey, }); } else { - const finalPayloads = appendedWebchatAgentMedia + const rawFinalPayloads = appendedWebchatAgentMedia ? [] : deliveredReplies .filter((entry) => entry.kind === "final") .map((entry) => entry.payload); + const finalPayloads = await normalizeWebchatReplyMediaPathsForDisplay({ + cfg, + sessionKey, + agentId, + accountId, + payloads: rawFinalPayloads, + }); const { storePath: latestStorePath, entry: latestEntry } = loadSessionEntry(sessionKey); const sessionId = latestEntry?.sessionId ?? backingSessionId ?? clientRunId;