Fix WebChat display for Codex-generated local media (#77889)

* fix: stage webchat codex media before display

* fix: avoid staging sensitive webchat media

* fix: preserve webchat inline media replies

* fix: normalize mixed webchat media replies

* fix: suppress mixed media false warnings

* fix(gateway): preserve webchat audio media display
This commit is contained in:
Frank Yang
2026-05-06 08:48:34 +08:00
committed by GitHub
parent ceca7fdfda
commit 1ddc2650c6
4 changed files with 311 additions and 2 deletions

View File

@@ -112,6 +112,7 @@ Docs: https://docs.openclaw.ai
- Gateway/HTTP: avoid loading managed outgoing-image media handlers for unrelated requests, so disabled OpenAI-compatible routes return 404 without waiting on lazy media sidecars. Thanks @vincentkoc.
- Gateway/OpenAI-compatible: send the assistant role SSE chunk as soon as streaming chat-completion headers are accepted, so cold agent setup cannot leave `/v1/chat/completions` clients with a bodyless 200 response until their idle timeout fires.
- Agents/media: avoid direct generated-media completion fallback while the announce-agent run is still pending, so async video and music completions do not duplicate raw media messages. (#77754)
- WebChat/Codex media: stage Codex app-server generated local images into managed media before Gateway display, so Codex-home image paths no longer hit `LocalMediaAccessError` while keeping Codex home out of the display allowlist. Thanks @frankekn.
- TUI/sessions: bound the session picker to recent rows and use exact lookup-style refreshes for the active session, so dusty stores no longer make TUI hydrate weeks-old transcripts before becoming responsive. Thanks @vincentkoc.
- Doctor/gateway: report recent supervisor restart handoffs in `openclaw doctor --deep`, using the installed service environment when available so service-managed clean exits are visible in guided diagnostics. Thanks @shakkernerd.
- Gateway/status: show recent supervisor restart handoffs in `openclaw gateway status --deep`, including JSON details, so clean service-managed restarts are reported as restart handoffs instead of opaque stopped-service diagnostics. Thanks @shakkernerd.

View File

@@ -0,0 +1,212 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js";
import { createManagedOutgoingImageBlocks } from "../managed-image-attachments.js";
import { normalizeWebchatReplyMediaPathsForDisplay } from "./chat-reply-media.js";
const PNG_BYTES = Buffer.from(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=",
"base64",
);
describe("normalizeWebchatReplyMediaPathsForDisplay", () => {
let rootDir = "";
beforeEach(async () => {
rootDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-webchat-reply-media-"));
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(rootDir, "state"));
});
afterEach(async () => {
vi.unstubAllEnvs();
await fs.rm(rootDir, { recursive: true, force: true });
rootDir = "";
});
function createConfig(params: {
agentDir: string;
workspaceDir: string;
allowRead: boolean;
}): OpenClawConfig {
return {
tools: params.allowRead ? { allow: ["read"] } : { fs: { workspaceOnly: true } },
agents: {
list: [
{
id: "main",
agentDir: params.agentDir,
workspace: params.workspaceDir,
},
],
},
};
}
async function createCodexHomeImage(params: { agentDir: string }): Promise<string> {
const imagePath = path.join(params.agentDir, "codex-home", "outputs", "chart.png");
await fs.mkdir(path.dirname(imagePath), { recursive: true });
await fs.writeFile(imagePath, PNG_BYTES);
return imagePath;
}
it("stages Codex-home image paths before Gateway managed-image display", async () => {
const stateDir = process.env.OPENCLAW_STATE_DIR ?? "";
const agentDir = path.join(stateDir, "agents", "main", "agent");
const workspaceDir = path.join(stateDir, "workspace");
const sourcePath = await createCodexHomeImage({ agentDir });
const cfg = createConfig({ agentDir, workspaceDir, allowRead: true });
const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey: "agent:main:webchat:direct:user",
agentId: "main",
payloads: [{ mediaUrls: [sourcePath] }],
});
const normalizedPath = payload?.mediaUrls?.[0];
expect(normalizedPath).toBeTruthy();
expect(normalizedPath).not.toBe(sourcePath);
expect(normalizedPath?.startsWith(path.join(stateDir, "media"))).toBe(true);
const blocks = await createManagedOutgoingImageBlocks({
sessionKey: "agent:main:webchat:direct:user",
mediaUrls: payload?.mediaUrls ?? [],
localRoots: getAgentScopedMediaLocalRoots(cfg, "main"),
});
expect(blocks).toHaveLength(1);
expect((blocks[0] as { type?: string }).type).toBe("image");
});
it("does not expose Codex-home media when host read policy is not enabled", async () => {
const stateDir = process.env.OPENCLAW_STATE_DIR ?? "";
const agentDir = path.join(stateDir, "agents", "main", "agent");
const workspaceDir = path.join(stateDir, "workspace");
const sourcePath = await createCodexHomeImage({ agentDir });
const cfg = createConfig({ agentDir, workspaceDir, allowRead: false });
const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey: "agent:main:webchat:direct:user",
agentId: "main",
payloads: [{ mediaUrls: [sourcePath] }],
});
expect(payload?.mediaUrl).toBeUndefined();
expect(payload?.mediaUrls).toBeUndefined();
expect(payload?.text).toBeTruthy();
});
it("does not stage sensitive media before display suppression", async () => {
const stateDir = process.env.OPENCLAW_STATE_DIR ?? "";
const agentDir = path.join(stateDir, "agents", "main", "agent");
const workspaceDir = path.join(stateDir, "workspace");
const sourcePath = await createCodexHomeImage({ agentDir });
const cfg = createConfig({ agentDir, workspaceDir, allowRead: true });
const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey: "agent:main:webchat:direct:user",
agentId: "main",
payloads: [{ mediaUrls: [sourcePath], sensitiveMedia: true }],
});
expect(payload?.mediaUrl).toBeUndefined();
expect(payload?.mediaUrls).toEqual([sourcePath]);
await expect(fs.stat(path.join(stateDir, "media", "outbound"))).rejects.toThrow();
});
it("preserves inline data image replies for WebChat rendering", async () => {
const stateDir = process.env.OPENCLAW_STATE_DIR ?? "";
const agentDir = path.join(stateDir, "agents", "main", "agent");
const workspaceDir = path.join(stateDir, "workspace");
const dataUrl = `data:image/png;base64,${PNG_BYTES.toString("base64")}`;
const cfg = createConfig({ agentDir, workspaceDir, allowRead: true });
const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey: "agent:main:webchat:direct:user",
agentId: "main",
payloads: [{ mediaUrls: [dataUrl] }],
});
expect(payload?.mediaUrl).toBeUndefined();
expect(payload?.mediaUrls).toEqual([dataUrl]);
await expect(fs.stat(path.join(stateDir, "media", "outbound"))).rejects.toThrow();
});
it("preserves local audio paths for WebChat audio embedding", async () => {
const stateDir = process.env.OPENCLAW_STATE_DIR ?? "";
const agentDir = path.join(stateDir, "agents", "main", "agent");
const workspaceDir = path.join(stateDir, "workspace");
const audioPath = path.join(workspaceDir, "voice.mp3");
await fs.mkdir(path.dirname(audioPath), { recursive: true });
await fs.writeFile(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
const cfg = createConfig({ agentDir, workspaceDir, allowRead: false });
const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey: "agent:main:webchat:direct:user",
agentId: "main",
payloads: [{ mediaUrls: [audioPath], trustedLocalMedia: true, audioAsVoice: true }],
});
expect(payload?.mediaUrl).toBeUndefined();
expect(payload?.mediaUrls).toEqual([audioPath]);
expect(payload?.trustedLocalMedia).toBe(true);
expect(payload?.audioAsVoice).toBe(true);
await expect(fs.stat(path.join(stateDir, "media", "outbound"))).rejects.toThrow();
});
it("preserves data images while staging mixed local image replies", async () => {
const stateDir = process.env.OPENCLAW_STATE_DIR ?? "";
const agentDir = path.join(stateDir, "agents", "main", "agent");
const workspaceDir = path.join(stateDir, "workspace");
const sourcePath = await createCodexHomeImage({ agentDir });
const dataUrl = `data:image/png;base64,${PNG_BYTES.toString("base64")}`;
const cfg = createConfig({ agentDir, workspaceDir, allowRead: true });
const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey: "agent:main:webchat:direct:user",
agentId: "main",
payloads: [{ mediaUrls: [dataUrl, sourcePath] }],
});
const normalizedLocalPath = payload?.mediaUrls?.[1];
expect(payload?.mediaUrls?.[0]).toBe(dataUrl);
expect(normalizedLocalPath).toBeTruthy();
expect(normalizedLocalPath).not.toBe(sourcePath);
expect(normalizedLocalPath?.startsWith(path.join(stateDir, "media"))).toBe(true);
const blocks = await createManagedOutgoingImageBlocks({
sessionKey: "agent:main:webchat:direct:user",
mediaUrls: payload?.mediaUrls ?? [],
localRoots: getAgentScopedMediaLocalRoots(cfg, "main"),
});
expect(blocks).toHaveLength(2);
});
it("does not add a failure warning when a mixed inline image survives", async () => {
const stateDir = process.env.OPENCLAW_STATE_DIR ?? "";
const agentDir = path.join(stateDir, "agents", "main", "agent");
const workspaceDir = path.join(stateDir, "workspace");
const sourcePath = await createCodexHomeImage({ agentDir });
const dataUrl = `data:image/png;base64,${PNG_BYTES.toString("base64")}`;
const cfg = createConfig({ agentDir, workspaceDir, allowRead: false });
const [payload] = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey: "agent:main:webchat:direct:user",
agentId: "main",
payloads: [{ mediaUrls: [sourcePath, dataUrl] }],
});
expect(payload?.text).toBeUndefined();
expect(payload?.mediaUrl).toBe(dataUrl);
expect(payload?.mediaUrls).toEqual([dataUrl]);
await expect(fs.stat(path.join(stateDir, "media", "outbound"))).rejects.toThrow();
});
});

View File

@@ -0,0 +1,79 @@
import { resolveAgentWorkspaceDir } from "../../agents/agent-scope.js";
import type { ReplyPayload } from "../../auto-reply/reply-payload.js";
import { createReplyMediaPathNormalizer } from "../../auto-reply/reply/reply-media-paths.runtime.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { isAudioFileName } from "../../media/mime.js";
import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js";
function isDataUrlMedia(mediaUrl: string): boolean {
return mediaUrl.trim().toLowerCase().startsWith("data:");
}
function shouldPreserveDisplayMediaUrl(mediaUrl: string): boolean {
return isDataUrlMedia(mediaUrl) || isAudioFileName(mediaUrl);
}
export async function normalizeWebchatReplyMediaPathsForDisplay(params: {
cfg: OpenClawConfig;
sessionKey: string;
agentId: string;
workspaceDir?: string;
accountId?: string;
payloads: ReplyPayload[];
}): Promise<ReplyPayload[]> {
if (params.payloads.length === 0) {
return params.payloads;
}
const workspaceDir = params.workspaceDir ?? resolveAgentWorkspaceDir(params.cfg, params.agentId);
if (!workspaceDir) {
return params.payloads;
}
const normalizeMediaPaths = createReplyMediaPathNormalizer({
cfg: params.cfg,
sessionKey: params.sessionKey,
agentId: params.agentId,
workspaceDir,
accountId: params.accountId,
});
const normalized: ReplyPayload[] = [];
for (const payload of params.payloads) {
if (payload.sensitiveMedia === true) {
normalized.push(payload);
continue;
}
const mediaUrls = resolveSendableOutboundReplyParts(payload).mediaUrls;
if (!mediaUrls.some(shouldPreserveDisplayMediaUrl)) {
normalized.push(await normalizeMediaPaths(payload));
continue;
}
if (!mediaUrls.some((mediaUrl) => !shouldPreserveDisplayMediaUrl(mediaUrl))) {
normalized.push(payload);
continue;
}
const mergedMediaUrls: string[] = [];
let text = payload.text;
for (const mediaUrl of mediaUrls) {
if (shouldPreserveDisplayMediaUrl(mediaUrl)) {
mergedMediaUrls.push(mediaUrl);
continue;
}
const normalizedPayload = await normalizeMediaPaths({
...payload,
mediaUrl,
mediaUrls: [mediaUrl],
});
const normalizedMediaUrls = resolveSendableOutboundReplyParts(normalizedPayload).mediaUrls;
if (normalizedMediaUrls.length === 0) {
continue;
}
mergedMediaUrls.push(...normalizedMediaUrls);
}
normalized.push({
...payload,
text,
mediaUrl: mergedMediaUrls[0],
mediaUrls: mergedMediaUrls,
});
}
return normalized;
}

View File

@@ -110,6 +110,7 @@ import { formatForLog } from "../ws-log.js";
import { injectTimestamp, timestampOptsFromConfig } from "./agent-timestamp.js";
import { setGatewayDedupeEntry } from "./agent-wait-dedupe.js";
import { normalizeRpcAttachmentsToChatAttachments } from "./attachment-normalize.js";
import { normalizeWebchatReplyMediaPathsForDisplay } from "./chat-reply-media.js";
import { appendInjectedAssistantMessageToTranscript } from "./chat-transcript-inject.js";
import {
buildWebchatAssistantMessageFromReplyPayloads,
@@ -2320,7 +2321,16 @@ export const chatHandlers: GatewayRequestHandlers = {
if (!agentRunStarted || appendedWebchatAgentMedia || !isMediaBearingPayload(payload)) {
return;
}
const transcriptPayload = stripVisibleTextFromTtsSupplement(payload);
const [transcriptPayload] = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey,
agentId,
accountId,
payloads: [stripVisibleTextFromTtsSupplement(payload)],
});
if (!transcriptPayload) {
return;
}
const { storePath: latestStorePath, entry: latestEntry } = loadSessionEntry(sessionKey);
const sessionId = latestEntry?.sessionId ?? backingSessionId ?? clientRunId;
const resolvedTranscriptPath = resolveTranscriptPath({
@@ -2499,11 +2509,18 @@ export const chatHandlers: GatewayRequestHandlers = {
sessionKey,
});
} else {
const finalPayloads = appendedWebchatAgentMedia
const rawFinalPayloads = appendedWebchatAgentMedia
? []
: deliveredReplies
.filter((entry) => entry.kind === "final")
.map((entry) => entry.payload);
const finalPayloads = await normalizeWebchatReplyMediaPathsForDisplay({
cfg,
sessionKey,
agentId,
accountId,
payloads: rawFinalPayloads,
});
const { storePath: latestStorePath, entry: latestEntry } =
loadSessionEntry(sessionKey);
const sessionId = latestEntry?.sessionId ?? backingSessionId ?? clientRunId;