fix(pair): render /pair qr as media (#70047)

* fix(pair): render pair qr as media

* fix(gateway): preserve media reply threading

* fix(gateway): harden webchat media replies

* fix(plugin-sdk): keep trustedLocalMedia internal

* docs(changelog): note pair qr media fix

* Update CHANGELOG with recent fixes and enhancements

Updated changelog to include recent fixes and enhancements.
This commit is contained in:
Val Alexander
2026-04-22 03:31:09 -05:00
committed by GitHub
parent 81ca7bc40b
commit 43a941b51c
43 changed files with 678 additions and 87 deletions

View File

@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Gateway/pairing webchat: render `/pair qr` replies as structured media instead of raw markdown text, preserve inline reply threading and silent-control handling on media replies, avoid persisting sensitive QR images into transcript history, and keep local webchat media embedding behind internal-only trust markers. (#70047) Thanks @BunsDev.
- OpenAI/Responses: keep embedded OpenAI Responses runs on HTTP when `models.providers.openai.baseUrl` points at a local mock or other non-public endpoint, so mocked/custom endpoints no longer drift onto the hardcoded public websocket transport. (#69815) Thanks @vincentkoc.
- Channels/config: require resolved runtime config on channel send/action/client helpers and block runtime helper `loadConfig()` calls, so SecretRefs are resolved at startup/boundaries instead of being re-read during sends.
- CLI/channels: preserve bundled setup promotion metadata when a loaded partial channel plugin omits it, so adding a non-default account still moves legacy single-account fields such as Telegram `streaming` into `accounts.default`.

View File

@@ -1,2 +1,2 @@
55b39075f07def786f5056b029921db64fcbdc5e2cab3d645215eccc857ba9a4 plugin-sdk-api-baseline.json
4a6b8f4afc9e6aa7c56b0cbab0886dacc4ead534c47761ab30eb76480d8fd673 plugin-sdk-api-baseline.jsonl
ba9b9d9b321b405fef89d4e95c1a3d629d1b956398a5b2a7f25b2a7654879783 plugin-sdk-api-baseline.json
8bbbee0ea2326148d4fd49f61fe74f83c5bb24c0742cfbf3609f43939fcd4c34 plugin-sdk-api-baseline.jsonl

View File

@@ -251,6 +251,7 @@ describe("device-pair /pair qr", () => {
gatewayClientScopes: ["operator.write", "operator.pairing"],
}),
);
const payload = result as { text?: string; mediaUrl?: string; sensitiveMedia?: boolean };
const text = requireText(result);
expect(pluginApiMocks.renderQrPngBase64).toHaveBeenCalledTimes(1);
@@ -261,11 +262,12 @@ describe("device-pair /pair qr", () => {
},
});
expect(text).toContain("Scan this QR code with the OpenClaw iOS app:");
expect(text).toContain("![OpenClaw pairing QR](data:image/png;base64,ZmFrZXBuZw==)");
expect(payload.mediaUrl).toBe("data:image/png;base64,ZmFrZXBuZw==");
expect(payload.sensitiveMedia).toBe(true);
expect(text).toContain("- Security: single-use bootstrap token");
expect(text).toContain("**Important:** Run `/pair cleanup` after pairing finishes.");
expect(text).toContain("If this QR code leaks, run `/pair cleanup` immediately.");
expect(text).not.toContain("```");
expect(text).not.toContain("![OpenClaw pairing QR]");
});
it("rejects qr setup for internal gateway callers without operator.pairing", async () => {

View File

@@ -732,9 +732,9 @@ export default definePluginEntry({
autoNotifyArmed,
expiresAtMs: payload.expiresAtMs,
}),
"",
`![OpenClaw pairing QR](${qrDataUrl})`,
].join("\n"),
mediaUrl: qrDataUrl,
sensitiveMedia: true,
};
}

View File

@@ -2,6 +2,8 @@ export type BlockReplyPayload = {
text?: string;
mediaUrls?: string[];
audioAsVoice?: boolean;
trustedLocalMedia?: boolean;
sensitiveMedia?: boolean;
isReasoning?: boolean;
replyToId?: string;
replyToTag?: boolean;

View File

@@ -251,6 +251,7 @@ describe("consumePendingToolMediaIntoReply", () => {
const state = {
pendingToolMediaUrls: ["/tmp/a.png", "/tmp/b.png"],
pendingToolAudioAsVoice: false,
pendingToolTrustedLocalMedia: false,
};
expect(
@@ -269,6 +270,7 @@ describe("consumePendingToolMediaIntoReply", () => {
const state = {
pendingToolMediaUrls: ["/tmp/a.png"],
pendingToolAudioAsVoice: true,
pendingToolTrustedLocalMedia: false,
};
expect(
@@ -290,6 +292,7 @@ describe("consumePendingToolMediaReply", () => {
const state = {
pendingToolMediaUrls: ["/tmp/reply.opus"],
pendingToolAudioAsVoice: true,
pendingToolTrustedLocalMedia: false,
};
expect(consumePendingToolMediaReply(state)).toEqual({

View File

@@ -178,20 +178,31 @@ export function resolveSilentReplyFallbackText(params: {
}
function clearPendingToolMedia(
state: Pick<EmbeddedPiSubscribeState, "pendingToolMediaUrls" | "pendingToolAudioAsVoice">,
state: Pick<
EmbeddedPiSubscribeState,
"pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia"
>,
) {
state.pendingToolMediaUrls = [];
state.pendingToolAudioAsVoice = false;
state.pendingToolTrustedLocalMedia = false;
}
export function consumePendingToolMediaIntoReply(
state: Pick<EmbeddedPiSubscribeState, "pendingToolMediaUrls" | "pendingToolAudioAsVoice">,
state: Pick<
EmbeddedPiSubscribeState,
"pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia"
>,
payload: BlockReplyPayload,
): BlockReplyPayload {
if (payload.isReasoning) {
return payload;
}
if (state.pendingToolMediaUrls.length === 0 && !state.pendingToolAudioAsVoice) {
if (
state.pendingToolMediaUrls.length === 0 &&
!state.pendingToolAudioAsVoice &&
!state.pendingToolTrustedLocalMedia
) {
return payload;
}
const mergedMediaUrls = Array.from(
@@ -201,15 +212,24 @@ export function consumePendingToolMediaIntoReply(
...payload,
mediaUrls: mergedMediaUrls.length ? mergedMediaUrls : undefined,
audioAsVoice: payload.audioAsVoice || state.pendingToolAudioAsVoice || undefined,
trustedLocalMedia:
payload.trustedLocalMedia || state.pendingToolTrustedLocalMedia || undefined,
};
clearPendingToolMedia(state);
return mergedPayload;
}
export function consumePendingToolMediaReply(
state: Pick<EmbeddedPiSubscribeState, "pendingToolMediaUrls" | "pendingToolAudioAsVoice">,
state: Pick<
EmbeddedPiSubscribeState,
"pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia"
>,
): BlockReplyPayload | null {
if (state.pendingToolMediaUrls.length === 0 && !state.pendingToolAudioAsVoice) {
if (
state.pendingToolMediaUrls.length === 0 &&
!state.pendingToolAudioAsVoice &&
!state.pendingToolTrustedLocalMedia
) {
return null;
}
const payload: BlockReplyPayload = {
@@ -217,6 +237,7 @@ export function consumePendingToolMediaReply(
? Array.from(new Set(state.pendingToolMediaUrls))
: undefined,
audioAsVoice: state.pendingToolAudioAsVoice || undefined,
trustedLocalMedia: state.pendingToolTrustedLocalMedia || undefined,
};
clearPendingToolMedia(state);
return payload;

View File

@@ -47,6 +47,7 @@ function createTestContext(): {
pendingMessagingMediaUrls: new Map<string, string[]>(),
pendingToolMediaUrls: [],
pendingToolAudioAsVoice: false,
pendingToolTrustedLocalMedia: false,
deterministicApprovalPromptPending: false,
replayState: { replayInvalid: false, hadPotentialSideEffects: false },
messagingToolSentTexts: [],

View File

@@ -293,7 +293,7 @@ function collectMessagingMediaUrlsFromToolResult(result: unknown): string[] {
function queuePendingToolMedia(
ctx: ToolHandlerContext,
mediaReply: { mediaUrls: string[]; audioAsVoice?: boolean },
mediaReply: { mediaUrls: string[]; audioAsVoice?: boolean; trustedLocalMedia?: boolean },
) {
const seen = new Set(ctx.state.pendingToolMediaUrls);
for (const mediaUrl of mediaReply.mediaUrls) {
@@ -306,6 +306,9 @@ function queuePendingToolMedia(
if (mediaReply.audioAsVoice) {
ctx.state.pendingToolAudioAsVoice = true;
}
if (mediaReply.trustedLocalMedia) {
ctx.state.pendingToolTrustedLocalMedia = true;
}
}
async function collectEmittedToolOutputMediaUrls(

View File

@@ -81,6 +81,7 @@ export type EmbeddedPiSubscribeState = {
pendingMessagingMediaUrls: Map<string, string[]>;
pendingToolMediaUrls: string[];
pendingToolAudioAsVoice: boolean;
pendingToolTrustedLocalMedia: boolean;
deterministicApprovalPromptPending: boolean;
deterministicApprovalPromptSent: boolean;
lastAssistant?: AgentMessage;
@@ -165,6 +166,7 @@ export type ToolHandlerState = Pick<
| "pendingMessagingMediaUrls"
| "pendingToolMediaUrls"
| "pendingToolAudioAsVoice"
| "pendingToolTrustedLocalMedia"
| "deterministicApprovalPromptPending"
| "replayState"
| "messagingToolSentTexts"

View File

@@ -51,6 +51,22 @@ describe("extractToolResultMediaPaths", () => {
});
});
it("extracts structured media trust markers", () => {
expect(
extractToolResultMediaArtifact({
details: {
media: {
mediaUrl: "/tmp/reply.opus",
trustedLocalMedia: true,
},
},
}),
).toEqual({
mediaUrls: ["/tmp/reply.opus"],
trustedLocalMedia: true,
});
});
it("extracts MEDIA: path from text content block", () => {
const result = {
content: [

View File

@@ -249,6 +249,7 @@ export function filterToolResultMediaUrls(
export type ToolResultMediaArtifact = {
mediaUrls: string[];
audioAsVoice?: boolean;
trustedLocalMedia?: boolean;
};
function readToolResultDetailsMedia(
@@ -292,6 +293,7 @@ export function extractToolResultMediaArtifact(
return {
mediaUrls,
...(detailsMedia.audioAsVoice === true ? { audioAsVoice: true } : {}),
...(detailsMedia.trustedLocalMedia === true ? { trustedLocalMedia: true } : {}),
};
}
}

View File

@@ -123,6 +123,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
pendingMessagingMediaUrls: new Map(),
pendingToolMediaUrls: initialPendingToolMediaUrls,
pendingToolAudioAsVoice: false,
pendingToolTrustedLocalMedia: false,
deterministicApprovalPromptPending: false,
deterministicApprovalPromptSent: false,
};

View File

@@ -15,6 +15,7 @@ export function createBaseToolHandlerState() {
pendingMessagingMediaUrls: new Map<string, string[]>(),
pendingToolMediaUrls: [] as string[],
pendingToolAudioAsVoice: false,
pendingToolTrustedLocalMedia: false,
deterministicApprovalPromptPending: false,
messagingToolSentTexts: [] as string[],
messagingToolSentTextsNormalized: [] as string[],

View File

@@ -35,6 +35,7 @@ describe("createTtsTool", () => {
provider: "test",
media: {
mediaUrl: "/tmp/reply.opus",
trustedLocalMedia: true,
audioAsVoice: true,
},
},

View File

@@ -43,6 +43,7 @@ export function createTtsTool(opts?: {
provider: result.provider,
media: {
mediaUrl: result.audioPath,
trustedLocalMedia: true,
...(result.voiceCompatible ? { audioAsVoice: true } : {}),
},
},

View File

@@ -8,6 +8,10 @@ export type ReplyPayload = {
text?: string;
mediaUrl?: string;
mediaUrls?: string[];
/** Internal-only trust signal for gateway webchat local media embedding. */
trustedLocalMedia?: boolean;
/** Treat media as live-only content and avoid persisting the underlying media reference. */
sensitiveMedia?: boolean;
/** Channel-agnostic rich presentation. Core degrades or asks the channel renderer to map it. */
presentation?: MessagePresentation;
/** Channel-agnostic delivery preferences, e.g. pin the sent message when supported. */

View File

@@ -167,6 +167,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
const payload: ReplyPayload = {
mediaUrl: result.audioPath,
audioAsVoice: result.voiceCompatible === true,
trustedLocalMedia: true,
};
return { shouldContinue: false, reply: payload };
}

View File

@@ -4,7 +4,10 @@ import path from "node:path";
import { pathToFileURL } from "node:url";
import { afterEach, describe, expect, it, vi } from "vitest";
import { getDefaultLocalRoots } from "../../media/local-media-access.js";
import { buildWebchatAudioContentBlocksFromReplyPayloads } from "./chat-webchat-media.js";
import {
buildWebchatAssistantMessageFromReplyPayloads,
buildWebchatAudioContentBlocksFromReplyPayloads,
} from "./chat-webchat-media.js";
describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
let tmpDir: string | undefined;
@@ -22,7 +25,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
fs.writeFileSync(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }],
[{ mediaUrl: audioPath, trustedLocalMedia: true }],
{ localRoots: [tmpDir] },
);
@@ -42,7 +45,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
it("skips remote URLs", async () => {
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([
{ mediaUrl: "https://example.com/a.mp3" },
{ mediaUrl: "https://example.com/a.mp3", trustedLocalMedia: true },
]);
expect(blocks).toHaveLength(0);
});
@@ -53,7 +56,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
fs.writeFileSync(imagePath, Buffer.from([0x89, 0x50, 0x4e, 0x47]));
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: imagePath }],
[{ mediaUrl: imagePath, trustedLocalMedia: true }],
{ localRoots: [tmpDir] },
);
@@ -66,7 +69,10 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
fs.writeFileSync(audioPath, Buffer.from([0x00]));
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }, { mediaUrl: audioPath }],
[
{ mediaUrl: audioPath, trustedLocalMedia: true },
{ mediaUrl: audioPath, trustedLocalMedia: true },
],
{ localRoots: [tmpDir] },
);
expect(blocks).toHaveLength(1);
@@ -78,9 +84,12 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
fs.writeFileSync(audioPath, Buffer.from([0x01]));
const fileUrl = pathToFileURL(audioPath).href;
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: fileUrl }], {
localRoots: [tmpDir],
});
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: fileUrl, trustedLocalMedia: true }],
{
localRoots: [tmpDir],
},
);
expect(blocks).toHaveLength(1);
expect((blocks[0] as { type?: string }).type).toBe("audio");
@@ -94,6 +103,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
{
text: "MEDIA:file://attacker/share/probe.mp3",
mediaUrl: "file://attacker/share/probe.mp3",
trustedLocalMedia: true,
},
]);
@@ -116,7 +126,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
const onLocalAudioAccessDenied = vi.fn();
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }],
[{ mediaUrl: audioPath, trustedLocalMedia: true }],
{
localRoots: [allowedRoot],
onLocalAudioAccessDenied,
@@ -136,7 +146,9 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
const audioPath = path.join(tmpDir, "clip.mp3");
fs.writeFileSync(audioPath, Buffer.from([0x04]));
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: audioPath }]);
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([
{ mediaUrl: audioPath, trustedLocalMedia: true },
]);
expect(blocks).toHaveLength(1);
expect((blocks[0] as { type?: string }).type).toBe("audio");
@@ -157,7 +169,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
const readSpy = vi.spyOn(fs, "readFileSync");
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }],
[{ mediaUrl: audioPath, trustedLocalMedia: true }],
{ localRoots: [tmpDir] },
);
@@ -167,4 +179,121 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
statSpy.mockRestore();
readSpy.mockRestore();
});
it("rejects untrusted local audio paths", async () => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-"));
const audioPath = path.join(tmpDir, "clip.mp3");
fs.writeFileSync(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }],
{ localRoots: [tmpDir] },
);
expect(blocks).toHaveLength(0);
});
});
describe("buildWebchatAssistantMessageFromReplyPayloads", () => {
it("converts image data URLs into webchat image blocks", async () => {
const message = await buildWebchatAssistantMessageFromReplyPayloads([
{
text: "Scan this QR code with the OpenClaw iOS app:",
mediaUrl: "data:image/png;base64,cG5n",
},
]);
expect(message).toEqual({
transcriptText: "Scan this QR code with the OpenClaw iOS app:",
content: [
{ type: "text", text: "Scan this QR code with the OpenClaw iOS app:" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
});
it("suppresses control tokens and falls back to synthetic image text", async () => {
const message = await buildWebchatAssistantMessageFromReplyPayloads([
{
text: "NO_REPLY",
mediaUrl: "data:image/png;base64,cG5n",
},
]);
expect(message).toEqual({
transcriptText: "Image reply",
content: [
{ type: "text", text: "Image reply" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
});
it("preserves reply directives in transcript text for media replies", async () => {
const message = await buildWebchatAssistantMessageFromReplyPayloads([
{
replyToCurrent: true,
mediaUrl: "data:image/png;base64,cG5n",
},
]);
expect(message).toEqual({
transcriptText: "[[reply_to_current]]Image reply",
content: [
{ type: "text", text: "[[reply_to_current]]Image reply" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
});
it("drops oversized data image URLs", async () => {
const hugeBase64 = "A".repeat(2_100_000);
const message = await buildWebchatAssistantMessageFromReplyPayloads([
{
text: "too large",
mediaUrl: `data:image/png;base64,${hugeBase64}`,
},
]);
expect(message).toBeNull();
});
it("rejects remote image URLs", async () => {
const message = await buildWebchatAssistantMessageFromReplyPayloads([
{
text: "remote",
mediaUrl: "https://example.com/final.png",
},
]);
expect(message).toBeNull();
});
it("rejects svg data URLs", async () => {
const message = await buildWebchatAssistantMessageFromReplyPayloads([
{
text: "svg",
mediaUrl: "data:image/svg+xml;base64,PHN2Zy8+",
},
]);
expect(message).toBeNull();
});
it("sanitizes reply ids before embedding directive prefixes", async () => {
const message = await buildWebchatAssistantMessageFromReplyPayloads([
{
replyToId: "abc]]\n[[audio_as_voice]]",
mediaUrl: "data:image/png;base64,cG5n",
},
]);
expect(message).toEqual({
transcriptText: "[[reply_to:abcaudio_as_voice]]Image reply",
content: [
{ type: "text", text: "[[reply_to:abcaudio_as_voice]]Image reply" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
});
});

View File

@@ -6,9 +6,22 @@ import { assertLocalMediaAllowed, LocalMediaAccessError } from "../../media/loca
import { isAudioFileName } from "../../media/mime.js";
import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js";
import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
import { sanitizeReplyDirectiveId } from "../../utils/directive-tags.js";
import { isSuppressedControlReplyText } from "../control-reply-text.js";
/** Cap embedded audio size to avoid multiMB payloads on the chat WebSocket. */
const MAX_WEBCHAT_AUDIO_BYTES = 15 * 1024 * 1024;
const MAX_WEBCHAT_IMAGE_DATA_URL_CHARS = 2_000_000;
const MAX_WEBCHAT_IMAGE_DATA_BYTES = 1_500_000;
const ALLOWED_WEBCHAT_DATA_IMAGE_MEDIA_TYPES = new Set([
"image/apng",
"image/avif",
"image/bmp",
"image/gif",
"image/jpeg",
"image/png",
"image/webp",
]);
const MIME_BY_EXT: Record<string, string> = {
".aac": "audio/aac",
@@ -26,6 +39,8 @@ type WebchatAudioEmbeddingOptions = {
onLocalAudioAccessDenied?: (err: LocalMediaAccessError) => void;
};
type WebchatAssistantMediaOptions = WebchatAudioEmbeddingOptions;
/** Map `mediaUrl` strings to an absolute filesystem path for local embedding (plain paths or `file:` URLs). */
function resolveLocalMediaPathForEmbedding(raw: string): string | null {
const trimmed = raw.trim();
@@ -62,9 +77,13 @@ function resolveLocalMediaPathForEmbedding(raw: string): string | null {
/** Returns a readable local file path when it is a regular file and within the size cap (single stat before read). */
async function resolveLocalAudioFileForEmbedding(
payload: ReplyPayload,
raw: string,
options: WebchatAudioEmbeddingOptions | undefined,
): Promise<string | null> {
if (payload.trustedLocalMedia !== true) {
return null;
}
const resolved = resolveLocalMediaPathForEmbedding(raw);
if (!resolved) {
return null;
@@ -92,6 +111,47 @@ function mimeTypeForPath(filePath: string): string {
return MIME_BY_EXT[ext] ?? "audio/mpeg";
}
function estimateBase64DecodedBytes(base64: string): number {
const sanitized = base64.replace(/\s+/g, "");
const padding =
sanitized.endsWith("==") ? 2 : sanitized.endsWith("=") ? 1 : 0;
return Math.floor((sanitized.length * 3) / 4) - padding;
}
function resolveEmbeddableImageUrl(url: string): string | null {
const trimmed = url.trim();
if (!trimmed) {
return null;
}
if (trimmed.length > MAX_WEBCHAT_IMAGE_DATA_URL_CHARS) {
return null;
}
const match = /^data:(image\/[a-z0-9.+-]+);base64,([a-z0-9+/=\s]+)$/i.exec(trimmed);
if (!match) {
return null;
}
const mediaType = normalizeLowercaseStringOrEmpty(match[1]);
const base64Data = match[2];
if (!ALLOWED_WEBCHAT_DATA_IMAGE_MEDIA_TYPES.has(mediaType)) {
return null;
}
if (estimateBase64DecodedBytes(base64Data) > MAX_WEBCHAT_IMAGE_DATA_BYTES) {
return null;
}
return trimmed;
}
function resolveReplyDirectivePrefix(payload: ReplyPayload): string {
const replyToId = sanitizeReplyDirectiveId(payload.replyToId);
if (replyToId) {
return `[[reply_to:${replyToId}]]`;
}
if (payload.replyToCurrent) {
return "[[reply_to_current]]";
}
return "";
}
/**
* Build Control UI / transcript `content` blocks for local TTS (or other) audio files
* referenced by slash-command / agent replies when the webchat path only had text aggregation.
@@ -109,7 +169,7 @@ export async function buildWebchatAudioContentBlocksFromReplyPayloads(
if (!url) {
continue;
}
const resolved = await resolveLocalAudioFileForEmbedding(url, options);
const resolved = await resolveLocalAudioFileForEmbedding(payload, url, options);
if (!resolved || seen.has(resolved)) {
continue;
}
@@ -123,6 +183,87 @@ export async function buildWebchatAudioContentBlocksFromReplyPayloads(
return blocks;
}
export async function buildWebchatAssistantMessageFromReplyPayloads(
payloads: ReplyPayload[],
options?: WebchatAssistantMediaOptions,
): Promise<{ content: Array<Record<string, unknown>>; transcriptText: string } | null> {
const content: Array<Record<string, unknown>> = [];
const transcriptTextParts: string[] = [];
const seenAudio = new Set<string>();
const seenImages = new Set<string>();
let hasAudio = false;
let hasImage = false;
for (const payload of payloads) {
const visibleText = payload.text?.trim();
const text =
visibleText && !isSuppressedControlReplyText(visibleText) ? visibleText : undefined;
const replyDirectivePrefix = resolveReplyDirectivePrefix(payload);
let payloadHasAudio = false;
let payloadHasImage = false;
const payloadMediaBlocks: Array<Record<string, unknown>> = [];
const parts = resolveSendableOutboundReplyParts(payload);
for (const raw of parts.mediaUrls) {
const url = raw.trim();
if (!url) {
continue;
}
const resolvedAudioPath = await resolveLocalAudioFileForEmbedding(payload, url, options);
if (resolvedAudioPath) {
if (seenAudio.has(resolvedAudioPath)) {
continue;
}
seenAudio.add(resolvedAudioPath);
const block = tryReadLocalAudioContentBlock(resolvedAudioPath);
if (block) {
payloadMediaBlocks.push(block);
hasAudio = true;
payloadHasAudio = true;
}
continue;
}
const imageUrl = resolveEmbeddableImageUrl(url);
if (!imageUrl || seenImages.has(imageUrl)) {
continue;
}
seenImages.add(imageUrl);
payloadMediaBlocks.push({ type: "input_image", image_url: imageUrl });
hasImage = true;
payloadHasImage = true;
}
const needsSyntheticText =
payloadMediaBlocks.length > 0 && (!text || replyDirectivePrefix) && transcriptTextParts.length === 0;
const syntheticText = needsSyntheticText
? payloadHasAudio && payloadHasImage
? "Media reply"
: payloadHasAudio
? "Audio reply"
: "Image reply"
: undefined;
const blockText = text ?? syntheticText;
if (blockText) {
const fullText = replyDirectivePrefix ? `${replyDirectivePrefix}${blockText}` : blockText;
transcriptTextParts.push(fullText);
content.push({ type: "text", text: fullText });
} else if (replyDirectivePrefix) {
transcriptTextParts.push(replyDirectivePrefix);
content.push({ type: "text", text: replyDirectivePrefix });
}
content.push(...payloadMediaBlocks);
}
if (!hasAudio && !hasImage) {
return null;
}
const transcriptText =
transcriptTextParts.join("\n\n").trim() ||
(hasAudio && hasImage ? "Media reply" : hasAudio ? "Audio reply" : "Image reply");
if (transcriptTextParts.length === 0) {
content.unshift({ type: "text", text: transcriptText });
}
return { content, transcriptText };
}
function tryReadLocalAudioContentBlock(filePath: string): Record<string, unknown> | null {
try {
const buf = fs.readFileSync(filePath);

View File

@@ -20,10 +20,23 @@ const mockState = vi.hoisted(() => ({
sessionId: "sess-1",
mainSessionKey: "main",
finalText: "[[reply_to_current]]",
finalPayload: null as { text?: string; mediaUrl?: string } | null,
finalPayload: null as {
text?: string;
mediaUrl?: string;
sensitiveMedia?: boolean;
replyToId?: string;
replyToCurrent?: boolean;
} | null,
dispatchedReplies: [] as Array<{
kind: "tool" | "block" | "final";
payload: { text?: string; mediaUrl?: string; mediaUrls?: string[] };
payload: {
text?: string;
mediaUrl?: string;
mediaUrls?: string[];
trustedLocalMedia?: boolean;
replyToId?: string;
replyToCurrent?: boolean;
};
}>,
dispatchError: null as Error | null,
triggerAgentRunStart: false,
@@ -91,16 +104,28 @@ vi.mock("../../auto-reply/dispatch.js", () => ({
async (params: {
ctx: MsgContext;
dispatcher: {
sendFinalReply: (payload: { text?: string; mediaUrl?: string }) => boolean;
sendFinalReply: (payload: {
text?: string;
mediaUrl?: string;
sensitiveMedia?: boolean;
replyToId?: string;
replyToCurrent?: boolean;
}) => boolean;
sendBlockReply: (payload: {
text?: string;
mediaUrl?: string;
mediaUrls?: string[];
trustedLocalMedia?: boolean;
replyToId?: string;
replyToCurrent?: boolean;
}) => boolean;
sendToolResult: (payload: {
text?: string;
mediaUrl?: string;
mediaUrls?: string[];
trustedLocalMedia?: boolean;
replyToId?: string;
replyToCurrent?: boolean;
}) => boolean;
markComplete: () => void;
waitForIdle: () => Promise<void>;
@@ -130,9 +155,7 @@ vi.mock("../../auto-reply/dispatch.js", () => ({
params.dispatcher.sendBlockReply(reply.payload);
continue;
}
params.dispatcher.sendFinalReply({
text: reply.payload.text ?? "",
});
params.dispatcher.sendFinalReply(reply.payload);
}
} else {
params.dispatcher.sendFinalReply(mockState.finalPayload ?? { text: mockState.finalText });
@@ -500,6 +523,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
payload: {
mediaUrl: audioPath,
mediaUrls: [audioPath],
trustedLocalMedia: true,
},
},
];
@@ -528,7 +552,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
expect(assistantUpdate).toMatchObject({
message: {
role: "assistant",
idempotencyKey: "idem-agent-audio:assistant-audio",
idempotencyKey: "idem-agent-audio:assistant-media",
content: [
{ type: "text", text: "Audio reply" },
{
@@ -544,6 +568,31 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
});
});
it("renders image reply payloads as assistant image content instead of MEDIA text", async () => {
createTranscriptFixture("openclaw-chat-send-agent-image-");
mockState.finalPayload = {
text: "Scan this QR code with the OpenClaw iOS app:",
mediaUrl: "data:image/png;base64,cG5n",
};
const respond = vi.fn();
const context = createChatContext();
const payload = await runNonStreamingChatSend({
context,
respond,
idempotencyKey: "idem-agent-image",
});
expect(payload?.message).toMatchObject({
role: "assistant",
content: [
{ type: "text", text: "Scan this QR code with the OpenClaw iOS app:" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
expect(JSON.stringify(payload?.message)).not.toContain("MEDIA:data:image/png;base64,cG5n");
});
it("chat.inject keeps message defined when directive tag is the only content", async () => {
createTranscriptFixture("openclaw-chat-inject-directive-only-");
const respond = vi.fn();
@@ -693,7 +742,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
respond,
idempotencyKey: "idem-untrusted-context",
});
expect(extractFirstTextBlock(payload)).toBe("hello");
expect(extractFirstTextBlock(payload)?.trim()).toBe("hello");
});
it("chat.send non-streaming final broadcasts and routes on the canonical session key", async () => {
@@ -1867,7 +1916,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
it("preserves media-only final replies in the final broadcast message", async () => {
createTranscriptFixture("openclaw-chat-send-media-only-final-");
mockState.finalPayload = { mediaUrl: "https://example.com/final.png" };
mockState.finalPayload = { mediaUrl: "data:image/png;base64,cG5n" };
const respond = vi.fn();
const context = createChatContext();
@@ -1877,14 +1926,20 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
idempotencyKey: "idem-media-only-final",
});
expect(extractFirstTextBlock(payload)).toBe("MEDIA:https://example.com/final.png");
expect(payload?.message).toMatchObject({
role: "assistant",
content: [
{ type: "text", text: "Image reply" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
});
it("strips NO_REPLY from transcript text when final replies only carry media", async () => {
createTranscriptFixture("openclaw-chat-send-media-only-silent-final-");
mockState.finalPayload = {
text: "NO_REPLY",
mediaUrl: "https://example.com/final.png",
mediaUrl: "data:image/png;base64,cG5n",
};
const respond = vi.fn();
const context = createChatContext();
@@ -1895,7 +1950,122 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
idempotencyKey: "idem-media-only-silent-final",
});
expect(extractFirstTextBlock(payload)).toBe("MEDIA:https://example.com/final.png");
expect(payload?.message).toMatchObject({
role: "assistant",
content: [
{ type: "text", text: "Image reply" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
});
it("preserves reply tags in transcript updates for media replies while stripping them from the broadcast", async () => {
createTranscriptFixture("openclaw-chat-send-media-reply-tags-");
mockState.finalPayload = {
replyToCurrent: true,
mediaUrl: "data:image/png;base64,cG5n",
};
const respond = vi.fn();
const context = createChatContext();
const payload = await runNonStreamingChatSend({
context,
respond,
idempotencyKey: "idem-media-reply-tags",
});
expect(payload?.message).toMatchObject({
role: "assistant",
content: [
{ type: "text", text: "Image reply" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
const transcriptUpdate = mockState.emittedTranscriptUpdates.find(
(update) =>
typeof update.message === "object" &&
update.message !== null &&
(update.message as { role?: unknown }).role === "assistant" &&
Array.isArray((update.message as { content?: unknown }).content) &&
((update.message as { content: Array<{ type?: string; text?: string }> }).content.some(
(block) => block?.type === "text" && block?.text?.includes("[[reply_to_current]]"),
) ??
false),
);
expect(transcriptUpdate).toMatchObject({
message: {
role: "assistant",
content: [
{ type: "text", text: "[[reply_to_current]]Image reply" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
},
});
});
it("does not persist sensitive image media into transcript updates", async () => {
createTranscriptFixture("openclaw-chat-send-sensitive-media-final-");
mockState.finalPayload = {
text: "Scan this QR code with the OpenClaw iOS app:",
mediaUrl: "data:image/png;base64,cG5n",
sensitiveMedia: true,
};
const respond = vi.fn();
const context = createChatContext();
const payload = await runNonStreamingChatSend({
context,
respond,
idempotencyKey: "idem-sensitive-media-final",
});
expect(payload?.message).toMatchObject({
role: "assistant",
content: [
{ type: "text", text: "Scan this QR code with the OpenClaw iOS app:" },
{ type: "input_image", image_url: "data:image/png;base64,cG5n" },
],
});
const transcriptUpdate = mockState.emittedTranscriptUpdates.find(
(update) =>
typeof update.message === "object" &&
update.message !== null &&
(update.message as { role?: unknown }).role === "assistant",
);
expect(transcriptUpdate).toMatchObject({
message: {
role: "assistant",
content: [{ type: "text", text: "Scan this QR code with the OpenClaw iOS app:" }],
},
});
expect(JSON.stringify(transcriptUpdate)).not.toContain("input_image");
expect(JSON.stringify(transcriptUpdate)).not.toContain("data:image/png;base64,cG5n");
});
it("sanitizes replyToId before emitting inline reply directives", async () => {
createTranscriptFixture("openclaw-chat-send-sanitized-reply-id-");
mockState.finalPayload = {
text: "hello",
replyToId: "abc]]\n[[audio_as_voice]]",
};
const respond = vi.fn();
const context = createChatContext();
const payload = await runNonStreamingChatSend({
context,
respond,
idempotencyKey: "idem-sanitized-reply-id",
});
expect(extractFirstTextBlock(payload)?.trim()).toBe("hello");
const transcriptUpdate = mockState.emittedTranscriptUpdates.find(
(update) =>
typeof update.message === "object" &&
update.message !== null &&
(update.message as { role?: unknown }).role === "assistant",
);
expect(JSON.stringify(transcriptUpdate)).toContain("[[reply_to:abcaudio_as_voice]]");
expect(JSON.stringify(transcriptUpdate)).not.toContain("[[audio_as_voice]]");
});
it("drops image attachments for text-only session models", async () => {

View File

@@ -29,6 +29,7 @@ import {
import {
stripInlineDirectiveTagsForDisplay,
stripInlineDirectiveTagsFromMessageForDisplay,
sanitizeReplyDirectiveId,
} from "../../utils/directive-tags.js";
import {
INTERNAL_MESSAGE_CHANNEL,
@@ -83,7 +84,7 @@ import { injectTimestamp, timestampOptsFromConfig } from "./agent-timestamp.js";
import { setGatewayDedupeEntry } from "./agent-wait-dedupe.js";
import { normalizeRpcAttachmentsToChatAttachments } from "./attachment-normalize.js";
import { appendInjectedAssistantMessageToTranscript } from "./chat-transcript-inject.js";
import { buildWebchatAudioContentBlocksFromReplyPayloads } from "./chat-webchat-media.js";
import { buildWebchatAssistantMessageFromReplyPayloads } from "./chat-webchat-media.js";
import type {
GatewayRequestContext,
GatewayRequestHandlerOptions,
@@ -123,26 +124,19 @@ function isMediaBearingPayload(payload: ReplyPayload): boolean {
return false;
}
async function buildWebchatAudioOnlyAssistantMessage(
async function buildWebchatAssistantMediaMessage(
payloads: ReplyPayload[],
options?: {
localRoots?: readonly string[];
onLocalAudioAccessDenied?: (message: string) => void;
},
): Promise<{ content: Array<Record<string, unknown>>; transcriptText: string } | null> {
const audioBlocks = await buildWebchatAudioContentBlocksFromReplyPayloads(payloads, {
return buildWebchatAssistantMessageFromReplyPayloads(payloads, {
localRoots: options?.localRoots,
onLocalAudioAccessDenied: (err) => {
options?.onLocalAudioAccessDenied?.(formatForLog(err));
},
});
if (audioBlocks.length === 0) {
return null;
}
return {
transcriptText: "Audio reply",
content: [{ type: "text", text: "Audio reply" }, ...audioBlocks],
};
}
export const DEFAULT_CHAT_HISTORY_TEXT_MAX_CHARS = 8_000;
@@ -225,8 +219,9 @@ function buildTranscriptReplyText(payloads: ReplyPayload[]): string {
.map((payload) => {
const parts = resolveSendableOutboundReplyParts(payload);
const lines: string[] = [];
if (typeof payload.replyToId === "string" && payload.replyToId.trim()) {
lines.push(`[[reply_to:${payload.replyToId.trim()}]]`);
const replyToId = sanitizeReplyDirectiveId(payload.replyToId);
if (replyToId) {
lines.push(`[[reply_to:${replyToId}]]`);
} else if (payload.replyToCurrent) {
lines.push("[[reply_to_current]]");
}
@@ -235,6 +230,9 @@ function buildTranscriptReplyText(payloads: ReplyPayload[]): string {
lines.push(text);
}
for (const mediaUrl of parts.mediaUrls) {
if (payload.sensitiveMedia === true) {
continue;
}
const trimmed = mediaUrl.trim();
if (trimmed) {
lines.push(`MEDIA:${trimmed}`);
@@ -249,6 +247,10 @@ function buildTranscriptReplyText(payloads: ReplyPayload[]): string {
return chunks.join("\n\n").trim();
}
function hasSensitiveMediaPayload(payloads: ReplyPayload[]): boolean {
return payloads.some((payload) => payload.sensitiveMedia === true && isMediaBearingPayload(payload));
}
function resolveChatSendOriginatingRoute(params: {
client?: { mode?: string | null; id?: string | null } | null;
deliver?: boolean;
@@ -2036,7 +2038,7 @@ export const chatHandlers: GatewayRequestHandlers = {
channel: INTERNAL_MESSAGE_CHANNEL,
});
const deliveredReplies: Array<{ payload: ReplyPayload; kind: "block" | "final" }> = [];
let appendedWebchatAgentAudio = false;
let appendedWebchatAgentMedia = false;
let userTranscriptUpdatePromise: Promise<void> | null = null;
const emitUserTranscriptUpdate = async () => {
if (userTranscriptUpdatePromise) {
@@ -2098,37 +2100,37 @@ export const chatHandlers: GatewayRequestHandlers = {
savedImages: await persistedImagesPromise,
});
};
const appendWebchatAgentAudioTranscriptIfNeeded = async (payload: ReplyPayload) => {
if (!agentRunStarted || appendedWebchatAgentAudio || !isMediaBearingPayload(payload)) {
const appendWebchatAgentMediaTranscriptIfNeeded = async (payload: ReplyPayload) => {
if (!agentRunStarted || appendedWebchatAgentMedia || !isMediaBearingPayload(payload)) {
return;
}
const audioMessage = await buildWebchatAudioOnlyAssistantMessage([payload], {
const mediaMessage = await buildWebchatAssistantMediaMessage([payload], {
localRoots: getAgentScopedMediaLocalRoots(cfg, agentId),
onLocalAudioAccessDenied: (message) => {
context.logGateway.warn(`webchat audio embedding denied local path: ${message}`);
},
});
if (!audioMessage) {
if (!mediaMessage) {
return;
}
const { storePath: latestStorePath, entry: latestEntry } = loadSessionEntry(sessionKey);
const sessionId = latestEntry?.sessionId ?? entry?.sessionId ?? clientRunId;
const appended = appendAssistantTranscriptMessage({
message: audioMessage.transcriptText,
content: audioMessage.content,
message: mediaMessage.transcriptText,
...(payload.sensitiveMedia === true ? {} : { content: mediaMessage.content }),
sessionId,
storePath: latestStorePath,
sessionFile: latestEntry?.sessionFile,
agentId,
createIfMissing: true,
idempotencyKey: `${clientRunId}:assistant-audio`,
idempotencyKey: `${clientRunId}:assistant-media`,
});
if (appended.ok) {
appendedWebchatAgentAudio = true;
appendedWebchatAgentMedia = true;
return;
}
context.logGateway.warn(
`webchat transcript append failed for audio reply: ${appended.error ?? "unknown error"}`,
`webchat transcript append failed for media reply: ${appended.error ?? "unknown error"}`,
);
};
const dispatcher = createReplyDispatcher({
@@ -2141,7 +2143,7 @@ export const chatHandlers: GatewayRequestHandlers = {
case "block":
case "final":
deliveredReplies.push({ payload, kind: info.kind });
await appendWebchatAgentAudioTranscriptIfNeeded(payload);
await appendWebchatAgentMediaTranscriptIfNeeded(payload);
break;
case "tool":
// Tool results that carry audio (e.g. the TTS tool) must be promoted
@@ -2231,18 +2233,25 @@ export const chatHandlers: GatewayRequestHandlers = {
sessionKey,
});
} else {
const combinedReply = buildTranscriptReplyText(
deliveredReplies
.filter((entry) => entry.kind === "final")
.map((entry) => entry.payload),
);
const finalPayloads = deliveredReplies
.filter((entry) => entry.kind === "final")
.map((entry) => entry.payload);
const combinedReply = buildTranscriptReplyText(finalPayloads);
const mediaMessage = await buildWebchatAssistantMediaMessage(finalPayloads, {
localRoots: getAgentScopedMediaLocalRoots(cfg, agentId),
onLocalAudioAccessDenied: (message) => {
context.logGateway.warn(`webchat audio embedding denied local path: ${message}`);
},
});
const hasSensitiveMedia = hasSensitiveMediaPayload(finalPayloads);
let message: Record<string, unknown> | undefined;
if (combinedReply) {
if (mediaMessage || combinedReply) {
const { storePath: latestStorePath, entry: latestEntry } =
loadSessionEntry(sessionKey);
const sessionId = latestEntry?.sessionId ?? entry?.sessionId ?? clientRunId;
const appended = appendAssistantTranscriptMessage({
message: combinedReply,
message: mediaMessage?.transcriptText ?? combinedReply,
...(mediaMessage && !hasSensitiveMedia ? { content: mediaMessage.content } : {}),
sessionId,
storePath: latestStorePath,
sessionFile: latestEntry?.sessionFile,
@@ -2250,7 +2259,14 @@ export const chatHandlers: GatewayRequestHandlers = {
createIfMissing: true,
});
if (appended.ok) {
message = appended.message;
if (hasSensitiveMedia && mediaMessage) {
message = {
...appended.message,
content: mediaMessage.content,
};
} else {
message = appended.message;
}
} else {
context.logGateway.warn(
`webchat transcript append failed: ${appended.error ?? "unknown error"}`,
@@ -2258,7 +2274,7 @@ export const chatHandlers: GatewayRequestHandlers = {
const now = Date.now();
message = {
role: "assistant",
content: [{ type: "text", text: combinedReply }],
content: mediaMessage?.content ?? [{ type: "text", text: combinedReply }],
timestamp: now,
// Keep this compatible with Pi stopReason enums even though this message isn't
// persisted to the transcript due to the append failure.

View File

@@ -1,4 +1,3 @@
import type { ReplyPayload } from "../auto-reply/reply-payload.js";
import type { ExecApprovalForwardTarget } from "../config/types.approvals.js";
import { matchesApprovalRequestFilters } from "../infra/approval-request-filters.js";
import { getExecApprovalReplyMetadata } from "../infra/exec-approval-reply.js";
@@ -9,6 +8,7 @@ import {
normalizeOptionalString,
} from "../shared/string-coerce.js";
import type { OpenClawConfig } from "./config-runtime.js";
import type { ReplyPayload } from "./reply-payload.js";
import { normalizeAccountId } from "./routing.js";
type ApprovalRequest = ExecApprovalRequest | PluginApprovalRequest;

View File

@@ -1,4 +1,3 @@
import type { ReplyPayload } from "../auto-reply/reply-payload.js";
import {
buildApprovalInteractiveReply,
type ExecApprovalReplyDecision,
@@ -10,6 +9,7 @@ import {
type PluginApprovalResolved,
} from "../infra/plugin-approvals.js";
import { normalizeOptionalString } from "../shared/string-coerce.js";
import type { ReplyPayload } from "./reply-payload.js";
const DEFAULT_ALLOWED_DECISIONS = ["allow-once", "allow-always", "deny"] as const;

View File

@@ -1,4 +1,3 @@
import type { ReplyPayload } from "../auto-reply/reply-payload.js";
import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.js";
import {
createReplyPrefixContext,
@@ -11,6 +10,7 @@ import {
type CreateTypingCallbacksParams,
type TypingCallbacks,
} from "../channels/typing.js";
import type { ReplyPayload } from "./reply-payload.js";
export type ReplyPrefixContext = ReplyPrefixContextBundle["prefixContext"];
export type { ReplyPrefixContextBundle, ReplyPrefixOptions };

View File

@@ -109,7 +109,7 @@ export type {
export type { OpenClawConfig } from "../config/config.js";
export type { OutboundIdentity } from "../infra/outbound/identity.js";
export type { HistoryEntry } from "../auto-reply/reply/history.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export type { AllowlistMatch } from "../channels/allowlist-match.js";
export type {
BaseProbeResult,

View File

@@ -8,7 +8,7 @@ export {
DEFAULT_GROUP_HISTORY_LIMIT,
recordPendingHistoryEntryIfEnabled,
} from "../auto-reply/reply/history.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export { logTypingFailure } from "../channels/logging.js";
export type { AllowlistMatch } from "../channels/plugins/allowlist-match.js";
export { buildChannelConfigSchema } from "../channels/plugins/config-schema.js";

View File

@@ -91,7 +91,7 @@ export * from "./music-generation.js";
export type { SecretInput, SecretRef } from "../config/types.secrets.js";
export type { RuntimeEnv } from "../runtime.js";
export type { HookEntry } from "../hooks/types.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export type { WizardPrompter } from "../wizard/prompts.js";
export type { ContextEngineFactory } from "../context-engine/registry.js";
export type { DiagnosticEventPayload } from "../infra/diagnostic-events.js";

View File

@@ -5,7 +5,7 @@ export type {
} from "../channels/plugins/types.public.js";
export type { ChannelPlugin } from "../channels/plugins/types.plugin.js";
export type { OpenClawConfig } from "../config/config.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export type { ChannelSetupAdapter } from "../channels/plugins/types.adapters.js";
export type { OpenClawPluginApi, PluginRuntime } from "./channel-plugin-common.js";

View File

@@ -29,7 +29,7 @@ export {
readStringParam,
} from "../agents/tools/common.js";
export type { BlockReplyContext } from "../auto-reply/get-reply-options.types.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export { resolveAckReaction } from "../agents/identity.js";
export {
compileAllowlist,

View File

@@ -10,7 +10,7 @@ export {
recordPendingHistoryEntryIfEnabled,
} from "../auto-reply/reply/history.js";
export { listSkillCommandsForAgents } from "../auto-reply/skill-commands.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export type { ChatType } from "../channels/chat-type.js";
export { resolveControlCommandGate } from "../channels/command-gating.js";
export { logInboundDrop, logTypingFailure } from "../channels/logging.js";

View File

@@ -12,7 +12,7 @@ export {
recordPendingHistoryEntryIfEnabled,
} from "../auto-reply/reply/history.js";
export { isSilentReplyText, SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export { mergeAllowlist, summarizeMapping } from "../channels/allowlists/resolve-utils.js";
export {
resolveControlCommandGate,

View File

@@ -7,4 +7,4 @@ export {
} from "../auto-reply/chunk.js";
export type { ChunkMode } from "../auto-reply/chunk.js";
export { isSilentReplyText } from "../auto-reply/tokens.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";

View File

@@ -4,4 +4,4 @@ export {
dispatchReplyWithBufferedBlockDispatcher,
dispatchReplyWithDispatcher,
} from "../auto-reply/reply/provider-dispatcher.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";

View File

@@ -1,6 +1,7 @@
import { describe, expect, it, vi } from "vitest";
import {
countOutboundMedia,
createNormalizedOutboundDeliverer,
deliverFormattedTextWithAttachments,
deliverTextOrMediaReply,
hasOutboundMedia,
@@ -8,6 +9,7 @@ import {
hasOutboundText,
isReasoningReplyPayload,
isNumericTargetId,
normalizeOutboundReplyPayload,
resolveOutboundMediaUrls,
resolveSendableOutboundReplyParts,
resolveTextChunksWithFallback,
@@ -87,6 +89,45 @@ describe("sendPayloadWithChunkedTextAndMedia", () => {
});
});
describe("normalizeOutboundReplyPayload", () => {
it("strips internal-only local media trust flags from loose payload objects", () => {
expect(
normalizeOutboundReplyPayload({
text: "hello",
mediaUrl: "/tmp/reply.opus",
trustedLocalMedia: true,
sensitiveMedia: true,
replyToId: "abc123",
}),
).toEqual({
text: "hello",
mediaUrl: "/tmp/reply.opus",
sensitiveMedia: true,
replyToId: "abc123",
});
});
it("keeps the normalized deliverer from forwarding trustedLocalMedia", async () => {
const handler = vi.fn(async () => {});
const deliver = createNormalizedOutboundDeliverer(handler);
await deliver({
text: "hello",
mediaUrl: "/tmp/reply.opus",
trustedLocalMedia: true,
sensitiveMedia: true,
});
expect(handler).toHaveBeenCalledWith({
text: "hello",
mediaUrl: "/tmp/reply.opus",
sensitiveMedia: true,
replyToId: undefined,
mediaUrls: undefined,
});
});
});
describe("resolveOutboundMediaUrls", () => {
it.each([
{

View File

@@ -1,14 +1,16 @@
import type { ReplyPayload as InternalReplyPayload } from "../auto-reply/reply-payload.js";
import type { ChannelOutboundAdapter } from "../channels/plugins/outbound.types.js";
import { normalizeLowercaseStringOrEmpty, readStringValue } from "../shared/string-coerce.js";
export type { MediaPayload, MediaPayloadInput } from "../channels/plugins/media-payload.js";
export { buildMediaPayload } from "../channels/plugins/media-payload.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type ReplyPayload = Omit<InternalReplyPayload, "trustedLocalMedia">;
export type OutboundReplyPayload = {
text?: string;
mediaUrls?: string[];
mediaUrl?: string;
sensitiveMedia?: boolean;
replyToId?: string;
};
@@ -72,11 +74,13 @@ export function normalizeOutboundReplyPayload(
)
: undefined;
const mediaUrl = readStringValue(payload.mediaUrl);
const sensitiveMedia = payload.sensitiveMedia === true ? true : undefined;
const replyToId = readStringValue(payload.replyToId);
return {
text,
mediaUrls,
mediaUrl,
sensitiveMedia,
replyToId,
};
}

View File

@@ -54,7 +54,7 @@ export type {
} from "../auto-reply/reply/reply-dispatcher.js";
export { createReplyReferencePlanner } from "../auto-reply/reply/reply-reference.js";
export type { GetReplyOptions, BlockReplyContext } from "../auto-reply/get-reply-options.types.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export type { FinalizedMsgContext, MsgContext } from "../auto-reply/templating.js";
export { generateConversationLabel } from "../auto-reply/reply/conversation-label-generator.js";
export type { ConversationLabelParams } from "../auto-reply/reply/conversation-label-generator.js";

View File

@@ -3,7 +3,7 @@
import { createOptionalChannelSetupSurface } from "./channel-setup.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export { buildChannelConfigSchema } from "../channels/plugins/config-schema.js";
export {
applyAccountNameToChannelSection,

View File

@@ -1,4 +1,3 @@
import type { ReplyPayload } from "../auto-reply/reply-payload.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js";
import type {
@@ -8,6 +7,7 @@ import type {
TtsDirectiveParseResult,
} from "../tts/provider-types.js";
import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js";
import type { ReplyPayload } from "./reply-payload.js";
export type { ResolvedTtsConfig, ResolvedTtsModelOverrides };
export type { TtsDirectiveOverrides, TtsDirectiveParseResult };

View File

@@ -3,7 +3,7 @@
import { createOptionalChannelSetupSurface } from "./channel-setup.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export { buildChannelConfigSchema } from "../channels/plugins/config-schema.js";
export type {
ChannelGatewayContext,

View File

@@ -2,7 +2,7 @@
// Keep this list additive and scoped to the bundled Zalo surface.
export { jsonResult, readStringParam } from "../agents/tools/common.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export {
deleteAccountFromConfigSection,
setAccountEnabledInConfigSection,

View File

@@ -3,7 +3,7 @@
import { createOptionalChannelSetupSurface } from "./channel-setup.js";
export type { ReplyPayload } from "../auto-reply/reply-payload.js";
export type { ReplyPayload } from "./reply-payload.js";
export { mergeAllowlist, summarizeMapping } from "../channels/allowlists/resolve-utils.js";
export {
resolveMentionGating,

View File

@@ -20,6 +20,7 @@ const AUDIO_TAG_RE = /\[\[\s*audio_as_voice\s*\]\]/gi;
const REPLY_TAG_RE = /\[\[\s*(?:reply_to_current|reply_to\s*:\s*([^\]\n]+))\s*\]\]/gi;
const INLINE_DIRECTIVE_TAG_WITH_PADDING_RE =
/\s*(?:\[\[\s*audio_as_voice\s*\]\]|\[\[\s*(?:reply_to_current|reply_to\s*:\s*[^\]\n]+)\s*\]\])\s*/gi;
const MAX_REPLY_DIRECTIVE_ID_LENGTH = 256;
function replacementPreservesWordBoundary(source: string, offset: number, length: number): string {
const before = source[offset - 1];
@@ -92,6 +93,33 @@ export function stripInlineDirectiveTagsForDisplay(text: string): StripInlineDir
};
}
function stripUnsafeReplyDirectiveChars(value: string): string {
let next = "";
for (const ch of value) {
const code = ch.charCodeAt(0);
if ((code >= 0 && code <= 31) || code === 127 || ch === "[" || ch === "]") {
continue;
}
next += ch;
}
return next;
}
export function sanitizeReplyDirectiveId(rawReplyToId?: string): string | undefined {
const trimmed = rawReplyToId?.trim();
if (!trimmed) {
return undefined;
}
const sanitized = stripUnsafeReplyDirectiveChars(trimmed).trim();
if (!sanitized) {
return undefined;
}
if (sanitized.length > MAX_REPLY_DIRECTIVE_ID_LENGTH) {
return sanitized.slice(0, MAX_REPLY_DIRECTIVE_ID_LENGTH);
}
return sanitized;
}
export function stripInlineDirectiveTagsForDelivery(text: string): StripInlineDirectiveTagsResult {
if (!text) {
return { text, changed: false };