diff --git a/CHANGELOG.md b/CHANGELOG.md
index e761d22cb69..b8f008b1bdc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
 
 ### Fixes
 
+- Gateway/pairing webchat: render `/pair qr` replies as structured media instead of raw markdown text, preserve inline reply threading and silent-control handling on media replies, avoid persisting sensitive QR images into transcript history, and keep local webchat media embedding behind internal-only trust markers. (#70047) Thanks @BunsDev.
 - OpenAI/Responses: keep embedded OpenAI Responses runs on HTTP when `models.providers.openai.baseUrl` points at a local mock or other non-public endpoint, so mocked/custom endpoints no longer drift onto the hardcoded public websocket transport. (#69815) Thanks @vincentkoc.
 - Channels/config: require resolved runtime config on channel send/action/client helpers and block runtime helper `loadConfig()` calls, so SecretRefs are resolved at startup/boundaries instead of being re-read during sends.
 - CLI/channels: preserve bundled setup promotion metadata when a loaded partial channel plugin omits it, so adding a non-default account still moves legacy single-account fields such as Telegram `streaming` into `accounts.default`.
diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256
index 581bafd9b80..9d756bbdd91 100644
--- a/docs/.generated/plugin-sdk-api-baseline.sha256
+++ b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-55b39075f07def786f5056b029921db64fcbdc5e2cab3d645215eccc857ba9a4  plugin-sdk-api-baseline.json
-4a6b8f4afc9e6aa7c56b0cbab0886dacc4ead534c47761ab30eb76480d8fd673  plugin-sdk-api-baseline.jsonl
+ba9b9d9b321b405fef89d4e95c1a3d629d1b956398a5b2a7f25b2a7654879783  plugin-sdk-api-baseline.json
+8bbbee0ea2326148d4fd49f61fe74f83c5bb24c0742cfbf3609f43939fcd4c34  plugin-sdk-api-baseline.jsonl
diff --git a/extensions/device-pair/index.test.ts b/extensions/device-pair/index.test.ts
index d34447691d9..66d6a897984 100644
--- a/extensions/device-pair/index.test.ts
+++ b/extensions/device-pair/index.test.ts
@@ -251,6 +251,7 @@ describe("device-pair /pair qr", () => {
         gatewayClientScopes: ["operator.write", "operator.pairing"],
       }),
     );
+    const payload = result as { text?: string; mediaUrl?: string; sensitiveMedia?: boolean };
     const text = requireText(result);
 
     expect(pluginApiMocks.renderQrPngBase64).toHaveBeenCalledTimes(1);
@@ -261,11 +262,12 @@ describe("device-pair /pair qr", () => {
       },
     });
     expect(text).toContain("Scan this QR code with the OpenClaw iOS app:");
-    expect(text).toContain("![OpenClaw pairing QR](data:image/png;base64,ZmFrZXBuZw==)");
+    expect(payload.mediaUrl).toBe("data:image/png;base64,ZmFrZXBuZw==");
+    expect(payload.sensitiveMedia).toBe(true);
     expect(text).toContain("- Security: single-use bootstrap token");
     expect(text).toContain("**Important:** Run `/pair cleanup` after pairing finishes.");
     expect(text).toContain("If this QR code leaks, run `/pair cleanup` immediately.");
-    expect(text).not.toContain("```");
+    expect(text).not.toContain("![OpenClaw pairing QR]");
   });
 
   it("rejects qr setup for internal gateway callers without operator.pairing", async () => {
diff --git a/extensions/device-pair/index.ts b/extensions/device-pair/index.ts
index 4d60be47d62..8e928223af3 100644
--- a/extensions/device-pair/index.ts
+++ b/extensions/device-pair/index.ts
@@ -732,9 +732,9 @@ export default definePluginEntry({
                   autoNotifyArmed,
                   expiresAtMs: payload.expiresAtMs,
                 }),
-                "",
-                `![OpenClaw pairing QR](${qrDataUrl})`,
               ].join("\n"),
+              mediaUrl: qrDataUrl,
+              sensitiveMedia: true,
             };
           }
 
diff --git a/src/agents/pi-embedded-payloads.ts b/src/agents/pi-embedded-payloads.ts
index 1186111db10..fe89378965a 100644
--- a/src/agents/pi-embedded-payloads.ts
+++ b/src/agents/pi-embedded-payloads.ts
@@ -2,6 +2,8 @@ export type BlockReplyPayload = {
   text?: string;
   mediaUrls?: string[];
   audioAsVoice?: boolean;
+  trustedLocalMedia?: boolean;
+  sensitiveMedia?: boolean;
   isReasoning?: boolean;
   replyToId?: string;
   replyToTag?: boolean;
diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts
index 1b19786a14a..db315eca0d7 100644
--- a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts
@@ -251,6 +251,7 @@ describe("consumePendingToolMediaIntoReply", () => {
     const state = {
       pendingToolMediaUrls: ["/tmp/a.png", "/tmp/b.png"],
       pendingToolAudioAsVoice: false,
+      pendingToolTrustedLocalMedia: false,
     };
 
     expect(
@@ -269,6 +270,7 @@ describe("consumePendingToolMediaIntoReply", () => {
     const state = {
       pendingToolMediaUrls: ["/tmp/a.png"],
       pendingToolAudioAsVoice: true,
+      pendingToolTrustedLocalMedia: false,
     };
 
     expect(
@@ -290,6 +292,7 @@ describe("consumePendingToolMediaReply", () => {
     const state = {
       pendingToolMediaUrls: ["/tmp/reply.opus"],
       pendingToolAudioAsVoice: true,
+      pendingToolTrustedLocalMedia: false,
     };
 
     expect(consumePendingToolMediaReply(state)).toEqual({
diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.ts b/src/agents/pi-embedded-subscribe.handlers.messages.ts
index a6847b47d34..27db0016e0e 100644
--- a/src/agents/pi-embedded-subscribe.handlers.messages.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.messages.ts
@@ -178,20 +178,31 @@ export function resolveSilentReplyFallbackText(params: {
 }
 
 function clearPendingToolMedia(
-  state: Pick<EmbeddedPiSubscribeState, "pendingToolMediaUrls" | "pendingToolAudioAsVoice">,
+  state: Pick<
+    EmbeddedPiSubscribeState,
+    "pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia"
+  >,
 ) {
   state.pendingToolMediaUrls = [];
   state.pendingToolAudioAsVoice = false;
+  state.pendingToolTrustedLocalMedia = false;
 }
 
 export function consumePendingToolMediaIntoReply(
-  state: Pick<EmbeddedPiSubscribeState, "pendingToolMediaUrls" | "pendingToolAudioAsVoice">,
+  state: Pick<
+    EmbeddedPiSubscribeState,
+    "pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia"
+  >,
   payload: BlockReplyPayload,
 ): BlockReplyPayload {
   if (payload.isReasoning) {
     return payload;
   }
-  if (state.pendingToolMediaUrls.length === 0 && !state.pendingToolAudioAsVoice) {
+  if (
+    state.pendingToolMediaUrls.length === 0 &&
+    !state.pendingToolAudioAsVoice &&
+    !state.pendingToolTrustedLocalMedia
+  ) {
     return payload;
   }
   const mergedMediaUrls = Array.from(
@@ -201,15 +212,24 @@ export function consumePendingToolMediaIntoReply(
     ...payload,
     mediaUrls: mergedMediaUrls.length ? mergedMediaUrls : undefined,
     audioAsVoice: payload.audioAsVoice || state.pendingToolAudioAsVoice || undefined,
+    trustedLocalMedia:
+      payload.trustedLocalMedia || state.pendingToolTrustedLocalMedia || undefined,
   };
   clearPendingToolMedia(state);
   return mergedPayload;
 }
 
 export function consumePendingToolMediaReply(
-  state: Pick<EmbeddedPiSubscribeState, "pendingToolMediaUrls" | "pendingToolAudioAsVoice">,
+  state: Pick<
+    EmbeddedPiSubscribeState,
+    "pendingToolMediaUrls" | "pendingToolAudioAsVoice" | "pendingToolTrustedLocalMedia"
+  >,
 ): BlockReplyPayload | null {
-  if (state.pendingToolMediaUrls.length === 0 && !state.pendingToolAudioAsVoice) {
+  if (
+    state.pendingToolMediaUrls.length === 0 &&
+    !state.pendingToolAudioAsVoice &&
+    !state.pendingToolTrustedLocalMedia
+  ) {
     return null;
   }
   const payload: BlockReplyPayload = {
@@ -217,6 +237,7 @@ export function consumePendingToolMediaReply(
       ? Array.from(new Set(state.pendingToolMediaUrls))
       : undefined,
     audioAsVoice: state.pendingToolAudioAsVoice || undefined,
+    trustedLocalMedia: state.pendingToolTrustedLocalMedia || undefined,
   };
   clearPendingToolMedia(state);
   return payload;
diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
index 661f0d4ebe1..9d216daef4e 100644
--- a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
@@ -47,6 +47,7 @@ function createTestContext(): {
       pendingMessagingMediaUrls: new Map<string, string[]>(),
       pendingToolMediaUrls: [],
       pendingToolAudioAsVoice: false,
+      pendingToolTrustedLocalMedia: false,
       deterministicApprovalPromptPending: false,
       replayState: { replayInvalid: false, hadPotentialSideEffects: false },
       messagingToolSentTexts: [],
diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.ts b/src/agents/pi-embedded-subscribe.handlers.tools.ts
index 7f765fb5f07..8c67904288a 100644
--- a/src/agents/pi-embedded-subscribe.handlers.tools.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.tools.ts
@@ -293,7 +293,7 @@ function collectMessagingMediaUrlsFromToolResult(result: unknown): string[] {
 
 function queuePendingToolMedia(
   ctx: ToolHandlerContext,
-  mediaReply: { mediaUrls: string[]; audioAsVoice?: boolean },
+  mediaReply: { mediaUrls: string[]; audioAsVoice?: boolean; trustedLocalMedia?: boolean },
 ) {
   const seen = new Set(ctx.state.pendingToolMediaUrls);
   for (const mediaUrl of mediaReply.mediaUrls) {
@@ -306,6 +306,9 @@ function queuePendingToolMedia(
   if (mediaReply.audioAsVoice) {
     ctx.state.pendingToolAudioAsVoice = true;
   }
+  if (mediaReply.trustedLocalMedia) {
+    ctx.state.pendingToolTrustedLocalMedia = true;
+  }
 }
 
 async function collectEmittedToolOutputMediaUrls(
diff --git a/src/agents/pi-embedded-subscribe.handlers.types.ts b/src/agents/pi-embedded-subscribe.handlers.types.ts
index 542e59b524a..e0e544e4b7b 100644
--- a/src/agents/pi-embedded-subscribe.handlers.types.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.types.ts
@@ -81,6 +81,7 @@ export type EmbeddedPiSubscribeState = {
   pendingMessagingMediaUrls: Map<string, string[]>;
   pendingToolMediaUrls: string[];
   pendingToolAudioAsVoice: boolean;
+  pendingToolTrustedLocalMedia: boolean;
   deterministicApprovalPromptPending: boolean;
   deterministicApprovalPromptSent: boolean;
   lastAssistant?: AgentMessage;
@@ -165,6 +166,7 @@ export type ToolHandlerState = Pick<
   | "pendingMessagingMediaUrls"
   | "pendingToolMediaUrls"
   | "pendingToolAudioAsVoice"
+  | "pendingToolTrustedLocalMedia"
   | "deterministicApprovalPromptPending"
   | "replayState"
   | "messagingToolSentTexts"
diff --git a/src/agents/pi-embedded-subscribe.tools.media.test.ts b/src/agents/pi-embedded-subscribe.tools.media.test.ts
index 443d07c2a34..8ffab15b574 100644
--- a/src/agents/pi-embedded-subscribe.tools.media.test.ts
+++ b/src/agents/pi-embedded-subscribe.tools.media.test.ts
@@ -51,6 +51,22 @@ describe("extractToolResultMediaPaths", () => {
     });
   });
 
+  it("extracts structured media trust markers", () => {
+    expect(
+      extractToolResultMediaArtifact({
+        details: {
+          media: {
+            mediaUrl: "/tmp/reply.opus",
+            trustedLocalMedia: true,
+          },
+        },
+      }),
+    ).toEqual({
+      mediaUrls: ["/tmp/reply.opus"],
+      trustedLocalMedia: true,
+    });
+  });
+
   it("extracts MEDIA: path from text content block", () => {
     const result = {
       content: [
diff --git a/src/agents/pi-embedded-subscribe.tools.ts b/src/agents/pi-embedded-subscribe.tools.ts
index 7021bcb3b72..14c2be9119f 100644
--- a/src/agents/pi-embedded-subscribe.tools.ts
+++ b/src/agents/pi-embedded-subscribe.tools.ts
@@ -249,6 +249,7 @@ export function filterToolResultMediaUrls(
 export type ToolResultMediaArtifact = {
   mediaUrls: string[];
   audioAsVoice?: boolean;
+  trustedLocalMedia?: boolean;
 };
 
 function readToolResultDetailsMedia(
@@ -292,6 +293,7 @@ export function extractToolResultMediaArtifact(
       return {
         mediaUrls,
         ...(detailsMedia.audioAsVoice === true ? { audioAsVoice: true } : {}),
+        ...(detailsMedia.trustedLocalMedia === true ? { trustedLocalMedia: true } : {}),
       };
     }
   }
diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts
index 1fddc10b216..3197f575f2c 100644
--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -123,6 +123,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
     pendingMessagingMediaUrls: new Map(),
     pendingToolMediaUrls: initialPendingToolMediaUrls,
     pendingToolAudioAsVoice: false,
+    pendingToolTrustedLocalMedia: false,
     deterministicApprovalPromptPending: false,
     deterministicApprovalPromptSent: false,
   };
diff --git a/src/agents/pi-tool-handler-state.test-helpers.ts b/src/agents/pi-tool-handler-state.test-helpers.ts
index 600395e8987..aa8d6186f07 100644
--- a/src/agents/pi-tool-handler-state.test-helpers.ts
+++ b/src/agents/pi-tool-handler-state.test-helpers.ts
@@ -15,6 +15,7 @@ export function createBaseToolHandlerState() {
     pendingMessagingMediaUrls: new Map<string, string[]>(),
     pendingToolMediaUrls: [] as string[],
     pendingToolAudioAsVoice: false,
+    pendingToolTrustedLocalMedia: false,
     deterministicApprovalPromptPending: false,
     messagingToolSentTexts: [] as string[],
     messagingToolSentTextsNormalized: [] as string[],
diff --git a/src/agents/tools/tts-tool.test.ts b/src/agents/tools/tts-tool.test.ts
index 7faa0790950..41cd1f6da58 100644
--- a/src/agents/tools/tts-tool.test.ts
+++ b/src/agents/tools/tts-tool.test.ts
@@ -35,6 +35,7 @@ describe("createTtsTool", () => {
         provider: "test",
         media: {
           mediaUrl: "/tmp/reply.opus",
+          trustedLocalMedia: true,
           audioAsVoice: true,
         },
       },
diff --git a/src/agents/tools/tts-tool.ts b/src/agents/tools/tts-tool.ts
index ff789f2787b..b192321cbbd 100644
--- a/src/agents/tools/tts-tool.ts
+++ b/src/agents/tools/tts-tool.ts
@@ -43,6 +43,7 @@ export function createTtsTool(opts?: {
             provider: result.provider,
             media: {
               mediaUrl: result.audioPath,
+              trustedLocalMedia: true,
               ...(result.voiceCompatible ? { audioAsVoice: true } : {}),
             },
           },
diff --git a/src/auto-reply/reply-payload.ts b/src/auto-reply/reply-payload.ts
index da257eca2ad..0aacb840371 100644
--- a/src/auto-reply/reply-payload.ts
+++ b/src/auto-reply/reply-payload.ts
@@ -8,6 +8,10 @@ export type ReplyPayload = {
   text?: string;
   mediaUrl?: string;
   mediaUrls?: string[];
+  /** Internal-only trust signal for gateway webchat local media embedding. */
+  trustedLocalMedia?: boolean;
+  /** Treat media as live-only content and avoid persisting the underlying media reference. */
+  sensitiveMedia?: boolean;
   /** Channel-agnostic rich presentation. Core degrades or asks the channel renderer to map it. */
   presentation?: MessagePresentation;
   /** Channel-agnostic delivery preferences, e.g. pin the sent message when supported. */
diff --git a/src/auto-reply/reply/commands-tts.ts b/src/auto-reply/reply/commands-tts.ts
index 46a054d8280..b6567857bd1 100644
--- a/src/auto-reply/reply/commands-tts.ts
+++ b/src/auto-reply/reply/commands-tts.ts
@@ -167,6 +167,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
       const payload: ReplyPayload = {
         mediaUrl: result.audioPath,
         audioAsVoice: result.voiceCompatible === true,
+        trustedLocalMedia: true,
       };
       return { shouldContinue: false, reply: payload };
     }
diff --git a/src/gateway/server-methods/chat-webchat-media.test.ts b/src/gateway/server-methods/chat-webchat-media.test.ts
index 33cc2662eb7..67de5784ede 100644
--- a/src/gateway/server-methods/chat-webchat-media.test.ts
+++ b/src/gateway/server-methods/chat-webchat-media.test.ts
@@ -4,7 +4,10 @@ import path from "node:path";
 import { pathToFileURL } from "node:url";
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { getDefaultLocalRoots } from "../../media/local-media-access.js";
-import { buildWebchatAudioContentBlocksFromReplyPayloads } from "./chat-webchat-media.js";
+import {
+  buildWebchatAssistantMessageFromReplyPayloads,
+  buildWebchatAudioContentBlocksFromReplyPayloads,
+} from "./chat-webchat-media.js";
 
 describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
   let tmpDir: string | undefined;
@@ -22,7 +25,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
     fs.writeFileSync(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
 
     const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
-      [{ mediaUrl: audioPath }],
+      [{ mediaUrl: audioPath, trustedLocalMedia: true }],
       { localRoots: [tmpDir] },
     );
 
@@ -42,7 +45,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
 
   it("skips remote URLs", async () => {
     const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([
-      { mediaUrl: "https://example.com/a.mp3" },
+      { mediaUrl: "https://example.com/a.mp3", trustedLocalMedia: true },
     ]);
     expect(blocks).toHaveLength(0);
   });
@@ -53,7 +56,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
     fs.writeFileSync(imagePath, Buffer.from([0x89, 0x50, 0x4e, 0x47]));
 
     const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
-      [{ mediaUrl: imagePath }],
+      [{ mediaUrl: imagePath, trustedLocalMedia: true }],
       { localRoots: [tmpDir] },
     );
 
@@ -66,7 +69,10 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
     fs.writeFileSync(audioPath, Buffer.from([0x00]));
 
     const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
-      [{ mediaUrl: audioPath }, { mediaUrl: audioPath }],
+      [
+        { mediaUrl: audioPath, trustedLocalMedia: true },
+        { mediaUrl: audioPath, trustedLocalMedia: true },
+      ],
       { localRoots: [tmpDir] },
     );
     expect(blocks).toHaveLength(1);
@@ -78,9 +84,12 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
     fs.writeFileSync(audioPath, Buffer.from([0x01]));
 
     const fileUrl = pathToFileURL(audioPath).href;
-    const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: fileUrl }], {
-      localRoots: [tmpDir],
-    });
+    const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
+      [{ mediaUrl: fileUrl, trustedLocalMedia: true }],
+      {
+        localRoots: [tmpDir],
+      },
+    );
 
     expect(blocks).toHaveLength(1);
     expect((blocks[0] as { type?: string }).type).toBe("audio");
@@ -94,6 +103,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
       {
         text: "MEDIA:file://attacker/share/probe.mp3",
         mediaUrl: "file://attacker/share/probe.mp3",
+        trustedLocalMedia: true,
       },
     ]);
 
@@ -116,7 +126,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
 
     const onLocalAudioAccessDenied = vi.fn();
     const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
-      [{ mediaUrl: audioPath }],
+      [{ mediaUrl: audioPath, trustedLocalMedia: true }],
       {
         localRoots: [allowedRoot],
         onLocalAudioAccessDenied,
@@ -136,7 +146,9 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
     const audioPath = path.join(tmpDir, "clip.mp3");
     fs.writeFileSync(audioPath, Buffer.from([0x04]));
 
-    const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: audioPath }]);
+    const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([
+      { mediaUrl: audioPath, trustedLocalMedia: true },
+    ]);
 
     expect(blocks).toHaveLength(1);
     expect((blocks[0] as { type?: string }).type).toBe("audio");
@@ -157,7 +169,7 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
     const readSpy = vi.spyOn(fs, "readFileSync");
 
     const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
-      [{ mediaUrl: audioPath }],
+      [{ mediaUrl: audioPath, trustedLocalMedia: true }],
       { localRoots: [tmpDir] },
     );
 
@@ -167,4 +179,121 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
     statSpy.mockRestore();
     readSpy.mockRestore();
   });
+
+  it("rejects untrusted local audio paths", async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-"));
+    const audioPath = path.join(tmpDir, "clip.mp3");
+    fs.writeFileSync(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
+
+    const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
+      [{ mediaUrl: audioPath }],
+      { localRoots: [tmpDir] },
+    );
+
+    expect(blocks).toHaveLength(0);
+  });
+});
+
+describe("buildWebchatAssistantMessageFromReplyPayloads", () => {
+  it("converts image data URLs into webchat image blocks", async () => {
+    const message = await buildWebchatAssistantMessageFromReplyPayloads([
+      {
+        text: "Scan this QR code with the OpenClaw iOS app:",
+        mediaUrl: "data:image/png;base64,cG5n",
+      },
+    ]);
+
+    expect(message).toEqual({
+      transcriptText: "Scan this QR code with the OpenClaw iOS app:",
+      content: [
+        { type: "text", text: "Scan this QR code with the OpenClaw iOS app:" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
+  });
+
+  it("suppresses control tokens and falls back to synthetic image text", async () => {
+    const message = await buildWebchatAssistantMessageFromReplyPayloads([
+      {
+        text: "NO_REPLY",
+        mediaUrl: "data:image/png;base64,cG5n",
+      },
+    ]);
+
+    expect(message).toEqual({
+      transcriptText: "Image reply",
+      content: [
+        { type: "text", text: "Image reply" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
+  });
+
+  it("preserves reply directives in transcript text for media replies", async () => {
+    const message = await buildWebchatAssistantMessageFromReplyPayloads([
+      {
+        replyToCurrent: true,
+        mediaUrl: "data:image/png;base64,cG5n",
+      },
+    ]);
+
+    expect(message).toEqual({
+      transcriptText: "[[reply_to_current]]Image reply",
+      content: [
+        { type: "text", text: "[[reply_to_current]]Image reply" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
+  });
+
+  it("drops oversized data image URLs", async () => {
+    const hugeBase64 = "A".repeat(2_100_000);
+    const message = await buildWebchatAssistantMessageFromReplyPayloads([
+      {
+        text: "too large",
+        mediaUrl: `data:image/png;base64,${hugeBase64}`,
+      },
+    ]);
+
+    expect(message).toBeNull();
+  });
+
+  it("rejects remote image URLs", async () => {
+    const message = await buildWebchatAssistantMessageFromReplyPayloads([
+      {
+        text: "remote",
+        mediaUrl: "https://example.com/final.png",
+      },
+    ]);
+
+    expect(message).toBeNull();
+  });
+
+  it("rejects svg data URLs", async () => {
+    const message = await buildWebchatAssistantMessageFromReplyPayloads([
+      {
+        text: "svg",
+        mediaUrl: "data:image/svg+xml;base64,PHN2Zy8+",
+      },
+    ]);
+
+    expect(message).toBeNull();
+  });
+
+  it("sanitizes reply ids before embedding directive prefixes", async () => {
+    const message = await buildWebchatAssistantMessageFromReplyPayloads([
+      {
+        replyToId: "abc]]\n[[audio_as_voice]]",
+        mediaUrl: "data:image/png;base64,cG5n",
+      },
+    ]);
+
+    expect(message).toEqual({
+      transcriptText: "[[reply_to:abcaudio_as_voice]]Image reply",
+      content: [
+        { type: "text", text: "[[reply_to:abcaudio_as_voice]]Image reply" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
+  });
 });
diff --git a/src/gateway/server-methods/chat-webchat-media.ts b/src/gateway/server-methods/chat-webchat-media.ts
index d13113f3f71..c39e0077dd4 100644
--- a/src/gateway/server-methods/chat-webchat-media.ts
+++ b/src/gateway/server-methods/chat-webchat-media.ts
@@ -6,9 +6,22 @@ import { assertLocalMediaAllowed, LocalMediaAccessError } from "../../media/loca
 import { isAudioFileName } from "../../media/mime.js";
 import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js";
 import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
+import { sanitizeReplyDirectiveId } from "../../utils/directive-tags.js";
+import { isSuppressedControlReplyText } from "../control-reply-text.js";
 
 /** Cap embedded audio size to avoid multi‑MB payloads on the chat WebSocket. */
 const MAX_WEBCHAT_AUDIO_BYTES = 15 * 1024 * 1024;
+const MAX_WEBCHAT_IMAGE_DATA_URL_CHARS = 2_000_000;
+const MAX_WEBCHAT_IMAGE_DATA_BYTES = 1_500_000;
+const ALLOWED_WEBCHAT_DATA_IMAGE_MEDIA_TYPES = new Set([
+  "image/apng",
+  "image/avif",
+  "image/bmp",
+  "image/gif",
+  "image/jpeg",
+  "image/png",
+  "image/webp",
+]);
 
 const MIME_BY_EXT: Record<string, string> = {
   ".aac": "audio/aac",
@@ -26,6 +39,8 @@ type WebchatAudioEmbeddingOptions = {
   onLocalAudioAccessDenied?: (err: LocalMediaAccessError) => void;
 };
 
+type WebchatAssistantMediaOptions = WebchatAudioEmbeddingOptions;
+
 /** Map `mediaUrl` strings to an absolute filesystem path for local embedding (plain paths or `file:` URLs). */
 function resolveLocalMediaPathForEmbedding(raw: string): string | null {
   const trimmed = raw.trim();
@@ -62,9 +77,13 @@ function resolveLocalMediaPathForEmbedding(raw: string): string | null {
 
 /** Returns a readable local file path when it is a regular file and within the size cap (single stat before read). */
 async function resolveLocalAudioFileForEmbedding(
+  payload: ReplyPayload,
   raw: string,
   options: WebchatAudioEmbeddingOptions | undefined,
 ): Promise<string | null> {
+  if (payload.trustedLocalMedia !== true) {
+    return null;
+  }
   const resolved = resolveLocalMediaPathForEmbedding(raw);
   if (!resolved) {
     return null;
@@ -92,6 +111,47 @@ function mimeTypeForPath(filePath: string): string {
   return MIME_BY_EXT[ext] ?? "audio/mpeg";
 }
 
+function estimateBase64DecodedBytes(base64: string): number {
+  const sanitized = base64.replace(/\s+/g, "");
+  const padding =
+    sanitized.endsWith("==") ? 2 : sanitized.endsWith("=") ? 1 : 0;
+  return Math.floor((sanitized.length * 3) / 4) - padding;
+}
+
+function resolveEmbeddableImageUrl(url: string): string | null {
+  const trimmed = url.trim();
+  if (!trimmed) {
+    return null;
+  }
+  if (trimmed.length > MAX_WEBCHAT_IMAGE_DATA_URL_CHARS) {
+    return null;
+  }
+  const match = /^data:(image\/[a-z0-9.+-]+);base64,([a-z0-9+/=\s]+)$/i.exec(trimmed);
+  if (!match) {
+    return null;
+  }
+  const mediaType = normalizeLowercaseStringOrEmpty(match[1]);
+  const base64Data = match[2];
+  if (!ALLOWED_WEBCHAT_DATA_IMAGE_MEDIA_TYPES.has(mediaType)) {
+    return null;
+  }
+  if (estimateBase64DecodedBytes(base64Data) > MAX_WEBCHAT_IMAGE_DATA_BYTES) {
+    return null;
+  }
+  return trimmed;
+}
+
+function resolveReplyDirectivePrefix(payload: ReplyPayload): string {
+  const replyToId = sanitizeReplyDirectiveId(payload.replyToId);
+  if (replyToId) {
+    return `[[reply_to:${replyToId}]]`;
+  }
+  if (payload.replyToCurrent) {
+    return "[[reply_to_current]]";
+  }
+  return "";
+}
+
 /**
  * Build Control UI / transcript `content` blocks for local TTS (or other) audio files
  * referenced by slash-command / agent replies when the webchat path only had text aggregation.
@@ -109,7 +169,7 @@ export async function buildWebchatAudioContentBlocksFromReplyPayloads(
       if (!url) {
         continue;
       }
-      const resolved = await resolveLocalAudioFileForEmbedding(url, options);
+      const resolved = await resolveLocalAudioFileForEmbedding(payload, url, options);
       if (!resolved || seen.has(resolved)) {
         continue;
       }
@@ -123,6 +183,87 @@ export async function buildWebchatAudioContentBlocksFromReplyPayloads(
   return blocks;
 }
 
+export async function buildWebchatAssistantMessageFromReplyPayloads(
+  payloads: ReplyPayload[],
+  options?: WebchatAssistantMediaOptions,
+): Promise<{ content: Array<Record<string, unknown>>; transcriptText: string } | null> {
+  const content: Array<Record<string, unknown>> = [];
+  const transcriptTextParts: string[] = [];
+  const seenAudio = new Set<string>();
+  const seenImages = new Set<string>();
+  let hasAudio = false;
+  let hasImage = false;
+
+  for (const payload of payloads) {
+    const visibleText = payload.text?.trim();
+    const text =
+      visibleText && !isSuppressedControlReplyText(visibleText) ? visibleText : undefined;
+    const replyDirectivePrefix = resolveReplyDirectivePrefix(payload);
+    let payloadHasAudio = false;
+    let payloadHasImage = false;
+    const payloadMediaBlocks: Array<Record<string, unknown>> = [];
+    const parts = resolveSendableOutboundReplyParts(payload);
+    for (const raw of parts.mediaUrls) {
+      const url = raw.trim();
+      if (!url) {
+        continue;
+      }
+      const resolvedAudioPath = await resolveLocalAudioFileForEmbedding(payload, url, options);
+      if (resolvedAudioPath) {
+        if (seenAudio.has(resolvedAudioPath)) {
+          continue;
+        }
+        seenAudio.add(resolvedAudioPath);
+        const block = tryReadLocalAudioContentBlock(resolvedAudioPath);
+        if (block) {
+          payloadMediaBlocks.push(block);
+          hasAudio = true;
+          payloadHasAudio = true;
+        }
+        continue;
+      }
+      const imageUrl = resolveEmbeddableImageUrl(url);
+      if (!imageUrl || seenImages.has(imageUrl)) {
+        continue;
+      }
+      seenImages.add(imageUrl);
+      payloadMediaBlocks.push({ type: "input_image", image_url: imageUrl });
+      hasImage = true;
+      payloadHasImage = true;
+    }
+    const needsSyntheticText =
+      payloadMediaBlocks.length > 0 && (!text || replyDirectivePrefix) && transcriptTextParts.length === 0;
+    const syntheticText = needsSyntheticText
+      ? payloadHasAudio && payloadHasImage
+        ? "Media reply"
+        : payloadHasAudio
+          ? "Audio reply"
+          : "Image reply"
+      : undefined;
+    const blockText = text ?? syntheticText;
+    if (blockText) {
+      const fullText = replyDirectivePrefix ? `${replyDirectivePrefix}${blockText}` : blockText;
+      transcriptTextParts.push(fullText);
+      content.push({ type: "text", text: fullText });
+    } else if (replyDirectivePrefix) {
+      transcriptTextParts.push(replyDirectivePrefix);
+      content.push({ type: "text", text: replyDirectivePrefix });
+    }
+    content.push(...payloadMediaBlocks);
+  }
+
+  if (!hasAudio && !hasImage) {
+    return null;
+  }
+  const transcriptText =
+    transcriptTextParts.join("\n\n").trim() ||
+    (hasAudio && hasImage ? "Media reply" : hasAudio ? "Audio reply" : "Image reply");
+  if (transcriptTextParts.length === 0) {
+    content.unshift({ type: "text", text: transcriptText });
+  }
+  return { content, transcriptText };
+}
+
 function tryReadLocalAudioContentBlock(filePath: string): Record<string, unknown> | null {
   try {
     const buf = fs.readFileSync(filePath);
diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts
index e871929da69..4cc6ea70e77 100644
--- a/src/gateway/server-methods/chat.directive-tags.test.ts
+++ b/src/gateway/server-methods/chat.directive-tags.test.ts
@@ -20,10 +20,23 @@ const mockState = vi.hoisted(() => ({
   sessionId: "sess-1",
   mainSessionKey: "main",
   finalText: "[[reply_to_current]]",
-  finalPayload: null as { text?: string; mediaUrl?: string } | null,
+  finalPayload: null as {
+    text?: string;
+    mediaUrl?: string;
+    sensitiveMedia?: boolean;
+    replyToId?: string;
+    replyToCurrent?: boolean;
+  } | null,
   dispatchedReplies: [] as Array<{
     kind: "tool" | "block" | "final";
-    payload: { text?: string; mediaUrl?: string; mediaUrls?: string[] };
+    payload: {
+      text?: string;
+      mediaUrl?: string;
+      mediaUrls?: string[];
+      trustedLocalMedia?: boolean;
+      replyToId?: string;
+      replyToCurrent?: boolean;
+    };
   }>,
   dispatchError: null as Error | null,
   triggerAgentRunStart: false,
@@ -91,16 +104,28 @@ vi.mock("../../auto-reply/dispatch.js", () => ({
     async (params: {
       ctx: MsgContext;
       dispatcher: {
-        sendFinalReply: (payload: { text?: string; mediaUrl?: string }) => boolean;
+        sendFinalReply: (payload: {
+          text?: string;
+          mediaUrl?: string;
+          sensitiveMedia?: boolean;
+          replyToId?: string;
+          replyToCurrent?: boolean;
+        }) => boolean;
         sendBlockReply: (payload: {
           text?: string;
           mediaUrl?: string;
           mediaUrls?: string[];
+          trustedLocalMedia?: boolean;
+          replyToId?: string;
+          replyToCurrent?: boolean;
         }) => boolean;
         sendToolResult: (payload: {
           text?: string;
           mediaUrl?: string;
           mediaUrls?: string[];
+          trustedLocalMedia?: boolean;
+          replyToId?: string;
+          replyToCurrent?: boolean;
         }) => boolean;
         markComplete: () => void;
         waitForIdle: () => Promise<void>;
@@ -130,9 +155,7 @@ vi.mock("../../auto-reply/dispatch.js", () => ({
             params.dispatcher.sendBlockReply(reply.payload);
             continue;
           }
-          params.dispatcher.sendFinalReply({
-            text: reply.payload.text ?? "",
-          });
+          params.dispatcher.sendFinalReply(reply.payload);
         }
       } else {
         params.dispatcher.sendFinalReply(mockState.finalPayload ?? { text: mockState.finalText });
@@ -500,6 +523,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
         payload: {
           mediaUrl: audioPath,
           mediaUrls: [audioPath],
+          trustedLocalMedia: true,
         },
       },
     ];
@@ -528,7 +552,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
       expect(assistantUpdate).toMatchObject({
         message: {
           role: "assistant",
-          idempotencyKey: "idem-agent-audio:assistant-audio",
+          idempotencyKey: "idem-agent-audio:assistant-media",
           content: [
             { type: "text", text: "Audio reply" },
             {
@@ -544,6 +568,31 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
     });
   });
 
+  it("renders image reply payloads as assistant image content instead of MEDIA text", async () => {
+    createTranscriptFixture("openclaw-chat-send-agent-image-");
+    mockState.finalPayload = {
+      text: "Scan this QR code with the OpenClaw iOS app:",
+      mediaUrl: "data:image/png;base64,cG5n",
+    };
+    const respond = vi.fn();
+    const context = createChatContext();
+
+    const payload = await runNonStreamingChatSend({
+      context,
+      respond,
+      idempotencyKey: "idem-agent-image",
+    });
+
+    expect(payload?.message).toMatchObject({
+      role: "assistant",
+      content: [
+        { type: "text", text: "Scan this QR code with the OpenClaw iOS app:" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
+    expect(JSON.stringify(payload?.message)).not.toContain("MEDIA:data:image/png;base64,cG5n");
+  });
+
   it("chat.inject keeps message defined when directive tag is the only content", async () => {
     createTranscriptFixture("openclaw-chat-inject-directive-only-");
     const respond = vi.fn();
@@ -693,7 +742,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
       respond,
       idempotencyKey: "idem-untrusted-context",
     });
-    expect(extractFirstTextBlock(payload)).toBe("hello");
+    expect(extractFirstTextBlock(payload)?.trim()).toBe("hello");
   });
 
   it("chat.send non-streaming final broadcasts and routes on the canonical session key", async () => {
@@ -1867,7 +1916,7 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
 
   it("preserves media-only final replies in the final broadcast message", async () => {
     createTranscriptFixture("openclaw-chat-send-media-only-final-");
-    mockState.finalPayload = { mediaUrl: "https://example.com/final.png" };
+    mockState.finalPayload = { mediaUrl: "data:image/png;base64,cG5n" };
     const respond = vi.fn();
     const context = createChatContext();
 
@@ -1877,14 +1926,20 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
       idempotencyKey: "idem-media-only-final",
     });
 
-    expect(extractFirstTextBlock(payload)).toBe("MEDIA:https://example.com/final.png");
+    expect(payload?.message).toMatchObject({
+      role: "assistant",
+      content: [
+        { type: "text", text: "Image reply" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
   });
 
   it("strips NO_REPLY from transcript text when final replies only carry media", async () => {
     createTranscriptFixture("openclaw-chat-send-media-only-silent-final-");
     mockState.finalPayload = {
       text: "NO_REPLY",
-      mediaUrl: "https://example.com/final.png",
+      mediaUrl: "data:image/png;base64,cG5n",
     };
     const respond = vi.fn();
     const context = createChatContext();
@@ -1895,7 +1950,122 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
       idempotencyKey: "idem-media-only-silent-final",
     });
 
-    expect(extractFirstTextBlock(payload)).toBe("MEDIA:https://example.com/final.png");
+    expect(payload?.message).toMatchObject({
+      role: "assistant",
+      content: [
+        { type: "text", text: "Image reply" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
+  });
+
+  it("preserves reply tags in transcript updates for media replies while stripping them from the broadcast", async () => {
+    createTranscriptFixture("openclaw-chat-send-media-reply-tags-");
+    mockState.finalPayload = {
+      replyToCurrent: true,
+      mediaUrl: "data:image/png;base64,cG5n",
+    };
+    const respond = vi.fn();
+    const context = createChatContext();
+
+    const payload = await runNonStreamingChatSend({
+      context,
+      respond,
+      idempotencyKey: "idem-media-reply-tags",
+    });
+
+    expect(payload?.message).toMatchObject({
+      role: "assistant",
+      content: [
+        { type: "text", text: "Image reply" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
+    const transcriptUpdate = mockState.emittedTranscriptUpdates.find(
+      (update) =>
+        typeof update.message === "object" &&
+        update.message !== null &&
+        (update.message as { role?: unknown }).role === "assistant" &&
+        Array.isArray((update.message as { content?: unknown }).content) &&
+        ((update.message as { content: Array<{ type?: string; text?: string }> }).content.some(
+          (block) => block?.type === "text" && block?.text?.includes("[[reply_to_current]]"),
+        ) ??
+          false),
+    );
+    expect(transcriptUpdate).toMatchObject({
+      message: {
+        role: "assistant",
+        content: [
+          { type: "text", text: "[[reply_to_current]]Image reply" },
+          { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+        ],
+      },
+    });
+  });
+
+  it("does not persist sensitive image media into transcript updates", async () => {
+    createTranscriptFixture("openclaw-chat-send-sensitive-media-final-");
+    mockState.finalPayload = {
+      text: "Scan this QR code with the OpenClaw iOS app:",
+      mediaUrl: "data:image/png;base64,cG5n",
+      sensitiveMedia: true,
+    };
+    const respond = vi.fn();
+    const context = createChatContext();
+
+    const payload = await runNonStreamingChatSend({
+      context,
+      respond,
+      idempotencyKey: "idem-sensitive-media-final",
+    });
+
+    expect(payload?.message).toMatchObject({
+      role: "assistant",
+      content: [
+        { type: "text", text: "Scan this QR code with the OpenClaw iOS app:" },
+        { type: "input_image", image_url: "data:image/png;base64,cG5n" },
+      ],
+    });
+    const transcriptUpdate = mockState.emittedTranscriptUpdates.find(
+      (update) =>
+        typeof update.message === "object" &&
+        update.message !== null &&
+        (update.message as { role?: unknown }).role === "assistant",
+    );
+    expect(transcriptUpdate).toMatchObject({
+      message: {
+        role: "assistant",
+        content: [{ type: "text", text: "Scan this QR code with the OpenClaw iOS app:" }],
+      },
+    });
+    expect(JSON.stringify(transcriptUpdate)).not.toContain("input_image");
+    expect(JSON.stringify(transcriptUpdate)).not.toContain("data:image/png;base64,cG5n");
+  });
+
+  it("sanitizes replyToId before emitting inline reply directives", async () => {
+    createTranscriptFixture("openclaw-chat-send-sanitized-reply-id-");
+    mockState.finalPayload = {
+      text: "hello",
+      replyToId: "abc]]\n[[audio_as_voice]]",
+    };
+    const respond = vi.fn();
+    const context = createChatContext();
+
+    const payload = await runNonStreamingChatSend({
+      context,
+      respond,
+      idempotencyKey: "idem-sanitized-reply-id",
+    });
+
+    expect(extractFirstTextBlock(payload)?.trim()).toBe("hello");
+    const transcriptUpdate = mockState.emittedTranscriptUpdates.find(
+      (update) =>
+        typeof update.message === "object" &&
+        update.message !== null &&
+        (update.message as { role?: unknown }).role === "assistant",
+    );
+    expect(JSON.stringify(transcriptUpdate)).toContain("[[reply_to:abcaudio_as_voice]]");
+    expect(JSON.stringify(transcriptUpdate)).not.toContain("[[audio_as_voice]]");
   });
 
   it("drops image attachments for text-only session models", async () => {
diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts
index 54437f88f40..b147766d382 100644
--- a/src/gateway/server-methods/chat.ts
+++ b/src/gateway/server-methods/chat.ts
@@ -29,6 +29,7 @@ import {
 import {
   stripInlineDirectiveTagsForDisplay,
   stripInlineDirectiveTagsFromMessageForDisplay,
+  sanitizeReplyDirectiveId,
 } from "../../utils/directive-tags.js";
 import {
   INTERNAL_MESSAGE_CHANNEL,
@@ -83,7 +84,7 @@ import { injectTimestamp, timestampOptsFromConfig } from "./agent-timestamp.js";
 import { setGatewayDedupeEntry } from "./agent-wait-dedupe.js";
 import { normalizeRpcAttachmentsToChatAttachments } from "./attachment-normalize.js";
 import { appendInjectedAssistantMessageToTranscript } from "./chat-transcript-inject.js";
-import { buildWebchatAudioContentBlocksFromReplyPayloads } from "./chat-webchat-media.js";
+import { buildWebchatAssistantMessageFromReplyPayloads } from "./chat-webchat-media.js";
 import type {
   GatewayRequestContext,
   GatewayRequestHandlerOptions,
@@ -123,26 +124,19 @@ function isMediaBearingPayload(payload: ReplyPayload): boolean {
   return false;
 }
 
-async function buildWebchatAudioOnlyAssistantMessage(
+async function buildWebchatAssistantMediaMessage(
   payloads: ReplyPayload[],
   options?: {
     localRoots?: readonly string[];
     onLocalAudioAccessDenied?: (message: string) => void;
   },
 ): Promise<{ content: Array<Record<string, unknown>>; transcriptText: string } | null> {
-  const audioBlocks = await buildWebchatAudioContentBlocksFromReplyPayloads(payloads, {
+  return buildWebchatAssistantMessageFromReplyPayloads(payloads, {
     localRoots: options?.localRoots,
     onLocalAudioAccessDenied: (err) => {
       options?.onLocalAudioAccessDenied?.(formatForLog(err));
     },
   });
-  if (audioBlocks.length === 0) {
-    return null;
-  }
-  return {
-    transcriptText: "Audio reply",
-    content: [{ type: "text", text: "Audio reply" }, ...audioBlocks],
-  };
 }
 
 export const DEFAULT_CHAT_HISTORY_TEXT_MAX_CHARS = 8_000;
@@ -225,8 +219,9 @@ function buildTranscriptReplyText(payloads: ReplyPayload[]): string {
     .map((payload) => {
       const parts = resolveSendableOutboundReplyParts(payload);
       const lines: string[] = [];
-      if (typeof payload.replyToId === "string" && payload.replyToId.trim()) {
-        lines.push(`[[reply_to:${payload.replyToId.trim()}]]`);
+      const replyToId = sanitizeReplyDirectiveId(payload.replyToId);
+      if (replyToId) {
+        lines.push(`[[reply_to:${replyToId}]]`);
       } else if (payload.replyToCurrent) {
         lines.push("[[reply_to_current]]");
       }
@@ -235,6 +230,9 @@ function buildTranscriptReplyText(payloads: ReplyPayload[]): string {
         lines.push(text);
       }
       for (const mediaUrl of parts.mediaUrls) {
+        if (payload.sensitiveMedia === true) {
+          continue;
+        }
         const trimmed = mediaUrl.trim();
         if (trimmed) {
           lines.push(`MEDIA:${trimmed}`);
@@ -249,6 +247,10 @@ function buildTranscriptReplyText(payloads: ReplyPayload[]): string {
   return chunks.join("\n\n").trim();
 }
 
+function hasSensitiveMediaPayload(payloads: ReplyPayload[]): boolean {
+  return payloads.some((payload) => payload.sensitiveMedia === true && isMediaBearingPayload(payload));
+}
+
 function resolveChatSendOriginatingRoute(params: {
   client?: { mode?: string | null; id?: string | null } | null;
   deliver?: boolean;
@@ -2036,7 +2038,7 @@ export const chatHandlers: GatewayRequestHandlers = {
         channel: INTERNAL_MESSAGE_CHANNEL,
       });
       const deliveredReplies: Array<{ payload: ReplyPayload; kind: "block" | "final" }> = [];
-      let appendedWebchatAgentAudio = false;
+      let appendedWebchatAgentMedia = false;
       let userTranscriptUpdatePromise: Promise<void> | null = null;
       const emitUserTranscriptUpdate = async () => {
         if (userTranscriptUpdatePromise) {
@@ -2098,37 +2100,37 @@ export const chatHandlers: GatewayRequestHandlers = {
           savedImages: await persistedImagesPromise,
         });
       };
-      const appendWebchatAgentAudioTranscriptIfNeeded = async (payload: ReplyPayload) => {
-        if (!agentRunStarted || appendedWebchatAgentAudio || !isMediaBearingPayload(payload)) {
+      const appendWebchatAgentMediaTranscriptIfNeeded = async (payload: ReplyPayload) => {
+        if (!agentRunStarted || appendedWebchatAgentMedia || !isMediaBearingPayload(payload)) {
           return;
         }
-        const audioMessage = await buildWebchatAudioOnlyAssistantMessage([payload], {
+        const mediaMessage = await buildWebchatAssistantMediaMessage([payload], {
           localRoots: getAgentScopedMediaLocalRoots(cfg, agentId),
           onLocalAudioAccessDenied: (message) => {
             context.logGateway.warn(`webchat audio embedding denied local path: ${message}`);
           },
         });
-        if (!audioMessage) {
+        if (!mediaMessage) {
           return;
         }
         const { storePath: latestStorePath, entry: latestEntry } = loadSessionEntry(sessionKey);
         const sessionId = latestEntry?.sessionId ?? entry?.sessionId ?? clientRunId;
         const appended = appendAssistantTranscriptMessage({
-          message: audioMessage.transcriptText,
-          content: audioMessage.content,
+          message: mediaMessage.transcriptText,
+          ...(payload.sensitiveMedia === true ? {} : { content: mediaMessage.content }),
           sessionId,
           storePath: latestStorePath,
           sessionFile: latestEntry?.sessionFile,
           agentId,
           createIfMissing: true,
-          idempotencyKey: `${clientRunId}:assistant-audio`,
+          idempotencyKey: `${clientRunId}:assistant-media`,
         });
         if (appended.ok) {
-          appendedWebchatAgentAudio = true;
+          appendedWebchatAgentMedia = true;
           return;
         }
         context.logGateway.warn(
-          `webchat transcript append failed for audio reply: ${appended.error ?? "unknown error"}`,
+          `webchat transcript append failed for media reply: ${appended.error ?? "unknown error"}`,
         );
       };
       const dispatcher = createReplyDispatcher({
@@ -2141,7 +2143,7 @@ export const chatHandlers: GatewayRequestHandlers = {
             case "block":
             case "final":
               deliveredReplies.push({ payload, kind: info.kind });
-              await appendWebchatAgentAudioTranscriptIfNeeded(payload);
+              await appendWebchatAgentMediaTranscriptIfNeeded(payload);
               break;
             case "tool":
               // Tool results that carry audio (e.g. the TTS tool) must be promoted
@@ -2231,18 +2233,25 @@ export const chatHandlers: GatewayRequestHandlers = {
                 sessionKey,
               });
             } else {
-              const combinedReply = buildTranscriptReplyText(
-                deliveredReplies
-                  .filter((entry) => entry.kind === "final")
-                  .map((entry) => entry.payload),
-              );
+              const finalPayloads = deliveredReplies
+                .filter((entry) => entry.kind === "final")
+                .map((entry) => entry.payload);
+              const combinedReply = buildTranscriptReplyText(finalPayloads);
+              const mediaMessage = await buildWebchatAssistantMediaMessage(finalPayloads, {
+                localRoots: getAgentScopedMediaLocalRoots(cfg, agentId),
+                onLocalAudioAccessDenied: (message) => {
+                  context.logGateway.warn(`webchat audio embedding denied local path: ${message}`);
+                },
+              });
+              const hasSensitiveMedia = hasSensitiveMediaPayload(finalPayloads);
               let message: Record<string, unknown> | undefined;
-              if (combinedReply) {
+              if (mediaMessage || combinedReply) {
                 const { storePath: latestStorePath, entry: latestEntry } =
                   loadSessionEntry(sessionKey);
                 const sessionId = latestEntry?.sessionId ?? entry?.sessionId ?? clientRunId;
                 const appended = appendAssistantTranscriptMessage({
-                  message: combinedReply,
+                  message: mediaMessage?.transcriptText ?? combinedReply,
+                  ...(mediaMessage && !hasSensitiveMedia ? { content: mediaMessage.content } : {}),
                   sessionId,
                   storePath: latestStorePath,
                   sessionFile: latestEntry?.sessionFile,
@@ -2250,7 +2259,14 @@ export const chatHandlers: GatewayRequestHandlers = {
                   createIfMissing: true,
                 });
                 if (appended.ok) {
-                  message = appended.message;
+                  if (hasSensitiveMedia && mediaMessage) {
+                    message = {
+                      ...appended.message,
+                      content: mediaMessage.content,
+                    };
+                  } else {
+                    message = appended.message;
+                  }
                 } else {
                   context.logGateway.warn(
                     `webchat transcript append failed: ${appended.error ?? "unknown error"}`,
@@ -2258,7 +2274,7 @@ export const chatHandlers: GatewayRequestHandlers = {
                   const now = Date.now();
                   message = {
                     role: "assistant",
-                    content: [{ type: "text", text: combinedReply }],
+                    content: mediaMessage?.content ?? [{ type: "text", text: combinedReply }],
                     timestamp: now,
                     // Keep this compatible with Pi stopReason enums even though this message isn't
                     // persisted to the transcript due to the append failure.
diff --git a/src/plugin-sdk/approval-client-helpers.ts b/src/plugin-sdk/approval-client-helpers.ts
index 4988ebcee5a..3c73703ef7f 100644
--- a/src/plugin-sdk/approval-client-helpers.ts
+++ b/src/plugin-sdk/approval-client-helpers.ts
@@ -1,4 +1,3 @@
-import type { ReplyPayload } from "../auto-reply/reply-payload.js";
 import type { ExecApprovalForwardTarget } from "../config/types.approvals.js";
 import { matchesApprovalRequestFilters } from "../infra/approval-request-filters.js";
 import { getExecApprovalReplyMetadata } from "../infra/exec-approval-reply.js";
@@ -9,6 +8,7 @@ import {
   normalizeOptionalString,
 } from "../shared/string-coerce.js";
 import type { OpenClawConfig } from "./config-runtime.js";
+import type { ReplyPayload } from "./reply-payload.js";
 import { normalizeAccountId } from "./routing.js";
 
 type ApprovalRequest = ExecApprovalRequest | PluginApprovalRequest;
diff --git a/src/plugin-sdk/approval-renderers.ts b/src/plugin-sdk/approval-renderers.ts
index e36c0d127d5..e693e66e6e6 100644
--- a/src/plugin-sdk/approval-renderers.ts
+++ b/src/plugin-sdk/approval-renderers.ts
@@ -1,4 +1,3 @@
-import type { ReplyPayload } from "../auto-reply/reply-payload.js";
 import {
   buildApprovalInteractiveReply,
   type ExecApprovalReplyDecision,
@@ -10,6 +9,7 @@ import {
   type PluginApprovalResolved,
 } from "../infra/plugin-approvals.js";
 import { normalizeOptionalString } from "../shared/string-coerce.js";
+import type { ReplyPayload } from "./reply-payload.js";
 
 const DEFAULT_ALLOWED_DECISIONS = ["allow-once", "allow-always", "deny"] as const;
 
diff --git a/src/plugin-sdk/channel-reply-pipeline.ts b/src/plugin-sdk/channel-reply-pipeline.ts
index d91bb20827f..8516e28b9ec 100644
--- a/src/plugin-sdk/channel-reply-pipeline.ts
+++ b/src/plugin-sdk/channel-reply-pipeline.ts
@@ -1,4 +1,3 @@
-import type { ReplyPayload } from "../auto-reply/reply-payload.js";
 import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.js";
 import {
   createReplyPrefixContext,
@@ -11,6 +10,7 @@ import {
   type CreateTypingCallbacksParams,
   type TypingCallbacks,
 } from "../channels/typing.js";
+import type { ReplyPayload } from "./reply-payload.js";
 
 export type ReplyPrefixContext = ReplyPrefixContextBundle["prefixContext"];
 export type { ReplyPrefixContextBundle, ReplyPrefixOptions };
diff --git a/src/plugin-sdk/core.ts b/src/plugin-sdk/core.ts
index a38db81b67b..80c325606fd 100644
--- a/src/plugin-sdk/core.ts
+++ b/src/plugin-sdk/core.ts
@@ -109,7 +109,7 @@ export type {
 export type { OpenClawConfig } from "../config/config.js";
 export type { OutboundIdentity } from "../infra/outbound/identity.js";
 export type { HistoryEntry } from "../auto-reply/reply/history.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export type { AllowlistMatch } from "../channels/allowlist-match.js";
 export type {
   BaseProbeResult,
diff --git a/src/plugin-sdk/feishu.ts b/src/plugin-sdk/feishu.ts
index 6966c97434a..36fa311f169 100644
--- a/src/plugin-sdk/feishu.ts
+++ b/src/plugin-sdk/feishu.ts
@@ -8,7 +8,7 @@ export {
   DEFAULT_GROUP_HISTORY_LIMIT,
   recordPendingHistoryEntryIfEnabled,
 } from "../auto-reply/reply/history.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export { logTypingFailure } from "../channels/logging.js";
 export type { AllowlistMatch } from "../channels/plugins/allowlist-match.js";
 export { buildChannelConfigSchema } from "../channels/plugins/config-schema.js";
diff --git a/src/plugin-sdk/index.ts b/src/plugin-sdk/index.ts
index 87d5c3c3fcd..1812f389082 100644
--- a/src/plugin-sdk/index.ts
+++ b/src/plugin-sdk/index.ts
@@ -91,7 +91,7 @@ export * from "./music-generation.js";
 export type { SecretInput, SecretRef } from "../config/types.secrets.js";
 export type { RuntimeEnv } from "../runtime.js";
 export type { HookEntry } from "../hooks/types.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export type { WizardPrompter } from "../wizard/prompts.js";
 export type { ContextEngineFactory } from "../context-engine/registry.js";
 export type { DiagnosticEventPayload } from "../infra/diagnostic-events.js";
diff --git a/src/plugin-sdk/line.ts b/src/plugin-sdk/line.ts
index 7ae99f5c058..092ab17677a 100644
--- a/src/plugin-sdk/line.ts
+++ b/src/plugin-sdk/line.ts
@@ -5,7 +5,7 @@ export type {
 } from "../channels/plugins/types.public.js";
 export type { ChannelPlugin } from "../channels/plugins/types.plugin.js";
 export type { OpenClawConfig } from "../config/config.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export type { ChannelSetupAdapter } from "../channels/plugins/types.adapters.js";
 export type { OpenClawPluginApi, PluginRuntime } from "./channel-plugin-common.js";
 
diff --git a/src/plugin-sdk/matrix.ts b/src/plugin-sdk/matrix.ts
index d5915c9538a..8677d811253 100644
--- a/src/plugin-sdk/matrix.ts
+++ b/src/plugin-sdk/matrix.ts
@@ -29,7 +29,7 @@ export {
   readStringParam,
 } from "../agents/tools/common.js";
 export type { BlockReplyContext } from "../auto-reply/get-reply-options.types.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export { resolveAckReaction } from "../agents/identity.js";
 export {
   compileAllowlist,
diff --git a/src/plugin-sdk/mattermost.ts b/src/plugin-sdk/mattermost.ts
index 2a771521ccf..8b0f35c4e84 100644
--- a/src/plugin-sdk/mattermost.ts
+++ b/src/plugin-sdk/mattermost.ts
@@ -10,7 +10,7 @@ export {
   recordPendingHistoryEntryIfEnabled,
 } from "../auto-reply/reply/history.js";
 export { listSkillCommandsForAgents } from "../auto-reply/skill-commands.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export type { ChatType } from "../channels/chat-type.js";
 export { resolveControlCommandGate } from "../channels/command-gating.js";
 export { logInboundDrop, logTypingFailure } from "../channels/logging.js";
diff --git a/src/plugin-sdk/msteams.ts b/src/plugin-sdk/msteams.ts
index 81459ea3b3a..835e53571c8 100644
--- a/src/plugin-sdk/msteams.ts
+++ b/src/plugin-sdk/msteams.ts
@@ -12,7 +12,7 @@ export {
   recordPendingHistoryEntryIfEnabled,
 } from "../auto-reply/reply/history.js";
 export { isSilentReplyText, SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export { mergeAllowlist, summarizeMapping } from "../channels/allowlists/resolve-utils.js";
 export {
   resolveControlCommandGate,
diff --git a/src/plugin-sdk/reply-chunking.ts b/src/plugin-sdk/reply-chunking.ts
index 456b689e090..f572c0835ad 100644
--- a/src/plugin-sdk/reply-chunking.ts
+++ b/src/plugin-sdk/reply-chunking.ts
@@ -7,4 +7,4 @@ export {
 } from "../auto-reply/chunk.js";
 export type { ChunkMode } from "../auto-reply/chunk.js";
 export { isSilentReplyText } from "../auto-reply/tokens.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
diff --git a/src/plugin-sdk/reply-dispatch-runtime.ts b/src/plugin-sdk/reply-dispatch-runtime.ts
index 9d7054b38ea..13b25f6a8bc 100644
--- a/src/plugin-sdk/reply-dispatch-runtime.ts
+++ b/src/plugin-sdk/reply-dispatch-runtime.ts
@@ -4,4 +4,4 @@ export {
   dispatchReplyWithBufferedBlockDispatcher,
   dispatchReplyWithDispatcher,
 } from "../auto-reply/reply/provider-dispatcher.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
diff --git a/src/plugin-sdk/reply-payload.test.ts b/src/plugin-sdk/reply-payload.test.ts
index 3f6cb300020..66338a5f839 100644
--- a/src/plugin-sdk/reply-payload.test.ts
+++ b/src/plugin-sdk/reply-payload.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, it, vi } from "vitest";
 import {
   countOutboundMedia,
+  createNormalizedOutboundDeliverer,
   deliverFormattedTextWithAttachments,
   deliverTextOrMediaReply,
   hasOutboundMedia,
@@ -8,6 +9,7 @@ import {
   hasOutboundText,
   isReasoningReplyPayload,
   isNumericTargetId,
+  normalizeOutboundReplyPayload,
   resolveOutboundMediaUrls,
   resolveSendableOutboundReplyParts,
   resolveTextChunksWithFallback,
@@ -87,6 +89,45 @@ describe("sendPayloadWithChunkedTextAndMedia", () => {
   });
 });
 
+describe("normalizeOutboundReplyPayload", () => {
+  it("strips internal-only local media trust flags from loose payload objects", () => {
+    expect(
+      normalizeOutboundReplyPayload({
+        text: "hello",
+        mediaUrl: "/tmp/reply.opus",
+        trustedLocalMedia: true,
+        sensitiveMedia: true,
+        replyToId: "abc123",
+      }),
+    ).toEqual({
+      text: "hello",
+      mediaUrl: "/tmp/reply.opus",
+      sensitiveMedia: true,
+      replyToId: "abc123",
+    });
+  });
+
+  it("keeps the normalized deliverer from forwarding trustedLocalMedia", async () => {
+    const handler = vi.fn(async () => {});
+    const deliver = createNormalizedOutboundDeliverer(handler);
+
+    await deliver({
+      text: "hello",
+      mediaUrl: "/tmp/reply.opus",
+      trustedLocalMedia: true,
+      sensitiveMedia: true,
+    });
+
+    expect(handler).toHaveBeenCalledWith({
+      text: "hello",
+      mediaUrl: "/tmp/reply.opus",
+      sensitiveMedia: true,
+      replyToId: undefined,
+      mediaUrls: undefined,
+    });
+  });
+});
+
 describe("resolveOutboundMediaUrls", () => {
   it.each([
     {
diff --git a/src/plugin-sdk/reply-payload.ts b/src/plugin-sdk/reply-payload.ts
index ec564a583b7..231e34f8f5d 100644
--- a/src/plugin-sdk/reply-payload.ts
+++ b/src/plugin-sdk/reply-payload.ts
@@ -1,14 +1,16 @@
+import type { ReplyPayload as InternalReplyPayload } from "../auto-reply/reply-payload.js";
 import type { ChannelOutboundAdapter } from "../channels/plugins/outbound.types.js";
 import { normalizeLowercaseStringOrEmpty, readStringValue } from "../shared/string-coerce.js";
 
 export type { MediaPayload, MediaPayloadInput } from "../channels/plugins/media-payload.js";
 export { buildMediaPayload } from "../channels/plugins/media-payload.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type ReplyPayload = Omit<InternalReplyPayload, "trustedLocalMedia">;
 
 export type OutboundReplyPayload = {
   text?: string;
   mediaUrls?: string[];
   mediaUrl?: string;
+  sensitiveMedia?: boolean;
   replyToId?: string;
 };
 
@@ -72,11 +74,13 @@ export function normalizeOutboundReplyPayload(
       )
     : undefined;
   const mediaUrl = readStringValue(payload.mediaUrl);
+  const sensitiveMedia = payload.sensitiveMedia === true ? true : undefined;
   const replyToId = readStringValue(payload.replyToId);
   return {
     text,
     mediaUrls,
     mediaUrl,
+    sensitiveMedia,
     replyToId,
   };
 }
diff --git a/src/plugin-sdk/reply-runtime.ts b/src/plugin-sdk/reply-runtime.ts
index ade9710f2cd..286ca6cb18d 100644
--- a/src/plugin-sdk/reply-runtime.ts
+++ b/src/plugin-sdk/reply-runtime.ts
@@ -54,7 +54,7 @@ export type {
 } from "../auto-reply/reply/reply-dispatcher.js";
 export { createReplyReferencePlanner } from "../auto-reply/reply/reply-reference.js";
 export type { GetReplyOptions, BlockReplyContext } from "../auto-reply/get-reply-options.types.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export type { FinalizedMsgContext, MsgContext } from "../auto-reply/templating.js";
 export { generateConversationLabel } from "../auto-reply/reply/conversation-label-generator.js";
 export type { ConversationLabelParams } from "../auto-reply/reply/conversation-label-generator.js";
diff --git a/src/plugin-sdk/tlon.ts b/src/plugin-sdk/tlon.ts
index c21806bfb2d..36389fe64aa 100644
--- a/src/plugin-sdk/tlon.ts
+++ b/src/plugin-sdk/tlon.ts
@@ -3,7 +3,7 @@
 
 import { createOptionalChannelSetupSurface } from "./channel-setup.js";
 
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export { buildChannelConfigSchema } from "../channels/plugins/config-schema.js";
 export {
   applyAccountNameToChannelSection,
diff --git a/src/plugin-sdk/tts-runtime.types.ts b/src/plugin-sdk/tts-runtime.types.ts
index 2a8c5b61458..f57e043a5ef 100644
--- a/src/plugin-sdk/tts-runtime.types.ts
+++ b/src/plugin-sdk/tts-runtime.types.ts
@@ -1,4 +1,3 @@
-import type { ReplyPayload } from "../auto-reply/reply-payload.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
 import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js";
 import type {
@@ -8,6 +7,7 @@ import type {
   TtsDirectiveParseResult,
 } from "../tts/provider-types.js";
 import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js";
+import type { ReplyPayload } from "./reply-payload.js";
 
 export type { ResolvedTtsConfig, ResolvedTtsModelOverrides };
 export type { TtsDirectiveOverrides, TtsDirectiveParseResult };
diff --git a/src/plugin-sdk/twitch.ts b/src/plugin-sdk/twitch.ts
index f889fb01083..0ddc98853e7 100644
--- a/src/plugin-sdk/twitch.ts
+++ b/src/plugin-sdk/twitch.ts
@@ -3,7 +3,7 @@
 
 import { createOptionalChannelSetupSurface } from "./channel-setup.js";
 
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export { buildChannelConfigSchema } from "../channels/plugins/config-schema.js";
 export type {
   ChannelGatewayContext,
diff --git a/src/plugin-sdk/zalo.ts b/src/plugin-sdk/zalo.ts
index fcd97a3e619..1d8f7d7ff4c 100644
--- a/src/plugin-sdk/zalo.ts
+++ b/src/plugin-sdk/zalo.ts
@@ -2,7 +2,7 @@
 // Keep this list additive and scoped to the bundled Zalo surface.
 
 export { jsonResult, readStringParam } from "../agents/tools/common.js";
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export {
   deleteAccountFromConfigSection,
   setAccountEnabledInConfigSection,
diff --git a/src/plugin-sdk/zalouser.ts b/src/plugin-sdk/zalouser.ts
index c476090fcff..f920c8a2fb3 100644
--- a/src/plugin-sdk/zalouser.ts
+++ b/src/plugin-sdk/zalouser.ts
@@ -3,7 +3,7 @@
 
 import { createOptionalChannelSetupSurface } from "./channel-setup.js";
 
-export type { ReplyPayload } from "../auto-reply/reply-payload.js";
+export type { ReplyPayload } from "./reply-payload.js";
 export { mergeAllowlist, summarizeMapping } from "../channels/allowlists/resolve-utils.js";
 export {
   resolveMentionGating,
diff --git a/src/utils/directive-tags.ts b/src/utils/directive-tags.ts
index ce5dccfabc5..6f215ddd2f3 100644
--- a/src/utils/directive-tags.ts
+++ b/src/utils/directive-tags.ts
@@ -20,6 +20,7 @@ const AUDIO_TAG_RE = /\[\[\s*audio_as_voice\s*\]\]/gi;
 const REPLY_TAG_RE = /\[\[\s*(?:reply_to_current|reply_to\s*:\s*([^\]\n]+))\s*\]\]/gi;
 const INLINE_DIRECTIVE_TAG_WITH_PADDING_RE =
   /\s*(?:\[\[\s*audio_as_voice\s*\]\]|\[\[\s*(?:reply_to_current|reply_to\s*:\s*[^\]\n]+)\s*\]\])\s*/gi;
+const MAX_REPLY_DIRECTIVE_ID_LENGTH = 256;
 
 function replacementPreservesWordBoundary(source: string, offset: number, length: number): string {
   const before = source[offset - 1];
@@ -92,6 +93,33 @@ export function stripInlineDirectiveTagsForDisplay(text: string): StripInlineDir
   };
 }
 
+function stripUnsafeReplyDirectiveChars(value: string): string {
+  let next = "";
+  for (const ch of value) {
+    const code = ch.charCodeAt(0);
+    if ((code >= 0 && code <= 31) || code === 127 || ch === "[" || ch === "]") {
+      continue;
+    }
+    next += ch;
+  }
+  return next;
+}
+
+export function sanitizeReplyDirectiveId(rawReplyToId?: string): string | undefined {
+  const trimmed = rawReplyToId?.trim();
+  if (!trimmed) {
+    return undefined;
+  }
+  const sanitized = stripUnsafeReplyDirectiveChars(trimmed).trim();
+  if (!sanitized) {
+    return undefined;
+  }
+  if (sanitized.length > MAX_REPLY_DIRECTIVE_ID_LENGTH) {
+    return sanitized.slice(0, MAX_REPLY_DIRECTIVE_ID_LENGTH);
+  }
+  return sanitized;
+}
+
 export function stripInlineDirectiveTagsForDelivery(text: string): StripInlineDirectiveTagsResult {
   if (!text) {
     return { text, changed: false };