From eb1080369165408eb4cf5b4eedbed2ffc07994fa Mon Sep 17 00:00:00 2001
From: Ted Li <tl2493@columbia.edu>
Date: Sat, 11 Apr 2026 17:46:03 -0700
Subject: [PATCH] fix(prompt): keep inbound chat ids out of system prefix

---
 .../reply/get-reply-run.media-only.test.ts    | 36 +++++++++++++++++++
 src/auto-reply/reply/get-reply-run.ts         | 13 +++----
 src/auto-reply/reply/inbound-meta.test.ts     | 28 +++++++++++++--
 src/auto-reply/reply/inbound-meta.ts          | 10 +++---
 4 files changed, 75 insertions(+), 12 deletions(-)

diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts
index ef5f86c4940..3670bfbf964 100644
--- a/src/auto-reply/reply/get-reply-run.media-only.test.ts
+++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts
@@ -112,6 +112,7 @@ let runReplyAgent: typeof import("./agent-runner.runtime.js").runReplyAgent;
 let routeReply: typeof import("./route-reply.runtime.js").routeReply;
 let drainFormattedSystemEvents: typeof import("./session-system-events.js").drainFormattedSystemEvents;
 let resolveTypingMode: typeof import("./typing-mode.js").resolveTypingMode;
+let buildInboundUserContextPrefix: typeof import("./inbound-meta.js").buildInboundUserContextPrefix;
 let getActiveReplyRunCount: typeof import("./reply-run-registry.js").getActiveReplyRunCount;
 let replyRunTesting: typeof import("./reply-run-registry.js").__testing;
 let loadScopeCounter = 0;
@@ -212,6 +213,7 @@ describe("runPreparedReply media-only handling", () => {
     ({ routeReply } = await import("./route-reply.runtime.js"));
     ({ drainFormattedSystemEvents } = await import("./session-system-events.js"));
     ({ resolveTypingMode } = await import("./typing-mode.js"));
+    ({ buildInboundUserContextPrefix } = await import("./inbound-meta.js"));
     ({ __testing: replyRunTesting, getActiveReplyRunCount } =
       await import("./reply-run-registry.js"));
   });
@@ -301,6 +303,40 @@ describe("runPreparedReply media-only handling", () => {
     expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
   });
 
+  it("still skips metadata-only turns when inbound context adds chat_id", async () => {
+    vi.mocked(buildInboundUserContextPrefix).mockReturnValueOnce(
+      [
+        "Conversation info (untrusted metadata):",
+        "```json",
+        JSON.stringify({ chat_id: "paperclip:issue:abc" }, null, 2),
+        "```",
+      ].join("\n"),
+    );
+
+    const result = await runPreparedReply(
+      baseParams({
+        ctx: {
+          Body: "",
+          RawBody: "",
+          CommandBody: "",
+        },
+        sessionCtx: {
+          Body: "",
+          BodyStripped: "",
+          Provider: "paperclip",
+          OriginatingChannel: "paperclip",
+          OriginatingTo: "paperclip:issue:abc",
+          ChatType: "direct",
+        },
+      }),
+    );
+
+    expect(result).toEqual({
+      text: "I didn't receive any text in your message. Please resend or add a caption.",
+    });
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+  });
+
   it("does not send a standalone reset notice for reply-producing /new turns", async () => {
     await runPreparedReply(
       baseParams({
diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts
index 3a0b04456c5..01406e5970e 100644
--- a/src/auto-reply/reply/get-reply-run.ts
+++ b/src/auto-reply/reply/get-reply-run.ts
@@ -345,11 +345,11 @@ export async function runPreparedReply(
   const baseBodyForPrompt = isBareSessionReset
     ? [startupContextPrelude, baseBodyFinal].filter(Boolean).join("\n\n")
     : [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n");
-  const baseBodyTrimmed = baseBodyForPrompt.trim();
+  const hasUserBody = baseBodyFinal.trim().length > 0;
   const hasMediaAttachment = Boolean(
     sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0),
   );
-  if (!baseBodyTrimmed && !hasMediaAttachment) {
+  if (!hasUserBody && !hasMediaAttachment) {
     // Skip onReplyStart when typing is suppressed (e.g. sendPolicy deny) —
     // otherwise channels that wire onReplyStart to typing indicators leak
     // visible signals even though outbound delivery is suppressed.
@@ -362,11 +362,12 @@ export async function runPreparedReply(
       text: "I didn't receive any text in your message. Please resend or add a caption.",
     };
   }
-  // When the user sends media without text, provide a minimal body so the agent
-  // run proceeds and the image/document is injected by the embedded runner.
-  const effectiveBaseBody = baseBodyTrimmed
+  // Prefix-only inbound metadata should not force a run on empty turns. When media
+  // arrives without text, keep the contextual prefix but append a minimal placeholder
+  // so the embedded runner can inject the attachment.
+  const effectiveBaseBody = hasUserBody
     ? baseBodyForPrompt
-    : "[User sent media without caption]";
+    : [inboundUserContext, "[User sent media without caption]"].filter(Boolean).join("\n\n");
   let prefixedBodyBase = await applySessionHints({
     baseBody: effectiveBaseBody,
     abortedLastRun,
diff --git a/src/auto-reply/reply/inbound-meta.test.ts b/src/auto-reply/reply/inbound-meta.test.ts
index 72ef3d0079d..dc5525c97fd 100644
--- a/src/auto-reply/reply/inbound-meta.test.ts
+++ b/src/auto-reply/reply/inbound-meta.test.ts
@@ -65,7 +65,7 @@ function parseHistoryPayload(text: string): Array<Record<string, unknown>> {
 }
 
 describe("buildInboundMetaSystemPrompt", () => {
-  it("includes session-stable routing fields", () => {
+  it("includes stable routing fields and omits chat ids", () => {
     const prompt = buildInboundMetaSystemPrompt({
       MessageSid: "123",
       MessageSidFull: "123",
@@ -80,11 +80,33 @@ describe("buildInboundMetaSystemPrompt", () => {
 
     const payload = parseInboundMetaPayload(prompt);
     expect(payload["schema"]).toBe("openclaw.inbound_meta.v2");
-    expect(payload["chat_id"]).toBe("telegram:5494292670");
+    expect(payload["chat_id"]).toBeUndefined();
     expect(payload["account_id"]).toBe("work");
     expect(payload["channel"]).toBe("telegram");
   });
 
+  it("keeps task-scoped chat ids out of the system prompt for cache stability", () => {
+    const first = buildInboundMetaSystemPrompt({
+      OriginatingTo: "paperclip:issue:c585d0cc",
+      OriginatingChannel: "paperclip",
+      Provider: "paperclip",
+      Surface: "paperclip",
+      ChatType: "direct",
+      AccountId: "default",
+    } as TemplateContext);
+    const second = buildInboundMetaSystemPrompt({
+      OriginatingTo: "paperclip:issue:ca527062",
+      OriginatingChannel: "paperclip",
+      Provider: "paperclip",
+      Surface: "paperclip",
+      ChatType: "direct",
+      AccountId: "default",
+    } as TemplateContext);
+
+    expect(parseInboundMetaPayload(first)["chat_id"]).toBeUndefined();
+    expect(first).toBe(second);
+  });
+
   it("does not include per-turn message identifiers (cache stability)", () => {
     const prompt = buildInboundMetaSystemPrompt({
       MessageSid: "123",
@@ -233,12 +255,14 @@ describe("buildInboundUserContextPrefix", () => {
     const text = buildInboundUserContextPrefix({
       ChatType: "direct",
       OriginatingChannel: "whatsapp",
+      OriginatingTo: "whatsapp:+15551230000",
       MessageSid: "short-id",
       MessageSidFull: "provider-full-id",
       SenderE164: " +15551234567 ",
     } as TemplateContext);
 
     const conversationInfo = parseConversationInfoPayload(text);
+    expect(conversationInfo["chat_id"]).toBe("whatsapp:+15551230000");
     expect(conversationInfo["message_id"]).toBe("short-id");
     expect(conversationInfo["message_id_full"]).toBeUndefined();
     expect(conversationInfo["sender"]).toBe("+15551234567");
diff --git a/src/auto-reply/reply/inbound-meta.ts b/src/auto-reply/reply/inbound-meta.ts
index d2a641da874..e94d4fac3b0 100644
--- a/src/auto-reply/reply/inbound-meta.ts
+++ b/src/auto-reply/reply/inbound-meta.ts
@@ -117,9 +117,9 @@ export function buildInboundMetaSystemPrompt(
 
   // Keep system metadata strictly free of attacker-controlled strings (sender names, group subjects, etc.).
   // Those belong in the user-role "untrusted context" blocks.
-  // Per-message identifiers and dynamic flags are also excluded here: they change on turns/replies
-  // and would bust prefix-based prompt caches on providers that use stable system prefixes.
-  // They are included in the user-role conversation info block instead.
+  // Conversation ids, per-message identifiers, and dynamic flags are also excluded here:
+  // they change on turns/replies and would bust prefix-based prompt caches on providers that
+  // use stable system prefixes. They are included in the user-role conversation info block instead.
 
   // Resolve channel identity: prefer explicit channel, then surface, then provider.
   // For webchat/Hub Chat sessions (when Surface is 'webchat' or undefined with no real channel),
@@ -128,7 +128,6 @@ export function buildInboundMetaSystemPrompt(
 
   const payload = {
     schema: "openclaw.inbound_meta.v2",
-    chat_id: normalizePromptMetadataString(ctx.OriginatingTo),
     account_id: normalizePromptMetadataString(ctx.AccountId),
     channel: channelValue,
     provider: normalizePromptMetadataString(ctx.Provider),
@@ -172,7 +171,10 @@ export function buildInboundUserContextPrefix(
   const inboundHistory = Array.isArray(ctx.InboundHistory) ? ctx.InboundHistory : [];
   const boundedHistory = inboundHistory.slice(-MAX_UNTRUSTED_HISTORY_ENTRIES);
 
+  // Keep volatile conversation/message identifiers in the user-role block so the system
+  // prompt stays byte-stable across task-scoped sessions and reply turns.
   const conversationInfo = {
+    chat_id: shouldIncludeConversationInfo ? normalizeOptionalString(ctx.OriginatingTo) : undefined,
     message_id: shouldIncludeConversationInfo ? resolvedMessageId : undefined,
     reply_to_id: shouldIncludeConversationInfo
       ? normalizePromptMetadataString(ctx.ReplyToId)