fix(prompt): keep inbound chat ids out of system prefix

2026-05-06 05:50:43 +00:00 · 2026-04-11 17:46:03 -07:00
parent 1183832d4f
commit eb10803691
4 changed files with 75 additions and 12 deletions
--- a/src/auto-reply/reply/get-reply-run.media-only.test.ts
+++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts
@@ -112,6 +112,7 @@ let runReplyAgent: typeof import("./agent-runner.runtime.js").runReplyAgent;
 let routeReply: typeof import("./route-reply.runtime.js").routeReply;
 let drainFormattedSystemEvents: typeof import("./session-system-events.js").drainFormattedSystemEvents;
 let resolveTypingMode: typeof import("./typing-mode.js").resolveTypingMode;
+let buildInboundUserContextPrefix: typeof import("./inbound-meta.js").buildInboundUserContextPrefix;
 let getActiveReplyRunCount: typeof import("./reply-run-registry.js").getActiveReplyRunCount;
 let replyRunTesting: typeof import("./reply-run-registry.js").__testing;
 let loadScopeCounter = 0;
@@ -212,6 +213,7 @@ describe("runPreparedReply media-only handling", () => {
    ({ routeReply } = await import("./route-reply.runtime.js"));
    ({ drainFormattedSystemEvents } = await import("./session-system-events.js"));
    ({ resolveTypingMode } = await import("./typing-mode.js"));
+    ({ buildInboundUserContextPrefix } = await import("./inbound-meta.js"));
    ({ __testing: replyRunTesting, getActiveReplyRunCount } =
      await import("./reply-run-registry.js"));
  });
@@ -301,6 +303,40 @@ describe("runPreparedReply media-only handling", () => {
    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
  });

+  it("still skips metadata-only turns when inbound context adds chat_id", async () => {
+    vi.mocked(buildInboundUserContextPrefix).mockReturnValueOnce(
+      [
+        "Conversation info (untrusted metadata):",
+        "```json",
+        JSON.stringify({ chat_id: "paperclip:issue:abc" }, null, 2),
+        "```",
+      ].join("\n"),
+    );
+
+    const result = await runPreparedReply(
+      baseParams({
+        ctx: {
+          Body: "",
+          RawBody: "",
+          CommandBody: "",
+        },
+        sessionCtx: {
+          Body: "",
+          BodyStripped: "",
+          Provider: "paperclip",
+          OriginatingChannel: "paperclip",
+          OriginatingTo: "paperclip:issue:abc",
+          ChatType: "direct",
+        },
+      }),
+    );
+
+    expect(result).toEqual({
+      text: "I didn't receive any text in your message. Please resend or add a caption.",
+    });
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+  });
+
  it("does not send a standalone reset notice for reply-producing /new turns", async () => {
    await runPreparedReply(
      baseParams({
--- a/src/auto-reply/reply/get-reply-run.ts
+++ b/src/auto-reply/reply/get-reply-run.ts
@@ -345,11 +345,11 @@ export async function runPreparedReply(
  const baseBodyForPrompt = isBareSessionReset
    ? [startupContextPrelude, baseBodyFinal].filter(Boolean).join("\n\n")
    : [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n");
-  const baseBodyTrimmed = baseBodyForPrompt.trim();
+  const hasUserBody = baseBodyFinal.trim().length > 0;
  const hasMediaAttachment = Boolean(
    sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0),
  );
-  if (!baseBodyTrimmed && !hasMediaAttachment) {
+  if (!hasUserBody && !hasMediaAttachment) {
    // Skip onReplyStart when typing is suppressed (e.g. sendPolicy deny) —
    // otherwise channels that wire onReplyStart to typing indicators leak
    // visible signals even though outbound delivery is suppressed.
@@ -362,11 +362,12 @@ export async function runPreparedReply(
      text: "I didn't receive any text in your message. Please resend or add a caption.",
    };
  }
-  // When the user sends media without text, provide a minimal body so the agent
-  // run proceeds and the image/document is injected by the embedded runner.
-  const effectiveBaseBody = baseBodyTrimmed
+  // Prefix-only inbound metadata should not force a run on empty turns. When media
+  // arrives without text, keep the contextual prefix but append a minimal placeholder
+  // so the embedded runner can inject the attachment.
+  const effectiveBaseBody = hasUserBody
    ? baseBodyForPrompt
-    : "[User sent media without caption]";
+    : [inboundUserContext, "[User sent media without caption]"].filter(Boolean).join("\n\n");
  let prefixedBodyBase = await applySessionHints({
    baseBody: effectiveBaseBody,
    abortedLastRun,
--- a/src/auto-reply/reply/inbound-meta.test.ts
+++ b/src/auto-reply/reply/inbound-meta.test.ts
@@ -65,7 +65,7 @@ function parseHistoryPayload(text: string): Array<Record<string, unknown>> {
 }

 describe("buildInboundMetaSystemPrompt", () => {
-  it("includes session-stable routing fields", () => {
+  it("includes stable routing fields and omits chat ids", () => {
    const prompt = buildInboundMetaSystemPrompt({
      MessageSid: "123",
      MessageSidFull: "123",
@@ -80,11 +80,33 @@ describe("buildInboundMetaSystemPrompt", () => {

    const payload = parseInboundMetaPayload(prompt);
    expect(payload["schema"]).toBe("openclaw.inbound_meta.v2");
-    expect(payload["chat_id"]).toBe("telegram:5494292670");
+    expect(payload["chat_id"]).toBeUndefined();
    expect(payload["account_id"]).toBe("work");
    expect(payload["channel"]).toBe("telegram");
  });

+  it("keeps task-scoped chat ids out of the system prompt for cache stability", () => {
+    const first = buildInboundMetaSystemPrompt({
+      OriginatingTo: "paperclip:issue:c585d0cc",
+      OriginatingChannel: "paperclip",
+      Provider: "paperclip",
+      Surface: "paperclip",
+      ChatType: "direct",
+      AccountId: "default",
+    } as TemplateContext);
+    const second = buildInboundMetaSystemPrompt({
+      OriginatingTo: "paperclip:issue:ca527062",
+      OriginatingChannel: "paperclip",
+      Provider: "paperclip",
+      Surface: "paperclip",
+      ChatType: "direct",
+      AccountId: "default",
+    } as TemplateContext);
+
+    expect(parseInboundMetaPayload(first)["chat_id"]).toBeUndefined();
+    expect(first).toBe(second);
+  });
+
  it("does not include per-turn message identifiers (cache stability)", () => {
    const prompt = buildInboundMetaSystemPrompt({
      MessageSid: "123",
@@ -233,12 +255,14 @@ describe("buildInboundUserContextPrefix", () => {
    const text = buildInboundUserContextPrefix({
      ChatType: "direct",
      OriginatingChannel: "whatsapp",
+      OriginatingTo: "whatsapp:+15551230000",
      MessageSid: "short-id",
      MessageSidFull: "provider-full-id",
      SenderE164: " +15551234567 ",
    } as TemplateContext);

    const conversationInfo = parseConversationInfoPayload(text);
+    expect(conversationInfo["chat_id"]).toBe("whatsapp:+15551230000");
    expect(conversationInfo["message_id"]).toBe("short-id");
    expect(conversationInfo["message_id_full"]).toBeUndefined();
    expect(conversationInfo["sender"]).toBe("+15551234567");
--- a/src/auto-reply/reply/inbound-meta.ts
+++ b/src/auto-reply/reply/inbound-meta.ts
@@ -117,9 +117,9 @@ export function buildInboundMetaSystemPrompt(

  // Keep system metadata strictly free of attacker-controlled strings (sender names, group subjects, etc.).
  // Those belong in the user-role "untrusted context" blocks.
-  // Per-message identifiers and dynamic flags are also excluded here: they change on turns/replies
-  // and would bust prefix-based prompt caches on providers that use stable system prefixes.
-  // They are included in the user-role conversation info block instead.
+  // Conversation ids, per-message identifiers, and dynamic flags are also excluded here:
+  // they change on turns/replies and would bust prefix-based prompt caches on providers that
+  // use stable system prefixes. They are included in the user-role conversation info block instead.

  // Resolve channel identity: prefer explicit channel, then surface, then provider.
  // For webchat/Hub Chat sessions (when Surface is 'webchat' or undefined with no real channel),
@@ -128,7 +128,6 @@ export function buildInboundMetaSystemPrompt(

  const payload = {
    schema: "openclaw.inbound_meta.v2",
-    chat_id: normalizePromptMetadataString(ctx.OriginatingTo),
    account_id: normalizePromptMetadataString(ctx.AccountId),
    channel: channelValue,
    provider: normalizePromptMetadataString(ctx.Provider),
@@ -172,7 +171,10 @@ export function buildInboundUserContextPrefix(
  const inboundHistory = Array.isArray(ctx.InboundHistory) ? ctx.InboundHistory : [];
  const boundedHistory = inboundHistory.slice(-MAX_UNTRUSTED_HISTORY_ENTRIES);

+  // Keep volatile conversation/message identifiers in the user-role block so the system
+  // prompt stays byte-stable across task-scoped sessions and reply turns.
  const conversationInfo = {
+    chat_id: shouldIncludeConversationInfo ? normalizeOptionalString(ctx.OriginatingTo) : undefined,
    message_id: shouldIncludeConversationInfo ? resolvedMessageId : undefined,
    reply_to_id: shouldIncludeConversationInfo
      ? normalizePromptMetadataString(ctx.ReplyToId)