From eb1080369165408eb4cf5b4eedbed2ffc07994fa Mon Sep 17 00:00:00 2001 From: Ted Li Date: Sat, 11 Apr 2026 17:46:03 -0700 Subject: [PATCH] fix(prompt): keep inbound chat ids out of system prefix --- .../reply/get-reply-run.media-only.test.ts | 36 +++++++++++++++++++ src/auto-reply/reply/get-reply-run.ts | 13 +++---- src/auto-reply/reply/inbound-meta.test.ts | 28 +++++++++++++-- src/auto-reply/reply/inbound-meta.ts | 10 +++--- 4 files changed, 75 insertions(+), 12 deletions(-) diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts index ef5f86c4940..3670bfbf964 100644 --- a/src/auto-reply/reply/get-reply-run.media-only.test.ts +++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts @@ -112,6 +112,7 @@ let runReplyAgent: typeof import("./agent-runner.runtime.js").runReplyAgent; let routeReply: typeof import("./route-reply.runtime.js").routeReply; let drainFormattedSystemEvents: typeof import("./session-system-events.js").drainFormattedSystemEvents; let resolveTypingMode: typeof import("./typing-mode.js").resolveTypingMode; +let buildInboundUserContextPrefix: typeof import("./inbound-meta.js").buildInboundUserContextPrefix; let getActiveReplyRunCount: typeof import("./reply-run-registry.js").getActiveReplyRunCount; let replyRunTesting: typeof import("./reply-run-registry.js").__testing; let loadScopeCounter = 0; @@ -212,6 +213,7 @@ describe("runPreparedReply media-only handling", () => { ({ routeReply } = await import("./route-reply.runtime.js")); ({ drainFormattedSystemEvents } = await import("./session-system-events.js")); ({ resolveTypingMode } = await import("./typing-mode.js")); + ({ buildInboundUserContextPrefix } = await import("./inbound-meta.js")); ({ __testing: replyRunTesting, getActiveReplyRunCount } = await import("./reply-run-registry.js")); }); @@ -301,6 +303,40 @@ describe("runPreparedReply media-only handling", () => { expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled(); }); + it("still skips metadata-only turns when inbound context adds chat_id", async () => { + vi.mocked(buildInboundUserContextPrefix).mockReturnValueOnce( + [ + "Conversation info (untrusted metadata):", + "```json", + JSON.stringify({ chat_id: "paperclip:issue:abc" }, null, 2), + "```", + ].join("\n"), + ); + + const result = await runPreparedReply( + baseParams({ + ctx: { + Body: "", + RawBody: "", + CommandBody: "", + }, + sessionCtx: { + Body: "", + BodyStripped: "", + Provider: "paperclip", + OriginatingChannel: "paperclip", + OriginatingTo: "paperclip:issue:abc", + ChatType: "direct", + }, + }), + ); + + expect(result).toEqual({ + text: "I didn't receive any text in your message. Please resend or add a caption.", + }); + expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled(); + }); + it("does not send a standalone reset notice for reply-producing /new turns", async () => { await runPreparedReply( baseParams({ diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts index 3a0b04456c5..01406e5970e 100644 --- a/src/auto-reply/reply/get-reply-run.ts +++ b/src/auto-reply/reply/get-reply-run.ts @@ -345,11 +345,11 @@ export async function runPreparedReply( const baseBodyForPrompt = isBareSessionReset ? [startupContextPrelude, baseBodyFinal].filter(Boolean).join("\n\n") : [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n"); - const baseBodyTrimmed = baseBodyForPrompt.trim(); + const hasUserBody = baseBodyFinal.trim().length > 0; const hasMediaAttachment = Boolean( sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0), ); - if (!baseBodyTrimmed && !hasMediaAttachment) { + if (!hasUserBody && !hasMediaAttachment) { // Skip onReplyStart when typing is suppressed (e.g. sendPolicy deny) — // otherwise channels that wire onReplyStart to typing indicators leak // visible signals even though outbound delivery is suppressed. @@ -362,11 +362,12 @@ export async function runPreparedReply( text: "I didn't receive any text in your message. Please resend or add a caption.", }; } - // When the user sends media without text, provide a minimal body so the agent - // run proceeds and the image/document is injected by the embedded runner. - const effectiveBaseBody = baseBodyTrimmed + // Prefix-only inbound metadata should not force a run on empty turns. When media + // arrives without text, keep the contextual prefix but append a minimal placeholder + // so the embedded runner can inject the attachment. + const effectiveBaseBody = hasUserBody ? baseBodyForPrompt - : "[User sent media without caption]"; + : [inboundUserContext, "[User sent media without caption]"].filter(Boolean).join("\n\n"); let prefixedBodyBase = await applySessionHints({ baseBody: effectiveBaseBody, abortedLastRun, diff --git a/src/auto-reply/reply/inbound-meta.test.ts b/src/auto-reply/reply/inbound-meta.test.ts index 72ef3d0079d..dc5525c97fd 100644 --- a/src/auto-reply/reply/inbound-meta.test.ts +++ b/src/auto-reply/reply/inbound-meta.test.ts @@ -65,7 +65,7 @@ function parseHistoryPayload(text: string): Array> { } describe("buildInboundMetaSystemPrompt", () => { - it("includes session-stable routing fields", () => { + it("includes stable routing fields and omits chat ids", () => { const prompt = buildInboundMetaSystemPrompt({ MessageSid: "123", MessageSidFull: "123", @@ -80,11 +80,33 @@ describe("buildInboundMetaSystemPrompt", () => { const payload = parseInboundMetaPayload(prompt); expect(payload["schema"]).toBe("openclaw.inbound_meta.v2"); - expect(payload["chat_id"]).toBe("telegram:5494292670"); + expect(payload["chat_id"]).toBeUndefined(); expect(payload["account_id"]).toBe("work"); expect(payload["channel"]).toBe("telegram"); }); + it("keeps task-scoped chat ids out of the system prompt for cache stability", () => { + const first = buildInboundMetaSystemPrompt({ + OriginatingTo: "paperclip:issue:c585d0cc", + OriginatingChannel: "paperclip", + Provider: "paperclip", + Surface: "paperclip", + ChatType: "direct", + AccountId: "default", + } as TemplateContext); + const second = buildInboundMetaSystemPrompt({ + OriginatingTo: "paperclip:issue:ca527062", + OriginatingChannel: "paperclip", + Provider: "paperclip", + Surface: "paperclip", + ChatType: "direct", + AccountId: "default", + } as TemplateContext); + + expect(parseInboundMetaPayload(first)["chat_id"]).toBeUndefined(); + expect(first).toBe(second); + }); + it("does not include per-turn message identifiers (cache stability)", () => { const prompt = buildInboundMetaSystemPrompt({ MessageSid: "123", @@ -233,12 +255,14 @@ describe("buildInboundUserContextPrefix", () => { const text = buildInboundUserContextPrefix({ ChatType: "direct", OriginatingChannel: "whatsapp", + OriginatingTo: "whatsapp:+15551230000", MessageSid: "short-id", MessageSidFull: "provider-full-id", SenderE164: " +15551234567 ", } as TemplateContext); const conversationInfo = parseConversationInfoPayload(text); + expect(conversationInfo["chat_id"]).toBe("whatsapp:+15551230000"); expect(conversationInfo["message_id"]).toBe("short-id"); expect(conversationInfo["message_id_full"]).toBeUndefined(); expect(conversationInfo["sender"]).toBe("+15551234567"); diff --git a/src/auto-reply/reply/inbound-meta.ts b/src/auto-reply/reply/inbound-meta.ts index d2a641da874..e94d4fac3b0 100644 --- a/src/auto-reply/reply/inbound-meta.ts +++ b/src/auto-reply/reply/inbound-meta.ts @@ -117,9 +117,9 @@ export function buildInboundMetaSystemPrompt( // Keep system metadata strictly free of attacker-controlled strings (sender names, group subjects, etc.). // Those belong in the user-role "untrusted context" blocks. - // Per-message identifiers and dynamic flags are also excluded here: they change on turns/replies - // and would bust prefix-based prompt caches on providers that use stable system prefixes. - // They are included in the user-role conversation info block instead. + // Conversation ids, per-message identifiers, and dynamic flags are also excluded here: + // they change on turns/replies and would bust prefix-based prompt caches on providers that + // use stable system prefixes. They are included in the user-role conversation info block instead. // Resolve channel identity: prefer explicit channel, then surface, then provider. // For webchat/Hub Chat sessions (when Surface is 'webchat' or undefined with no real channel), @@ -128,7 +128,6 @@ export function buildInboundMetaSystemPrompt( const payload = { schema: "openclaw.inbound_meta.v2", - chat_id: normalizePromptMetadataString(ctx.OriginatingTo), account_id: normalizePromptMetadataString(ctx.AccountId), channel: channelValue, provider: normalizePromptMetadataString(ctx.Provider), @@ -172,7 +171,10 @@ export function buildInboundUserContextPrefix( const inboundHistory = Array.isArray(ctx.InboundHistory) ? ctx.InboundHistory : []; const boundedHistory = inboundHistory.slice(-MAX_UNTRUSTED_HISTORY_ENTRIES); + // Keep volatile conversation/message identifiers in the user-role block so the system + // prompt stays byte-stable across task-scoped sessions and reply turns. const conversationInfo = { + chat_id: shouldIncludeConversationInfo ? normalizeOptionalString(ctx.OriginatingTo) : undefined, message_id: shouldIncludeConversationInfo ? resolvedMessageId : undefined, reply_to_id: shouldIncludeConversationInfo ? normalizePromptMetadataString(ctx.ReplyToId)