From ca16413f3f524b58ceabfca6f5a40ea1d7020d81 Mon Sep 17 00:00:00 2001 From: Jaswir Raghoe Date: Mon, 20 Apr 2026 23:36:58 +0200 Subject: [PATCH] fix(gateway): restore webchat pure-image turn handling (#69358) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eb10803691 tightened the reply-run empty-turn gate to only count baseBodyFinal (strict user body) and to always append the '[User sent media without caption]' placeholder to any prefix. That broke the Control UI webchat path: images arrive via opts.images and do not stamp sessionCtx.MediaPath (by design — see chat.directive-tags.test.ts assertion that ctx.MediaPath stays undefined on dispatch). For pure-image webchat turns the gate therefore returned 'I didn't receive any text in your message', and when a caption was present the placeholder text leaked into the Control UI user bubble on top of the inbound-context prefix. Revert the three get-reply-run.ts hunks from eb10803691 back to the stable 2026.4.5 behavior: check baseBodyForPrompt.trim() (which includes the inbound-context prefix) for the empty-turn gate, and fall back to the plain '[User sent media without caption]' placeholder only when the whole prompt body is empty. Drop the media-only test the same commit added for metadata-only-prefix bail-out; it encoded the exact behavior this reverts. Fixes #69358. Refs #69427. --- .../reply/get-reply-run.media-only.test.ts | 36 ------------------- src/auto-reply/reply/get-reply-run.ts | 13 ++++--- 2 files changed, 6 insertions(+), 43 deletions(-) diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts index 098686f5f44..c1bf3873d72 100644 --- a/src/auto-reply/reply/get-reply-run.media-only.test.ts +++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts @@ -112,7 +112,6 @@ let runReplyAgent: typeof import("./agent-runner.runtime.js").runReplyAgent; let routeReply: typeof import("./route-reply.runtime.js").routeReply; let drainFormattedSystemEvents: typeof import("./session-system-events.js").drainFormattedSystemEvents; let resolveTypingMode: typeof import("./typing-mode.js").resolveTypingMode; -let buildInboundUserContextPrefix: typeof import("./inbound-meta.js").buildInboundUserContextPrefix; let getActiveReplyRunCount: typeof import("./reply-run-registry.js").getActiveReplyRunCount; let replyRunTesting: typeof import("./reply-run-registry.js").__testing; let loadScopeCounter = 0; @@ -222,7 +221,6 @@ describe("runPreparedReply media-only handling", () => { ({ routeReply } = await import("./route-reply.runtime.js")); ({ drainFormattedSystemEvents } = await import("./session-system-events.js")); ({ resolveTypingMode } = await import("./typing-mode.js")); - ({ buildInboundUserContextPrefix } = await import("./inbound-meta.js")); ({ __testing: replyRunTesting, getActiveReplyRunCount } = await import("./reply-run-registry.js")); }); @@ -312,40 +310,6 @@ describe("runPreparedReply media-only handling", () => { expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled(); }); - it("still skips metadata-only turns when inbound context adds chat_id", async () => { - vi.mocked(buildInboundUserContextPrefix).mockReturnValueOnce( - [ - "Conversation info (untrusted metadata):", - "```json", - JSON.stringify({ chat_id: "paperclip:issue:abc" }, null, 2), - "```", - ].join("\n"), - ); - - const result = await runPreparedReply( - baseParams({ - ctx: { - Body: "", - RawBody: "", - CommandBody: "", - }, - sessionCtx: { - Body: "", - BodyStripped: "", - Provider: "paperclip", - OriginatingChannel: "paperclip", - OriginatingTo: "paperclip:issue:abc", - ChatType: "direct", - }, - }), - ); - - expect(result).toEqual({ - text: "I didn't receive any text in your message. Please resend or add a caption.", - }); - expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled(); - }); - it("does not send a standalone reset notice for reply-producing /new turns", async () => { await runPreparedReply( baseParams({ diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts index a0c26d83c93..7393adcd844 100644 --- a/src/auto-reply/reply/get-reply-run.ts +++ b/src/auto-reply/reply/get-reply-run.ts @@ -375,11 +375,11 @@ export async function runPreparedReply( const baseBodyForPrompt = isBareSessionReset ? [startupContextPrelude, baseBodyFinal].filter(Boolean).join("\n\n") : [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n"); - const hasUserBody = baseBodyFinal.trim().length > 0; + const baseBodyTrimmed = baseBodyForPrompt.trim(); const hasMediaAttachment = Boolean( sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0), ); - if (!hasUserBody && !hasMediaAttachment) { + if (!baseBodyTrimmed && !hasMediaAttachment) { // Skip onReplyStart when typing is suppressed (e.g. sendPolicy deny) — // otherwise channels that wire onReplyStart to typing indicators leak // visible signals even though outbound delivery is suppressed. @@ -392,12 +392,11 @@ export async function runPreparedReply( text: "I didn't receive any text in your message. Please resend or add a caption.", }; } - // Prefix-only inbound metadata should not force a run on empty turns. When media - // arrives without text, keep the contextual prefix but append a minimal placeholder - // so the embedded runner can inject the attachment. - const effectiveBaseBody = hasUserBody + // When the user sends media without text, provide a minimal body so the agent + // run proceeds and the image/document is injected by the embedded runner. + const effectiveBaseBody = baseBodyTrimmed ? baseBodyForPrompt - : [inboundUserContext, "[User sent media without caption]"].filter(Boolean).join("\n\n"); + : "[User sent media without caption]"; let prefixedBodyBase = await applySessionHints({ baseBody: effectiveBaseBody, abortedLastRun,