From ca16413f3f524b58ceabfca6f5a40ea1d7020d81 Mon Sep 17 00:00:00 2001
From: Jaswir Raghoe <jaswirraghoe@gmail.com>
Date: Mon, 20 Apr 2026 23:36:58 +0200
Subject: [PATCH] fix(gateway): restore webchat pure-image turn handling
 (#69358)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

eb10803691 tightened the reply-run empty-turn gate to only count
baseBodyFinal (strict user body) and to always append the '[User sent
media without caption]' placeholder to any prefix. That broke the Control
UI webchat path: images arrive via opts.images and do not stamp
sessionCtx.MediaPath (by design — see chat.directive-tags.test.ts
assertion that ctx.MediaPath stays undefined on dispatch). For pure-image
webchat turns the gate therefore returned 'I didn't receive any text in
your message', and when a caption was present the placeholder text leaked
into the Control UI user bubble on top of the inbound-context prefix.

Revert the three get-reply-run.ts hunks from eb10803691 back to the stable
2026.4.5 behavior: check baseBodyForPrompt.trim() (which includes the
inbound-context prefix) for the empty-turn gate, and fall back to the
plain '[User sent media without caption]' placeholder only when the whole
prompt body is empty.

Drop the media-only test the same commit added for metadata-only-prefix
bail-out; it encoded the exact behavior this reverts.

Fixes #69358.
Refs #69427.
---
 .../reply/get-reply-run.media-only.test.ts    | 36 -------------------
 src/auto-reply/reply/get-reply-run.ts         | 13 ++++---
 2 files changed, 6 insertions(+), 43 deletions(-)

diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts
index 098686f5f44..c1bf3873d72 100644
--- a/src/auto-reply/reply/get-reply-run.media-only.test.ts
+++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts
@@ -112,7 +112,6 @@ let runReplyAgent: typeof import("./agent-runner.runtime.js").runReplyAgent;
 let routeReply: typeof import("./route-reply.runtime.js").routeReply;
 let drainFormattedSystemEvents: typeof import("./session-system-events.js").drainFormattedSystemEvents;
 let resolveTypingMode: typeof import("./typing-mode.js").resolveTypingMode;
-let buildInboundUserContextPrefix: typeof import("./inbound-meta.js").buildInboundUserContextPrefix;
 let getActiveReplyRunCount: typeof import("./reply-run-registry.js").getActiveReplyRunCount;
 let replyRunTesting: typeof import("./reply-run-registry.js").__testing;
 let loadScopeCounter = 0;
@@ -222,7 +221,6 @@ describe("runPreparedReply media-only handling", () => {
     ({ routeReply } = await import("./route-reply.runtime.js"));
     ({ drainFormattedSystemEvents } = await import("./session-system-events.js"));
     ({ resolveTypingMode } = await import("./typing-mode.js"));
-    ({ buildInboundUserContextPrefix } = await import("./inbound-meta.js"));
     ({ __testing: replyRunTesting, getActiveReplyRunCount } =
       await import("./reply-run-registry.js"));
   });
@@ -312,40 +310,6 @@ describe("runPreparedReply media-only handling", () => {
     expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
   });
 
-  it("still skips metadata-only turns when inbound context adds chat_id", async () => {
-    vi.mocked(buildInboundUserContextPrefix).mockReturnValueOnce(
-      [
-        "Conversation info (untrusted metadata):",
-        "```json",
-        JSON.stringify({ chat_id: "paperclip:issue:abc" }, null, 2),
-        "```",
-      ].join("\n"),
-    );
-
-    const result = await runPreparedReply(
-      baseParams({
-        ctx: {
-          Body: "",
-          RawBody: "",
-          CommandBody: "",
-        },
-        sessionCtx: {
-          Body: "",
-          BodyStripped: "",
-          Provider: "paperclip",
-          OriginatingChannel: "paperclip",
-          OriginatingTo: "paperclip:issue:abc",
-          ChatType: "direct",
-        },
-      }),
-    );
-
-    expect(result).toEqual({
-      text: "I didn't receive any text in your message. Please resend or add a caption.",
-    });
-    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
-  });
-
   it("does not send a standalone reset notice for reply-producing /new turns", async () => {
     await runPreparedReply(
       baseParams({
diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts
index a0c26d83c93..7393adcd844 100644
--- a/src/auto-reply/reply/get-reply-run.ts
+++ b/src/auto-reply/reply/get-reply-run.ts
@@ -375,11 +375,11 @@ export async function runPreparedReply(
   const baseBodyForPrompt = isBareSessionReset
     ? [startupContextPrelude, baseBodyFinal].filter(Boolean).join("\n\n")
     : [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n");
-  const hasUserBody = baseBodyFinal.trim().length > 0;
+  const baseBodyTrimmed = baseBodyForPrompt.trim();
   const hasMediaAttachment = Boolean(
     sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0),
   );
-  if (!hasUserBody && !hasMediaAttachment) {
+  if (!baseBodyTrimmed && !hasMediaAttachment) {
     // Skip onReplyStart when typing is suppressed (e.g. sendPolicy deny) —
     // otherwise channels that wire onReplyStart to typing indicators leak
     // visible signals even though outbound delivery is suppressed.
@@ -392,12 +392,11 @@ export async function runPreparedReply(
       text: "I didn't receive any text in your message. Please resend or add a caption.",
     };
   }
-  // Prefix-only inbound metadata should not force a run on empty turns. When media
-  // arrives without text, keep the contextual prefix but append a minimal placeholder
-  // so the embedded runner can inject the attachment.
-  const effectiveBaseBody = hasUserBody
+  // When the user sends media without text, provide a minimal body so the agent
+  // run proceeds and the image/document is injected by the embedded runner.
+  const effectiveBaseBody = baseBodyTrimmed
     ? baseBodyForPrompt
-    : [inboundUserContext, "[User sent media without caption]"].filter(Boolean).join("\n\n");
+    : "[User sent media without caption]";
   let prefixedBodyBase = await applySessionHints({
     baseBody: effectiveBaseBody,
     abortedLastRun,