fix(gateway): restore webchat pure-image turn handling (#69358)

eb10803691 tightened the reply-run empty-turn gate to only count baseBodyFinal (strict user body) and to always append the '[User sent media without caption]' placeholder to any prefix. That broke the Control UI webchat path: images arrive via opts.images and do not stamp sessionCtx.MediaPath (by design — see chat.directive-tags.test.ts assertion that ctx.MediaPath stays undefined on dispatch). For pure-image webchat turns the gate therefore returned 'I didn't receive any text in your message', and when a caption was present the placeholder text leaked into the Control UI user bubble on top of the inbound-context prefix. Revert the three get-reply-run.ts hunks from eb10803691 back to the stable 2026.4.5 behavior: check baseBodyForPrompt.trim() (which includes the inbound-context prefix) for the empty-turn gate, and fall back to the plain '[User sent media without caption]' placeholder only when the whole prompt body is empty. Drop the media-only test the same commit added for metadata-only-prefix bail-out; it encoded the exact behavior this reverts. Fixes #69358. Refs #69427.
2026-05-06 12:20:44 +00:00 · 2026-04-20 23:36:58 +02:00
parent 5275d008ed
commit ca16413f3f
2 changed files with 6 additions and 43 deletions
--- a/src/auto-reply/reply/get-reply-run.media-only.test.ts
+++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts
@@ -112,7 +112,6 @@ let runReplyAgent: typeof import("./agent-runner.runtime.js").runReplyAgent;
 let routeReply: typeof import("./route-reply.runtime.js").routeReply;
 let drainFormattedSystemEvents: typeof import("./session-system-events.js").drainFormattedSystemEvents;
 let resolveTypingMode: typeof import("./typing-mode.js").resolveTypingMode;
-let buildInboundUserContextPrefix: typeof import("./inbound-meta.js").buildInboundUserContextPrefix;
 let getActiveReplyRunCount: typeof import("./reply-run-registry.js").getActiveReplyRunCount;
 let replyRunTesting: typeof import("./reply-run-registry.js").__testing;
 let loadScopeCounter = 0;
@@ -222,7 +221,6 @@ describe("runPreparedReply media-only handling", () => {
    ({ routeReply } = await import("./route-reply.runtime.js"));
    ({ drainFormattedSystemEvents } = await import("./session-system-events.js"));
    ({ resolveTypingMode } = await import("./typing-mode.js"));
-    ({ buildInboundUserContextPrefix } = await import("./inbound-meta.js"));
    ({ __testing: replyRunTesting, getActiveReplyRunCount } =
      await import("./reply-run-registry.js"));
  });
@@ -312,40 +310,6 @@ describe("runPreparedReply media-only handling", () => {
    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
  });

-  it("still skips metadata-only turns when inbound context adds chat_id", async () => {
-    vi.mocked(buildInboundUserContextPrefix).mockReturnValueOnce(
-      [
-        "Conversation info (untrusted metadata):",
-        "```json",
-        JSON.stringify({ chat_id: "paperclip:issue:abc" }, null, 2),
-        "```",
-      ].join("\n"),
-    );
-
-    const result = await runPreparedReply(
-      baseParams({
-        ctx: {
-          Body: "",
-          RawBody: "",
-          CommandBody: "",
-        },
-        sessionCtx: {
-          Body: "",
-          BodyStripped: "",
-          Provider: "paperclip",
-          OriginatingChannel: "paperclip",
-          OriginatingTo: "paperclip:issue:abc",
-          ChatType: "direct",
-        },
-      }),
-    );
-
-    expect(result).toEqual({
-      text: "I didn't receive any text in your message. Please resend or add a caption.",
-    });
-    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
-  });
-
  it("does not send a standalone reset notice for reply-producing /new turns", async () => {
    await runPreparedReply(
      baseParams({
--- a/src/auto-reply/reply/get-reply-run.ts
+++ b/src/auto-reply/reply/get-reply-run.ts
@@ -375,11 +375,11 @@ export async function runPreparedReply(
  const baseBodyForPrompt = isBareSessionReset
    ? [startupContextPrelude, baseBodyFinal].filter(Boolean).join("\n\n")
    : [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n");
-  const hasUserBody = baseBodyFinal.trim().length > 0;
+  const baseBodyTrimmed = baseBodyForPrompt.trim();
  const hasMediaAttachment = Boolean(
    sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0),
  );
-  if (!hasUserBody && !hasMediaAttachment) {
+  if (!baseBodyTrimmed && !hasMediaAttachment) {
    // Skip onReplyStart when typing is suppressed (e.g. sendPolicy deny) —
    // otherwise channels that wire onReplyStart to typing indicators leak
    // visible signals even though outbound delivery is suppressed.
@@ -392,12 +392,11 @@ export async function runPreparedReply(
      text: "I didn't receive any text in your message. Please resend or add a caption.",
    };
  }
-  // Prefix-only inbound metadata should not force a run on empty turns. When media
-  // arrives without text, keep the contextual prefix but append a minimal placeholder
-  // so the embedded runner can inject the attachment.
-  const effectiveBaseBody = hasUserBody
+  // When the user sends media without text, provide a minimal body so the agent
+  // run proceeds and the image/document is injected by the embedded runner.
+  const effectiveBaseBody = baseBodyTrimmed
    ? baseBodyForPrompt
-    : [inboundUserContext, "[User sent media without caption]"].filter(Boolean).join("\n\n");
+    : "[User sent media without caption]";
  let prefixedBodyBase = await applySessionHints({
    baseBody: effectiveBaseBody,
    abortedLastRun,