diff --git a/CHANGELOG.md b/CHANGELOG.md index a9794a1e8da..9bea4d7c192 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai ### Fixes - fix(gateway): enforce allowRequestSessionKey gate on template-rendered mapping sessionKeys. (#69381) Thanks @pgondhi987. +- Webchat/images: treat inline image attachments as media for empty-turn gating while still ignoring metadata-only blank turns. (#69474) Thanks @Jaswir. - OpenAI/Responses: resolve `/think` levels against each GPT model's supported reasoning efforts so `/think off` no longer becomes high reasoning or sends unsupported `reasoning.effort: "none"` payloads. - Lobster/TaskFlow: allow managed approval resumes to use `approvalId` without a resume token, and persist that id in approval wait state. (#69559) Thanks @kirkluokun. - Plugins/startup: install bundled runtime dependencies into each plugin's own runtime directory, reuse source-checkout repair caches after rebuilds, and log only packages that were actually installed so repeated Gateway starts stay quiet once deps are present. diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts index c1bf3873d72..2f3d61865e9 100644 --- a/src/auto-reply/reply/get-reply-run.media-only.test.ts +++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts @@ -112,6 +112,7 @@ let runReplyAgent: typeof import("./agent-runner.runtime.js").runReplyAgent; let routeReply: typeof import("./route-reply.runtime.js").routeReply; let drainFormattedSystemEvents: typeof import("./session-system-events.js").drainFormattedSystemEvents; let resolveTypingMode: typeof import("./typing-mode.js").resolveTypingMode; +let buildInboundUserContextPrefix: typeof import("./inbound-meta.js").buildInboundUserContextPrefix; let getActiveReplyRunCount: typeof import("./reply-run-registry.js").getActiveReplyRunCount; let replyRunTesting: typeof import("./reply-run-registry.js").__testing; let loadScopeCounter = 0; @@ -221,6 +222,7 @@ describe("runPreparedReply media-only handling", () => { ({ routeReply } = await import("./route-reply.runtime.js")); ({ drainFormattedSystemEvents } = await import("./session-system-events.js")); ({ resolveTypingMode } = await import("./typing-mode.js")); + ({ buildInboundUserContextPrefix } = await import("./inbound-meta.js")); ({ __testing: replyRunTesting, getActiveReplyRunCount } = await import("./reply-run-registry.js")); }); @@ -310,6 +312,83 @@ describe("runPreparedReply media-only handling", () => { expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled(); }); + it("still skips metadata-only turns when inbound context adds chat_id", async () => { + vi.mocked(buildInboundUserContextPrefix).mockReturnValueOnce( + [ + "Conversation info (untrusted metadata):", + "```json", + JSON.stringify({ chat_id: "paperclip:issue:abc" }, null, 2), + "```", + ].join("\n"), + ); + + const result = await runPreparedReply( + baseParams({ + ctx: { + Body: "", + RawBody: "", + CommandBody: "", + }, + sessionCtx: { + Body: "", + BodyStripped: "", + Provider: "paperclip", + OriginatingChannel: "paperclip", + OriginatingTo: "paperclip:issue:abc", + ChatType: "direct", + }, + }), + ); + + expect(result).toEqual({ + text: "I didn't receive any text in your message. Please resend or add a caption.", + }); + expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled(); + }); + + it("allows webchat pure-image turns when image content is carried outside MediaPath", async () => { + vi.mocked(buildInboundUserContextPrefix).mockReturnValueOnce( + [ + "Conversation info (untrusted metadata):", + "```json", + JSON.stringify({ provider: "webchat", chat_id: "webchat:local" }, null, 2), + "```", + ].join("\n"), + ); + + const result = await runPreparedReply( + baseParams({ + ctx: { + Body: "", + RawBody: "", + CommandBody: "", + }, + sessionCtx: { + Body: "", + BodyStripped: "", + Provider: "webchat", + OriginatingChannel: "webchat", + OriginatingTo: "webchat:local", + ChatType: "direct", + }, + opts: { + images: [ + { + type: "input_image", + image_url: "data:image/png;base64,AAAA", + }, + ] as never, + }, + }), + ); + + expect(result).toEqual({ text: "ok" }); + expect(vi.mocked(runReplyAgent)).toHaveBeenCalledOnce(); + const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0]; + expect(call?.followupRun.prompt).toContain("webchat:local"); + expect(call?.followupRun.prompt).toContain("[User sent media without caption]"); + }); + it("does not send a standalone reset notice for reply-producing /new turns", async () => { await runPreparedReply( baseParams({ diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts index 7393adcd844..a58d1a5b988 100644 --- a/src/auto-reply/reply/get-reply-run.ts +++ b/src/auto-reply/reply/get-reply-run.ts @@ -42,6 +42,7 @@ import type { InlineDirectives } from "./directive-handling.js"; import { shouldUseReplyFastTestRuntime } from "./get-reply-fast-path.js"; import { resolvePreparedReplyQueueState } from "./get-reply-run-queue.js"; import { buildGroupChatContext, buildGroupIntro } from "./groups.js"; +import { hasInboundMedia } from "./inbound-media.js"; import { buildInboundMetaSystemPrompt, buildInboundUserContextPrefix } from "./inbound-meta.js"; import type { createModelSelectionState } from "./model-selection.js"; import { resolveOriginMessageProvider } from "./origin-routing.js"; @@ -375,11 +376,9 @@ export async function runPreparedReply( const baseBodyForPrompt = isBareSessionReset ? [startupContextPrelude, baseBodyFinal].filter(Boolean).join("\n\n") : [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n"); - const baseBodyTrimmed = baseBodyForPrompt.trim(); - const hasMediaAttachment = Boolean( - sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0), - ); - if (!baseBodyTrimmed && !hasMediaAttachment) { + const hasUserBody = baseBodyFinal.trim().length > 0; + const hasMediaAttachment = hasInboundMedia(sessionCtx) || (opts?.images?.length ?? 0) > 0; + if (!hasUserBody && !hasMediaAttachment) { // Skip onReplyStart when typing is suppressed (e.g. sendPolicy deny) — // otherwise channels that wire onReplyStart to typing indicators leak // visible signals even though outbound delivery is suppressed. @@ -394,9 +393,9 @@ export async function runPreparedReply( } // When the user sends media without text, provide a minimal body so the agent // run proceeds and the image/document is injected by the embedded runner. - const effectiveBaseBody = baseBodyTrimmed + const effectiveBaseBody = hasUserBody ? baseBodyForPrompt - : "[User sent media without caption]"; + : [inboundUserContext, "[User sent media without caption]"].filter(Boolean).join("\n\n"); let prefixedBodyBase = await applySessionHints({ baseBody: effectiveBaseBody, abortedLastRun,