From 26704020a4140064f314f3507cc590ecbc3d49dc Mon Sep 17 00:00:00 2001 From: Marcus Castro Date: Fri, 24 Apr 2026 22:49:37 -0300 Subject: [PATCH] fix: narrow WhatsApp tool media delivery --- CHANGELOG.md | 1 + .../monitor/inbound-dispatch.test.ts | 86 ++++++++++++++++++- .../auto-reply/monitor/inbound-dispatch.ts | 33 ++++--- 3 files changed, 102 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8d5d9e667b..05b2340b51c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -193,6 +193,7 @@ Docs: https://docs.openclaw.ai - Slack: route native stream fallback replies through the normal chunked sender so long buffered Slack Connect responses are not dropped or duplicated. (#71124) Thanks @martingarramon. - WhatsApp: transcribe accepted voice notes before agent dispatch while keeping spoken transcripts out of command authorization. (#64120) Thanks @rogerdigital. - Plugins/CLI: expose channel plugin CLI descriptors during discovery-mode plugin loads so snapshot registries keep channel commands visible without activating full runtimes. (#71309) Thanks @gumadeiras. +- WhatsApp: deliver media generated by tool-result replies while still suppressing text-only tool chatter. (#60968) Thanks @adaclaw. ## 2026.4.23 diff --git a/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.test.ts b/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.test.ts index 083a529e8be..b698cf72b4b 100644 --- a/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.test.ts +++ b/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.test.ts @@ -2,6 +2,14 @@ import { describe, expect, it, vi, beforeEach } from "vitest"; let capturedDispatchParams: unknown; +type CapturedReplyPayload = { + text?: string; + isReasoning?: boolean; + isCompactionNotice?: boolean; + mediaUrl?: string; + mediaUrls?: string[]; +}; + const { dispatchReplyWithBufferedBlockDispatcherMock } = vi.hoisted(() => ({ dispatchReplyWithBufferedBlockDispatcherMock: vi.fn(async (params: { ctx: unknown }) => { capturedDispatchParams = params; @@ -101,7 +109,7 @@ function getCapturedDeliver() { capturedDispatchParams as { dispatcherOptions?: { deliver?: ( - payload: { text?: string; isReasoning?: boolean; isCompactionNotice?: boolean }, + payload: CapturedReplyPayload, info: { kind: "tool" | "block" | "final" }, ) => Promise; }; @@ -386,11 +394,22 @@ describe("whatsapp inbound dispatch", () => { expect(deliverReply).not.toHaveBeenCalled(); expect(rememberSentText).not.toHaveBeenCalled(); - await deliver?.({ text: "tool image", mediaUrls: ["/tmp/generated.jpg"] } as never, { - kind: "tool", - }); + await deliver?.( + { text: "tool image", mediaUrls: ["/tmp/generated.jpg"] }, + { + kind: "tool", + }, + ); expect(deliverReply).toHaveBeenCalledTimes(1); expect(rememberSentText).toHaveBeenCalledTimes(1); + expect(deliverReply).toHaveBeenLastCalledWith( + expect.objectContaining({ + replyResult: expect.objectContaining({ + mediaUrls: ["/tmp/generated.jpg"], + text: undefined, + }), + }), + ); await deliver?.({ text: "block payload" }, { kind: "block" }); await deliver?.({ text: "final payload" }, { kind: "final" }); @@ -489,6 +508,65 @@ describe("whatsapp inbound dispatch", () => { expect(rememberSentText).toHaveBeenCalledTimes(1); }); + it("returns true for tool-only media turns after delivering media", async () => { + const deliverReply = vi.fn(async () => undefined); + const rememberSentText = vi.fn(); + dispatchReplyWithBufferedBlockDispatcherMock.mockImplementationOnce( + async (params: { + ctx: unknown; + dispatcherOptions?: { + deliver?: ( + payload: CapturedReplyPayload, + info: { kind: "tool" | "block" | "final" }, + ) => Promise; + }; + }) => { + capturedDispatchParams = params; + await params.dispatcherOptions?.deliver?.( + { text: "tool image", mediaUrls: ["/tmp/generated.jpg"] }, + { kind: "tool" }, + ); + return { queuedFinal: false, counts: { tool: 1, block: 0, final: 0 } }; + }, + ); + + await expect( + dispatchWhatsAppBufferedReply({ + cfg: { channels: { whatsapp: { blockStreaming: true } } } as never, + connectionId: "conn", + context: { Body: "hi" }, + conversationId: "+1000", + deliverReply, + groupHistories: new Map(), + groupHistoryKey: "+1000", + maxMediaBytes: 1, + msg: makeMsg(), + rememberSentText, + replyLogger: { + info: () => {}, + warn: () => {}, + error: () => {}, + debug: () => {}, + } as never, + replyPipeline: {}, + replyResolver: (async () => undefined) as never, + route: makeRoute(), + shouldClearGroupHistory: false, + }), + ).resolves.toBe(true); + + expect(deliverReply).toHaveBeenCalledTimes(1); + expect(deliverReply).toHaveBeenCalledWith( + expect.objectContaining({ + replyResult: expect.objectContaining({ + mediaUrls: ["/tmp/generated.jpg"], + text: undefined, + }), + }), + ); + expect(rememberSentText).toHaveBeenCalledWith(undefined, expect.any(Object)); + }); + it("passes sendComposing through as the reply typing callback", async () => { const sendComposing = vi.fn(async () => undefined); diff --git a/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.ts b/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.ts index 6fd291e89b2..0705ac0fbd8 100644 --- a/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.ts +++ b/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.ts @@ -57,17 +57,20 @@ function resolveWhatsAppDisableBlockStreaming(cfg: ReturnType): bo return !cfg.channels.whatsapp.blockStreaming; } -function shouldSuppressWhatsAppPayload( +function resolveWhatsAppDeliverablePayload( payload: ReplyPayload, info: { kind: ReplyLifecycleKind }, -): boolean { - if (info.kind === "tool") { - return !resolveSendableOutboundReplyParts(payload).hasMedia; - } +): ReplyPayload | null { if (payload.isReasoning === true || payload.isCompactionNotice === true) { - return true; + return null; } - return false; + if (info.kind === "tool") { + if (!resolveSendableOutboundReplyParts(payload).hasMedia) { + return null; + } + return { ...payload, text: undefined }; + } + return payload; } export function resolveWhatsAppResponsePrefix(params: { @@ -291,11 +294,12 @@ export async function dispatchWhatsAppBufferedReply(params: { } }, deliver: async (payload: ReplyPayload, info: { kind: ReplyLifecycleKind }) => { - if (shouldSuppressWhatsAppPayload(payload, info)) { + const deliveryPayload = resolveWhatsAppDeliverablePayload(payload, info); + if (!deliveryPayload) { return; } await params.deliverReply({ - replyResult: payload, + replyResult: deliveryPayload, msg: params.msg, mediaLocalRoots, maxMediaBytes: params.maxMediaBytes, @@ -307,17 +311,17 @@ export async function dispatchWhatsAppBufferedReply(params: { tableMode, }); didSendReply = true; - const shouldLog = payload.text ? true : undefined; - params.rememberSentText(payload.text, { + const shouldLog = deliveryPayload.text ? true : undefined; + params.rememberSentText(deliveryPayload.text, { combinedBody: params.context.Body as string | undefined, combinedBodySessionKey: params.route.sessionKey, logVerboseMessage: shouldLog, }); const fromDisplay = params.msg.chatType === "group" ? params.conversationId : (params.msg.from ?? "unknown"); - const reply = resolveSendableOutboundReplyParts(payload); + const reply = resolveSendableOutboundReplyParts(deliveryPayload); if (shouldLogVerbose()) { - const preview = payload.text != null ? reply.text : ""; + const preview = deliveryPayload.text != null ? reply.text : ""; logVerbose(`Reply body: ${preview}${reply.hasMedia ? " (media)" : ""} -> ${fromDisplay}`); } }, @@ -329,7 +333,8 @@ export async function dispatchWhatsAppBufferedReply(params: { }, }); - const didQueueVisibleReply = queuedFinal || counts.block > 0 || counts.final > 0; + const didQueueVisibleReply = + queuedFinal || counts.tool > 0 || counts.block > 0 || counts.final > 0; if (!didQueueVisibleReply) { if (params.shouldClearGroupHistory) { params.groupHistories.set(params.groupHistoryKey, []);