From a2efabf4c93baf23b9be0d161d8672f71cccdb21 Mon Sep 17 00:00:00 2001 From: NVIDIAN Date: Wed, 6 May 2026 23:15:19 -0700 Subject: [PATCH] fix(whatsapp): dedupe captioned MEDIA auto-replies (#78770) * fix(whatsapp): dedupe captioned MEDIA auto-replies * docs: note whatsapp media directive dedupe --------- Co-authored-by: Marcus Castro --- CHANGELOG.md | 1 + .../monitor/inbound-dispatch.test.ts | 22 +- .../auto-reply/monitor/inbound-dispatch.ts | 205 ++++++++++++++---- 3 files changed, 173 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15306230d11..892fe70b237 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -506,6 +506,7 @@ Docs: https://docs.openclaw.ai - Agents/subagents: have completed session-mode subagent registry rows honor `agents.defaults.subagents.archiveAfterMinutes` (default 60 minutes; same knob run-mode already uses for `archiveAtMs`) instead of a hardcoded 5-minute TTL, so `subagents list` and other registry-backed surfaces still show recently-completed runs and operators have one consistent retention knob across spawn modes. (#78263) Thanks @arniesaha. - Plugins/channel setup: fix `setChannelRuntime` being silently dropped from non-bundled external plugin setup entries — external channel plugins that export `{ plugin, setChannelRuntime }` from their setup entry now have the runtime setter invoked, so the runtime initializer the provider polls for is set before the channel starts, preventing a poll timeout and gateway crash loop when the plugin opts into deferred startup loading. Fixes #77779. (#77799) Thanks @openperf. - WhatsApp: route proactive phone-number sends through Baileys LID forward mappings when available, so LID-addressed contacts receive agent messages instead of creating sender-only ghost chats. Fixes #67378. (#74925) Thanks @edenfunf. +- WhatsApp: send captioned `MEDIA:` directive auto-replies once instead of emitting an empty media message before the captioned media reply. (#78770) Thanks @ai-hpc. ## 2026.5.3-1 diff --git a/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.test.ts b/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.test.ts index fa32513fae8..0a26be224b5 100644 --- a/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.test.ts +++ b/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.test.ts @@ -487,7 +487,7 @@ describe("whatsapp inbound dispatch", () => { expect(groupHistories.get("whatsapp:default:group:123@g.us") ?? []).toHaveLength(0); }); - it("delivers block and final WhatsApp payloads; suppresses text-only tool payloads but delivers media", async () => { + it("replaces duplicate media-only interim payloads with the final captioned WhatsApp media", async () => { const deliverReply = vi.fn(async () => acceptedDeliveryResult()); const rememberSentText = vi.fn(); @@ -509,16 +509,8 @@ describe("whatsapp inbound dispatch", () => { kind: "tool", }, ); - expect(deliverReply).toHaveBeenCalledTimes(1); - expect(rememberSentText).toHaveBeenCalledTimes(1); - expect(deliverReply).toHaveBeenLastCalledWith( - expect.objectContaining({ - replyResult: expect.objectContaining({ - mediaUrls: ["/tmp/generated.jpg"], - text: undefined, - }), - }), - ); + expect(deliverReply).not.toHaveBeenCalled(); + expect(rememberSentText).not.toHaveBeenCalled(); await deliver?.( { text: "generated image", mediaUrls: ["/tmp/generated.jpg"] }, @@ -526,8 +518,8 @@ describe("whatsapp inbound dispatch", () => { kind: "block", }, ); - expect(deliverReply).toHaveBeenCalledTimes(2); - expect(rememberSentText).toHaveBeenCalledTimes(2); + expect(deliverReply).toHaveBeenCalledTimes(1); + expect(rememberSentText).toHaveBeenCalledTimes(1); expect(deliverReply).toHaveBeenLastCalledWith( expect.objectContaining({ replyResult: expect.objectContaining({ @@ -539,8 +531,8 @@ describe("whatsapp inbound dispatch", () => { await deliver?.({ text: "block payload" }, { kind: "block" }); await deliver?.({ text: "final payload" }, { kind: "final" }); - expect(deliverReply).toHaveBeenCalledTimes(4); - expect(rememberSentText).toHaveBeenCalledTimes(4); + expect(deliverReply).toHaveBeenCalledTimes(3); + expect(rememberSentText).toHaveBeenCalledTimes(3); }); it("queues final WhatsApp payloads through durable outbound delivery", async () => { diff --git a/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.ts b/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.ts index 7c74baca19d..9118d8414c7 100644 --- a/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.ts +++ b/extensions/whatsapp/src/auto-reply/monitor/inbound-dispatch.ts @@ -60,9 +60,22 @@ type SenderContext = { e164?: string; }; +type ReplyDeliveryInfo = { kind: ReplyLifecycleKind }; + +type PendingWhatsAppMediaOnlyPayload = { + info: ReplyDeliveryInfo; + mediaUrls: Set; + payload: DeliverableWhatsAppOutboundPayload; +}; + +type WhatsAppMediaOnlyFlushResult = { + delivered: number; + droppedDuplicateMedia: number; +}; + function logWhatsAppReplyDeliveryError(params: { err: unknown; - info: { kind: ReplyLifecycleKind }; + info: ReplyDeliveryInfo; connectionId: string; conversationId: string; msg: WebInboundMsg; @@ -109,6 +122,85 @@ function resolveWhatsAppDeliverablePayload( return payload; } +function getWhatsAppPayloadMediaUrls(payload: ReplyPayload): Set { + return new Set( + [ + ...(Array.isArray(payload.mediaUrls) ? payload.mediaUrls : []), + ...(typeof payload.mediaUrl === "string" ? [payload.mediaUrl] : []), + ] + .map((url) => url.trim()) + .filter(Boolean), + ); +} + +function hasWhatsAppMediaUrlOverlap(left: Set, right: Set): boolean { + for (const url of left) { + if (right.has(url)) { + return true; + } + } + return false; +} + +function shouldDeferWhatsAppMediaOnlyPayload(params: { + info: ReplyDeliveryInfo; + mediaUrls: Set; + reply: ReturnType; +}): boolean { + return ( + params.info.kind !== "final" && + params.reply.hasMedia && + !params.reply.text.trim() && + params.mediaUrls.size > 0 + ); +} + +function createWhatsAppMediaOnlyReplyCoalescer(params: { + deliver: (pending: PendingWhatsAppMediaOnlyPayload) => Promise; +}) { + const pendingMediaOnlyPayloads: PendingWhatsAppMediaOnlyPayload[] = []; + const flushExceptDuplicateMedia = async ( + mediaUrls?: Set, + ): Promise => { + const flushResult: WhatsAppMediaOnlyFlushResult = { + delivered: 0, + droppedDuplicateMedia: 0, + }; + const pending = pendingMediaOnlyPayloads.splice(0); + for (const candidate of pending) { + if (mediaUrls && hasWhatsAppMediaUrlOverlap(candidate.mediaUrls, mediaUrls)) { + flushResult.droppedDuplicateMedia += 1; + continue; + } + await params.deliver(candidate); + flushResult.delivered += 1; + } + return flushResult; + }; + + return { + defer(pending: PendingWhatsAppMediaOnlyPayload) { + pendingMediaOnlyPayloads.push(pending); + }, + flushExceptDuplicateMedia, + flushAll: () => flushExceptDuplicateMedia(), + }; +} + +function logWhatsAppMediaOnlyFlushResult(result: WhatsAppMediaOnlyFlushResult) { + if (!shouldLogVerbose()) { + return; + } + if (result.droppedDuplicateMedia > 0) { + logVerbose( + `Dropped ${result.droppedDuplicateMedia} deferred media-only WhatsApp reply payload(s) superseded by captioned media`, + ); + } + if (result.delivered > 0) { + logVerbose(`Flushed ${result.delivered} deferred media-only WhatsApp reply payload(s)`); + } +} + export function resolveWhatsAppResponsePrefix(params: { cfg: ReturnType; agentId: string; @@ -335,6 +427,63 @@ export async function dispatchWhatsAppBufferedReply(params: { let didSendReply = false; let didLogHeartbeatStrip = false; + const deliverNormalizedPayload = async ( + normalizedDeliveryPayload: DeliverableWhatsAppOutboundPayload, + info: ReplyDeliveryInfo, + ) => { + const reply = resolveSendableOutboundReplyParts(normalizedDeliveryPayload); + if (!reply.hasMedia && !reply.text.trim()) { + return; + } + const delivery = await params.deliverReply({ + replyResult: normalizedDeliveryPayload, + normalizedReplyResult: normalizedDeliveryPayload, + msg: params.msg, + mediaLocalRoots, + maxMediaBytes: params.maxMediaBytes, + textLimit, + chunkMode, + replyLogger: params.replyLogger, + connectionId: params.connectionId, + skipLog: false, + tableMode, + }); + if (!delivery.providerAccepted) { + params.replyLogger.warn( + { + correlationId: params.msg.id ?? null, + connectionId: params.connectionId, + conversationId: params.conversationId, + chatId: params.msg.chatId, + to: params.msg.from, + from: params.msg.to, + replyKind: info.kind, + }, + "auto-reply was not accepted by WhatsApp provider", + ); + return; + } + didSendReply = true; + const shouldLog = normalizedDeliveryPayload.text ? true : undefined; + params.rememberSentText(normalizedDeliveryPayload.text, { + combinedBody: params.context.Body as string | undefined, + combinedBodySessionKey: params.route.sessionKey, + logVerboseMessage: shouldLog, + }); + const fromDisplay = + params.msg.chatType === "group" ? params.conversationId : (params.msg.from ?? "unknown"); + if (shouldLogVerbose()) { + const preview = normalizedDeliveryPayload.text != null ? reply.text : ""; + logVerbose(`Reply body: ${preview}${reply.hasMedia ? " (media)" : ""} -> ${fromDisplay}`); + } + }; + + const mediaOnlyCoalescer = createWhatsAppMediaOnlyReplyCoalescer({ + deliver: async (pending) => { + await deliverNormalizedPayload(pending.payload, pending.info); + }, + }); + const { queuedFinal, counts } = await dispatchReplyWithBufferedBlockDispatcher({ ctx: params.context, cfg: params.cfg, @@ -364,6 +513,7 @@ export async function dispatchWhatsAppBufferedReply(params: { return; } if (!reply.hasMedia) { + logWhatsAppMediaOnlyFlushResult(await mediaOnlyCoalescer.flushAll()); const durable = await deliverInboundReplyWithMessageSendContext({ cfg: params.cfg, channel: "whatsapp", @@ -395,48 +545,22 @@ export async function dispatchWhatsAppBufferedReply(params: { if (durable.status === "handled_no_send") { return; } - } - const delivery = await params.deliverReply({ - replyResult: normalizedDeliveryPayload, - normalizedReplyResult: normalizedDeliveryPayload, - msg: params.msg, - mediaLocalRoots, - maxMediaBytes: params.maxMediaBytes, - textLimit, - chunkMode, - replyLogger: params.replyLogger, - connectionId: params.connectionId, - skipLog: false, - tableMode, - }); - if (!delivery.providerAccepted) { - params.replyLogger.warn( - { - correlationId: params.msg.id ?? null, - connectionId: params.connectionId, - conversationId: params.conversationId, - chatId: params.msg.chatId, - to: params.msg.from, - from: params.msg.to, - replyKind: info.kind, - }, - "auto-reply was not accepted by WhatsApp provider", - ); + await deliverNormalizedPayload(normalizedDeliveryPayload, info); return; } - didSendReply = true; - const shouldLog = normalizedDeliveryPayload.text ? true : undefined; - params.rememberSentText(normalizedDeliveryPayload.text, { - combinedBody: params.context.Body as string | undefined, - combinedBodySessionKey: params.route.sessionKey, - logVerboseMessage: shouldLog, - }); - const fromDisplay = - params.msg.chatType === "group" ? params.conversationId : (params.msg.from ?? "unknown"); - if (shouldLogVerbose()) { - const preview = normalizedDeliveryPayload.text != null ? reply.text : ""; - logVerbose(`Reply body: ${preview}${reply.hasMedia ? " (media)" : ""} -> ${fromDisplay}`); + const mediaUrls = getWhatsAppPayloadMediaUrls(normalizedDeliveryPayload); + if (shouldDeferWhatsAppMediaOnlyPayload({ info, mediaUrls, reply })) { + mediaOnlyCoalescer.defer({ + info, + mediaUrls, + payload: normalizedDeliveryPayload, + }); + return; } + logWhatsAppMediaOnlyFlushResult( + await mediaOnlyCoalescer.flushExceptDuplicateMedia(mediaUrls), + ); + await deliverNormalizedPayload(normalizedDeliveryPayload, info); }, onReplyStart: params.msg.sendComposing, onError: (err, info) => { @@ -456,6 +580,7 @@ export async function dispatchWhatsAppBufferedReply(params: { onModelSelected: params.onModelSelected, }, }); + logWhatsAppMediaOnlyFlushResult(await mediaOnlyCoalescer.flushAll()); const didQueueVisibleReply = hasVisibleInboundReplyDispatch({ queuedFinal, counts }); if (!didQueueVisibleReply) {