fix(whatsapp): dedupe captioned MEDIA auto-replies (#78770)

* fix(whatsapp): dedupe captioned MEDIA auto-replies

* docs: note whatsapp media directive dedupe

---------

Co-authored-by: Marcus Castro <mcaxtr@openclaw.ai>
This commit is contained in:
NVIDIAN
2026-05-06 23:15:19 -07:00
committed by GitHub
parent 7dc597b921
commit a2efabf4c9
3 changed files with 173 additions and 55 deletions

View File

@@ -506,6 +506,7 @@ Docs: https://docs.openclaw.ai
- Agents/subagents: have completed session-mode subagent registry rows honor `agents.defaults.subagents.archiveAfterMinutes` (default 60 minutes; same knob run-mode already uses for `archiveAtMs`) instead of a hardcoded 5-minute TTL, so `subagents list` and other registry-backed surfaces still show recently-completed runs and operators have one consistent retention knob across spawn modes. (#78263) Thanks @arniesaha.
- Plugins/channel setup: fix `setChannelRuntime` being silently dropped from non-bundled external plugin setup entries — external channel plugins that export `{ plugin, setChannelRuntime }` from their setup entry now have the runtime setter invoked, so the runtime initializer the provider polls for is set before the channel starts, preventing a poll timeout and gateway crash loop when the plugin opts into deferred startup loading. Fixes #77779. (#77799) Thanks @openperf.
- WhatsApp: route proactive phone-number sends through Baileys LID forward mappings when available, so LID-addressed contacts receive agent messages instead of creating sender-only ghost chats. Fixes #67378. (#74925) Thanks @edenfunf.
- WhatsApp: send captioned `MEDIA:` directive auto-replies once instead of emitting an empty media message before the captioned media reply. (#78770) Thanks @ai-hpc.
## 2026.5.3-1

View File

@@ -487,7 +487,7 @@ describe("whatsapp inbound dispatch", () => {
expect(groupHistories.get("whatsapp:default:group:123@g.us") ?? []).toHaveLength(0);
});
it("delivers block and final WhatsApp payloads; suppresses text-only tool payloads but delivers media", async () => {
it("replaces duplicate media-only interim payloads with the final captioned WhatsApp media", async () => {
const deliverReply = vi.fn(async () => acceptedDeliveryResult());
const rememberSentText = vi.fn();
@@ -509,16 +509,8 @@ describe("whatsapp inbound dispatch", () => {
kind: "tool",
},
);
expect(deliverReply).toHaveBeenCalledTimes(1);
expect(rememberSentText).toHaveBeenCalledTimes(1);
expect(deliverReply).toHaveBeenLastCalledWith(
expect.objectContaining({
replyResult: expect.objectContaining({
mediaUrls: ["/tmp/generated.jpg"],
text: undefined,
}),
}),
);
expect(deliverReply).not.toHaveBeenCalled();
expect(rememberSentText).not.toHaveBeenCalled();
await deliver?.(
{ text: "generated image", mediaUrls: ["/tmp/generated.jpg"] },
@@ -526,8 +518,8 @@ describe("whatsapp inbound dispatch", () => {
kind: "block",
},
);
expect(deliverReply).toHaveBeenCalledTimes(2);
expect(rememberSentText).toHaveBeenCalledTimes(2);
expect(deliverReply).toHaveBeenCalledTimes(1);
expect(rememberSentText).toHaveBeenCalledTimes(1);
expect(deliverReply).toHaveBeenLastCalledWith(
expect.objectContaining({
replyResult: expect.objectContaining({
@@ -539,8 +531,8 @@ describe("whatsapp inbound dispatch", () => {
await deliver?.({ text: "block payload" }, { kind: "block" });
await deliver?.({ text: "final payload" }, { kind: "final" });
expect(deliverReply).toHaveBeenCalledTimes(4);
expect(rememberSentText).toHaveBeenCalledTimes(4);
expect(deliverReply).toHaveBeenCalledTimes(3);
expect(rememberSentText).toHaveBeenCalledTimes(3);
});
it("queues final WhatsApp payloads through durable outbound delivery", async () => {

View File

@@ -60,9 +60,22 @@ type SenderContext = {
e164?: string;
};
type ReplyDeliveryInfo = { kind: ReplyLifecycleKind };
type PendingWhatsAppMediaOnlyPayload = {
info: ReplyDeliveryInfo;
mediaUrls: Set<string>;
payload: DeliverableWhatsAppOutboundPayload<ReplyPayload>;
};
type WhatsAppMediaOnlyFlushResult = {
delivered: number;
droppedDuplicateMedia: number;
};
function logWhatsAppReplyDeliveryError(params: {
err: unknown;
info: { kind: ReplyLifecycleKind };
info: ReplyDeliveryInfo;
connectionId: string;
conversationId: string;
msg: WebInboundMsg;
@@ -109,6 +122,85 @@ function resolveWhatsAppDeliverablePayload(
return payload;
}
function getWhatsAppPayloadMediaUrls(payload: ReplyPayload): Set<string> {
return new Set(
[
...(Array.isArray(payload.mediaUrls) ? payload.mediaUrls : []),
...(typeof payload.mediaUrl === "string" ? [payload.mediaUrl] : []),
]
.map((url) => url.trim())
.filter(Boolean),
);
}
function hasWhatsAppMediaUrlOverlap(left: Set<string>, right: Set<string>): boolean {
for (const url of left) {
if (right.has(url)) {
return true;
}
}
return false;
}
function shouldDeferWhatsAppMediaOnlyPayload(params: {
info: ReplyDeliveryInfo;
mediaUrls: Set<string>;
reply: ReturnType<typeof resolveSendableOutboundReplyParts>;
}): boolean {
return (
params.info.kind !== "final" &&
params.reply.hasMedia &&
!params.reply.text.trim() &&
params.mediaUrls.size > 0
);
}
function createWhatsAppMediaOnlyReplyCoalescer(params: {
deliver: (pending: PendingWhatsAppMediaOnlyPayload) => Promise<void>;
}) {
const pendingMediaOnlyPayloads: PendingWhatsAppMediaOnlyPayload[] = [];
const flushExceptDuplicateMedia = async (
mediaUrls?: Set<string>,
): Promise<WhatsAppMediaOnlyFlushResult> => {
const flushResult: WhatsAppMediaOnlyFlushResult = {
delivered: 0,
droppedDuplicateMedia: 0,
};
const pending = pendingMediaOnlyPayloads.splice(0);
for (const candidate of pending) {
if (mediaUrls && hasWhatsAppMediaUrlOverlap(candidate.mediaUrls, mediaUrls)) {
flushResult.droppedDuplicateMedia += 1;
continue;
}
await params.deliver(candidate);
flushResult.delivered += 1;
}
return flushResult;
};
return {
defer(pending: PendingWhatsAppMediaOnlyPayload) {
pendingMediaOnlyPayloads.push(pending);
},
flushExceptDuplicateMedia,
flushAll: () => flushExceptDuplicateMedia(),
};
}
function logWhatsAppMediaOnlyFlushResult(result: WhatsAppMediaOnlyFlushResult) {
if (!shouldLogVerbose()) {
return;
}
if (result.droppedDuplicateMedia > 0) {
logVerbose(
`Dropped ${result.droppedDuplicateMedia} deferred media-only WhatsApp reply payload(s) superseded by captioned media`,
);
}
if (result.delivered > 0) {
logVerbose(`Flushed ${result.delivered} deferred media-only WhatsApp reply payload(s)`);
}
}
export function resolveWhatsAppResponsePrefix(params: {
cfg: ReturnType<LoadConfigFn>;
agentId: string;
@@ -335,6 +427,63 @@ export async function dispatchWhatsAppBufferedReply(params: {
let didSendReply = false;
let didLogHeartbeatStrip = false;
const deliverNormalizedPayload = async (
normalizedDeliveryPayload: DeliverableWhatsAppOutboundPayload<ReplyPayload>,
info: ReplyDeliveryInfo,
) => {
const reply = resolveSendableOutboundReplyParts(normalizedDeliveryPayload);
if (!reply.hasMedia && !reply.text.trim()) {
return;
}
const delivery = await params.deliverReply({
replyResult: normalizedDeliveryPayload,
normalizedReplyResult: normalizedDeliveryPayload,
msg: params.msg,
mediaLocalRoots,
maxMediaBytes: params.maxMediaBytes,
textLimit,
chunkMode,
replyLogger: params.replyLogger,
connectionId: params.connectionId,
skipLog: false,
tableMode,
});
if (!delivery.providerAccepted) {
params.replyLogger.warn(
{
correlationId: params.msg.id ?? null,
connectionId: params.connectionId,
conversationId: params.conversationId,
chatId: params.msg.chatId,
to: params.msg.from,
from: params.msg.to,
replyKind: info.kind,
},
"auto-reply was not accepted by WhatsApp provider",
);
return;
}
didSendReply = true;
const shouldLog = normalizedDeliveryPayload.text ? true : undefined;
params.rememberSentText(normalizedDeliveryPayload.text, {
combinedBody: params.context.Body as string | undefined,
combinedBodySessionKey: params.route.sessionKey,
logVerboseMessage: shouldLog,
});
const fromDisplay =
params.msg.chatType === "group" ? params.conversationId : (params.msg.from ?? "unknown");
if (shouldLogVerbose()) {
const preview = normalizedDeliveryPayload.text != null ? reply.text : "<media>";
logVerbose(`Reply body: ${preview}${reply.hasMedia ? " (media)" : ""} -> ${fromDisplay}`);
}
};
const mediaOnlyCoalescer = createWhatsAppMediaOnlyReplyCoalescer({
deliver: async (pending) => {
await deliverNormalizedPayload(pending.payload, pending.info);
},
});
const { queuedFinal, counts } = await dispatchReplyWithBufferedBlockDispatcher({
ctx: params.context,
cfg: params.cfg,
@@ -364,6 +513,7 @@ export async function dispatchWhatsAppBufferedReply(params: {
return;
}
if (!reply.hasMedia) {
logWhatsAppMediaOnlyFlushResult(await mediaOnlyCoalescer.flushAll());
const durable = await deliverInboundReplyWithMessageSendContext({
cfg: params.cfg,
channel: "whatsapp",
@@ -395,48 +545,22 @@ export async function dispatchWhatsAppBufferedReply(params: {
if (durable.status === "handled_no_send") {
return;
}
}
const delivery = await params.deliverReply({
replyResult: normalizedDeliveryPayload,
normalizedReplyResult: normalizedDeliveryPayload,
msg: params.msg,
mediaLocalRoots,
maxMediaBytes: params.maxMediaBytes,
textLimit,
chunkMode,
replyLogger: params.replyLogger,
connectionId: params.connectionId,
skipLog: false,
tableMode,
});
if (!delivery.providerAccepted) {
params.replyLogger.warn(
{
correlationId: params.msg.id ?? null,
connectionId: params.connectionId,
conversationId: params.conversationId,
chatId: params.msg.chatId,
to: params.msg.from,
from: params.msg.to,
replyKind: info.kind,
},
"auto-reply was not accepted by WhatsApp provider",
);
await deliverNormalizedPayload(normalizedDeliveryPayload, info);
return;
}
didSendReply = true;
const shouldLog = normalizedDeliveryPayload.text ? true : undefined;
params.rememberSentText(normalizedDeliveryPayload.text, {
combinedBody: params.context.Body as string | undefined,
combinedBodySessionKey: params.route.sessionKey,
logVerboseMessage: shouldLog,
});
const fromDisplay =
params.msg.chatType === "group" ? params.conversationId : (params.msg.from ?? "unknown");
if (shouldLogVerbose()) {
const preview = normalizedDeliveryPayload.text != null ? reply.text : "<media>";
logVerbose(`Reply body: ${preview}${reply.hasMedia ? " (media)" : ""} -> ${fromDisplay}`);
const mediaUrls = getWhatsAppPayloadMediaUrls(normalizedDeliveryPayload);
if (shouldDeferWhatsAppMediaOnlyPayload({ info, mediaUrls, reply })) {
mediaOnlyCoalescer.defer({
info,
mediaUrls,
payload: normalizedDeliveryPayload,
});
return;
}
logWhatsAppMediaOnlyFlushResult(
await mediaOnlyCoalescer.flushExceptDuplicateMedia(mediaUrls),
);
await deliverNormalizedPayload(normalizedDeliveryPayload, info);
},
onReplyStart: params.msg.sendComposing,
onError: (err, info) => {
@@ -456,6 +580,7 @@ export async function dispatchWhatsAppBufferedReply(params: {
onModelSelected: params.onModelSelected,
},
});
logWhatsAppMediaOnlyFlushResult(await mediaOnlyCoalescer.flushAll());
const didQueueVisibleReply = hasVisibleInboundReplyDispatch({ queuedFinal, counts });
if (!didQueueVisibleReply) {