fix: include quoted WhatsApp media in inbound context

This commit is contained in:
Peter Steinberger
2026-05-02 02:19:16 +01:00
parent 1844c1fb38
commit 06be5eee6a
6 changed files with 107 additions and 17 deletions

View File

@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
- Gateway/sessions: move hot transcript reads and mirror appends onto async bounded IO with serialized parent-linked writes, keeping large session histories from stalling Gateway requests and channel replies. Fixes #75656. Thanks @DerFlash.
- Cron/TTS: run cron announce payloads through the normal TTS directive transform before outbound delivery, so scheduled `[[tts]]` replies generate voice payloads instead of leaking raw tags. Fixes #52125. Thanks @kenchen3000.
- WhatsApp: save downloadable quoted image media from reply context as inbound media, so agents can inspect an image that a user replied to instead of only seeing `<media:image>`. Fixes #59174. Thanks @gaffner.
- Doctor/WhatsApp: warn when Linux crontabs still run the legacy `ensure-whatsapp.sh` health check, which can misreport `Gateway inactive` when cron lacks the systemd user-bus environment. Fixes #60204. Thanks @mySebbe.
- Slack/setup: print the generated app manifest as plain JSON instead of embedding it inside the framed setup note, so it can be copied into Slack without deleting border characters. Fixes #65751. Thanks @theDanielJLewis.
- Channels/WhatsApp: route CLI logout through the live Gateway and stop runtime-backed listeners before channel removal, so removing a WhatsApp account does not leave the old socket replying until restart. Fixes #67746. Thanks @123Mismail.

View File

@@ -293,6 +293,10 @@ When the linked self number is also present in `allowFrom`, WhatsApp self-chat s
```
Reply metadata fields are also populated when available (`ReplyToId`, `ReplyToBody`, `ReplyToSender`, sender JID/E.164).
When the quoted reply target is downloadable media, OpenClaw saves it through
the normal inbound media store and exposes it as `MediaPath`/`MediaType` so
the agent can inspect the referenced image instead of only seeing
`<media:image>`.
</Accordion>

View File

@@ -234,6 +234,54 @@ describe("web inbound media saves with extension", () => {
await listener.close();
});
it("stores quoted image media from reply context", async () => {
const onMessage = vi.fn();
const listener = await monitorWebInbox({
cfg: {
channels: { whatsapp: { allowFrom: ["*"] } },
messages: { messagePrefix: undefined, responsePrefix: undefined },
} as never,
verbose: false,
onMessage,
accountId: "default",
authDir: path.join(HOME, "wa-auth"),
});
const realSock = await getMockSocket();
realSock.ev.emit("messages.upsert", {
type: "notify",
messages: [
{
key: { id: "quote-img-reply", fromMe: false, remoteJid: "111@g.us" },
message: {
extendedTextMessage: {
text: "@bot what is this?",
contextInfo: {
stanzaId: "quoted-image",
participant: "222@s.whatsapp.net",
mentionedJid: ["me@s.whatsapp.net"],
quotedMessage: {
imageMessage: { mimetype: "image/jpeg" },
},
},
},
},
messageTimestamp: 1_700_000_005,
},
],
});
const inbound = await waitForMessage(onMessage);
expect(inbound.replyToBody).toBe("<media:image>");
expect(inbound.mediaPath).toBeDefined();
expect(path.extname(inbound.mediaPath as string)).toBe(".jpg");
expect(saveMediaBufferSpy).toHaveBeenCalled();
const lastCall = saveMediaBufferSpy.mock.calls.at(-1);
expect(lastCall?.[1]).toBe("image/jpeg");
await listener.close();
});
it("passes mediaMaxMb to saveMediaBuffer", async () => {
const onMessage = vi.fn();
const listener = await monitorWebInbox({

View File

@@ -197,7 +197,9 @@ function extractContextInfoFromMessage(message: proto.IMessage): proto.IContextI
return undefined;
}
function extractContextInfo(message: proto.IMessage | undefined): proto.IContextInfo | undefined {
export function extractContextInfo(
message: proto.IMessage | undefined,
): proto.IContextInfo | undefined {
for (const candidate of buildMessageChain(message)) {
const contextInfo = extractContextInfoFromMessage(candidate);
if (contextInfo) {

View File

@@ -1,6 +1,7 @@
import type { proto, WAMessage } from "@whiskeysockets/baileys";
import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
import type { createWaSocket } from "../session.js";
import { extractContextInfo } from "./extract.js";
import { downloadMediaMessage, normalizeMessageContent } from "./runtime-api.js";
function unwrapMessage(message: proto.IMessage | undefined): proto.IMessage | undefined {
@@ -74,3 +75,28 @@ export async function downloadInboundMedia(
return undefined;
}
}
export async function downloadQuotedInboundMedia(
msg: proto.IWebMessageInfo,
sock: Awaited<ReturnType<typeof createWaSocket>>,
): Promise<{ buffer: Buffer; mimetype?: string; fileName?: string } | undefined> {
const message = unwrapMessage(msg.message as proto.IMessage | undefined);
const contextInfo = extractContextInfo(message);
if (!contextInfo?.quotedMessage) {
return undefined;
}
const quotedMessage = contextInfo.quotedMessage;
return downloadInboundMedia(
{
key: {
id: contextInfo?.stanzaId || undefined,
remoteJid: contextInfo.remoteJid ?? msg.key?.remoteJid ?? undefined,
participant: contextInfo?.participant ?? undefined,
fromMe: false,
},
message: quotedMessage,
messageTimestamp: msg.messageTimestamp,
},
sock,
);
}

View File

@@ -39,7 +39,7 @@ import {
hasInboundUserContent,
} from "./extract.js";
import { attachEmitterListener, closeInboundMonitorSocket } from "./lifecycle.js";
import { downloadInboundMedia } from "./media.js";
import { downloadInboundMedia, downloadQuotedInboundMedia } from "./media.js";
import { DisconnectReason, isJidGroup, saveMediaBuffer } from "./runtime-api.js";
import { createWebSendApi } from "./send-api.js";
import { normalizeWhatsAppSendResult } from "./send-result.js";
@@ -571,24 +571,33 @@ export async function attachWebInboxToSocket(
let mediaPath: string | undefined;
let mediaType: string | undefined;
let mediaFileName: string | undefined;
const saveInboundMedia = async (
inboundMedia: Awaited<ReturnType<typeof downloadInboundMedia>>,
) => {
if (!inboundMedia) {
return;
}
const maxMb =
typeof options.mediaMaxMb === "number" && options.mediaMaxMb > 0 ? options.mediaMaxMb : 50;
const maxBytes = maxMb * 1024 * 1024;
const saved = await saveMediaBuffer(
inboundMedia.buffer,
inboundMedia.mimetype,
"inbound",
maxBytes,
inboundMedia.fileName,
);
mediaPath = saved.path;
mediaType = inboundMedia.mimetype;
mediaFileName = inboundMedia.fileName;
};
try {
const inboundMedia = await downloadInboundMedia(msg as proto.IWebMessageInfo, sock);
if (inboundMedia) {
const maxMb =
typeof options.mediaMaxMb === "number" && options.mediaMaxMb > 0
? options.mediaMaxMb
: 50;
const maxBytes = maxMb * 1024 * 1024;
const saved = await saveMediaBuffer(
inboundMedia.buffer,
inboundMedia.mimetype,
"inbound",
maxBytes,
inboundMedia.fileName,
await saveInboundMedia(inboundMedia);
if (!mediaPath && replyContext) {
await saveInboundMedia(
await downloadQuotedInboundMedia(msg as proto.IWebMessageInfo, sock),
);
mediaPath = saved.path;
mediaType = inboundMedia.mimetype;
mediaFileName = inboundMedia.fileName;
}
} catch (err) {
logWhatsAppVerbose(options.verbose, `Inbound media download failed: ${String(err)}`);