From 38703ed9a1b5f128b6c79041c1043bea5ea9bf3d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 11:05:33 +0100 Subject: [PATCH] fix(discord): identify voice attachment metadata --- .../discord/src/monitor/message-utils.test.ts | 26 ++++++++++++++++ .../discord/src/monitor/message-utils.ts | 7 +++++ .../src/monitor/preflight-audio.test.ts | 30 +++++++++++++++++++ .../discord/src/monitor/preflight-audio.ts | 16 ++++++++-- 4 files changed, 76 insertions(+), 3 deletions(-) diff --git a/extensions/discord/src/monitor/message-utils.test.ts b/extensions/discord/src/monitor/message-utils.test.ts index 6d33b0b385d..c81b0f0ce94 100644 --- a/extensions/discord/src/monitor/message-utils.test.ts +++ b/extensions/discord/src/monitor/message-utils.test.ts @@ -560,6 +560,32 @@ describe("resolveMediaList", () => { ]); }); + it("classifies Discord voice attachments by waveform metadata", async () => { + const attachment = { + id: "att-voice-metadata", + url: "https://cdn.discordapp.com/attachments/1/voice", + filename: "voice", + duration_secs: 1.5, + waveform: "AAAA", + }; + fetchRemoteMedia.mockRejectedValueOnce(new Error("blocked by ssrf guard")); + + const result = await resolveMediaList( + asMessage({ + attachments: [attachment], + }), + 512, + ); + + expect(result).toEqual([ + { + path: attachment.url, + contentType: undefined, + placeholder: "", + }, + ]); + }); + it("falls back to URL when saveMediaBuffer fails", async () => { const attachment = { id: "att-save-fail", diff --git a/extensions/discord/src/monitor/message-utils.ts b/extensions/discord/src/monitor/message-utils.ts index 4ac0c8587a8..c047720e2a9 100644 --- a/extensions/discord/src/monitor/message-utils.ts +++ b/extensions/discord/src/monitor/message-utils.ts @@ -44,6 +44,10 @@ function isDiscordAudioAttachmentFileName(fileName?: string | null): boolean { return Boolean(ext && AUDIO_ATTACHMENT_EXTENSIONS.has(ext)); } +function hasDiscordVoiceAttachmentFields(attachment: APIAttachment): boolean { + return typeof attachment.duration_secs === "number" || typeof attachment.waveform === "string"; +} + function mergeHostnameList(...lists: Array): string[] | undefined { const merged = lists .flatMap((list) => list ?? []) @@ -578,6 +582,9 @@ function inferPlaceholder(attachment: APIAttachment): string { if (mime.startsWith("audio/")) { return ""; } + if (hasDiscordVoiceAttachmentFields(attachment)) { + return ""; + } if (isDiscordAudioAttachmentFileName(attachment.filename ?? attachment.url)) { return ""; } diff --git a/extensions/discord/src/monitor/preflight-audio.test.ts b/extensions/discord/src/monitor/preflight-audio.test.ts index be42324d893..9d492f2724e 100644 --- a/extensions/discord/src/monitor/preflight-audio.test.ts +++ b/extensions/discord/src/monitor/preflight-audio.test.ts @@ -77,6 +77,36 @@ describe("resolveDiscordPreflightAudioMentionContext", () => { ); }); + it("preflights Discord voice attachments by waveform metadata", async () => { + transcribeFirstAudioMock.mockResolvedValue("metadata transcript"); + + await resolveDiscordPreflightAudioMentionContext({ + message: { + attachments: [ + { + url: " https://cdn.discordapp.com/attachments/voice ", + filename: "voice", + duration_secs: 1.5, + waveform: "AAAA", + }, + ], + }, + isDirectMessage: true, + shouldRequireMention: false, + mentionRegexes: [], + cfg, + }); + + expect(transcribeFirstAudioMock).toHaveBeenCalledWith( + expect.objectContaining({ + ctx: expect.objectContaining({ + MediaUrls: ["https://cdn.discordapp.com/attachments/voice"], + MediaTypes: ["audio/ogg"], + }), + }), + ); + }); + it("does not preflight typed direct-message audio", async () => { const result = await resolveDiscordPreflightAudioMentionContext({ message: { diff --git a/extensions/discord/src/monitor/preflight-audio.ts b/extensions/discord/src/monitor/preflight-audio.ts index d1d6acb3624..98706f47b83 100644 --- a/extensions/discord/src/monitor/preflight-audio.ts +++ b/extensions/discord/src/monitor/preflight-audio.ts @@ -1,6 +1,7 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { getFileExtension } from "openclaw/plugin-sdk/media-mime"; import { logVerbose } from "openclaw/plugin-sdk/runtime-env"; +import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime"; type DiscordPreflightAudioRuntime = typeof import("./preflight-audio.runtime.js"); @@ -13,8 +14,10 @@ function loadDiscordPreflightAudioRuntime(): Promise typeof att.url === "string" && att.url.length > 0 && inferAudioAttachmentMime(att), + (att) => normalizeOptionalString(att.url) && inferAudioAttachmentMime(att), ); } @@ -91,7 +100,8 @@ export async function resolveDiscordPreflightAudioMentionContext(params: { } const audioUrls = audioAttachments .map((att) => att.url) - .filter((url): url is string => typeof url === "string" && url.length > 0); + .map((url) => normalizeOptionalString(url)) + .filter((url): url is string => Boolean(url)); if (audioUrls.length > 0) { transcript = await transcribeFirstAudio({ ctx: {