fix(discord): identify voice attachment metadata

This commit is contained in:
Peter Steinberger
2026-04-25 11:05:33 +01:00
parent 5985e1d8b9
commit 38703ed9a1
4 changed files with 76 additions and 3 deletions

View File

@@ -560,6 +560,32 @@ describe("resolveMediaList", () => {
]);
});
it("classifies Discord voice attachments by waveform metadata", async () => {
const attachment = {
id: "att-voice-metadata",
url: "https://cdn.discordapp.com/attachments/1/voice",
filename: "voice",
duration_secs: 1.5,
waveform: "AAAA",
};
fetchRemoteMedia.mockRejectedValueOnce(new Error("blocked by ssrf guard"));
const result = await resolveMediaList(
asMessage({
attachments: [attachment],
}),
512,
);
expect(result).toEqual([
{
path: attachment.url,
contentType: undefined,
placeholder: "<media:audio>",
},
]);
});
it("falls back to URL when saveMediaBuffer fails", async () => {
const attachment = {
id: "att-save-fail",

View File

@@ -44,6 +44,10 @@ function isDiscordAudioAttachmentFileName(fileName?: string | null): boolean {
return Boolean(ext && AUDIO_ATTACHMENT_EXTENSIONS.has(ext));
}
function hasDiscordVoiceAttachmentFields(attachment: APIAttachment): boolean {
return typeof attachment.duration_secs === "number" || typeof attachment.waveform === "string";
}
function mergeHostnameList(...lists: Array<string[] | undefined>): string[] | undefined {
const merged = lists
.flatMap((list) => list ?? [])
@@ -578,6 +582,9 @@ function inferPlaceholder(attachment: APIAttachment): string {
if (mime.startsWith("audio/")) {
return "<media:audio>";
}
if (hasDiscordVoiceAttachmentFields(attachment)) {
return "<media:audio>";
}
if (isDiscordAudioAttachmentFileName(attachment.filename ?? attachment.url)) {
return "<media:audio>";
}

View File

@@ -77,6 +77,36 @@ describe("resolveDiscordPreflightAudioMentionContext", () => {
);
});
it("preflights Discord voice attachments by waveform metadata", async () => {
transcribeFirstAudioMock.mockResolvedValue("metadata transcript");
await resolveDiscordPreflightAudioMentionContext({
message: {
attachments: [
{
url: " https://cdn.discordapp.com/attachments/voice ",
filename: "voice",
duration_secs: 1.5,
waveform: "AAAA",
},
],
},
isDirectMessage: true,
shouldRequireMention: false,
mentionRegexes: [],
cfg,
});
expect(transcribeFirstAudioMock).toHaveBeenCalledWith(
expect.objectContaining({
ctx: expect.objectContaining({
MediaUrls: ["https://cdn.discordapp.com/attachments/voice"],
MediaTypes: ["audio/ogg"],
}),
}),
);
});
it("does not preflight typed direct-message audio", async () => {
const result = await resolveDiscordPreflightAudioMentionContext({
message: {

View File

@@ -1,6 +1,7 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { getFileExtension } from "openclaw/plugin-sdk/media-mime";
import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
type DiscordPreflightAudioRuntime = typeof import("./preflight-audio.runtime.js");
@@ -13,8 +14,10 @@ function loadDiscordPreflightAudioRuntime(): Promise<DiscordPreflightAudioRuntim
type DiscordAudioAttachment = {
content_type?: string;
duration_secs?: number;
filename?: string;
url?: string;
waveform?: string;
};
const AUDIO_ATTACHMENT_MIME_BY_EXT = new Map([
@@ -30,10 +33,16 @@ const AUDIO_ATTACHMENT_MIME_BY_EXT = new Map([
]);
function inferAudioAttachmentMime(attachment: DiscordAudioAttachment): string | undefined {
const contentType = attachment.content_type?.trim();
const contentType = normalizeOptionalString(attachment.content_type);
if (contentType?.startsWith("audio/")) {
return contentType;
}
if (
typeof attachment.duration_secs === "number" ||
typeof normalizeOptionalString(attachment.waveform) === "string"
) {
return "audio/ogg";
}
const ext = getFileExtension(attachment.filename ?? attachment.url);
return ext ? AUDIO_ATTACHMENT_MIME_BY_EXT.get(ext) : undefined;
}
@@ -45,7 +54,7 @@ function collectAudioAttachments(
return [];
}
return attachments.filter(
(att) => typeof att.url === "string" && att.url.length > 0 && inferAudioAttachmentMime(att),
(att) => normalizeOptionalString(att.url) && inferAudioAttachmentMime(att),
);
}
@@ -91,7 +100,8 @@ export async function resolveDiscordPreflightAudioMentionContext(params: {
}
const audioUrls = audioAttachments
.map((att) => att.url)
.filter((url): url is string => typeof url === "string" && url.length > 0);
.map((url) => normalizeOptionalString(url))
.filter((url): url is string => Boolean(url));
if (audioUrls.length > 0) {
transcript = await transcribeFirstAudio({
ctx: {