fix(discord): restore voice note audio preflight

This commit is contained in:
Peter Steinberger
2026-04-25 10:56:54 +01:00
parent 936f27dcab
commit 9fe35a0c62
6 changed files with 272 additions and 9 deletions

View File

@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
- MiniMax music generation: switch the bundled default model from the unsupported `music-2.5+` id to the current `music-2.6` API model. Fixes #64870 and addresses the music default from #62315. Thanks @noahclanman and @edwardzheng1.
- Google media generation: strip a configured trailing `/v1beta` from Google music/video provider base URLs before calling the Google GenAI SDK, preventing doubled `/v1beta/v1beta` paths. Fixes #63240. (#63258) Thanks @Hybirdss.
- Discord: restore direct-message voice-note preflight transcription and classify URL-only Ogg/Opus voice attachments as audio while skipping partial attachments without usable URLs. Fixes #61314 and #64803.
- Google Chat: preserve reply text when a typing indicator message is deleted or can no longer be updated, so media captions and first text chunks are resent instead of silently disappearing. (#71498) Thanks @colin-lgtm.
- Cron: tolerate malformed legacy job rows in startup, main-session system-event payloads, and human-readable `cron list` output so missing `state`, `payload.text`, or display fields no longer crash the scheduler or CLI. Fixes #66016, #65916, #64137, #57872, #59968, #63813, #52804, and #43163. (#71509) Thanks @vincentkoc.
- CLI/models: make `openclaw models scan` fall back to public OpenRouter free-model metadata when no `OPENROUTER_API_KEY` is configured, avoid config secret resolution for explicit `--no-probe` scans, and apply the scan timeout to the OpenRouter catalog request.

View File

@@ -366,6 +366,47 @@ describe("preflightDiscordMessage", () => {
});
});
it("preflights direct-message voice notes without mention gating", async () => {
transcribeFirstAudioMock.mockResolvedValue("hello openclaw from dm audio");
const result = await runDmPreflight({
channelId: "dm-channel-audio-1",
message: createDiscordMessage({
id: "m-dm-audio-1",
channelId: "dm-channel-audio-1",
content: "",
attachments: [
{
id: "att-dm-audio-1",
url: "https://cdn.discordapp.com/attachments/voice.ogg",
content_type: "audio/ogg",
filename: "voice.ogg",
},
],
author: {
id: "user-1",
bot: false,
username: "alice",
},
}),
discordConfig: {
dmPolicy: "open",
} as DiscordConfig,
});
expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1);
expect(transcribeFirstAudioMock).toHaveBeenCalledWith(
expect.objectContaining({
ctx: expect.objectContaining({
MediaUrls: ["https://cdn.discordapp.com/attachments/voice.ogg"],
MediaTypes: ["audio/ogg"],
}),
}),
);
expect(result).not.toBeNull();
expect(result?.isDirectMessage).toBe(true);
});
it("falls back to the default discord account for omitted-account dm authorization", async () => {
const message = createDiscordMessage({
id: "m-dm-default-account",

View File

@@ -517,6 +517,49 @@ describe("resolveMediaList", () => {
expectAttachmentImageFallback({ result, attachment });
});
it("skips attachments without a usable URL", async () => {
const result = await resolveMediaList(
asMessage({
attachments: [
{
id: "att-missing-url",
filename: "voice.ogg",
content_type: "audio/ogg",
},
],
}),
512,
);
expect(fetchRemoteMedia).not.toHaveBeenCalled();
expect(saveMediaBuffer).not.toHaveBeenCalled();
expect(result).toEqual([]);
});
it("classifies audio attachments by filename when content type is missing", async () => {
const attachment = {
id: "att-audio-fallback",
url: "https://cdn.discordapp.com/attachments/1/voice.ogg",
filename: "voice.ogg",
};
fetchRemoteMedia.mockRejectedValueOnce(new Error("blocked by ssrf guard"));
const result = await resolveMediaList(
asMessage({
attachments: [attachment],
}),
512,
);
expect(result).toEqual([
{
path: attachment.url,
contentType: undefined,
placeholder: "<media:audio>",
},
]);
});
it("falls back to URL when saveMediaBuffer fails", async () => {
const attachment = {
id: "att-save-fail",

View File

@@ -1,5 +1,6 @@
import type { ChannelType, Client, Message } from "@buape/carbon";
import { StickerFormatType, type APIAttachment, type APIStickerItem } from "discord-api-types/v10";
import { getFileExtension } from "openclaw/plugin-sdk/media-mime";
import { fetchRemoteMedia, type FetchLike } from "openclaw/plugin-sdk/media-runtime";
import { saveMediaBuffer } from "openclaw/plugin-sdk/media-runtime";
import { buildMediaPayload } from "openclaw/plugin-sdk/reply-payload";
@@ -26,6 +27,23 @@ const DISCORD_MEDIA_SSRF_POLICY: SsrFPolicy = {
allowRfc2544BenchmarkRange: true,
};
const AUDIO_ATTACHMENT_EXTENSIONS = new Set([
".aac",
".caf",
".flac",
".m4a",
".mp3",
".oga",
".ogg",
".opus",
".wav",
]);
function isDiscordAudioAttachmentFileName(fileName?: string | null): boolean {
const ext = getFileExtension(fileName);
return Boolean(ext && AUDIO_ATTACHMENT_EXTENSIONS.has(ext));
}
function mergeHostnameList(...lists: Array<string[] | undefined>): string[] | undefined {
const merged = lists
.flatMap((list) => list ?? [])
@@ -381,10 +399,17 @@ async function appendResolvedMediaFromAttachments(params: {
return;
}
for (const attachment of attachments) {
const attachmentUrl = normalizeOptionalString(attachment.url);
if (!attachmentUrl) {
logVerbose(
`${params.errorPrefix} ${attachment.id ?? attachment.filename ?? "attachment"}: missing url`,
);
continue;
}
try {
const fetched = await fetchDiscordMedia({
url: attachment.url,
filePathHint: attachment.filename ?? attachment.url,
url: attachmentUrl,
filePathHint: attachment.filename ?? attachmentUrl,
maxBytes: params.maxBytes,
fetchImpl: params.fetchImpl,
ssrfPolicy: params.ssrfPolicy,
@@ -404,11 +429,11 @@ async function appendResolvedMediaFromAttachments(params: {
placeholder: inferPlaceholder(attachment),
});
} catch (err) {
const id = attachment.id ?? attachment.url;
const id = attachment.id ?? attachmentUrl;
logVerbose(`${params.errorPrefix} ${id}: ${String(err)}`);
// Preserve attachment context even when remote fetch is blocked/fails.
params.out.push({
path: attachment.url,
path: attachmentUrl,
contentType: attachment.content_type,
placeholder: inferPlaceholder(attachment),
});
@@ -553,6 +578,9 @@ function inferPlaceholder(attachment: APIAttachment): string {
if (mime.startsWith("audio/")) {
return "<media:audio>";
}
if (isDiscordAudioAttachmentFileName(attachment.filename ?? attachment.url)) {
return "<media:audio>";
}
return "<media:document>";
}

View File

@@ -0,0 +1,127 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const transcribeFirstAudioMock = vi.hoisted(() => vi.fn());
vi.mock("./preflight-audio.runtime.js", () => ({
transcribeFirstAudio: transcribeFirstAudioMock,
}));
import { resolveDiscordPreflightAudioMentionContext } from "./preflight-audio.js";
const cfg = {} as import("openclaw/plugin-sdk/config-runtime").OpenClawConfig;
describe("resolveDiscordPreflightAudioMentionContext", () => {
beforeEach(() => {
transcribeFirstAudioMock.mockReset();
});
it("preflights direct-message audio without requiring a mention", async () => {
transcribeFirstAudioMock.mockResolvedValue("hello from dm");
const result = await resolveDiscordPreflightAudioMentionContext({
message: {
attachments: [
{
url: "https://cdn.discordapp.com/attachments/voice.ogg",
content_type: "audio/ogg",
filename: "voice.ogg",
},
],
},
isDirectMessage: true,
shouldRequireMention: false,
mentionRegexes: [],
cfg,
});
expect(transcribeFirstAudioMock).toHaveBeenCalledWith(
expect.objectContaining({
ctx: expect.objectContaining({
MediaUrls: ["https://cdn.discordapp.com/attachments/voice.ogg"],
MediaTypes: ["audio/ogg"],
}),
}),
);
expect(result).toEqual({
hasAudioAttachment: true,
hasTypedText: false,
transcript: "hello from dm",
});
});
it("preflights audio by filename when Discord omits content type", async () => {
transcribeFirstAudioMock.mockResolvedValue("filename transcript");
await resolveDiscordPreflightAudioMentionContext({
message: {
attachments: [
{
url: "https://cdn.discordapp.com/attachments/voice.opus",
filename: "voice.opus",
},
],
},
isDirectMessage: true,
shouldRequireMention: false,
mentionRegexes: [],
cfg,
});
expect(transcribeFirstAudioMock).toHaveBeenCalledWith(
expect.objectContaining({
ctx: expect.objectContaining({
MediaUrls: ["https://cdn.discordapp.com/attachments/voice.opus"],
MediaTypes: ["audio/opus"],
}),
}),
);
});
it("does not preflight typed direct-message audio", async () => {
const result = await resolveDiscordPreflightAudioMentionContext({
message: {
content: "typed caption",
attachments: [
{
url: "https://cdn.discordapp.com/attachments/voice.ogg",
content_type: "audio/ogg",
filename: "voice.ogg",
},
],
},
isDirectMessage: true,
shouldRequireMention: false,
mentionRegexes: [],
cfg,
});
expect(transcribeFirstAudioMock).not.toHaveBeenCalled();
expect(result).toEqual({
hasAudioAttachment: true,
hasTypedText: true,
});
});
it("ignores URL-less audio attachments", async () => {
const result = await resolveDiscordPreflightAudioMentionContext({
message: {
attachments: [
{
content_type: "audio/ogg",
filename: "voice.ogg",
},
],
},
isDirectMessage: true,
shouldRequireMention: false,
mentionRegexes: [],
cfg,
});
expect(transcribeFirstAudioMock).not.toHaveBeenCalled();
expect(result).toEqual({
hasAudioAttachment: false,
hasTypedText: false,
});
});
});

View File

@@ -1,4 +1,5 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { getFileExtension } from "openclaw/plugin-sdk/media-mime";
import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
type DiscordPreflightAudioRuntime = typeof import("./preflight-audio.runtime.js");
@@ -12,16 +13,40 @@ function loadDiscordPreflightAudioRuntime(): Promise<DiscordPreflightAudioRuntim
type DiscordAudioAttachment = {
content_type?: string;
filename?: string;
url?: string;
};
const AUDIO_ATTACHMENT_MIME_BY_EXT = new Map([
[".aac", "audio/aac"],
[".caf", "audio/x-caf"],
[".flac", "audio/flac"],
[".m4a", "audio/mp4"],
[".mp3", "audio/mpeg"],
[".oga", "audio/ogg"],
[".ogg", "audio/ogg"],
[".opus", "audio/opus"],
[".wav", "audio/wav"],
]);
function inferAudioAttachmentMime(attachment: DiscordAudioAttachment): string | undefined {
const contentType = attachment.content_type?.trim();
if (contentType?.startsWith("audio/")) {
return contentType;
}
const ext = getFileExtension(attachment.filename ?? attachment.url);
return ext ? AUDIO_ATTACHMENT_MIME_BY_EXT.get(ext) : undefined;
}
function collectAudioAttachments(
attachments: DiscordAudioAttachment[] | undefined,
): DiscordAudioAttachment[] {
if (!Array.isArray(attachments)) {
return [];
}
return attachments.filter((att) => att.content_type?.startsWith("audio/"));
return attachments.filter(
(att) => typeof att.url === "string" && att.url.length > 0 && inferAudioAttachmentMime(att),
);
}
export async function resolveDiscordPreflightAudioMentionContext(params: {
@@ -43,12 +68,10 @@ export async function resolveDiscordPreflightAudioMentionContext(params: {
const hasAudioAttachment = audioAttachments.length > 0;
const hasTypedText = Boolean(params.message.content?.trim());
const needsPreflightTranscription =
!params.isDirectMessage &&
params.shouldRequireMention &&
hasAudioAttachment &&
// `baseText` includes media placeholders; gate on typed text only.
!hasTypedText &&
params.mentionRegexes.length > 0;
(params.isDirectMessage || (params.shouldRequireMention && params.mentionRegexes.length > 0));
let transcript: string | undefined;
if (needsPreflightTranscription) {
@@ -74,7 +97,7 @@ export async function resolveDiscordPreflightAudioMentionContext(params: {
ctx: {
MediaUrls: audioUrls,
MediaTypes: audioAttachments
.map((att) => att.content_type)
.map((att) => inferAudioAttachmentMime(att))
.filter((contentType): contentType is string => Boolean(contentType)),
},
cfg: params.cfg,