mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:10:44 +00:00
fix(discord): restore voice note audio preflight
This commit is contained in:
@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- MiniMax music generation: switch the bundled default model from the unsupported `music-2.5+` id to the current `music-2.6` API model. Fixes #64870 and addresses the music default from #62315. Thanks @noahclanman and @edwardzheng1.
|
||||
- Google media generation: strip a configured trailing `/v1beta` from Google music/video provider base URLs before calling the Google GenAI SDK, preventing doubled `/v1beta/v1beta` paths. Fixes #63240. (#63258) Thanks @Hybirdss.
|
||||
- Discord: restore direct-message voice-note preflight transcription and classify URL-only Ogg/Opus voice attachments as audio while skipping partial attachments without usable URLs. Fixes #61314 and #64803.
|
||||
- Google Chat: preserve reply text when a typing indicator message is deleted or can no longer be updated, so media captions and first text chunks are resent instead of silently disappearing. (#71498) Thanks @colin-lgtm.
|
||||
- Cron: tolerate malformed legacy job rows in startup, main-session system-event payloads, and human-readable `cron list` output so missing `state`, `payload.text`, or display fields no longer crash the scheduler or CLI. Fixes #66016, #65916, #64137, #57872, #59968, #63813, #52804, and #43163. (#71509) Thanks @vincentkoc.
|
||||
- CLI/models: make `openclaw models scan` fall back to public OpenRouter free-model metadata when no `OPENROUTER_API_KEY` is configured, avoid config secret resolution for explicit `--no-probe` scans, and apply the scan timeout to the OpenRouter catalog request.
|
||||
|
||||
@@ -366,6 +366,47 @@ describe("preflightDiscordMessage", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("preflights direct-message voice notes without mention gating", async () => {
|
||||
transcribeFirstAudioMock.mockResolvedValue("hello openclaw from dm audio");
|
||||
|
||||
const result = await runDmPreflight({
|
||||
channelId: "dm-channel-audio-1",
|
||||
message: createDiscordMessage({
|
||||
id: "m-dm-audio-1",
|
||||
channelId: "dm-channel-audio-1",
|
||||
content: "",
|
||||
attachments: [
|
||||
{
|
||||
id: "att-dm-audio-1",
|
||||
url: "https://cdn.discordapp.com/attachments/voice.ogg",
|
||||
content_type: "audio/ogg",
|
||||
filename: "voice.ogg",
|
||||
},
|
||||
],
|
||||
author: {
|
||||
id: "user-1",
|
||||
bot: false,
|
||||
username: "alice",
|
||||
},
|
||||
}),
|
||||
discordConfig: {
|
||||
dmPolicy: "open",
|
||||
} as DiscordConfig,
|
||||
});
|
||||
|
||||
expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1);
|
||||
expect(transcribeFirstAudioMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
ctx: expect.objectContaining({
|
||||
MediaUrls: ["https://cdn.discordapp.com/attachments/voice.ogg"],
|
||||
MediaTypes: ["audio/ogg"],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.isDirectMessage).toBe(true);
|
||||
});
|
||||
|
||||
it("falls back to the default discord account for omitted-account dm authorization", async () => {
|
||||
const message = createDiscordMessage({
|
||||
id: "m-dm-default-account",
|
||||
|
||||
@@ -517,6 +517,49 @@ describe("resolveMediaList", () => {
|
||||
expectAttachmentImageFallback({ result, attachment });
|
||||
});
|
||||
|
||||
it("skips attachments without a usable URL", async () => {
|
||||
const result = await resolveMediaList(
|
||||
asMessage({
|
||||
attachments: [
|
||||
{
|
||||
id: "att-missing-url",
|
||||
filename: "voice.ogg",
|
||||
content_type: "audio/ogg",
|
||||
},
|
||||
],
|
||||
}),
|
||||
512,
|
||||
);
|
||||
|
||||
expect(fetchRemoteMedia).not.toHaveBeenCalled();
|
||||
expect(saveMediaBuffer).not.toHaveBeenCalled();
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it("classifies audio attachments by filename when content type is missing", async () => {
|
||||
const attachment = {
|
||||
id: "att-audio-fallback",
|
||||
url: "https://cdn.discordapp.com/attachments/1/voice.ogg",
|
||||
filename: "voice.ogg",
|
||||
};
|
||||
fetchRemoteMedia.mockRejectedValueOnce(new Error("blocked by ssrf guard"));
|
||||
|
||||
const result = await resolveMediaList(
|
||||
asMessage({
|
||||
attachments: [attachment],
|
||||
}),
|
||||
512,
|
||||
);
|
||||
|
||||
expect(result).toEqual([
|
||||
{
|
||||
path: attachment.url,
|
||||
contentType: undefined,
|
||||
placeholder: "<media:audio>",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("falls back to URL when saveMediaBuffer fails", async () => {
|
||||
const attachment = {
|
||||
id: "att-save-fail",
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { ChannelType, Client, Message } from "@buape/carbon";
|
||||
import { StickerFormatType, type APIAttachment, type APIStickerItem } from "discord-api-types/v10";
|
||||
import { getFileExtension } from "openclaw/plugin-sdk/media-mime";
|
||||
import { fetchRemoteMedia, type FetchLike } from "openclaw/plugin-sdk/media-runtime";
|
||||
import { saveMediaBuffer } from "openclaw/plugin-sdk/media-runtime";
|
||||
import { buildMediaPayload } from "openclaw/plugin-sdk/reply-payload";
|
||||
@@ -26,6 +27,23 @@ const DISCORD_MEDIA_SSRF_POLICY: SsrFPolicy = {
|
||||
allowRfc2544BenchmarkRange: true,
|
||||
};
|
||||
|
||||
const AUDIO_ATTACHMENT_EXTENSIONS = new Set([
|
||||
".aac",
|
||||
".caf",
|
||||
".flac",
|
||||
".m4a",
|
||||
".mp3",
|
||||
".oga",
|
||||
".ogg",
|
||||
".opus",
|
||||
".wav",
|
||||
]);
|
||||
|
||||
function isDiscordAudioAttachmentFileName(fileName?: string | null): boolean {
|
||||
const ext = getFileExtension(fileName);
|
||||
return Boolean(ext && AUDIO_ATTACHMENT_EXTENSIONS.has(ext));
|
||||
}
|
||||
|
||||
function mergeHostnameList(...lists: Array<string[] | undefined>): string[] | undefined {
|
||||
const merged = lists
|
||||
.flatMap((list) => list ?? [])
|
||||
@@ -381,10 +399,17 @@ async function appendResolvedMediaFromAttachments(params: {
|
||||
return;
|
||||
}
|
||||
for (const attachment of attachments) {
|
||||
const attachmentUrl = normalizeOptionalString(attachment.url);
|
||||
if (!attachmentUrl) {
|
||||
logVerbose(
|
||||
`${params.errorPrefix} ${attachment.id ?? attachment.filename ?? "attachment"}: missing url`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const fetched = await fetchDiscordMedia({
|
||||
url: attachment.url,
|
||||
filePathHint: attachment.filename ?? attachment.url,
|
||||
url: attachmentUrl,
|
||||
filePathHint: attachment.filename ?? attachmentUrl,
|
||||
maxBytes: params.maxBytes,
|
||||
fetchImpl: params.fetchImpl,
|
||||
ssrfPolicy: params.ssrfPolicy,
|
||||
@@ -404,11 +429,11 @@ async function appendResolvedMediaFromAttachments(params: {
|
||||
placeholder: inferPlaceholder(attachment),
|
||||
});
|
||||
} catch (err) {
|
||||
const id = attachment.id ?? attachment.url;
|
||||
const id = attachment.id ?? attachmentUrl;
|
||||
logVerbose(`${params.errorPrefix} ${id}: ${String(err)}`);
|
||||
// Preserve attachment context even when remote fetch is blocked/fails.
|
||||
params.out.push({
|
||||
path: attachment.url,
|
||||
path: attachmentUrl,
|
||||
contentType: attachment.content_type,
|
||||
placeholder: inferPlaceholder(attachment),
|
||||
});
|
||||
@@ -553,6 +578,9 @@ function inferPlaceholder(attachment: APIAttachment): string {
|
||||
if (mime.startsWith("audio/")) {
|
||||
return "<media:audio>";
|
||||
}
|
||||
if (isDiscordAudioAttachmentFileName(attachment.filename ?? attachment.url)) {
|
||||
return "<media:audio>";
|
||||
}
|
||||
return "<media:document>";
|
||||
}
|
||||
|
||||
|
||||
127
extensions/discord/src/monitor/preflight-audio.test.ts
Normal file
127
extensions/discord/src/monitor/preflight-audio.test.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const transcribeFirstAudioMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("./preflight-audio.runtime.js", () => ({
|
||||
transcribeFirstAudio: transcribeFirstAudioMock,
|
||||
}));
|
||||
|
||||
import { resolveDiscordPreflightAudioMentionContext } from "./preflight-audio.js";
|
||||
|
||||
const cfg = {} as import("openclaw/plugin-sdk/config-runtime").OpenClawConfig;
|
||||
|
||||
describe("resolveDiscordPreflightAudioMentionContext", () => {
|
||||
beforeEach(() => {
|
||||
transcribeFirstAudioMock.mockReset();
|
||||
});
|
||||
|
||||
it("preflights direct-message audio without requiring a mention", async () => {
|
||||
transcribeFirstAudioMock.mockResolvedValue("hello from dm");
|
||||
|
||||
const result = await resolveDiscordPreflightAudioMentionContext({
|
||||
message: {
|
||||
attachments: [
|
||||
{
|
||||
url: "https://cdn.discordapp.com/attachments/voice.ogg",
|
||||
content_type: "audio/ogg",
|
||||
filename: "voice.ogg",
|
||||
},
|
||||
],
|
||||
},
|
||||
isDirectMessage: true,
|
||||
shouldRequireMention: false,
|
||||
mentionRegexes: [],
|
||||
cfg,
|
||||
});
|
||||
|
||||
expect(transcribeFirstAudioMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
ctx: expect.objectContaining({
|
||||
MediaUrls: ["https://cdn.discordapp.com/attachments/voice.ogg"],
|
||||
MediaTypes: ["audio/ogg"],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(result).toEqual({
|
||||
hasAudioAttachment: true,
|
||||
hasTypedText: false,
|
||||
transcript: "hello from dm",
|
||||
});
|
||||
});
|
||||
|
||||
it("preflights audio by filename when Discord omits content type", async () => {
|
||||
transcribeFirstAudioMock.mockResolvedValue("filename transcript");
|
||||
|
||||
await resolveDiscordPreflightAudioMentionContext({
|
||||
message: {
|
||||
attachments: [
|
||||
{
|
||||
url: "https://cdn.discordapp.com/attachments/voice.opus",
|
||||
filename: "voice.opus",
|
||||
},
|
||||
],
|
||||
},
|
||||
isDirectMessage: true,
|
||||
shouldRequireMention: false,
|
||||
mentionRegexes: [],
|
||||
cfg,
|
||||
});
|
||||
|
||||
expect(transcribeFirstAudioMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
ctx: expect.objectContaining({
|
||||
MediaUrls: ["https://cdn.discordapp.com/attachments/voice.opus"],
|
||||
MediaTypes: ["audio/opus"],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("does not preflight typed direct-message audio", async () => {
|
||||
const result = await resolveDiscordPreflightAudioMentionContext({
|
||||
message: {
|
||||
content: "typed caption",
|
||||
attachments: [
|
||||
{
|
||||
url: "https://cdn.discordapp.com/attachments/voice.ogg",
|
||||
content_type: "audio/ogg",
|
||||
filename: "voice.ogg",
|
||||
},
|
||||
],
|
||||
},
|
||||
isDirectMessage: true,
|
||||
shouldRequireMention: false,
|
||||
mentionRegexes: [],
|
||||
cfg,
|
||||
});
|
||||
|
||||
expect(transcribeFirstAudioMock).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({
|
||||
hasAudioAttachment: true,
|
||||
hasTypedText: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores URL-less audio attachments", async () => {
|
||||
const result = await resolveDiscordPreflightAudioMentionContext({
|
||||
message: {
|
||||
attachments: [
|
||||
{
|
||||
content_type: "audio/ogg",
|
||||
filename: "voice.ogg",
|
||||
},
|
||||
],
|
||||
},
|
||||
isDirectMessage: true,
|
||||
shouldRequireMention: false,
|
||||
mentionRegexes: [],
|
||||
cfg,
|
||||
});
|
||||
|
||||
expect(transcribeFirstAudioMock).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({
|
||||
hasAudioAttachment: false,
|
||||
hasTypedText: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import { getFileExtension } from "openclaw/plugin-sdk/media-mime";
|
||||
import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
|
||||
|
||||
type DiscordPreflightAudioRuntime = typeof import("./preflight-audio.runtime.js");
|
||||
@@ -12,16 +13,40 @@ function loadDiscordPreflightAudioRuntime(): Promise<DiscordPreflightAudioRuntim
|
||||
|
||||
type DiscordAudioAttachment = {
|
||||
content_type?: string;
|
||||
filename?: string;
|
||||
url?: string;
|
||||
};
|
||||
|
||||
const AUDIO_ATTACHMENT_MIME_BY_EXT = new Map([
|
||||
[".aac", "audio/aac"],
|
||||
[".caf", "audio/x-caf"],
|
||||
[".flac", "audio/flac"],
|
||||
[".m4a", "audio/mp4"],
|
||||
[".mp3", "audio/mpeg"],
|
||||
[".oga", "audio/ogg"],
|
||||
[".ogg", "audio/ogg"],
|
||||
[".opus", "audio/opus"],
|
||||
[".wav", "audio/wav"],
|
||||
]);
|
||||
|
||||
function inferAudioAttachmentMime(attachment: DiscordAudioAttachment): string | undefined {
|
||||
const contentType = attachment.content_type?.trim();
|
||||
if (contentType?.startsWith("audio/")) {
|
||||
return contentType;
|
||||
}
|
||||
const ext = getFileExtension(attachment.filename ?? attachment.url);
|
||||
return ext ? AUDIO_ATTACHMENT_MIME_BY_EXT.get(ext) : undefined;
|
||||
}
|
||||
|
||||
function collectAudioAttachments(
|
||||
attachments: DiscordAudioAttachment[] | undefined,
|
||||
): DiscordAudioAttachment[] {
|
||||
if (!Array.isArray(attachments)) {
|
||||
return [];
|
||||
}
|
||||
return attachments.filter((att) => att.content_type?.startsWith("audio/"));
|
||||
return attachments.filter(
|
||||
(att) => typeof att.url === "string" && att.url.length > 0 && inferAudioAttachmentMime(att),
|
||||
);
|
||||
}
|
||||
|
||||
export async function resolveDiscordPreflightAudioMentionContext(params: {
|
||||
@@ -43,12 +68,10 @@ export async function resolveDiscordPreflightAudioMentionContext(params: {
|
||||
const hasAudioAttachment = audioAttachments.length > 0;
|
||||
const hasTypedText = Boolean(params.message.content?.trim());
|
||||
const needsPreflightTranscription =
|
||||
!params.isDirectMessage &&
|
||||
params.shouldRequireMention &&
|
||||
hasAudioAttachment &&
|
||||
// `baseText` includes media placeholders; gate on typed text only.
|
||||
!hasTypedText &&
|
||||
params.mentionRegexes.length > 0;
|
||||
(params.isDirectMessage || (params.shouldRequireMention && params.mentionRegexes.length > 0));
|
||||
|
||||
let transcript: string | undefined;
|
||||
if (needsPreflightTranscription) {
|
||||
@@ -74,7 +97,7 @@ export async function resolveDiscordPreflightAudioMentionContext(params: {
|
||||
ctx: {
|
||||
MediaUrls: audioUrls,
|
||||
MediaTypes: audioAttachments
|
||||
.map((att) => att.content_type)
|
||||
.map((att) => inferAudioAttachmentMime(att))
|
||||
.filter((contentType): contentType is string => Boolean(contentType)),
|
||||
},
|
||||
cfg: params.cfg,
|
||||
|
||||
Reference in New Issue
Block a user