fix(discord): harden voice ffmpeg path and opus fast-path

This commit is contained in:
Peter Steinberger
2026-03-03 01:13:44 +00:00
parent 687ef2e00f
commit 346d3590fb
3 changed files with 173 additions and 31 deletions

View File

@@ -12,6 +12,7 @@ import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
import { convertMarkdownTables } from "../markdown/tables.js";
import { maxBytesForKind } from "../media/constants.js";
import { extensionForMime } from "../media/mime.js";
import { unlinkIfExists } from "../media/temp-files.js";
import type { PollInput } from "../polls.js";
import { loadWebMediaRaw } from "../web/media.js";
import { resolveDiscordAccount } from "./accounts.js";
@@ -543,18 +544,7 @@ export async function sendVoiceMessageDiscord(
}
throw err;
} finally {
// Clean up temporary OGG file if we created one
if (oggCleanup && oggPath) {
try {
await fs.unlink(oggPath);
} catch {
// Ignore cleanup errors
}
}
try {
await fs.unlink(localInputPath);
} catch {
// Ignore cleanup errors
}
await unlinkIfExists(oggCleanup ? oggPath : null);
await unlinkIfExists(localInputPath);
}
}

View File

@@ -0,0 +1,146 @@
import type { ChildProcess, ExecFileOptions } from "node:child_process";
import { promisify } from "node:util";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
type ExecCallback = (
error: NodeJS.ErrnoException | null,
stdout: string | Buffer,
stderr: string | Buffer,
) => void;
type ExecCall = {
command: string;
args: string[];
options?: ExecFileOptions;
};
type MockExecResult = {
stdout?: string;
stderr?: string;
error?: NodeJS.ErrnoException;
};
const execCalls: ExecCall[] = [];
const mockExecResults: MockExecResult[] = [];
vi.mock("node:child_process", async (importOriginal) => {
const actual = await importOriginal<typeof import("node:child_process")>();
const execFileImpl = (
file: string,
args?: readonly string[] | null,
optionsOrCallback?: ExecFileOptions | ExecCallback | null,
callbackMaybe?: ExecCallback,
) => {
const normalizedArgs = Array.isArray(args) ? [...args] : [];
const callback =
typeof optionsOrCallback === "function" ? optionsOrCallback : (callbackMaybe ?? undefined);
const options =
typeof optionsOrCallback === "function" ? undefined : (optionsOrCallback ?? undefined);
execCalls.push({
command: file,
args: normalizedArgs,
options,
});
const next = mockExecResults.shift() ?? { stdout: "", stderr: "" };
queueMicrotask(() => {
callback?.(next.error ?? null, next.stdout ?? "", next.stderr ?? "");
});
return {} as ChildProcess;
};
const execFileWithCustomPromisify = execFileImpl as unknown as typeof actual.execFile & {
[promisify.custom]?: (
file: string,
args?: readonly string[] | null,
options?: ExecFileOptions | null,
) => Promise<{ stdout: string | Buffer; stderr: string | Buffer }>;
};
execFileWithCustomPromisify[promisify.custom] = (
file: string,
args?: readonly string[] | null,
options?: ExecFileOptions | null,
) =>
new Promise<{ stdout: string | Buffer; stderr: string | Buffer }>((resolve, reject) => {
execFileImpl(file, args, options, (error, stdout, stderr) => {
if (error) {
reject(error);
return;
}
resolve({ stdout, stderr });
});
});
return {
...actual,
execFile: execFileWithCustomPromisify,
};
});
vi.mock("../infra/tmp-openclaw-dir.js", () => ({
resolvePreferredOpenClawTmpDir: () => "/tmp",
}));
const { ensureOggOpus } = await import("./voice-message.js");
describe("ensureOggOpus", () => {
beforeEach(() => {
execCalls.length = 0;
mockExecResults.length = 0;
});
afterEach(() => {
execCalls.length = 0;
mockExecResults.length = 0;
});
it("rejects URL/protocol input paths", async () => {
await expect(ensureOggOpus("https://example.com/audio.ogg")).rejects.toThrow(
/local file path/i,
);
expect(execCalls).toHaveLength(0);
});
it("keeps .ogg only when codec is opus and sample rate is 48kHz", async () => {
mockExecResults.push({ stdout: "opus,48000\n" });
const result = await ensureOggOpus("/tmp/input.ogg");
expect(result).toEqual({ path: "/tmp/input.ogg", cleanup: false });
expect(execCalls).toHaveLength(1);
expect(execCalls[0].command).toBe("ffprobe");
expect(execCalls[0].args).toContain("stream=codec_name,sample_rate");
expect(execCalls[0].options?.timeout).toBe(10_000);
});
it("re-encodes .ogg opus when sample rate is not 48kHz", async () => {
mockExecResults.push({ stdout: "opus,24000\n" });
mockExecResults.push({ stdout: "" });
const result = await ensureOggOpus("/tmp/input.ogg");
const ffmpegCall = execCalls.find((call) => call.command === "ffmpeg");
expect(result.cleanup).toBe(true);
expect(result.path).toMatch(/^\/tmp\/voice-.*\.ogg$/);
expect(ffmpegCall).toBeDefined();
expect(ffmpegCall?.args).toContain("-t");
expect(ffmpegCall?.args).toContain("1200");
expect(ffmpegCall?.args).toContain("-ar");
expect(ffmpegCall?.args).toContain("48000");
expect(ffmpegCall?.options?.timeout).toBe(45_000);
});
it("re-encodes non-ogg input with bounded ffmpeg execution", async () => {
mockExecResults.push({ stdout: "" });
const result = await ensureOggOpus("/tmp/input.mp3");
const ffprobeCalls = execCalls.filter((call) => call.command === "ffprobe");
const ffmpegCalls = execCalls.filter((call) => call.command === "ffmpeg");
expect(result.cleanup).toBe(true);
expect(ffprobeCalls).toHaveLength(0);
expect(ffmpegCalls).toHaveLength(1);
expect(ffmpegCalls[0].options?.timeout).toBe(45_000);
expect(ffmpegCalls[0].args).toEqual(expect.arrayContaining(["-vn", "-sn", "-dn"]));
});
});

View File

@@ -10,20 +10,20 @@
* - No other content (text, embeds, etc.)
*/
import { execFile } from "node:child_process";
import crypto from "node:crypto";
import fs from "node:fs/promises";
import path from "node:path";
import { promisify } from "node:util";
import type { RequestClient } from "@buape/carbon";
import type { RetryRunner } from "../infra/retry-policy.js";
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
const execFileAsync = promisify(execFile);
import { parseFfprobeCodecAndSampleRate, runFfmpeg, runFfprobe } from "../media/ffmpeg-exec.js";
import { MEDIA_FFMPEG_MAX_AUDIO_DURATION_SECS } from "../media/ffmpeg-limits.js";
import { unlinkIfExists } from "../media/temp-files.js";
const DISCORD_VOICE_MESSAGE_FLAG = 1 << 13;
const SUPPRESS_NOTIFICATIONS_FLAG = 1 << 12;
const WAVEFORM_SAMPLES = 256;
const DISCORD_OPUS_SAMPLE_RATE_HZ = 48_000;
export type VoiceMessageMetadata = {
durationSecs: number;
@@ -35,7 +35,7 @@ export type VoiceMessageMetadata = {
*/
export async function getAudioDuration(filePath: string): Promise<number> {
try {
const { stdout } = await execFileAsync("ffprobe", [
const stdout = await runFfprobe([
"-v",
"error",
"-show_entries",
@@ -78,10 +78,15 @@ async function generateWaveformFromPcm(filePath: string): Promise<string> {
try {
// Convert to raw 16-bit signed PCM, mono, 8kHz
await execFileAsync("ffmpeg", [
await runFfmpeg([
"-y",
"-i",
filePath,
"-vn",
"-sn",
"-dn",
"-t",
String(MEDIA_FFMPEG_MAX_AUDIO_DURATION_SECS),
"-f",
"s16le",
"-acodec",
@@ -121,12 +126,7 @@ async function generateWaveformFromPcm(filePath: string): Promise<string> {
return Buffer.from(waveform).toString("base64");
} finally {
// Clean up temp file
try {
await fs.unlink(tempPcm);
} catch {
// Ignore cleanup errors
}
await unlinkIfExists(tempPcm);
}
}
@@ -160,20 +160,21 @@ export async function ensureOggOpus(filePath: string): Promise<{ path: string; c
// Check if already OGG
if (ext === ".ogg") {
// Verify it's Opus codec, not Vorbis (Vorbis won't play on mobile)
// Fast-path only when the file is Opus at Discord's expected 48kHz.
try {
const { stdout } = await execFileAsync("ffprobe", [
const stdout = await runFfprobe([
"-v",
"error",
"-select_streams",
"a:0",
"-show_entries",
"stream=codec_name",
"stream=codec_name,sample_rate",
"-of",
"csv=p=0",
filePath,
]);
if (stdout.trim().toLowerCase() === "opus") {
const { codec, sampleRateHz } = parseFfprobeCodecAndSampleRate(stdout);
if (codec === "opus" && sampleRateHz === DISCORD_OPUS_SAMPLE_RATE_HZ) {
return { path: filePath, cleanup: false };
}
} catch {
@@ -187,12 +188,17 @@ export async function ensureOggOpus(filePath: string): Promise<{ path: string; c
const tempDir = resolvePreferredOpenClawTmpDir();
const outputPath = path.join(tempDir, `voice-${crypto.randomUUID()}.ogg`);
await execFileAsync("ffmpeg", [
await runFfmpeg([
"-y",
"-i",
filePath,
"-vn",
"-sn",
"-dn",
"-t",
String(MEDIA_FFMPEG_MAX_AUDIO_DURATION_SECS),
"-ar",
"48000",
String(DISCORD_OPUS_SAMPLE_RATE_HZ),
"-c:a",
"libopus",
"-b:a",