diff --git a/src/discord/send.outbound.ts b/src/discord/send.outbound.ts index 70d5088d46e..ce13321ba00 100644 --- a/src/discord/send.outbound.ts +++ b/src/discord/send.outbound.ts @@ -12,6 +12,7 @@ import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { convertMarkdownTables } from "../markdown/tables.js"; import { maxBytesForKind } from "../media/constants.js"; import { extensionForMime } from "../media/mime.js"; +import { unlinkIfExists } from "../media/temp-files.js"; import type { PollInput } from "../polls.js"; import { loadWebMediaRaw } from "../web/media.js"; import { resolveDiscordAccount } from "./accounts.js"; @@ -543,18 +544,7 @@ export async function sendVoiceMessageDiscord( } throw err; } finally { - // Clean up temporary OGG file if we created one - if (oggCleanup && oggPath) { - try { - await fs.unlink(oggPath); - } catch { - // Ignore cleanup errors - } - } - try { - await fs.unlink(localInputPath); - } catch { - // Ignore cleanup errors - } + await unlinkIfExists(oggCleanup ? oggPath : null); + await unlinkIfExists(localInputPath); } } diff --git a/src/discord/voice-message.test.ts b/src/discord/voice-message.test.ts new file mode 100644 index 00000000000..51a177f059f --- /dev/null +++ b/src/discord/voice-message.test.ts @@ -0,0 +1,146 @@ +import type { ChildProcess, ExecFileOptions } from "node:child_process"; +import { promisify } from "node:util"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +type ExecCallback = ( + error: NodeJS.ErrnoException | null, + stdout: string | Buffer, + stderr: string | Buffer, +) => void; + +type ExecCall = { + command: string; + args: string[]; + options?: ExecFileOptions; +}; + +type MockExecResult = { + stdout?: string; + stderr?: string; + error?: NodeJS.ErrnoException; +}; + +const execCalls: ExecCall[] = []; +const mockExecResults: MockExecResult[] = []; + +vi.mock("node:child_process", async (importOriginal) => { + const actual = await importOriginal(); + const execFileImpl = ( + file: string, + args?: readonly string[] | null, + optionsOrCallback?: ExecFileOptions | ExecCallback | null, + callbackMaybe?: ExecCallback, + ) => { + const normalizedArgs = Array.isArray(args) ? [...args] : []; + const callback = + typeof optionsOrCallback === "function" ? optionsOrCallback : (callbackMaybe ?? undefined); + const options = + typeof optionsOrCallback === "function" ? undefined : (optionsOrCallback ?? undefined); + + execCalls.push({ + command: file, + args: normalizedArgs, + options, + }); + + const next = mockExecResults.shift() ?? { stdout: "", stderr: "" }; + queueMicrotask(() => { + callback?.(next.error ?? null, next.stdout ?? "", next.stderr ?? ""); + }); + return {} as ChildProcess; + }; + const execFileWithCustomPromisify = execFileImpl as unknown as typeof actual.execFile & { + [promisify.custom]?: ( + file: string, + args?: readonly string[] | null, + options?: ExecFileOptions | null, + ) => Promise<{ stdout: string | Buffer; stderr: string | Buffer }>; + }; + execFileWithCustomPromisify[promisify.custom] = ( + file: string, + args?: readonly string[] | null, + options?: ExecFileOptions | null, + ) => + new Promise<{ stdout: string | Buffer; stderr: string | Buffer }>((resolve, reject) => { + execFileImpl(file, args, options, (error, stdout, stderr) => { + if (error) { + reject(error); + return; + } + resolve({ stdout, stderr }); + }); + }); + + return { + ...actual, + execFile: execFileWithCustomPromisify, + }; +}); + +vi.mock("../infra/tmp-openclaw-dir.js", () => ({ + resolvePreferredOpenClawTmpDir: () => "/tmp", +})); + +const { ensureOggOpus } = await import("./voice-message.js"); + +describe("ensureOggOpus", () => { + beforeEach(() => { + execCalls.length = 0; + mockExecResults.length = 0; + }); + + afterEach(() => { + execCalls.length = 0; + mockExecResults.length = 0; + }); + + it("rejects URL/protocol input paths", async () => { + await expect(ensureOggOpus("https://example.com/audio.ogg")).rejects.toThrow( + /local file path/i, + ); + expect(execCalls).toHaveLength(0); + }); + + it("keeps .ogg only when codec is opus and sample rate is 48kHz", async () => { + mockExecResults.push({ stdout: "opus,48000\n" }); + + const result = await ensureOggOpus("/tmp/input.ogg"); + + expect(result).toEqual({ path: "/tmp/input.ogg", cleanup: false }); + expect(execCalls).toHaveLength(1); + expect(execCalls[0].command).toBe("ffprobe"); + expect(execCalls[0].args).toContain("stream=codec_name,sample_rate"); + expect(execCalls[0].options?.timeout).toBe(10_000); + }); + + it("re-encodes .ogg opus when sample rate is not 48kHz", async () => { + mockExecResults.push({ stdout: "opus,24000\n" }); + mockExecResults.push({ stdout: "" }); + + const result = await ensureOggOpus("/tmp/input.ogg"); + const ffmpegCall = execCalls.find((call) => call.command === "ffmpeg"); + + expect(result.cleanup).toBe(true); + expect(result.path).toMatch(/^\/tmp\/voice-.*\.ogg$/); + expect(ffmpegCall).toBeDefined(); + expect(ffmpegCall?.args).toContain("-t"); + expect(ffmpegCall?.args).toContain("1200"); + expect(ffmpegCall?.args).toContain("-ar"); + expect(ffmpegCall?.args).toContain("48000"); + expect(ffmpegCall?.options?.timeout).toBe(45_000); + }); + + it("re-encodes non-ogg input with bounded ffmpeg execution", async () => { + mockExecResults.push({ stdout: "" }); + + const result = await ensureOggOpus("/tmp/input.mp3"); + const ffprobeCalls = execCalls.filter((call) => call.command === "ffprobe"); + const ffmpegCalls = execCalls.filter((call) => call.command === "ffmpeg"); + + expect(result.cleanup).toBe(true); + expect(ffprobeCalls).toHaveLength(0); + expect(ffmpegCalls).toHaveLength(1); + expect(ffmpegCalls[0].options?.timeout).toBe(45_000); + expect(ffmpegCalls[0].args).toEqual(expect.arrayContaining(["-vn", "-sn", "-dn"])); + }); +}); diff --git a/src/discord/voice-message.ts b/src/discord/voice-message.ts index 23dffd3833f..3891babfff3 100644 --- a/src/discord/voice-message.ts +++ b/src/discord/voice-message.ts @@ -10,20 +10,20 @@ * - No other content (text, embeds, etc.) */ -import { execFile } from "node:child_process"; import crypto from "node:crypto"; import fs from "node:fs/promises"; import path from "node:path"; -import { promisify } from "node:util"; import type { RequestClient } from "@buape/carbon"; import type { RetryRunner } from "../infra/retry-policy.js"; import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; - -const execFileAsync = promisify(execFile); +import { parseFfprobeCodecAndSampleRate, runFfmpeg, runFfprobe } from "../media/ffmpeg-exec.js"; +import { MEDIA_FFMPEG_MAX_AUDIO_DURATION_SECS } from "../media/ffmpeg-limits.js"; +import { unlinkIfExists } from "../media/temp-files.js"; const DISCORD_VOICE_MESSAGE_FLAG = 1 << 13; const SUPPRESS_NOTIFICATIONS_FLAG = 1 << 12; const WAVEFORM_SAMPLES = 256; +const DISCORD_OPUS_SAMPLE_RATE_HZ = 48_000; export type VoiceMessageMetadata = { durationSecs: number; @@ -35,7 +35,7 @@ export type VoiceMessageMetadata = { */ export async function getAudioDuration(filePath: string): Promise { try { - const { stdout } = await execFileAsync("ffprobe", [ + const stdout = await runFfprobe([ "-v", "error", "-show_entries", @@ -78,10 +78,15 @@ async function generateWaveformFromPcm(filePath: string): Promise { try { // Convert to raw 16-bit signed PCM, mono, 8kHz - await execFileAsync("ffmpeg", [ + await runFfmpeg([ "-y", "-i", filePath, + "-vn", + "-sn", + "-dn", + "-t", + String(MEDIA_FFMPEG_MAX_AUDIO_DURATION_SECS), "-f", "s16le", "-acodec", @@ -121,12 +126,7 @@ async function generateWaveformFromPcm(filePath: string): Promise { return Buffer.from(waveform).toString("base64"); } finally { - // Clean up temp file - try { - await fs.unlink(tempPcm); - } catch { - // Ignore cleanup errors - } + await unlinkIfExists(tempPcm); } } @@ -160,20 +160,21 @@ export async function ensureOggOpus(filePath: string): Promise<{ path: string; c // Check if already OGG if (ext === ".ogg") { - // Verify it's Opus codec, not Vorbis (Vorbis won't play on mobile) + // Fast-path only when the file is Opus at Discord's expected 48kHz. try { - const { stdout } = await execFileAsync("ffprobe", [ + const stdout = await runFfprobe([ "-v", "error", "-select_streams", "a:0", "-show_entries", - "stream=codec_name", + "stream=codec_name,sample_rate", "-of", "csv=p=0", filePath, ]); - if (stdout.trim().toLowerCase() === "opus") { + const { codec, sampleRateHz } = parseFfprobeCodecAndSampleRate(stdout); + if (codec === "opus" && sampleRateHz === DISCORD_OPUS_SAMPLE_RATE_HZ) { return { path: filePath, cleanup: false }; } } catch { @@ -187,12 +188,17 @@ export async function ensureOggOpus(filePath: string): Promise<{ path: string; c const tempDir = resolvePreferredOpenClawTmpDir(); const outputPath = path.join(tempDir, `voice-${crypto.randomUUID()}.ogg`); - await execFileAsync("ffmpeg", [ + await runFfmpeg([ "-y", "-i", filePath, + "-vn", + "-sn", + "-dn", + "-t", + String(MEDIA_FFMPEG_MAX_AUDIO_DURATION_SECS), "-ar", - "48000", + String(DISCORD_OPUS_SAMPLE_RATE_HZ), "-c:a", "libopus", "-b:a",