diff --git a/extensions/tts-local-cli/speech-provider.test.ts b/extensions/tts-local-cli/speech-provider.test.ts index 75b09ee8f1c..b0d9ca23f26 100644 --- a/extensions/tts-local-cli/speech-provider.test.ts +++ b/extensions/tts-local-cli/speech-provider.test.ts @@ -80,7 +80,18 @@ describe("buildCliSpeechProvider", () => { if (typeof outputPath !== "string") { throw new Error("missing ffmpeg output path"); } - writeFileSync(outputPath, Buffer.from(`converted:${path.extname(outputPath)}`)); + const forcedFormatIndex = args.lastIndexOf("-f"); + const forcedFormat = + forcedFormatIndex >= 0 && typeof args[forcedFormatIndex + 1] === "string" + ? args[forcedFormatIndex + 1] + : undefined; + const extension = + forcedFormat === "s16le" + ? ".pcm" + : forcedFormat + ? `.${forcedFormat}` + : path.extname(outputPath); + writeFileSync(outputPath, Buffer.from(`converted:${extension}`)); }); }); diff --git a/extensions/tts-local-cli/speech-provider.ts b/extensions/tts-local-cli/speech-provider.ts index aece764806c..283057ce748 100644 --- a/extensions/tts-local-cli/speech-provider.ts +++ b/extensions/tts-local-cli/speech-provider.ts @@ -275,11 +275,11 @@ async function convertAudio( const outputPath = path.join(outputDir, outputFileName); const args = ["-y", "-i", inputPath]; if (target === "opus") { - args.push("-c:a", "libopus", "-b:a", "64k"); + args.push("-c:a", "libopus", "-b:a", "64k", "-f", "opus"); } else if (target === "wav") { - args.push("-c:a", "pcm_s16le"); + args.push("-c:a", "pcm_s16le", "-f", "wav"); } else { - args.push("-c:a", "libmp3lame", "-b:a", "128k"); + args.push("-c:a", "libmp3lame", "-b:a", "128k", "-f", "mp3"); } await writeExternalFileWithinRoot({ rootDir: outputDir,