mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 15:20:44 +00:00
fix(google): emit opus voice-note tts
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const runFfmpegMock = vi.hoisted(() => vi.fn());
|
||||
const transcodeAudioBufferToOpusMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/media-runtime", () => ({
|
||||
runFfmpeg: runFfmpegMock,
|
||||
transcodeAudioBufferToOpus: transcodeAudioBufferToOpusMock,
|
||||
}));
|
||||
|
||||
import { buildXiaomiSpeechProvider } from "./speech-provider.js";
|
||||
@@ -123,7 +123,7 @@ describe("buildXiaomiSpeechProvider", () => {
|
||||
|
||||
beforeEach(() => {
|
||||
vi.stubGlobal("fetch", vi.fn());
|
||||
runFfmpegMock.mockReset();
|
||||
transcodeAudioBufferToOpusMock.mockReset();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -170,7 +170,7 @@ describe("buildXiaomiSpeechProvider", () => {
|
||||
{ role: "assistant", content: "Hello from OpenClaw." },
|
||||
]);
|
||||
expect(body.audio).toEqual({ format: "mp3", voice: "default_en" });
|
||||
expect(runFfmpegMock).not.toHaveBeenCalled();
|
||||
expect(transcodeAudioBufferToOpusMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("transcodes Xiaomi output to Opus for voice-note targets", async () => {
|
||||
@@ -181,15 +181,7 @@ describe("buildXiaomiSpeechProvider", () => {
|
||||
headers: { "Content-Type": "application/json" },
|
||||
}),
|
||||
);
|
||||
runFfmpegMock.mockImplementationOnce(async (args: string[]) => {
|
||||
const outputPath = args.at(-1);
|
||||
if (typeof outputPath !== "string") {
|
||||
throw new Error("missing ffmpeg output path");
|
||||
}
|
||||
await import("node:fs/promises").then((fs) =>
|
||||
fs.writeFile(outputPath, Buffer.from("fake-opus-audio")),
|
||||
);
|
||||
});
|
||||
transcodeAudioBufferToOpusMock.mockResolvedValueOnce(Buffer.from("fake-opus-audio"));
|
||||
|
||||
const result = await provider.synthesize({
|
||||
text: "Hello from OpenClaw.",
|
||||
@@ -203,10 +195,12 @@ describe("buildXiaomiSpeechProvider", () => {
|
||||
expect(result.fileExtension).toBe(".opus");
|
||||
expect(result.voiceCompatible).toBe(true);
|
||||
expect(result.audioBuffer.toString()).toBe("fake-opus-audio");
|
||||
expect(runFfmpegMock).toHaveBeenCalledWith(
|
||||
expect.arrayContaining(["-c:a", "libopus", "-ar", "48000"]),
|
||||
{ timeoutMs: 30000 },
|
||||
);
|
||||
expect(transcodeAudioBufferToOpusMock).toHaveBeenCalledWith({
|
||||
audioBuffer: Buffer.from("fake-mp3-audio"),
|
||||
inputExtension: "mp3",
|
||||
tempPrefix: "tts-xiaomi-",
|
||||
timeoutMs: 30000,
|
||||
});
|
||||
});
|
||||
|
||||
it("throws when API key is missing", async () => {
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { runFfmpeg } from "openclaw/plugin-sdk/media-runtime";
|
||||
import { transcodeAudioBufferToOpus } from "openclaw/plugin-sdk/media-runtime";
|
||||
import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
|
||||
import type {
|
||||
@@ -14,7 +12,6 @@ import {
|
||||
fetchWithSsrFGuard,
|
||||
ssrfPolicyFromHttpBaseUrlAllowedHostname,
|
||||
} from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
|
||||
|
||||
export const DEFAULT_XIAOMI_TTS_BASE_URL = "https://api.xiaomimimo.com/v1";
|
||||
export const DEFAULT_XIAOMI_TTS_MODEL = "mimo-v2.5-tts";
|
||||
@@ -242,45 +239,6 @@ export async function xiaomiTTS(params: {
|
||||
}
|
||||
}
|
||||
|
||||
async function transcodeAudioToOpus(params: {
|
||||
audioBuffer: Buffer;
|
||||
inputExtension: string;
|
||||
timeoutMs: number | undefined;
|
||||
}) {
|
||||
const tempRoot = resolvePreferredOpenClawTmpDir();
|
||||
await mkdir(tempRoot, { recursive: true, mode: 0o700 });
|
||||
const tempDir = await mkdtemp(path.join(tempRoot, "tts-xiaomi-"));
|
||||
try {
|
||||
const inputPath = path.join(tempDir, `input.${params.inputExtension}`);
|
||||
const outputPath = path.join(tempDir, "voice.opus");
|
||||
await writeFile(inputPath, params.audioBuffer, { mode: 0o600 });
|
||||
await runFfmpeg(
|
||||
[
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-y",
|
||||
"-i",
|
||||
inputPath,
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"libopus",
|
||||
"-b:a",
|
||||
"64k",
|
||||
"-ar",
|
||||
"48000",
|
||||
"-ac",
|
||||
"1",
|
||||
outputPath,
|
||||
],
|
||||
{ timeoutMs: params.timeoutMs },
|
||||
);
|
||||
return await readFile(outputPath);
|
||||
} finally {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
export function buildXiaomiSpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "xiaomi",
|
||||
@@ -313,9 +271,10 @@ export function buildXiaomiSpeechProvider(): SpeechProviderPlugin {
|
||||
timeoutMs: req.timeoutMs,
|
||||
});
|
||||
if (req.target === "voice-note") {
|
||||
const opusBuffer = await transcodeAudioToOpus({
|
||||
const opusBuffer = await transcodeAudioBufferToOpus({
|
||||
audioBuffer,
|
||||
inputExtension: outputFormat,
|
||||
tempPrefix: "tts-xiaomi-",
|
||||
timeoutMs: req.timeoutMs,
|
||||
});
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user