mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 13:50:49 +00:00
fix(google): emit opus voice-note tts
This commit is contained in:
@@ -38,6 +38,24 @@ describeLive("google plugin live", () => {
|
||||
expect(audioFile.audioBuffer.byteLength).toBeGreaterThan(512);
|
||||
}, 120_000);
|
||||
|
||||
it("transcodes speech to Opus for voice-note targets", async () => {
|
||||
const { speechProviders } = await registerGooglePlugin();
|
||||
const provider = requireRegisteredProvider(speechProviders, "google");
|
||||
|
||||
const audioFile = await provider.synthesize({
|
||||
text: "OpenClaw Google voice note integration test OK.",
|
||||
cfg: { plugins: { enabled: true } } as never,
|
||||
providerConfig: { apiKey: GOOGLE_API_KEY },
|
||||
target: "voice-note",
|
||||
timeoutMs: 90_000,
|
||||
});
|
||||
|
||||
expect(audioFile.outputFormat).toBe("opus");
|
||||
expect(audioFile.fileExtension).toBe(".opus");
|
||||
expect(audioFile.voiceCompatible).toBe(true);
|
||||
expect(audioFile.audioBuffer.byteLength).toBeGreaterThan(128);
|
||||
}, 120_000);
|
||||
|
||||
it("transcribes synthesized speech through the media provider", async () => {
|
||||
const { mediaProviders, speechProviders } = await registerGooglePlugin();
|
||||
const speechProvider = requireRegisteredProvider(speechProviders, "google");
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import * as providerHttp from "openclaw/plugin-sdk/provider-http";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const transcodeAudioBufferToOpusMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/media-runtime", () => ({
|
||||
transcodeAudioBufferToOpus: transcodeAudioBufferToOpusMock,
|
||||
}));
|
||||
|
||||
import { buildGoogleSpeechProvider, __testing } from "./speech-provider.js";
|
||||
|
||||
function installGoogleTtsFetchMock(pcm = Buffer.from([1, 0, 2, 0])) {
|
||||
@@ -31,6 +38,7 @@ describe("Google speech provider", () => {
|
||||
vi.restoreAllMocks();
|
||||
vi.unstubAllGlobals();
|
||||
vi.unstubAllEnvs();
|
||||
transcodeAudioBufferToOpusMock.mockReset();
|
||||
});
|
||||
|
||||
it("synthesizes Gemini PCM as WAV and preserves audio tags in the request text", async () => {
|
||||
@@ -82,6 +90,39 @@ describe("Google speech provider", () => {
|
||||
expect(result.audioBuffer.subarray(8, 12).toString("ascii")).toBe("WAVE");
|
||||
expect(result.audioBuffer.readUInt32LE(24)).toBe(__testing.GOOGLE_TTS_SAMPLE_RATE);
|
||||
expect(result.audioBuffer.subarray(44)).toEqual(Buffer.from([1, 0, 2, 0]));
|
||||
expect(transcodeAudioBufferToOpusMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("transcodes Gemini PCM to Opus for voice-note targets", async () => {
|
||||
installGoogleTtsFetchMock(Buffer.from([5, 0, 6, 0]));
|
||||
transcodeAudioBufferToOpusMock.mockResolvedValueOnce(Buffer.from("google-opus"));
|
||||
const provider = buildGoogleSpeechProvider();
|
||||
|
||||
const result = await provider.synthesize({
|
||||
text: "Send this as a voice note.",
|
||||
cfg: {},
|
||||
providerConfig: {
|
||||
apiKey: "google-test-key",
|
||||
},
|
||||
target: "voice-note",
|
||||
timeoutMs: 12_000,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
audioBuffer: Buffer.from("google-opus"),
|
||||
outputFormat: "opus",
|
||||
fileExtension: ".opus",
|
||||
voiceCompatible: true,
|
||||
});
|
||||
expect(transcodeAudioBufferToOpusMock).toHaveBeenCalledWith({
|
||||
audioBuffer: expect.any(Buffer),
|
||||
inputExtension: "wav",
|
||||
tempPrefix: "tts-google-",
|
||||
timeoutMs: 12_000,
|
||||
});
|
||||
const [{ audioBuffer }] = transcodeAudioBufferToOpusMock.mock.calls[0];
|
||||
expect(audioBuffer.subarray(0, 4).toString("ascii")).toBe("RIFF");
|
||||
expect(audioBuffer.subarray(8, 12).toString("ascii")).toBe("WAVE");
|
||||
});
|
||||
|
||||
it("falls back to GEMINI_API_KEY and configured Google API base URL", async () => {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { transcodeAudioBufferToOpus } from "openclaw/plugin-sdk/media-runtime";
|
||||
import {
|
||||
assertOkOrThrowProviderError,
|
||||
postJsonRequest,
|
||||
@@ -394,6 +395,19 @@ export function buildGoogleSpeechProvider(): SpeechProviderPlugin {
|
||||
speakerName: overrides.speakerName ?? config.speakerName,
|
||||
timeoutMs: req.timeoutMs,
|
||||
});
|
||||
if (req.target === "voice-note") {
|
||||
return {
|
||||
audioBuffer: await transcodeAudioBufferToOpus({
|
||||
audioBuffer: wrapPcm16MonoToWav(pcm),
|
||||
inputExtension: "wav",
|
||||
tempPrefix: "tts-google-",
|
||||
timeoutMs: req.timeoutMs,
|
||||
}),
|
||||
outputFormat: "opus",
|
||||
fileExtension: ".opus",
|
||||
voiceCompatible: true,
|
||||
};
|
||||
}
|
||||
return {
|
||||
audioBuffer: wrapPcm16MonoToWav(pcm),
|
||||
outputFormat: "wav",
|
||||
|
||||
Reference in New Issue
Block a user