Files
openclaw/src/gateway/server.talk-runtime.test.ts
2026-05-12 10:39:47 +01:00

306 lines
9.1 KiB
TypeScript

import { beforeEach, describe, expect, it, vi } from "vitest";
import {
invokeTalkSpeakDirect,
type TalkSpeakTestPayload,
withSpeechProviders,
} from "./talk.test-helpers.js";
const synthesizeSpeechMock = vi.hoisted(() =>
vi.fn<typeof import("../tts/tts.js").synthesizeSpeech>(async () => ({
success: true,
audioBuffer: Buffer.from([7, 8, 9]),
provider: "acme",
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
})),
);
vi.mock("../tts/tts.js", () => ({
synthesizeSpeech: synthesizeSpeechMock,
}));
type SpeechProvider = Parameters<typeof withSpeechProviders>[0][number]["provider"];
const ALIAS_STUB_VOICE_ID = "VoiceAlias1234567890";
async function setTalkConfig(talk: Record<string, unknown>) {
const { setRuntimeConfigSnapshot } = await import("../config/config.js");
const config = {
commands: {
ownerDisplaySecret: "openclaw-test-owner-display-secret",
},
talk,
};
setRuntimeConfigSnapshot(config, config);
}
async function setAcmeTalkConfig() {
await setTalkConfig({
provider: "acme",
providers: {
acme: {
voiceId: "plugin-voice",
},
},
});
}
async function setElevenLabsTalkConfig() {
await setTalkConfig({
provider: "elevenlabs",
providers: {
elevenlabs: {
voiceId: "stub-default-voice",
voiceAliases: {
Clawd: ALIAS_STUB_VOICE_ID,
},
},
},
});
}
async function setEmptyTalkConfig() {
await setTalkConfig({});
}
async function withAcmeSpeechProvider(
synthesize: SpeechProvider["synthesize"],
run: () => Promise<void>,
) {
await withSpeechProviders(
[
{
pluginId: "acme-plugin",
source: "test",
provider: {
id: "acme",
label: "Acme Speech",
isConfigured: () => true,
synthesize,
},
},
],
run,
);
}
function expectSingleSynthesizeSpeechCall() {
expect(synthesizeSpeechMock).toHaveBeenCalledTimes(1);
const params = synthesizeSpeechMock.mock.calls.at(0)?.[0];
if (params === undefined) {
throw new Error("expected synthesizeSpeech call params");
}
return params;
}
describe("gateway talk runtime", () => {
beforeEach(() => {
synthesizeSpeechMock.mockReset();
synthesizeSpeechMock.mockResolvedValue({
success: true,
audioBuffer: Buffer.from([7, 8, 9]),
provider: "acme",
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
});
});
it("allows extension speech providers through the talk setup", async () => {
await setAcmeTalkConfig();
await withSpeechProviders(
[
{
pluginId: "acme-plugin",
source: "test",
provider: {
id: "acme",
label: "Acme Speech",
isConfigured: () => true,
resolveTalkConfig: ({ talkProviderConfig }) => ({
...talkProviderConfig,
resolvedBy: "acme-test-provider",
}),
synthesize: async () => {
throw new Error("synthesize should be mocked at the handler boundary");
},
},
},
],
async () => {
const res = await invokeTalkSpeakDirect({
text: "Hello from talk mode.",
});
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
const synthesizeParams = expectSingleSynthesizeSpeechCall();
expect(synthesizeParams.text).toBe("Hello from talk mode.");
expect(synthesizeParams.overrides).toEqual({ provider: "acme" });
expect(synthesizeParams.disableFallback).toBe(true);
const ttsConfig = (
synthesizeParams.cfg as {
messages?: {
tts?: {
provider?: string;
providers?: Record<string, { resolvedBy?: string; voiceId?: string }>;
};
};
}
).messages?.tts;
expect(ttsConfig?.provider).toBe("acme");
expect(ttsConfig?.providers?.acme?.resolvedBy).toBe("acme-test-provider");
expect(ttsConfig?.providers?.acme?.voiceId).toBe("plugin-voice");
},
);
});
it("allows extension speech providers through talk.speak", async () => {
await setAcmeTalkConfig();
await withAcmeSpeechProvider(
async () => ({
audioBuffer: Buffer.from([7, 8, 9]),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
}),
async () => {
const res = await invokeTalkSpeakDirect({
text: "Hello from talk mode.",
});
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
expect((res?.payload as TalkSpeakTestPayload | undefined)?.provider).toBe("acme");
expect((res?.payload as TalkSpeakTestPayload | undefined)?.audioBase64).toBe(
Buffer.from([7, 8, 9]).toString("base64"),
);
},
);
});
it("resolves talk voice aliases case-insensitively and forwards provider overrides", async () => {
await setElevenLabsTalkConfig();
await withSpeechProviders(
[
{
pluginId: "elevenlabs-test",
source: "test",
provider: {
id: "elevenlabs",
label: "ElevenLabs",
isConfigured: () => true,
resolveTalkOverrides: ({ params }) => ({
...(typeof params.voiceId === "string" && params.voiceId.trim().length > 0
? { voiceId: params.voiceId.trim() }
: {}),
...(typeof params.outputFormat === "string" && params.outputFormat.trim().length > 0
? { outputFormat: params.outputFormat.trim() }
: {}),
...(typeof params.latencyTier === "number"
? { latencyTier: params.latencyTier }
: {}),
}),
synthesize: async () => {
throw new Error("synthesize should be mocked at the handler boundary");
},
},
},
],
async () => {
synthesizeSpeechMock.mockResolvedValue({
success: true,
audioBuffer: Buffer.from([4, 5, 6]),
provider: "elevenlabs",
outputFormat: "pcm_44100",
fileExtension: ".pcm",
voiceCompatible: false,
});
const res = await invokeTalkSpeakDirect({
text: "Hello from talk mode.",
voiceId: "clawd",
outputFormat: "pcm_44100",
latencyTier: 3,
});
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
expect((res?.payload as TalkSpeakTestPayload | undefined)?.provider).toBe("elevenlabs");
expect((res?.payload as TalkSpeakTestPayload | undefined)?.outputFormat).toBe("pcm_44100");
expect((res?.payload as TalkSpeakTestPayload | undefined)?.audioBase64).toBe(
Buffer.from([4, 5, 6]).toString("base64"),
);
const synthesizeParams = expectSingleSynthesizeSpeechCall();
expect(synthesizeParams.text).toBe("Hello from talk mode.");
expect(synthesizeParams.overrides).toEqual({
provider: "elevenlabs",
providerOverrides: {
elevenlabs: {
voiceId: ALIAS_STUB_VOICE_ID,
outputFormat: "pcm_44100",
latencyTier: 3,
},
},
});
expect(synthesizeParams.disableFallback).toBe(true);
},
);
});
it("returns fallback-eligible details when talk provider is not configured", async () => {
await setEmptyTalkConfig();
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.message).toContain("talk provider not configured");
expect((res?.error as { details?: unknown } | undefined)?.details).toEqual({
reason: "talk_unconfigured",
fallbackEligible: true,
});
});
it("returns synthesis_failed details when the provider rejects synthesis", async () => {
await setAcmeTalkConfig();
await withAcmeSpeechProvider(
async () => ({}) as never,
async () => {
synthesizeSpeechMock.mockResolvedValue({
success: false,
error: "provider failed",
});
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.details).toEqual({
reason: "synthesis_failed",
fallbackEligible: false,
});
},
);
});
it("rejects empty audio results as invalid_audio_result", async () => {
await setAcmeTalkConfig();
await withAcmeSpeechProvider(
async () => ({}) as never,
async () => {
synthesizeSpeechMock.mockResolvedValue({
success: true,
audioBuffer: Buffer.alloc(0),
provider: "acme",
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
});
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.details).toEqual({
reason: "invalid_audio_result",
fallbackEligible: false,
});
},
);
});
});