test: mock talk synthesis at gateway boundary

This commit is contained in:
Peter Steinberger
2026-04-08 06:07:05 +01:00
parent a53c13fc06
commit b0c5d2baf6
2 changed files with 172 additions and 107 deletions

View File

@@ -2,9 +2,23 @@ import { describe, expect, it } from "vitest";
import { vi } from "vitest";
import { createEmptyPluginRegistry } from "../plugins/registry-empty.js";
import { getActivePluginRegistry, setActivePluginRegistry } from "../plugins/runtime.js";
import { withFetchPreconnect } from "../test-utils/fetch-mock.js";
import { talkHandlers } from "./server-methods/talk.js";
const synthesizeSpeechMock = vi.hoisted(() =>
vi.fn(async () => ({
success: true,
audioBuffer: Buffer.from([4, 5, 6]),
provider: "elevenlabs",
outputFormat: "pcm_44100",
fileExtension: ".pcm",
voiceCompatible: false,
})),
);
vi.mock("../tts/tts.js", () => ({
synthesizeSpeech: synthesizeSpeechMock,
}));
type TalkSpeakPayload = {
audioBase64?: string;
provider?: string;
@@ -71,104 +85,62 @@ describe("gateway talk provider contracts", () => {
},
});
const originalFetch = globalThis.fetch;
let fetchUrl: string | undefined;
const requestInits: RequestInit[] = [];
const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
fetchUrl = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
if (init) {
requestInits.push(init);
}
return new Response(new Uint8Array([4, 5, 6]), { status: 200 });
});
globalThis.fetch = withFetchPreconnect(fetchMock);
try {
const res = await withSpeechProviders(
[
{
pluginId: "elevenlabs-test",
source: "test",
provider: {
id: "elevenlabs",
label: "ElevenLabs",
isConfigured: () => true,
resolveTalkOverrides: ({ params }) => ({
...(typeof params.voiceId === "string" && params.voiceId.trim().length > 0
? { voiceId: params.voiceId.trim() }
: {}),
...(typeof params.modelId === "string" && params.modelId.trim().length > 0
? { modelId: params.modelId.trim() }
: {}),
...(typeof params.outputFormat === "string" && params.outputFormat.trim().length > 0
? { outputFormat: params.outputFormat.trim() }
: {}),
...(typeof params.latencyTier === "number"
? { latencyTier: params.latencyTier }
: {}),
}),
synthesize: async (req) => {
const config = req.providerConfig as Record<string, unknown>;
const overrides = (req.providerOverrides ?? {}) as Record<string, unknown>;
const voiceId =
(typeof overrides.voiceId === "string" && overrides.voiceId.trim().length > 0
? overrides.voiceId.trim()
: undefined) ??
(typeof config.voiceId === "string" && config.voiceId.trim().length > 0
? config.voiceId.trim()
: undefined) ??
DEFAULT_STUB_VOICE_ID;
const outputFormat =
typeof overrides.outputFormat === "string" &&
overrides.outputFormat.trim().length > 0
? overrides.outputFormat.trim()
: "mp3";
const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`);
url.searchParams.set("output_format", outputFormat);
const response = await globalThis.fetch(url.href, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
text: req.text,
...(typeof overrides.latencyTier === "number"
? { latency_optimization_level: overrides.latencyTier }
: {}),
}),
});
return {
audioBuffer: Buffer.from(await response.arrayBuffer()),
outputFormat,
fileExtension: outputFormat.startsWith("pcm") ? ".pcm" : ".mp3",
voiceCompatible: false,
};
},
const res = await withSpeechProviders(
[
{
pluginId: "elevenlabs-test",
source: "test",
provider: {
id: "elevenlabs",
label: "ElevenLabs",
isConfigured: () => true,
resolveTalkOverrides: ({ params }) => ({
...(typeof params.voiceId === "string" && params.voiceId.trim().length > 0
? { voiceId: params.voiceId.trim() }
: {}),
...(typeof params.outputFormat === "string" && params.outputFormat.trim().length > 0
? { outputFormat: params.outputFormat.trim() }
: {}),
...(typeof params.latencyTier === "number"
? { latencyTier: params.latencyTier }
: {}),
}),
synthesize: async () => {
throw new Error("synthesize should be mocked at the handler boundary");
},
},
],
async () =>
await invokeTalkSpeakDirect({
text: "Hello from talk mode.",
voiceId: "clawd",
outputFormat: "pcm_44100",
latencyTier: 3,
}),
);
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("elevenlabs");
expect((res?.payload as TalkSpeakPayload | undefined)?.outputFormat).toBe("pcm_44100");
expect((res?.payload as TalkSpeakPayload | undefined)?.audioBase64).toBe(
Buffer.from([4, 5, 6]).toString("base64"),
);
},
],
async () =>
await invokeTalkSpeakDirect({
text: "Hello from talk mode.",
voiceId: "clawd",
outputFormat: "pcm_44100",
latencyTier: 3,
}),
);
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("elevenlabs");
expect((res?.payload as TalkSpeakPayload | undefined)?.outputFormat).toBe("pcm_44100");
expect((res?.payload as TalkSpeakPayload | undefined)?.audioBase64).toBe(
Buffer.from([4, 5, 6]).toString("base64"),
);
expect(fetchMock).toHaveBeenCalled();
expect(fetchUrl).toContain(`/v1/text-to-speech/${ALIAS_STUB_VOICE_ID}`);
expect(fetchUrl).toContain("output_format=pcm_44100");
const init = requestInits[0];
const bodyText = typeof init?.body === "string" ? init.body : "{}";
const body = JSON.parse(bodyText) as Record<string, unknown>;
expect(body.latency_optimization_level).toBe(3);
} finally {
globalThis.fetch = originalFetch;
}
expect(synthesizeSpeechMock).toHaveBeenCalledWith(
expect.objectContaining({
text: "Hello from talk mode.",
overrides: {
provider: "elevenlabs",
providerOverrides: {
elevenlabs: {
voiceId: ALIAS_STUB_VOICE_ID,
outputFormat: "pcm_44100",
latencyTier: 3,
},
},
},
disableFallback: true,
}),
);
});
});

View File

@@ -1,8 +1,23 @@
import { describe, expect, it } from "vitest";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { createEmptyPluginRegistry } from "../plugins/registry-empty.js";
import { getActivePluginRegistry, setActivePluginRegistry } from "../plugins/runtime.js";
import { talkHandlers } from "./server-methods/talk.js";
const synthesizeSpeechMock = vi.hoisted(() =>
vi.fn<typeof import("../tts/tts.js").synthesizeSpeech>(async () => ({
success: true,
audioBuffer: Buffer.from([7, 8, 9]),
provider: "acme",
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
})),
);
vi.mock("../tts/tts.js", () => ({
synthesizeSpeech: synthesizeSpeechMock,
}));
type TalkSpeakPayload = {
audioBase64?: string;
provider?: string;
@@ -46,6 +61,79 @@ async function withSpeechProviders<T>(
}
describe("gateway talk runtime", () => {
beforeEach(() => {
synthesizeSpeechMock.mockReset();
synthesizeSpeechMock.mockResolvedValue({
success: true,
audioBuffer: Buffer.from([7, 8, 9]),
provider: "acme",
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
});
});
it("allows extension speech providers through the talk setup", async () => {
const { writeConfigFile } = await import("../config/config.js");
await writeConfigFile({
talk: {
provider: "acme",
providers: {
acme: {
voiceId: "plugin-voice",
},
},
},
});
await withSpeechProviders(
[
{
pluginId: "acme-plugin",
source: "test",
provider: {
id: "acme",
label: "Acme Speech",
isConfigured: () => true,
resolveTalkConfig: ({ talkProviderConfig }) => ({
...talkProviderConfig,
resolvedBy: "acme-test-provider",
}),
synthesize: async () => {
throw new Error("synthesize should be mocked at the handler boundary");
},
},
},
],
async () => {
const res = await invokeTalkSpeakDirect({
text: "Hello from talk mode.",
});
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
expect(synthesizeSpeechMock).toHaveBeenCalledWith(
expect.objectContaining({
text: "Hello from talk mode.",
overrides: { provider: "acme" },
disableFallback: true,
cfg: expect.objectContaining({
messages: expect.objectContaining({
tts: expect.objectContaining({
provider: "acme",
providers: expect.objectContaining({
acme: expect.objectContaining({
resolvedBy: "acme-test-provider",
voiceId: "plugin-voice",
}),
}),
}),
}),
}),
}),
);
},
);
});
it("allows extension speech providers through talk.speak", async () => {
const { writeConfigFile } = await import("../config/config.js");
await writeConfigFile({
@@ -125,13 +213,15 @@ describe("gateway talk runtime", () => {
id: "acme",
label: "Acme Speech",
isConfigured: () => true,
synthesize: async () => {
throw new Error("provider failed");
},
synthesize: async () => ({}) as never,
},
},
],
async () => {
synthesizeSpeechMock.mockResolvedValue({
success: false,
error: "provider failed",
});
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.details).toEqual({
@@ -164,16 +254,19 @@ describe("gateway talk runtime", () => {
id: "acme",
label: "Acme Speech",
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: Buffer.alloc(0),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
}),
synthesize: async () => ({}) as never,
},
},
],
async () => {
synthesizeSpeechMock.mockResolvedValue({
success: true,
audioBuffer: Buffer.alloc(0),
provider: "acme",
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
});
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.details).toEqual({