mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
332 lines
9.0 KiB
TypeScript
332 lines
9.0 KiB
TypeScript
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
import { buildOpenAISpeechProvider } from "./speech-provider.js";
|
|
|
|
vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
|
|
fetchWithSsrFGuard: async ({
|
|
url,
|
|
init,
|
|
}: {
|
|
url: string;
|
|
init?: RequestInit;
|
|
}): Promise<{ response: Response; release: () => Promise<void> }> => ({
|
|
response: await globalThis.fetch(url, init),
|
|
release: vi.fn(async () => {}),
|
|
}),
|
|
ssrfPolicyFromHttpBaseUrlAllowedHostname: () => undefined,
|
|
}));
|
|
|
|
function isSpeechRequestBody(value: unknown): value is {
|
|
[key: string]: unknown;
|
|
model?: string;
|
|
voice?: string;
|
|
speed?: number;
|
|
response_format?: string;
|
|
} {
|
|
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
}
|
|
|
|
function parseRequestBody(init: RequestInit | undefined): {
|
|
[key: string]: unknown;
|
|
model?: string;
|
|
voice?: string;
|
|
speed?: number;
|
|
response_format?: string;
|
|
} {
|
|
if (typeof init?.body !== "string") {
|
|
throw new Error("expected string request body");
|
|
}
|
|
const body: unknown = JSON.parse(init.body);
|
|
if (!isSpeechRequestBody(body)) {
|
|
throw new Error("expected OpenAI speech request body");
|
|
}
|
|
return body;
|
|
}
|
|
|
|
function mockSpeechFetchExpectingFormat(responseFormat: string) {
|
|
const fetchMock = vi.fn(async (_url: string, init?: RequestInit) => {
|
|
const body = parseRequestBody(init);
|
|
expect(body.response_format).toBe(responseFormat);
|
|
return new Response(new Uint8Array([1, 2, 3]), { status: 200 });
|
|
});
|
|
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
|
return fetchMock;
|
|
}
|
|
|
|
describe("buildOpenAISpeechProvider", () => {
|
|
const originalFetch = globalThis.fetch;
|
|
|
|
afterEach(() => {
|
|
globalThis.fetch = originalFetch;
|
|
vi.restoreAllMocks();
|
|
});
|
|
|
|
it("normalizes provider-owned speech config from raw provider config", () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
const resolved = provider.resolveConfig?.({
|
|
cfg: {} as never,
|
|
timeoutMs: 30_000,
|
|
rawConfig: {
|
|
providers: {
|
|
openai: {
|
|
apiKey: "sk-test",
|
|
baseUrl: "https://example.com/v1/",
|
|
model: "tts-1",
|
|
voice: "alloy",
|
|
speed: 1.25,
|
|
instructions: " Speak warmly ",
|
|
responseFormat: " WAV ",
|
|
extraBody: {
|
|
lang: "en-US",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
expect(resolved).toEqual({
|
|
apiKey: "sk-test",
|
|
baseUrl: "https://example.com/v1",
|
|
model: "tts-1",
|
|
voice: "alloy",
|
|
speed: 1.25,
|
|
instructions: "Speak warmly",
|
|
responseFormat: "wav",
|
|
extraBody: {
|
|
lang: "en-US",
|
|
},
|
|
});
|
|
});
|
|
|
|
it("parses OpenAI directive tokens against the resolved base url", () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
|
|
expect(
|
|
provider.parseDirectiveToken?.({
|
|
key: "voice",
|
|
value: "alloy",
|
|
policy: {
|
|
allowVoice: true,
|
|
allowModelId: true,
|
|
},
|
|
providerConfig: {
|
|
baseUrl: "https://api.openai.com/v1/",
|
|
},
|
|
} as never),
|
|
).toEqual({
|
|
handled: true,
|
|
overrides: { voice: "alloy" },
|
|
});
|
|
|
|
expect(
|
|
provider.parseDirectiveToken?.({
|
|
key: "model",
|
|
value: "kokoro-custom-model",
|
|
policy: {
|
|
allowVoice: true,
|
|
allowModelId: true,
|
|
},
|
|
providerConfig: {
|
|
baseUrl: "https://api.openai.com/v1/",
|
|
},
|
|
} as never),
|
|
).toEqual({
|
|
handled: false,
|
|
});
|
|
});
|
|
|
|
it("preserves talk responseFormat overrides", () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
|
|
expect(
|
|
provider.resolveTalkConfig?.({
|
|
cfg: {} as never,
|
|
timeoutMs: 30_000,
|
|
baseTtsConfig: {
|
|
providers: {
|
|
openai: {
|
|
apiKey: "sk-base",
|
|
responseFormat: "mp3",
|
|
},
|
|
},
|
|
},
|
|
talkProviderConfig: {
|
|
apiKey: "sk-talk",
|
|
responseFormat: " WAV ",
|
|
},
|
|
}),
|
|
).toMatchObject({
|
|
apiKey: "sk-talk",
|
|
responseFormat: "wav",
|
|
});
|
|
});
|
|
|
|
it("maps Talk speak params onto OpenAI speech overrides", () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
|
|
expect(
|
|
provider.resolveTalkOverrides?.({
|
|
talkProviderConfig: {},
|
|
params: {
|
|
text: "Hello from talk mode.",
|
|
voiceId: "nova",
|
|
modelId: "tts-1",
|
|
speed: 218 / 175,
|
|
},
|
|
}),
|
|
).toEqual({
|
|
voice: "nova",
|
|
model: "tts-1",
|
|
speed: 218 / 175,
|
|
});
|
|
});
|
|
|
|
it("maps persona prompt fields to instructions when instructions are unset", async () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
|
|
const prepared = await provider.prepareSynthesis?.({
|
|
text: "hello",
|
|
cfg: {} as never,
|
|
providerConfig: {
|
|
apiKey: "sk-test",
|
|
model: "gpt-4o-mini-tts",
|
|
voice: "cedar",
|
|
},
|
|
persona: {
|
|
id: "alfred",
|
|
label: "Alfred",
|
|
prompt: {
|
|
profile: "A brilliant British butler.",
|
|
scene: "A quiet late-night study.",
|
|
sampleContext: "The speaker is answering a trusted operator.",
|
|
style: "Refined and lightly amused.",
|
|
accent: "British English.",
|
|
pacing: "Measured.",
|
|
constraints: ["Do not read configuration values aloud."],
|
|
},
|
|
},
|
|
target: "audio-file",
|
|
timeoutMs: 1_000,
|
|
});
|
|
|
|
expect(prepared?.providerConfig?.instructions).toContain("Persona: Alfred");
|
|
expect(prepared?.providerConfig?.instructions).toContain(
|
|
"Constraint: Do not read configuration values aloud.",
|
|
);
|
|
});
|
|
|
|
it("uses wav for Groq-compatible OpenAI TTS endpoints", async () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
mockSpeechFetchExpectingFormat("wav");
|
|
|
|
const result = await provider.synthesize({
|
|
text: "hello",
|
|
cfg: {} as never,
|
|
providerConfig: {
|
|
apiKey: "sk-test",
|
|
baseUrl: "https://api.groq.com/openai/v1",
|
|
model: "canopylabs/orpheus-v1-english",
|
|
voice: "daniel",
|
|
},
|
|
target: "audio-file",
|
|
timeoutMs: 1_000,
|
|
});
|
|
|
|
expect(result.outputFormat).toBe("wav");
|
|
expect(result.fileExtension).toBe(".wav");
|
|
expect(result.voiceCompatible).toBe(false);
|
|
});
|
|
|
|
it("applies provider overrides to telephony synthesis", async () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
const fetchMock = vi.fn(async (_url: string, init?: RequestInit) => {
|
|
const body = parseRequestBody(init);
|
|
expect(body).toMatchObject({
|
|
model: "tts-1",
|
|
voice: "nova",
|
|
speed: 1.25,
|
|
response_format: "pcm",
|
|
});
|
|
return new Response(new Uint8Array([1, 2, 3]), { status: 200 });
|
|
});
|
|
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
|
|
|
const result = await provider.synthesizeTelephony?.({
|
|
text: "hello",
|
|
cfg: {} as never,
|
|
providerConfig: {
|
|
apiKey: "sk-test",
|
|
model: "gpt-4o-mini-tts",
|
|
voice: "alloy",
|
|
speed: 1,
|
|
},
|
|
providerOverrides: {
|
|
model: "tts-1",
|
|
voice: "nova",
|
|
speed: 1.25,
|
|
},
|
|
timeoutMs: 1_000,
|
|
});
|
|
|
|
expect(result?.outputFormat).toBe("pcm");
|
|
expect(fetchMock).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it("honors explicit responseFormat overrides and clears voice-note compatibility when not opus", async () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
mockSpeechFetchExpectingFormat("wav");
|
|
|
|
const result = await provider.synthesize({
|
|
text: "hello",
|
|
cfg: {} as never,
|
|
providerConfig: {
|
|
apiKey: "sk-test",
|
|
baseUrl: "https://proxy.example.com/openai/v1",
|
|
model: "canopylabs/orpheus-v1-english",
|
|
voice: "daniel",
|
|
responseFormat: "wav",
|
|
},
|
|
target: "voice-note",
|
|
timeoutMs: 1_000,
|
|
});
|
|
|
|
expect(result.outputFormat).toBe("wav");
|
|
expect(result.fileExtension).toBe(".wav");
|
|
expect(result.voiceCompatible).toBe(false);
|
|
});
|
|
|
|
it("passes extra_body config through to OpenAI-compatible speech requests", async () => {
|
|
const provider = buildOpenAISpeechProvider();
|
|
const fetchMock = vi.fn(async (_url: string, init?: RequestInit) => {
|
|
const body = parseRequestBody(init);
|
|
expect(body).toMatchObject({
|
|
model: "custom-tts",
|
|
voice: "custom-voice",
|
|
lang: "en-US",
|
|
response_format: "mp3",
|
|
});
|
|
return new Response(new Uint8Array([1, 2, 3]), { status: 200 });
|
|
});
|
|
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
|
|
|
const result = await provider.synthesize({
|
|
text: "hello",
|
|
cfg: {} as never,
|
|
providerConfig: {
|
|
apiKey: "sk-test",
|
|
baseUrl: "https://proxy.example.com/openai/v1",
|
|
model: "custom-tts",
|
|
voice: "custom-voice",
|
|
responseFormat: "mp3",
|
|
extra_body: {
|
|
lang: "en-US",
|
|
},
|
|
},
|
|
target: "audio-file",
|
|
timeoutMs: 1_000,
|
|
});
|
|
|
|
expect(result.outputFormat).toBe("mp3");
|
|
expect(fetchMock).toHaveBeenCalledTimes(1);
|
|
});
|
|
});
|