import type { AssistantMessage } from "@mariozechner/pi-ai"; import { beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../../src/config/config.js"; import { __testing as pluginLoaderTesting } from "../../../src/plugins/loader.js"; import { createEmptyPluginRegistry } from "../../../src/plugins/registry-empty.js"; import { setActivePluginRegistry } from "../../../src/plugins/runtime.js"; import type { SpeechProviderPlugin } from "../../../src/plugins/types.js"; import { withEnv } from "../../../src/test-utils/env.js"; import * as tts from "../../../src/tts/tts.js"; let completeSimple: typeof import("@mariozechner/pi-ai").completeSimple; let getApiKeyForModelMock: typeof import("../../../src/agents/model-auth.js").getApiKeyForModel; let requireApiKeyMock: typeof import("../../../src/agents/model-auth.js").requireApiKey; let resolveModelAsyncMock: typeof import("../../../src/agents/pi-embedded-runner/model.js").resolveModelAsync; let ensureCustomApiRegisteredMock: typeof import("../../../src/agents/custom-api-registry.js").ensureCustomApiRegistered; let prepareModelForSimpleCompletionMock: typeof import("../../../src/agents/simple-completion-transport.js").prepareModelForSimpleCompletion; vi.mock("@mariozechner/pi-ai", async () => { const original = await vi.importActual("@mariozechner/pi-ai"); return { ...original, completeSimple: vi.fn(), }; }); vi.mock("@mariozechner/pi-ai/oauth", async () => { const actual = await vi.importActual( "@mariozechner/pi-ai/oauth", ); return { ...actual, getOAuthProviders: () => [], getOAuthApiKey: vi.fn(async () => null), }; }); function createResolvedModel(provider: string, modelId: string, api = "openai-completions") { return { model: { provider, id: modelId, name: modelId, api, reasoning: false, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 128000, maxTokens: 8192, }, authStorage: { profiles: {} }, modelRegistry: { find: vi.fn() }, }; } vi.mock("../../../src/agents/pi-embedded-runner/model.js", () => ({ resolveModel: vi.fn((provider: string, modelId: string) => createResolvedModel(provider, modelId), ), resolveModelAsync: vi.fn(async (provider: string, modelId: string) => createResolvedModel(provider, modelId), ), })); vi.mock("../../../src/agents/model-auth.js", () => ({ getApiKeyForModel: vi.fn(async () => ({ apiKey: "test-api-key", source: "test", mode: "api-key", })), requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? ""), })); vi.mock("../../../src/agents/custom-api-registry.js", () => ({ ensureCustomApiRegistered: vi.fn(), })); const { _test, resolveTtsConfig, maybeApplyTtsToPayload, getTtsProvider } = tts; const { parseTtsDirectives, resolveModelOverridePolicy, summarizeText, getResolvedSpeechProviderConfig, formatTtsProviderError, sanitizeTtsErrorForLog, } = _test; function asLegacyTtsConfig(value: unknown): OpenClawConfig { return value as OpenClawConfig; } function asLegacyOpenClawConfig(value: Record): OpenClawConfig { return value as unknown as OpenClawConfig; } const mockAssistantMessage = (content: AssistantMessage["content"]): AssistantMessage => ({ role: "assistant", content, api: "openai-completions", provider: "openai", model: "gpt-4o-mini", usage: { input: 1, output: 1, cacheRead: 0, cacheWrite: 0, totalTokens: 2, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0, }, }, stopReason: "stop", timestamp: Date.now(), }); function createSummarizeTextDeps() { return { completeSimple, getApiKeyForModel: getApiKeyForModelMock, prepareModelForSimpleCompletion: prepareModelForSimpleCompletionMock, requireApiKey: requireApiKeyMock, resolveModelAsync: resolveModelAsyncMock, }; } function createOpenAiTelephonyCfg(model: "tts-1" | "gpt-4o-mini-tts"): OpenClawConfig { return asLegacyTtsConfig({ messages: { tts: { provider: "openai", providers: { openai: { apiKey: "test-key", model, voice: "alloy", instructions: "Speak warmly", }, }, }, }, }); } function createAudioBuffer(length = 2): Buffer { return Buffer.from(new Uint8Array(length).fill(1)); } async function withMockedSpeechFetch( run: (fetchMock: ReturnType) => Promise, audioLength: number, ) { const originalFetch = globalThis.fetch; const fetchMock = vi.fn(async () => ({ ok: true, arrayBuffer: async () => new ArrayBuffer(audioLength), })); globalThis.fetch = fetchMock as unknown as typeof fetch; try { await run(fetchMock); } finally { globalThis.fetch = originalFetch; } } function resolveBaseUrl(rawValue: unknown, fallback: string): string { return typeof rawValue === "string" && rawValue.trim() ? rawValue.replace(/\/+$/u, "") : fallback; } function resolveTestProviderConfig( rawConfig: Record, providerId: string, ...aliases: string[] ): Record { const providers = typeof rawConfig.providers === "object" && rawConfig.providers !== null && !Array.isArray(rawConfig.providers) ? (rawConfig.providers as Record) : {}; for (const key of [providerId, ...aliases]) { const direct = rawConfig[key]; if (typeof direct === "object" && direct !== null && !Array.isArray(direct)) { return direct as Record; } const nested = providers[key]; if (typeof nested === "object" && nested !== null && !Array.isArray(nested)) { return nested as Record; } } return {}; } function buildTestOpenAISpeechProvider(): SpeechProviderPlugin { return { id: "openai", label: "OpenAI", autoSelectOrder: 10, resolveConfig: ({ rawConfig }) => { const config = resolveTestProviderConfig(rawConfig, "openai"); return { ...config, baseUrl: resolveBaseUrl( config.baseUrl ?? process.env.OPENAI_TTS_BASE_URL, "https://api.openai.com/v1", ), }; }, parseDirectiveToken: ({ key, value, providerConfig }) => { if (key === "voice") { const baseUrl = resolveBaseUrl( (providerConfig as Record | undefined)?.baseUrl, "https://api.openai.com/v1", ); const isDefaultEndpoint = baseUrl === "https://api.openai.com/v1"; const allowedVoices = new Set([ "alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", ]); if (isDefaultEndpoint && !allowedVoices.has(value)) { return { handled: true, warnings: [`invalid OpenAI voice "${value}"`] }; } return { handled: true, overrides: { voice: value } }; } if (key === "model") { const baseUrl = resolveBaseUrl( (providerConfig as Record | undefined)?.baseUrl, "https://api.openai.com/v1", ); const isDefaultEndpoint = baseUrl === "https://api.openai.com/v1"; const allowedModels = new Set(["tts-1", "tts-1-hd", "gpt-4o-mini-tts"]); if (isDefaultEndpoint && !allowedModels.has(value)) { return { handled: true, warnings: [`invalid OpenAI model "${value}"`] }; } return { handled: true, overrides: { model: value } }; } return { handled: false }; }, isConfigured: ({ providerConfig }) => typeof (providerConfig as Record | undefined)?.apiKey === "string" || typeof process.env.OPENAI_API_KEY === "string", synthesize: async ({ text, providerConfig, providerOverrides }) => { const config = providerConfig as Record | undefined; await fetch(`${resolveBaseUrl(config?.baseUrl, "https://api.openai.com/v1")}/audio/speech`, { method: "POST", body: JSON.stringify({ input: text, model: providerOverrides?.model ?? config?.model ?? "gpt-4o-mini-tts", voice: providerOverrides?.voice ?? config?.voice ?? "alloy", }), }); return { audioBuffer: createAudioBuffer(1), outputFormat: "mp3", fileExtension: ".mp3", voiceCompatible: true, }; }, synthesizeTelephony: async ({ text, providerConfig }) => { const config = providerConfig as Record | undefined; const configuredModel = typeof config?.model === "string" ? config.model : undefined; const model = configuredModel ?? "tts-1"; const configuredInstructions = typeof config?.instructions === "string" ? config.instructions : undefined; const instructions = model === "gpt-4o-mini-tts" ? configuredInstructions || undefined : undefined; await fetch(`${resolveBaseUrl(config?.baseUrl, "https://api.openai.com/v1")}/audio/speech`, { method: "POST", body: JSON.stringify({ input: text, model, voice: config?.voice ?? "alloy", instructions, }), }); return { audioBuffer: createAudioBuffer(2), outputFormat: "mp3", sampleRate: 24000, }; }, listVoices: async () => [{ id: "alloy", label: "Alloy" }], }; } function buildTestMicrosoftSpeechProvider(): SpeechProviderPlugin { return { id: "microsoft", label: "Microsoft", aliases: ["edge"], autoSelectOrder: 30, resolveConfig: ({ rawConfig }) => { const edgeConfig = resolveTestProviderConfig(rawConfig, "microsoft", "edge"); return { ...edgeConfig, outputFormat: edgeConfig.outputFormat ?? "audio-24khz-48kbitrate-mono-mp3", }; }, isConfigured: () => true, synthesize: async () => ({ audioBuffer: createAudioBuffer(), outputFormat: "mp3", fileExtension: ".mp3", voiceCompatible: true, }), listVoices: async () => [{ id: "edge", label: "Edge" }], }; } function buildTestElevenLabsSpeechProvider(): SpeechProviderPlugin { return { id: "elevenlabs", label: "ElevenLabs", autoSelectOrder: 20, resolveConfig: ({ rawConfig }) => resolveTestProviderConfig(rawConfig, "elevenlabs"), parseDirectiveToken: ({ key, value, currentOverrides }) => { if (key === "voiceid") { return { handled: true, overrides: { voiceId: value } }; } if (key === "stability") { return { handled: true, overrides: { voiceSettings: { ...(currentOverrides as { voiceSettings?: Record } | undefined) ?.voiceSettings, stability: Number(value), }, }, }; } if (key === "speed") { return { handled: true, overrides: { voiceSettings: { ...(currentOverrides as { voiceSettings?: Record } | undefined) ?.voiceSettings, speed: Number(value), }, }, }; } return { handled: false }; }, isConfigured: ({ providerConfig }) => typeof (providerConfig as Record | undefined)?.apiKey === "string" || typeof process.env.ELEVENLABS_API_KEY === "string" || typeof process.env.XI_API_KEY === "string", synthesize: async () => ({ audioBuffer: createAudioBuffer(), outputFormat: "mp3", fileExtension: ".mp3", voiceCompatible: true, }), listVoices: async () => [{ id: "eleven", label: "Eleven" }], }; } beforeEach(async () => { ({ completeSimple } = await import("@mariozechner/pi-ai")); ({ getApiKeyForModel: getApiKeyForModelMock, requireApiKey: requireApiKeyMock } = await import("../../../src/agents/model-auth.js")); ({ resolveModelAsync: resolveModelAsyncMock } = await import("../../../src/agents/pi-embedded-runner/model.js")); ({ ensureCustomApiRegistered: ensureCustomApiRegisteredMock } = await import("../../../src/agents/custom-api-registry.js")); prepareModelForSimpleCompletionMock = vi.fn(({ model }) => model); const registry = createEmptyPluginRegistry(); registry.speechProviders = [ { pluginId: "openai", provider: buildTestOpenAISpeechProvider(), source: "test" }, { pluginId: "microsoft", provider: buildTestMicrosoftSpeechProvider(), source: "test" }, { pluginId: "elevenlabs", provider: buildTestElevenLabsSpeechProvider(), source: "test" }, ]; const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} }); setActivePluginRegistry(registry, cacheKey); vi.clearAllMocks(); vi.mocked(completeSimple).mockResolvedValue( mockAssistantMessage([{ type: "text", text: "Summary" }]), ); }); export function describeTtsConfigContract() { describe("tts config contract", () => { describe("resolveEdgeOutputFormat", () => { const baseCfg: OpenClawConfig = { agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: {} }, }; it.each([ { name: "default", cfg: baseCfg, expected: "audio-24khz-48kbitrate-mono-mp3", }, { name: "override", cfg: { ...baseCfg, messages: { tts: { edge: { outputFormat: "audio-24khz-96kbitrate-mono-mp3" }, }, }, } as unknown as OpenClawConfig, expected: "audio-24khz-96kbitrate-mono-mp3", }, ] as const)("$name", ({ cfg, expected, name }) => { const config = resolveTtsConfig(cfg); const providerConfig = getResolvedSpeechProviderConfig(config, "microsoft") as { outputFormat?: string; }; expect(providerConfig.outputFormat, name).toBe(expected); }); }); describe("parseTtsDirectives", () => { it("extracts overrides and strips directives when enabled", () => { const policy = resolveModelOverridePolicy({ enabled: true, allowProvider: true }); const input = "Hello [[tts:provider=elevenlabs voiceId=pMsXgVXv3BLzUgSXRplE stability=0.4 speed=1.1]] world\n\n" + "[[tts:text]](laughs) Read the song once more.[[/tts:text]]"; const result = parseTtsDirectives(input, policy); const elevenlabsOverrides = result.overrides.providerOverrides?.elevenlabs as | { voiceId?: string; voiceSettings?: { stability?: number; speed?: number }; } | undefined; expect(result.cleanedText).not.toContain("[[tts:"); expect(result.ttsText).toBe("(laughs) Read the song once more."); expect(result.overrides.provider).toBe("elevenlabs"); expect(elevenlabsOverrides?.voiceId).toBe("pMsXgVXv3BLzUgSXRplE"); expect(elevenlabsOverrides?.voiceSettings?.stability).toBe(0.4); expect(elevenlabsOverrides?.voiceSettings?.speed).toBe(1.1); }); it("accepts edge as a legacy microsoft provider override", () => { const policy = resolveModelOverridePolicy({ enabled: true, allowProvider: true }); const input = "Hello [[tts:provider=edge]] world"; const result = parseTtsDirectives(input, policy); expect(result.overrides.provider).toBe("edge"); }); it("rejects provider override by default while keeping voice overrides enabled", () => { const policy = resolveModelOverridePolicy({ enabled: true }); const input = "Hello [[tts:provider=edge voice=alloy]] world"; const result = parseTtsDirectives(input, policy); const openaiOverrides = result.overrides.providerOverrides?.openai as | { voice?: string } | undefined; expect(result.overrides.provider).toBeUndefined(); expect(openaiOverrides?.voice).toBe("alloy"); }); it("keeps text intact when overrides are disabled", () => { const policy = resolveModelOverridePolicy({ enabled: false }); const input = "Hello [[tts:voice=alloy]] world"; const result = parseTtsDirectives(input, policy); expect(result.cleanedText).toBe(input); expect(result.overrides.provider).toBeUndefined(); }); it("accepts custom voices and models when openaiBaseUrl is a non-default endpoint", () => { const policy = resolveModelOverridePolicy({ enabled: true }); const input = "Hello [[tts:voice=kokoro-chinese model=kokoro-v1]] world"; const result = parseTtsDirectives(input, policy, { providerConfigs: { openai: { baseUrl: "http://localhost:8880/v1" }, }, }); const openaiOverrides = result.overrides.providerOverrides?.openai as | { voice?: string; model?: string } | undefined; expect(openaiOverrides?.voice).toBe("kokoro-chinese"); expect(openaiOverrides?.model).toBe("kokoro-v1"); expect(result.warnings).toHaveLength(0); }); it("rejects unknown voices and models when openaiBaseUrl is the default OpenAI endpoint", () => { const policy = resolveModelOverridePolicy({ enabled: true }); const input = "Hello [[tts:voice=kokoro-chinese model=kokoro-v1]] world"; const result = parseTtsDirectives(input, policy, { providerConfigs: { openai: { baseUrl: "https://api.openai.com/v1" }, }, }); const openaiOverrides = result.overrides.providerOverrides?.openai as | { voice?: string } | undefined; expect(openaiOverrides?.voice).toBeUndefined(); expect(result.warnings).toContain('invalid OpenAI voice "kokoro-chinese"'); }); }); describe("getTtsProvider", () => { it.each([ { name: "openai key available", env: { OPENAI_API_KEY: "test-openai-key", ELEVENLABS_API_KEY: undefined, XI_API_KEY: undefined, }, prefsPath: "/tmp/tts-prefs-openai.json", expected: "openai", }, { name: "elevenlabs key available", env: { OPENAI_API_KEY: undefined, ELEVENLABS_API_KEY: "test-elevenlabs-key", XI_API_KEY: undefined, }, prefsPath: "/tmp/tts-prefs-elevenlabs.json", expected: "elevenlabs", }, { name: "falls back to microsoft", env: { OPENAI_API_KEY: undefined, ELEVENLABS_API_KEY: undefined, XI_API_KEY: undefined, }, prefsPath: "/tmp/tts-prefs-microsoft.json", expected: "microsoft", }, ] as const)("selects provider based on available API keys: $name", (testCase) => { withEnv(testCase.env, () => { const config = { auto: "off", mode: "final", provider: "openai", providerSource: "default", summaryModel: undefined, modelOverrides: resolveModelOverridePolicy(undefined), providerConfigs: { openai: {}, microsoft: {}, elevenlabs: {}, }, prefsPath: undefined, maxTextLength: 4000, timeoutMs: 30_000, } as ReturnType; const provider = getTtsProvider(config, testCase.prefsPath); expect(provider).toBe(testCase.expected); }); }); }); describe("resolveTtsConfig provider normalization", () => { it("normalizes legacy edge provider ids to microsoft", () => { const config = resolveTtsConfig( asLegacyOpenClawConfig({ agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: { provider: "edge", providers: { edge: { enabled: true, }, }, }, }, }), ); expect(config.provider).toBe("microsoft"); expect(getTtsProvider(config, "/tmp/tts-prefs-normalized.json")).toBe("microsoft"); }); }); describe("resolveTtsConfig – openai.baseUrl", () => { const baseCfg: OpenClawConfig = { agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: {} }, }; it.each([ { name: "default endpoint", cfg: baseCfg, env: { OPENAI_TTS_BASE_URL: undefined }, expected: "https://api.openai.com/v1", }, { name: "env override", cfg: baseCfg, env: { OPENAI_TTS_BASE_URL: "http://localhost:8880/v1" }, expected: "http://localhost:8880/v1", }, { name: "config wins over env", cfg: { ...baseCfg, messages: { tts: { ...baseCfg.messages!.tts, openai: { baseUrl: "http://my-server:9000/v1" } }, }, } as unknown as OpenClawConfig, env: { OPENAI_TTS_BASE_URL: "http://localhost:8880/v1" }, expected: "http://my-server:9000/v1", }, { name: "config slash trimming", cfg: { ...baseCfg, messages: { tts: { ...baseCfg.messages!.tts, openai: { baseUrl: "http://my-server:9000/v1///" }, }, }, } as unknown as OpenClawConfig, env: { OPENAI_TTS_BASE_URL: undefined }, expected: "http://my-server:9000/v1", }, { name: "env slash trimming", cfg: baseCfg, env: { OPENAI_TTS_BASE_URL: "http://localhost:8880/v1/" }, expected: "http://localhost:8880/v1", }, ] as const)( "resolves openai.baseUrl from config/env with config precedence and slash trimming: $name", (testCase) => { withEnv(testCase.env, () => { const config = resolveTtsConfig(testCase.cfg); const openaiConfig = getResolvedSpeechProviderConfig(config, "openai") as { baseUrl?: string; }; expect(openaiConfig.baseUrl, testCase.name).toBe(testCase.expected); }); }, ); it("hydrates provider config lazily when no explicit speech provider is configured", () => { withEnv({ OPENAI_TTS_BASE_URL: "http://localhost:8880/v1" }, () => { const config = resolveTtsConfig(baseCfg); const openaiConfig = getResolvedSpeechProviderConfig(config, "openai", baseCfg) as { baseUrl?: string; }; expect(config.provider).toBe(""); expect(openaiConfig.baseUrl).toBe("http://localhost:8880/v1"); }); }); }); }); } export function describeTtsSummarizationContract() { describe("tts summarization contract", () => { const baseCfg: OpenClawConfig = { agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: {} }, }; async function runSummarizeText(params?: { text?: string; targetLength?: number; cfg?: OpenClawConfig; }) { const cfg = params?.cfg ?? baseCfg; const config = resolveTtsConfig(cfg); return await summarizeText( { text: params?.text ?? "Long text to summarize", targetLength: params?.targetLength ?? 500, cfg, config, timeoutMs: 30_000, }, createSummarizeTextDeps(), ); } it("summarizes text and returns result with metrics", async () => { const mockSummary = "This is a summarized version of the text."; vi.mocked(completeSimple).mockResolvedValue( mockAssistantMessage([{ type: "text", text: mockSummary }]), ); const longText = "A".repeat(2000); const result = await runSummarizeText({ text: longText, targetLength: 1500, }); expect(result.summary).toBe(mockSummary); expect(result.inputLength).toBe(2000); expect(result.outputLength).toBe(mockSummary.length); expect(result.latencyMs).toBeGreaterThanOrEqual(0); expect(completeSimple).toHaveBeenCalledTimes(1); }); it("calls the summary model with the expected parameters", async () => { await runSummarizeText(); const callArgs = vi.mocked(completeSimple).mock.calls[0]; expect(callArgs?.[1]?.messages?.[0]?.role).toBe("user"); expect(callArgs?.[2]?.maxTokens).toBe(250); expect(callArgs?.[2]?.temperature).toBe(0.3); expect(getApiKeyForModelMock).toHaveBeenCalledTimes(1); }); it("uses summaryModel override when configured", async () => { const cfg: OpenClawConfig = { agents: { defaults: { model: { primary: "anthropic/claude-opus-4-5" } } }, messages: { tts: { summaryModel: "openai/gpt-4.1-mini" } }, }; await runSummarizeText({ cfg }); expect(resolveModelAsyncMock).toHaveBeenCalledWith("openai", "gpt-4.1-mini", undefined, cfg); }); it("keeps the Ollama api for direct summarization", async () => { vi.mocked(resolveModelAsyncMock).mockResolvedValue({ ...createResolvedModel("ollama", "qwen3:8b", "ollama"), model: { ...createResolvedModel("ollama", "qwen3:8b", "ollama").model, baseUrl: "http://127.0.0.1:11434", }, } as never); await runSummarizeText(); expect(vi.mocked(completeSimple).mock.calls[0]?.[0]?.api).toBe("ollama"); expect(ensureCustomApiRegisteredMock).not.toHaveBeenCalled(); }); it.each([ { targetLength: 99, shouldThrow: true }, { targetLength: 100, shouldThrow: false }, { targetLength: 10000, shouldThrow: false }, { targetLength: 10001, shouldThrow: true }, ] as const)("validates targetLength bounds: $targetLength", async (testCase) => { const call = runSummarizeText({ text: "text", targetLength: testCase.targetLength }); if (testCase.shouldThrow) { await expect(call, String(testCase.targetLength)).rejects.toThrow( `Invalid targetLength: ${testCase.targetLength}`, ); } else { await expect(call, String(testCase.targetLength)).resolves.toBeDefined(); } }); it.each([ { name: "no summary blocks", message: mockAssistantMessage([]) }, { name: "empty summary content", message: mockAssistantMessage([{ type: "text", text: " " }]), }, ] as const)("throws when summary output is missing or empty: $name", async (testCase) => { vi.mocked(completeSimple).mockResolvedValue(testCase.message); await expect(runSummarizeText({ text: "text" }), testCase.name).rejects.toThrow( "No summary returned", ); }); }); } export function describeTtsProviderRuntimeContract() { describe("tts provider runtime contract", () => { describe("provider error redaction", () => { it("redacts sensitive tokens in provider errors", () => { const result = formatTtsProviderError( "openai", new Error("Authorization: Bearer sk-super-secret-token-1234567890"), ); expect(result).toContain("openai:"); expect(result).toContain("Authorization: Bearer"); expect(result).not.toContain("sk-super-secret-token-1234567890"); }); it("escapes control characters in verbose fallback error logs", () => { const result = sanitizeTtsErrorForLog( new Error("failed\nAuthorization: Bearer sk-super-secret-token-1234567890\tboom"), ); expect(result).toContain("\\n"); expect(result).toContain("\\t"); expect(result).not.toContain("sk-super-secret-token-1234567890"); }); }); describe("fallback readiness errors", () => { it("continues synthesize fallback when primary readiness checks throw", async () => { const throwingPrimary: SpeechProviderPlugin = { id: "openai", label: "OpenAI", autoSelectOrder: 10, resolveConfig: () => ({}), isConfigured: () => { throw new Error("Authorization: Bearer sk-readiness-throw-token-1234567890\nboom"); }, synthesize: async () => { throw new Error("unexpected synthesize call"); }, }; const fallback: SpeechProviderPlugin = { id: "microsoft", label: "Microsoft", autoSelectOrder: 20, resolveConfig: () => ({}), isConfigured: () => true, synthesize: async () => ({ audioBuffer: createAudioBuffer(2), outputFormat: "mp3", fileExtension: ".mp3", voiceCompatible: true, }), }; const registry = createEmptyPluginRegistry(); registry.speechProviders = [ { pluginId: "openai", provider: throwingPrimary, source: "test" }, { pluginId: "microsoft", provider: fallback, source: "test" }, ]; const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} }); setActivePluginRegistry(registry, cacheKey); const result = await tts.synthesizeSpeech({ text: "hello fallback", cfg: { messages: { tts: { provider: "openai", }, }, }, }); expect(result.success).toBe(true); if (!result.success) { throw new Error("expected fallback synthesis success"); } expect(result.provider).toBe("microsoft"); expect(result.fallbackFrom).toBe("openai"); expect(result.attemptedProviders).toEqual(["openai", "microsoft"]); expect(result.attempts?.[0]).toMatchObject({ provider: "openai", outcome: "failed", reasonCode: "provider_error", }); expect(result.attempts?.[1]).toMatchObject({ provider: "microsoft", outcome: "success", reasonCode: "success", }); }); it("continues telephony fallback when primary readiness checks throw", async () => { const throwingPrimary: SpeechProviderPlugin = { id: "primary-throws", label: "PrimaryThrows", autoSelectOrder: 10, resolveConfig: () => ({}), isConfigured: () => { throw new Error("Authorization: Bearer sk-telephony-throw-token-1234567890\tboom"); }, synthesize: async () => { throw new Error("unexpected synthesize call"); }, }; const fallback: SpeechProviderPlugin = { id: "microsoft", label: "Microsoft", autoSelectOrder: 20, resolveConfig: () => ({}), isConfigured: () => true, synthesize: async () => ({ audioBuffer: createAudioBuffer(2), outputFormat: "mp3", fileExtension: ".mp3", voiceCompatible: true, }), synthesizeTelephony: async () => ({ audioBuffer: createAudioBuffer(2), outputFormat: "mp3", sampleRate: 24000, }), }; const registry = createEmptyPluginRegistry(); registry.speechProviders = [ { pluginId: "primary-throws", provider: throwingPrimary, source: "test" }, { pluginId: "microsoft", provider: fallback, source: "test" }, ]; const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} }); setActivePluginRegistry(registry, cacheKey); const result = await tts.textToSpeechTelephony({ text: "hello telephony fallback", cfg: { messages: { tts: { provider: "primary-throws", }, }, }, }); expect(result.success).toBe(true); if (!result.success) { throw new Error("expected telephony fallback success"); } expect(result.provider).toBe("microsoft"); expect(result.fallbackFrom).toBe("primary-throws"); expect(result.attemptedProviders).toEqual(["primary-throws", "microsoft"]); expect(result.attempts?.[0]).toMatchObject({ provider: "primary-throws", outcome: "failed", reasonCode: "provider_error", }); expect(result.attempts?.[1]).toMatchObject({ provider: "microsoft", outcome: "success", reasonCode: "success", }); }); it("does not double-prefix textToSpeech failure messages", async () => { const failingProvider: SpeechProviderPlugin = { id: "openai", label: "OpenAI", autoSelectOrder: 10, resolveConfig: () => ({}), isConfigured: () => true, synthesize: async () => { throw new Error("provider failed"); }, }; const registry = createEmptyPluginRegistry(); registry.speechProviders = [ { pluginId: "openai", provider: failingProvider, source: "test" }, ]; const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} }); setActivePluginRegistry(registry, cacheKey); const result = await tts.textToSpeech({ text: "hello", cfg: { messages: { tts: { provider: "openai", }, }, }, disableFallback: true, }); expect(result.success).toBe(false); if (result.success) { throw new Error("expected synthesis failure"); } expect(result.error).toBeDefined(); const errorMessage = result.error ?? ""; expect(errorMessage).toBe("TTS conversion failed: openai: provider failed"); expect(errorMessage).not.toContain("TTS conversion failed: TTS conversion failed:"); expect(errorMessage.match(/TTS conversion failed:/g)).toHaveLength(1); }); }); describe("textToSpeechTelephony – openai instructions", () => { async function expectTelephonyInstructions( model: "tts-1" | "gpt-4o-mini-tts", expectedInstructions: string | undefined, ) { await withMockedSpeechFetch(async (fetchMock) => { const result = await tts.textToSpeechTelephony({ text: "Hello there, friendly caller.", cfg: createOpenAiTelephonyCfg(model), }); expect(result.success).toBe(true); expect(fetchMock).toHaveBeenCalledTimes(1); const [, init] = fetchMock.mock.calls[0] as [string, RequestInit]; expect(typeof init.body).toBe("string"); const body = JSON.parse(init.body as string) as Record; expect(body.instructions).toBe(expectedInstructions); }, 2); } it.each([ { name: "tts-1 omits instructions", model: "tts-1", expectedInstructions: undefined }, { name: "gpt-4o-mini-tts keeps instructions", model: "gpt-4o-mini-tts", expectedInstructions: "Speak warmly", }, ] as const)( "only includes instructions for supported telephony models: $name", async (testCase) => { await expectTelephonyInstructions(testCase.model, testCase.expectedInstructions); }, ); }); }); } export function describeTtsAutoApplyContract() { describe("tts auto-apply contract", () => { const baseCfg: OpenClawConfig = asLegacyOpenClawConfig({ agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } }, messages: { tts: { auto: "inbound", provider: "openai", providers: { openai: { apiKey: "test-key", model: "gpt-4o-mini-tts", voice: "alloy" }, }, }, }, }); const withMockedAutoTtsFetch = async ( run: (fetchMock: ReturnType) => Promise, ) => { const prevPrefs = process.env.OPENCLAW_TTS_PREFS; process.env.OPENCLAW_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`; try { await withMockedSpeechFetch(run, 1); } finally { process.env.OPENCLAW_TTS_PREFS = prevPrefs; } }; const taggedCfg: OpenClawConfig = { ...baseCfg, messages: { ...baseCfg.messages!, tts: { ...baseCfg.messages!.tts, auto: "tagged" }, }, }; async function expectAutoTtsOutcome(params: { cfg: OpenClawConfig; payload: { text: string }; inboundAudio?: boolean; expectedFetchCalls: number; expectSamePayload: boolean; }) { await withMockedAutoTtsFetch(async (fetchMock) => { const result = await maybeApplyTtsToPayload({ payload: params.payload, cfg: params.cfg, kind: "final", ...(params.inboundAudio !== undefined ? { inboundAudio: params.inboundAudio } : {}), }); expect(fetchMock).toHaveBeenCalledTimes(params.expectedFetchCalls); if (params.expectSamePayload) { expect(result).toBe(params.payload); } else { expect(result.mediaUrl).toBeDefined(); } }); } it.each([ { name: "inbound gating blocks non-audio", payload: { text: "Hello world" }, inboundAudio: false, expectedFetchCalls: 0, expectSamePayload: true, }, { name: "inbound gating blocks too-short cleaned text", payload: { text: "### **bold**" }, inboundAudio: true, expectedFetchCalls: 0, expectSamePayload: true, }, { name: "inbound gating allows audio with real text", payload: { text: "Hello world" }, inboundAudio: true, expectedFetchCalls: 1, expectSamePayload: false, }, ] as const)( "applies inbound auto-TTS gating by audio status and cleaned text length: $name", async (testCase) => { await expectAutoTtsOutcome({ cfg: baseCfg, payload: testCase.payload, inboundAudio: testCase.inboundAudio, expectedFetchCalls: testCase.expectedFetchCalls, expectSamePayload: testCase.expectSamePayload, }); }, ); it.each([ { name: "plain text is skipped", payload: { text: "Hello world" }, expectedFetchCalls: 0, expectSamePayload: true, }, { name: "tagged text is synthesized", payload: { text: "[[tts:text]]Hello world[[/tts:text]]" }, expectedFetchCalls: 1, expectSamePayload: false, }, ] as const)("respects tagged-mode auto-TTS gating: $name", async (testCase) => { await expectAutoTtsOutcome({ cfg: taggedCfg, payload: testCase.payload, expectedFetchCalls: testCase.expectedFetchCalls, expectSamePayload: testCase.expectSamePayload, }); }); }); }