diff --git a/CHANGELOG.md b/CHANGELOG.md index 052509381bf..9ae81b9f1ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ Docs: https://docs.openclaw.ai - Providers/MiniMax TTS: mark MP3 output voice-compatible for Telegram voice-note delivery. Fixes #63540. - Providers/Microsoft TTS: keep allowlisted bundled speech providers discoverable even when another speech plugin has already registered, so Edge/Microsoft TTS is available alongside OpenAI. Fixes #62117 and #66850. - Providers/Microsoft TTS: honor legacy `messages.tts.providers.edge` voice settings after normalizing Edge TTS to the Microsoft provider. Fixes #64153. +- Providers/OpenRouter: add an OpenRouter TTS provider using the OpenAI-compatible `/audio/speech` endpoint and `OPENROUTER_API_KEY`. Fixes #71268. - macOS Talk Mode: retry failed local ElevenLabs stream playback through gateway `talk.speak` before falling back to the system voice, so configured ElevenLabs voices still play when streaming playback fails. Fixes #65662. - Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens. - Plugins/Voice Call: terminate expired restored call sessions with the provider and restart restored max-duration timers with only the remaining duration, preventing stale outbound retry loops after Gateway restarts. Fixes #48739. Thanks @mira-solari. diff --git a/docs/providers/openrouter.md b/docs/providers/openrouter.md index 0c623648ec9..8b66a950871 100644 --- a/docs/providers/openrouter.md +++ b/docs/providers/openrouter.md @@ -79,6 +79,32 @@ OpenRouter can also back the `image_generate` tool. Use an OpenRouter image mode OpenClaw sends image requests to OpenRouter's chat completions image API with `modalities: ["image", "text"]`. Gemini image models receive supported `aspectRatio` and `resolution` hints through OpenRouter's `image_config`. +## Text-to-speech + +OpenRouter can also be used as a TTS provider through its OpenAI-compatible +`/audio/speech` endpoint. + +```json5 +{ + messages: { + tts: { + auto: "always", + provider: "openrouter", + providers: { + openrouter: { + model: "hexgrad/kokoro-82m", + voice: "af_alloy", + responseFormat: "mp3", + }, + }, + }, + }, +} +``` + +If `messages.tts.providers.openrouter.apiKey` is omitted, TTS reuses +`models.providers.openrouter.apiKey`, then `OPENROUTER_API_KEY`. + ## Authentication and headers OpenRouter uses a Bearer token with your API key under the hood. diff --git a/docs/tools/tts.md b/docs/tools/tts.md index d8d6bbdd1ce..7871538c2aa 100644 --- a/docs/tools/tts.md +++ b/docs/tools/tts.md @@ -231,6 +231,32 @@ Resolution order is `messages.tts.providers.xai.apiKey` -> `XAI_API_KEY`. Current live voices are `ara`, `eve`, `leo`, `rex`, `sal`, and `una`; `eve` is the default. `language` accepts a BCP-47 tag or `auto`. +### OpenRouter primary + +```json5 +{ + messages: { + tts: { + auto: "always", + provider: "openrouter", + providers: { + openrouter: { + apiKey: "openrouter_api_key", + model: "hexgrad/kokoro-82m", + voice: "af_alloy", + responseFormat: "mp3", + }, + }, + }, + }, +} +``` + +OpenRouter TTS uses the same `OPENROUTER_API_KEY` path as the bundled +OpenRouter model provider. Resolution order is +`messages.tts.providers.openrouter.apiKey` -> +`models.providers.openrouter.apiKey` -> `OPENROUTER_API_KEY`. + ### Gradium primary ```json5 @@ -361,6 +387,12 @@ Then run: - `providers.xai.language`: BCP-47 language code or `auto` (default `en`). - `providers.xai.responseFormat`: `mp3`, `wav`, `pcm`, `mulaw`, or `alaw` (default `mp3`). - `providers.xai.speed`: provider-native speed override. +- `providers.openrouter.apiKey`: OpenRouter API key (env: `OPENROUTER_API_KEY`; can reuse `models.providers.openrouter.apiKey`). +- `providers.openrouter.baseUrl`: override the OpenRouter TTS base URL (default `https://openrouter.ai/api/v1`; legacy `https://openrouter.ai/v1` is normalized). +- `providers.openrouter.model`: OpenRouter TTS model id (default `hexgrad/kokoro-82m`; `modelId` is also accepted). +- `providers.openrouter.voice`: provider-specific voice id (default `af_alloy`; `voiceId` is also accepted). +- `providers.openrouter.responseFormat`: `mp3` or `pcm` (default `mp3`). +- `providers.openrouter.speed`: provider-native speed override. - `providers.microsoft.enabled`: allow Microsoft speech usage (default `true`; no API key). - `providers.microsoft.voice`: Microsoft neural voice name (e.g. `en-US-MichelleNeural`). - `providers.microsoft.lang`: language code (e.g. `en-US`). diff --git a/extensions/openrouter/api.ts b/extensions/openrouter/api.ts index 326b015355d..bf455d4a64a 100644 --- a/extensions/openrouter/api.ts +++ b/extensions/openrouter/api.ts @@ -1,5 +1,6 @@ export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js"; export { buildOpenrouterProvider } from "./provider-catalog.js"; +export { buildOpenRouterSpeechProvider } from "./speech-provider.js"; export { applyOpenrouterConfig, applyOpenrouterProviderConfig, diff --git a/extensions/openrouter/index.test.ts b/extensions/openrouter/index.test.ts index 8891701b2ed..f1d84387a3d 100644 --- a/extensions/openrouter/index.test.ts +++ b/extensions/openrouter/index.test.ts @@ -1,10 +1,25 @@ import { describe, expect, it, vi } from "vitest"; import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js"; +import { registerProviderPlugin } from "../../test/helpers/plugins/provider-registration.js"; import { expectPassthroughReplayPolicy } from "../../test/helpers/provider-replay-policy.ts"; import openrouterPlugin from "./index.js"; import { buildOpenrouterProvider } from "./provider-catalog.js"; describe("openrouter provider hooks", () => { + it("registers OpenRouter speech alongside model and media providers", async () => { + const { providers, speechProviders, mediaProviders, imageProviders } = + await registerProviderPlugin({ + plugin: openrouterPlugin, + id: "openrouter", + name: "OpenRouter Provider", + }); + + expect(providers).toEqual([expect.objectContaining({ id: "openrouter" })]); + expect(speechProviders).toEqual([expect.objectContaining({ id: "openrouter" })]); + expect(mediaProviders).toEqual([expect.objectContaining({ id: "openrouter" })]); + expect(imageProviders).toEqual([expect.objectContaining({ id: "openrouter" })]); + }); + it("includes Kimi K2.6 in the bundled catalog", () => { expect(buildOpenrouterProvider().models?.map((model) => model.id)).toContain( "moonshotai/kimi-k2.6", diff --git a/extensions/openrouter/index.ts b/extensions/openrouter/index.ts index 9ff7d5958f2..e133576e983 100644 --- a/extensions/openrouter/index.ts +++ b/extensions/openrouter/index.ts @@ -20,6 +20,7 @@ import { normalizeOpenRouterBaseUrl, OPENROUTER_BASE_URL, } from "./provider-catalog.js"; +import { buildOpenRouterSpeechProvider } from "./speech-provider.js"; import { wrapOpenRouterProviderStream } from "./stream.js"; const PROVIDER_ID = "openrouter"; @@ -145,5 +146,6 @@ export default definePluginEntry({ }); api.registerMediaUnderstandingProvider(openrouterMediaUnderstandingProvider); api.registerImageGenerationProvider(buildOpenRouterImageGenerationProvider()); + api.registerSpeechProvider(buildOpenRouterSpeechProvider()); }, }); diff --git a/extensions/openrouter/openclaw.plugin.json b/extensions/openrouter/openclaw.plugin.json index 0ea814f01dc..b3651a177e7 100644 --- a/extensions/openrouter/openclaw.plugin.json +++ b/extensions/openrouter/openclaw.plugin.json @@ -22,7 +22,8 @@ ], "contracts": { "mediaUnderstandingProviders": ["openrouter"], - "imageGenerationProviders": ["openrouter"] + "imageGenerationProviders": ["openrouter"], + "speechProviders": ["openrouter"] }, "mediaUnderstandingProviderMetadata": { "openrouter": { diff --git a/extensions/openrouter/register.runtime.ts b/extensions/openrouter/register.runtime.ts index b3c75f0cafc..2f890d25857 100644 --- a/extensions/openrouter/register.runtime.ts +++ b/extensions/openrouter/register.runtime.ts @@ -14,10 +14,12 @@ import { import { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js"; import { applyOpenrouterConfig, OPENROUTER_DEFAULT_MODEL_REF } from "./onboard.js"; import { buildOpenrouterProvider } from "./provider-catalog.js"; +import { buildOpenRouterSpeechProvider } from "./speech-provider.js"; export { applyOpenrouterConfig, buildOpenrouterProvider, + buildOpenRouterSpeechProvider, buildProviderReplayFamilyHooks, buildProviderStreamFamilyHooks, createOpenRouterSystemCacheWrapper, diff --git a/extensions/openrouter/speech-provider.test.ts b/extensions/openrouter/speech-provider.test.ts new file mode 100644 index 00000000000..ac4bc53d076 --- /dev/null +++ b/extensions/openrouter/speech-provider.test.ts @@ -0,0 +1,155 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { buildOpenRouterSpeechProvider } from "./speech-provider.js"; + +const { assertOkOrThrowHttpErrorMock, postJsonRequestMock, resolveProviderHttpRequestConfigMock } = + vi.hoisted(() => ({ + assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), + postJsonRequestMock: vi.fn(), + resolveProviderHttpRequestConfigMock: vi.fn((params: Record) => ({ + baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://openrouter.ai/api/v1", + allowPrivateNetwork: false, + headers: new Headers(params.defaultHeaders as HeadersInit | undefined), + dispatcherPolicy: undefined, + })), + })); + +vi.mock("openclaw/plugin-sdk/provider-http", () => ({ + assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, + postJsonRequest: postJsonRequestMock, + resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, +})); + +describe("openrouter speech provider", () => { + afterEach(() => { + assertOkOrThrowHttpErrorMock.mockClear(); + postJsonRequestMock.mockReset(); + resolveProviderHttpRequestConfigMock.mockClear(); + vi.unstubAllEnvs(); + }); + + it("normalizes provider-owned speech config", () => { + const provider = buildOpenRouterSpeechProvider(); + const resolved = provider.resolveConfig?.({ + cfg: {} as never, + timeoutMs: 30_000, + rawConfig: { + providers: { + openrouter: { + apiKey: "sk-test", + baseUrl: "https://openrouter.ai/v1/", + modelId: "google/gemini-3.1-flash-tts-preview", + voiceId: "Kore", + speed: 1.1, + responseFormat: " MP3 ", + provider: { + options: { + openai: { + instructions: "Speak warmly.", + }, + }, + }, + }, + }, + }, + }); + + expect(resolved).toEqual({ + apiKey: "sk-test", + baseUrl: "https://openrouter.ai/api/v1", + model: "google/gemini-3.1-flash-tts-preview", + voice: "Kore", + speed: 1.1, + responseFormat: "mp3", + provider: { + options: { + openai: { + instructions: "Speak warmly.", + }, + }, + }, + }); + }); + + it("synthesizes OpenAI-compatible speech through OpenRouter", async () => { + const release = vi.fn(async () => {}); + postJsonRequestMock.mockResolvedValue({ + response: new Response(new Uint8Array([1, 2, 3]), { status: 200 }), + release, + }); + + const provider = buildOpenRouterSpeechProvider(); + const result = await provider.synthesize({ + text: "hello", + cfg: { + models: { + providers: { + openrouter: { + apiKey: "sk-openrouter", + baseUrl: "https://openrouter.ai/v1/", + }, + }, + }, + } as never, + providerConfig: { + model: "openai/gpt-4o-mini-tts-2025-12-15", + voice: "nova", + speed: 1.2, + }, + target: "voice-note", + timeoutMs: 12_345, + }); + + expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "openrouter", + capability: "audio", + baseUrl: "https://openrouter.ai/api/v1", + defaultHeaders: expect.objectContaining({ + "Content-Type": "application/json", + }), + }), + ); + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://openrouter.ai/api/v1/audio/speech", + timeoutMs: 12_345, + body: { + model: "openai/gpt-4o-mini-tts-2025-12-15", + input: "hello", + voice: "nova", + response_format: "mp3", + speed: 1.2, + }, + }), + ); + expect(result.audioBuffer).toEqual(Buffer.from([1, 2, 3])); + expect(result.outputFormat).toBe("mp3"); + expect(result.fileExtension).toBe(".mp3"); + expect(result.voiceCompatible).toBe(true); + expect(release).toHaveBeenCalledOnce(); + }); + + it("defaults to a live-proven OpenRouter TTS model", () => { + const provider = buildOpenRouterSpeechProvider(); + + expect( + provider.resolveConfig?.({ cfg: {} as never, rawConfig: {}, timeoutMs: 30_000 }), + ).toMatchObject({ + model: "hexgrad/kokoro-82m", + voice: "af_alloy", + }); + }); + + it("uses OPENROUTER_API_KEY when provider config omits apiKey", () => { + vi.stubEnv("OPENROUTER_API_KEY", "sk-env"); + const provider = buildOpenRouterSpeechProvider(); + + expect( + provider.isConfigured({ + cfg: {} as never, + providerConfig: {}, + timeoutMs: 30_000, + }), + ).toBe(true); + }); +}); diff --git a/extensions/openrouter/speech-provider.ts b/extensions/openrouter/speech-provider.ts new file mode 100644 index 00000000000..e60102ea96f --- /dev/null +++ b/extensions/openrouter/speech-provider.ts @@ -0,0 +1,303 @@ +import { + assertOkOrThrowHttpError, + postJsonRequest, + resolveProviderHttpRequestConfig, +} from "openclaw/plugin-sdk/provider-http"; +import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input"; +import { + asFiniteNumber, + asObject, + trimToUndefined, + type SpeechDirectiveTokenParseContext, + type SpeechProviderConfig, + type SpeechProviderOverrides, + type SpeechProviderPlugin, +} from "openclaw/plugin-sdk/speech"; +import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime"; +import { normalizeOpenRouterBaseUrl, OPENROUTER_BASE_URL } from "./provider-catalog.js"; + +const DEFAULT_OPENROUTER_TTS_MODEL = "hexgrad/kokoro-82m"; +const DEFAULT_OPENROUTER_TTS_VOICE = "af_alloy"; +const OPENROUTER_TTS_MODELS = [ + DEFAULT_OPENROUTER_TTS_MODEL, + "google/gemini-3.1-flash-tts-preview", + "mistralai/voxtral-mini-tts-2603", + "elevenlabs/eleven-turbo-v2", +] as const; +const OPENROUTER_TTS_RESPONSE_FORMATS = ["mp3", "pcm"] as const; + +type OpenRouterTtsResponseFormat = (typeof OPENROUTER_TTS_RESPONSE_FORMATS)[number]; + +type OpenRouterTtsProviderConfig = { + apiKey?: string; + baseUrl?: string; + model: string; + voice: string; + speed?: number; + responseFormat?: OpenRouterTtsResponseFormat; + provider?: Record; +}; + +type OpenRouterTtsProviderOverrides = { + model?: string; + voice?: string; + speed?: number; +}; + +function normalizeOpenRouterTtsResponseFormat( + value: unknown, +): OpenRouterTtsResponseFormat | undefined { + const next = normalizeOptionalLowercaseString(value); + if (!next) { + return undefined; + } + if (OPENROUTER_TTS_RESPONSE_FORMATS.some((format) => format === next)) { + return next as OpenRouterTtsResponseFormat; + } + throw new Error(`Invalid OpenRouter speech responseFormat: ${next}`); +} + +function normalizeOpenRouterTtsBaseUrl(value: unknown): string { + return ( + normalizeOpenRouterBaseUrl(trimToUndefined(value) ?? OPENROUTER_BASE_URL) ?? OPENROUTER_BASE_URL + ); +} + +function resolveOpenRouterProviderConfigRecord( + rawConfig: Record, +): Record | undefined { + const providers = asObject(rawConfig.providers); + return asObject(providers?.openrouter) ?? asObject(rawConfig.openrouter); +} + +function normalizeOpenRouterTtsProviderConfig( + rawConfig: Record, +): OpenRouterTtsProviderConfig { + const raw = resolveOpenRouterProviderConfigRecord(rawConfig); + return { + apiKey: normalizeResolvedSecretInputString({ + value: raw?.apiKey, + path: "messages.tts.providers.openrouter.apiKey", + }), + baseUrl: + trimToUndefined(raw?.baseUrl) == null + ? undefined + : normalizeOpenRouterTtsBaseUrl(raw?.baseUrl), + model: trimToUndefined(raw?.model ?? raw?.modelId) ?? DEFAULT_OPENROUTER_TTS_MODEL, + voice: trimToUndefined(raw?.voice ?? raw?.voiceId) ?? DEFAULT_OPENROUTER_TTS_VOICE, + speed: asFiniteNumber(raw?.speed), + responseFormat: normalizeOpenRouterTtsResponseFormat(raw?.responseFormat), + provider: asObject(raw?.provider), + }; +} + +function readOpenRouterTtsProviderConfig( + config: SpeechProviderConfig, +): OpenRouterTtsProviderConfig { + const normalized = normalizeOpenRouterTtsProviderConfig({}); + return { + apiKey: trimToUndefined(config.apiKey) ?? normalized.apiKey, + baseUrl: + trimToUndefined(config.baseUrl) == null + ? normalized.baseUrl + : normalizeOpenRouterTtsBaseUrl(config.baseUrl), + model: trimToUndefined(config.model ?? config.modelId) ?? normalized.model, + voice: trimToUndefined(config.voice ?? config.voiceId) ?? normalized.voice, + speed: asFiniteNumber(config.speed) ?? normalized.speed, + responseFormat: + normalizeOpenRouterTtsResponseFormat(config.responseFormat) ?? normalized.responseFormat, + provider: asObject(config.provider) ?? normalized.provider, + }; +} + +function readOpenRouterTtsOverrides( + overrides: SpeechProviderOverrides | undefined, +): OpenRouterTtsProviderOverrides { + if (!overrides) { + return {}; + } + return { + model: trimToUndefined(overrides.model ?? overrides.modelId), + voice: trimToUndefined(overrides.voice ?? overrides.voiceId), + speed: asFiniteNumber(overrides.speed), + }; +} + +function resolveOpenRouterTtsApiKey(params: { + cfg?: { models?: { providers?: { openrouter?: { apiKey?: unknown } } } }; + providerConfig: OpenRouterTtsProviderConfig; +}): string | undefined { + return ( + params.providerConfig.apiKey ?? + normalizeResolvedSecretInputString({ + value: params.cfg?.models?.providers?.openrouter?.apiKey, + path: "models.providers.openrouter.apiKey", + }) ?? + trimToUndefined(process.env.OPENROUTER_API_KEY) + ); +} + +function resolveOpenRouterTtsBaseUrl(params: { + cfg?: { models?: { providers?: { openrouter?: { baseUrl?: unknown } } } }; + providerConfig: OpenRouterTtsProviderConfig; +}): string { + return normalizeOpenRouterTtsBaseUrl( + params.providerConfig.baseUrl ?? + trimToUndefined(params.cfg?.models?.providers?.openrouter?.baseUrl) ?? + OPENROUTER_BASE_URL, + ); +} + +function resolveOpenRouterTtsResponseFormat( + configuredFormat?: OpenRouterTtsResponseFormat, +): OpenRouterTtsResponseFormat { + if (configuredFormat) { + return configuredFormat; + } + return "mp3"; +} + +function responseFormatToFileExtension(format: OpenRouterTtsResponseFormat): ".mp3" | ".pcm" { + return format === "pcm" ? ".pcm" : ".mp3"; +} + +function parseDirectiveToken(ctx: SpeechDirectiveTokenParseContext): { + handled: boolean; + overrides?: SpeechProviderOverrides; +} { + switch (ctx.key) { + case "voice": + case "voice_id": + case "voiceid": + case "openrouter_voice": + case "openroutervoice": + if (!ctx.policy.allowVoice) { + return { handled: true }; + } + return { handled: true, overrides: { voice: ctx.value } }; + case "model": + case "model_id": + case "modelid": + case "openrouter_model": + case "openroutermodel": + if (!ctx.policy.allowModelId) { + return { handled: true }; + } + return { handled: true, overrides: { model: ctx.value } }; + default: + return { handled: false }; + } +} + +export function buildOpenRouterSpeechProvider(): SpeechProviderPlugin { + return { + id: "openrouter", + label: "OpenRouter", + autoSelectOrder: 35, + models: OPENROUTER_TTS_MODELS, + voices: [DEFAULT_OPENROUTER_TTS_VOICE], + resolveConfig: ({ rawConfig }) => normalizeOpenRouterTtsProviderConfig(rawConfig), + parseDirectiveToken, + resolveTalkConfig: ({ baseTtsConfig, talkProviderConfig }) => { + const base = normalizeOpenRouterTtsProviderConfig(baseTtsConfig); + const responseFormat = normalizeOpenRouterTtsResponseFormat( + talkProviderConfig.responseFormat, + ); + return { + ...base, + ...(talkProviderConfig.apiKey === undefined + ? {} + : { + apiKey: normalizeResolvedSecretInputString({ + value: talkProviderConfig.apiKey, + path: "talk.providers.openrouter.apiKey", + }), + }), + ...(trimToUndefined(talkProviderConfig.baseUrl) == null + ? {} + : { baseUrl: normalizeOpenRouterTtsBaseUrl(talkProviderConfig.baseUrl) }), + ...(trimToUndefined(talkProviderConfig.modelId) == null + ? {} + : { model: trimToUndefined(talkProviderConfig.modelId) }), + ...(trimToUndefined(talkProviderConfig.voiceId) == null + ? {} + : { voice: trimToUndefined(talkProviderConfig.voiceId) }), + ...(asFiniteNumber(talkProviderConfig.speed) == null + ? {} + : { speed: asFiniteNumber(talkProviderConfig.speed) }), + ...(responseFormat == null ? {} : { responseFormat }), + }; + }, + resolveTalkOverrides: ({ params }) => ({ + ...(trimToUndefined(params.voiceId ?? params.voice) == null + ? {} + : { voice: trimToUndefined(params.voiceId ?? params.voice) }), + ...(trimToUndefined(params.modelId ?? params.model) == null + ? {} + : { model: trimToUndefined(params.modelId ?? params.model) }), + ...(asFiniteNumber(params.speed) == null ? {} : { speed: asFiniteNumber(params.speed) }), + }), + listVoices: async () => [ + { id: DEFAULT_OPENROUTER_TTS_VOICE, name: DEFAULT_OPENROUTER_TTS_VOICE }, + ], + isConfigured: ({ cfg, providerConfig }) => { + const config = readOpenRouterTtsProviderConfig(providerConfig); + return Boolean(resolveOpenRouterTtsApiKey({ cfg, providerConfig: config })); + }, + synthesize: async (req) => { + const config = readOpenRouterTtsProviderConfig(req.providerConfig); + const overrides = readOpenRouterTtsOverrides(req.providerOverrides); + const apiKey = resolveOpenRouterTtsApiKey({ cfg: req.cfg, providerConfig: config }); + if (!apiKey) { + throw new Error("OpenRouter API key missing"); + } + + const baseUrl = resolveOpenRouterTtsBaseUrl({ cfg: req.cfg, providerConfig: config }); + const responseFormat = resolveOpenRouterTtsResponseFormat(config.responseFormat); + const speed = overrides.speed ?? config.speed; + const { allowPrivateNetwork, headers, dispatcherPolicy } = resolveProviderHttpRequestConfig({ + baseUrl, + defaultBaseUrl: OPENROUTER_BASE_URL, + allowPrivateNetwork: false, + defaultHeaders: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + "HTTP-Referer": "https://openclaw.ai", + "X-OpenRouter-Title": "OpenClaw", + }, + provider: "openrouter", + capability: "audio", + transport: "http", + }); + + const { response, release } = await postJsonRequest({ + url: `${baseUrl}/audio/speech`, + headers, + body: { + model: overrides.model ?? config.model, + input: req.text, + voice: overrides.voice ?? config.voice, + response_format: responseFormat, + ...(speed == null ? {} : { speed }), + ...(config.provider == null ? {} : { provider: config.provider }), + }, + timeoutMs: req.timeoutMs, + fetchFn: fetch, + allowPrivateNetwork, + dispatcherPolicy, + }); + + try { + await assertOkOrThrowHttpError(response, "OpenRouter TTS API error"); + return { + audioBuffer: Buffer.from(await response.arrayBuffer()), + outputFormat: responseFormat, + fileExtension: responseFormatToFileExtension(responseFormat), + voiceCompatible: responseFormat === "mp3", + }; + } finally { + await release(); + } + }, + }; +} diff --git a/extensions/openrouter/test-api.ts b/extensions/openrouter/test-api.ts index efe25467ef4..8c6d24e135e 100644 --- a/extensions/openrouter/test-api.ts +++ b/extensions/openrouter/test-api.ts @@ -1,2 +1,3 @@ export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js"; export { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js"; +export { buildOpenRouterSpeechProvider } from "./speech-provider.js";