feat(openrouter): add tts provider

This commit is contained in:
Peter Steinberger
2026-04-25 04:36:40 +01:00
parent c7f18a6b9d
commit 7875092f4d
11 changed files with 540 additions and 1 deletions

View File

@@ -92,6 +92,7 @@ Docs: https://docs.openclaw.ai
- Providers/MiniMax TTS: mark MP3 output voice-compatible for Telegram voice-note delivery. Fixes #63540.
- Providers/Microsoft TTS: keep allowlisted bundled speech providers discoverable even when another speech plugin has already registered, so Edge/Microsoft TTS is available alongside OpenAI. Fixes #62117 and #66850.
- Providers/Microsoft TTS: honor legacy `messages.tts.providers.edge` voice settings after normalizing Edge TTS to the Microsoft provider. Fixes #64153.
- Providers/OpenRouter: add an OpenRouter TTS provider using the OpenAI-compatible `/audio/speech` endpoint and `OPENROUTER_API_KEY`. Fixes #71268.
- macOS Talk Mode: retry failed local ElevenLabs stream playback through gateway `talk.speak` before falling back to the system voice, so configured ElevenLabs voices still play when streaming playback fails. Fixes #65662.
- Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens.
- Plugins/Voice Call: terminate expired restored call sessions with the provider and restart restored max-duration timers with only the remaining duration, preventing stale outbound retry loops after Gateway restarts. Fixes #48739. Thanks @mira-solari.

View File

@@ -79,6 +79,32 @@ OpenRouter can also back the `image_generate` tool. Use an OpenRouter image mode
OpenClaw sends image requests to OpenRouter's chat completions image API with `modalities: ["image", "text"]`. Gemini image models receive supported `aspectRatio` and `resolution` hints through OpenRouter's `image_config`.
## Text-to-speech
OpenRouter can also be used as a TTS provider through its OpenAI-compatible
`/audio/speech` endpoint.
```json5
{
messages: {
tts: {
auto: "always",
provider: "openrouter",
providers: {
openrouter: {
model: "hexgrad/kokoro-82m",
voice: "af_alloy",
responseFormat: "mp3",
},
},
},
},
}
```
If `messages.tts.providers.openrouter.apiKey` is omitted, TTS reuses
`models.providers.openrouter.apiKey`, then `OPENROUTER_API_KEY`.
## Authentication and headers
OpenRouter uses a Bearer token with your API key under the hood.

View File

@@ -231,6 +231,32 @@ Resolution order is `messages.tts.providers.xai.apiKey` -> `XAI_API_KEY`.
Current live voices are `ara`, `eve`, `leo`, `rex`, `sal`, and `una`; `eve` is
the default. `language` accepts a BCP-47 tag or `auto`.
### OpenRouter primary
```json5
{
messages: {
tts: {
auto: "always",
provider: "openrouter",
providers: {
openrouter: {
apiKey: "openrouter_api_key",
model: "hexgrad/kokoro-82m",
voice: "af_alloy",
responseFormat: "mp3",
},
},
},
},
}
```
OpenRouter TTS uses the same `OPENROUTER_API_KEY` path as the bundled
OpenRouter model provider. Resolution order is
`messages.tts.providers.openrouter.apiKey` ->
`models.providers.openrouter.apiKey` -> `OPENROUTER_API_KEY`.
### Gradium primary
```json5
@@ -361,6 +387,12 @@ Then run:
- `providers.xai.language`: BCP-47 language code or `auto` (default `en`).
- `providers.xai.responseFormat`: `mp3`, `wav`, `pcm`, `mulaw`, or `alaw` (default `mp3`).
- `providers.xai.speed`: provider-native speed override.
- `providers.openrouter.apiKey`: OpenRouter API key (env: `OPENROUTER_API_KEY`; can reuse `models.providers.openrouter.apiKey`).
- `providers.openrouter.baseUrl`: override the OpenRouter TTS base URL (default `https://openrouter.ai/api/v1`; legacy `https://openrouter.ai/v1` is normalized).
- `providers.openrouter.model`: OpenRouter TTS model id (default `hexgrad/kokoro-82m`; `modelId` is also accepted).
- `providers.openrouter.voice`: provider-specific voice id (default `af_alloy`; `voiceId` is also accepted).
- `providers.openrouter.responseFormat`: `mp3` or `pcm` (default `mp3`).
- `providers.openrouter.speed`: provider-native speed override.
- `providers.microsoft.enabled`: allow Microsoft speech usage (default `true`; no API key).
- `providers.microsoft.voice`: Microsoft neural voice name (e.g. `en-US-MichelleNeural`).
- `providers.microsoft.lang`: language code (e.g. `en-US`).

View File

@@ -1,5 +1,6 @@
export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js";
export { buildOpenrouterProvider } from "./provider-catalog.js";
export { buildOpenRouterSpeechProvider } from "./speech-provider.js";
export {
applyOpenrouterConfig,
applyOpenrouterProviderConfig,

View File

@@ -1,10 +1,25 @@
import { describe, expect, it, vi } from "vitest";
import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js";
import { registerProviderPlugin } from "../../test/helpers/plugins/provider-registration.js";
import { expectPassthroughReplayPolicy } from "../../test/helpers/provider-replay-policy.ts";
import openrouterPlugin from "./index.js";
import { buildOpenrouterProvider } from "./provider-catalog.js";
describe("openrouter provider hooks", () => {
it("registers OpenRouter speech alongside model and media providers", async () => {
const { providers, speechProviders, mediaProviders, imageProviders } =
await registerProviderPlugin({
plugin: openrouterPlugin,
id: "openrouter",
name: "OpenRouter Provider",
});
expect(providers).toEqual([expect.objectContaining({ id: "openrouter" })]);
expect(speechProviders).toEqual([expect.objectContaining({ id: "openrouter" })]);
expect(mediaProviders).toEqual([expect.objectContaining({ id: "openrouter" })]);
expect(imageProviders).toEqual([expect.objectContaining({ id: "openrouter" })]);
});
it("includes Kimi K2.6 in the bundled catalog", () => {
expect(buildOpenrouterProvider().models?.map((model) => model.id)).toContain(
"moonshotai/kimi-k2.6",

View File

@@ -20,6 +20,7 @@ import {
normalizeOpenRouterBaseUrl,
OPENROUTER_BASE_URL,
} from "./provider-catalog.js";
import { buildOpenRouterSpeechProvider } from "./speech-provider.js";
import { wrapOpenRouterProviderStream } from "./stream.js";
const PROVIDER_ID = "openrouter";
@@ -145,5 +146,6 @@ export default definePluginEntry({
});
api.registerMediaUnderstandingProvider(openrouterMediaUnderstandingProvider);
api.registerImageGenerationProvider(buildOpenRouterImageGenerationProvider());
api.registerSpeechProvider(buildOpenRouterSpeechProvider());
},
});

View File

@@ -22,7 +22,8 @@
],
"contracts": {
"mediaUnderstandingProviders": ["openrouter"],
"imageGenerationProviders": ["openrouter"]
"imageGenerationProviders": ["openrouter"],
"speechProviders": ["openrouter"]
},
"mediaUnderstandingProviderMetadata": {
"openrouter": {

View File

@@ -14,10 +14,12 @@ import {
import { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js";
import { applyOpenrouterConfig, OPENROUTER_DEFAULT_MODEL_REF } from "./onboard.js";
import { buildOpenrouterProvider } from "./provider-catalog.js";
import { buildOpenRouterSpeechProvider } from "./speech-provider.js";
export {
applyOpenrouterConfig,
buildOpenrouterProvider,
buildOpenRouterSpeechProvider,
buildProviderReplayFamilyHooks,
buildProviderStreamFamilyHooks,
createOpenRouterSystemCacheWrapper,

View File

@@ -0,0 +1,155 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildOpenRouterSpeechProvider } from "./speech-provider.js";
const { assertOkOrThrowHttpErrorMock, postJsonRequestMock, resolveProviderHttpRequestConfigMock } =
vi.hoisted(() => ({
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
postJsonRequestMock: vi.fn(),
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://openrouter.ai/api/v1",
allowPrivateNetwork: false,
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
dispatcherPolicy: undefined,
})),
}));
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
postJsonRequest: postJsonRequestMock,
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
}));
describe("openrouter speech provider", () => {
afterEach(() => {
assertOkOrThrowHttpErrorMock.mockClear();
postJsonRequestMock.mockReset();
resolveProviderHttpRequestConfigMock.mockClear();
vi.unstubAllEnvs();
});
it("normalizes provider-owned speech config", () => {
const provider = buildOpenRouterSpeechProvider();
const resolved = provider.resolveConfig?.({
cfg: {} as never,
timeoutMs: 30_000,
rawConfig: {
providers: {
openrouter: {
apiKey: "sk-test",
baseUrl: "https://openrouter.ai/v1/",
modelId: "google/gemini-3.1-flash-tts-preview",
voiceId: "Kore",
speed: 1.1,
responseFormat: " MP3 ",
provider: {
options: {
openai: {
instructions: "Speak warmly.",
},
},
},
},
},
},
});
expect(resolved).toEqual({
apiKey: "sk-test",
baseUrl: "https://openrouter.ai/api/v1",
model: "google/gemini-3.1-flash-tts-preview",
voice: "Kore",
speed: 1.1,
responseFormat: "mp3",
provider: {
options: {
openai: {
instructions: "Speak warmly.",
},
},
},
});
});
it("synthesizes OpenAI-compatible speech through OpenRouter", async () => {
const release = vi.fn(async () => {});
postJsonRequestMock.mockResolvedValue({
response: new Response(new Uint8Array([1, 2, 3]), { status: 200 }),
release,
});
const provider = buildOpenRouterSpeechProvider();
const result = await provider.synthesize({
text: "hello",
cfg: {
models: {
providers: {
openrouter: {
apiKey: "sk-openrouter",
baseUrl: "https://openrouter.ai/v1/",
},
},
},
} as never,
providerConfig: {
model: "openai/gpt-4o-mini-tts-2025-12-15",
voice: "nova",
speed: 1.2,
},
target: "voice-note",
timeoutMs: 12_345,
});
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "openrouter",
capability: "audio",
baseUrl: "https://openrouter.ai/api/v1",
defaultHeaders: expect.objectContaining({
"Content-Type": "application/json",
}),
}),
);
expect(postJsonRequestMock).toHaveBeenCalledWith(
expect.objectContaining({
url: "https://openrouter.ai/api/v1/audio/speech",
timeoutMs: 12_345,
body: {
model: "openai/gpt-4o-mini-tts-2025-12-15",
input: "hello",
voice: "nova",
response_format: "mp3",
speed: 1.2,
},
}),
);
expect(result.audioBuffer).toEqual(Buffer.from([1, 2, 3]));
expect(result.outputFormat).toBe("mp3");
expect(result.fileExtension).toBe(".mp3");
expect(result.voiceCompatible).toBe(true);
expect(release).toHaveBeenCalledOnce();
});
it("defaults to a live-proven OpenRouter TTS model", () => {
const provider = buildOpenRouterSpeechProvider();
expect(
provider.resolveConfig?.({ cfg: {} as never, rawConfig: {}, timeoutMs: 30_000 }),
).toMatchObject({
model: "hexgrad/kokoro-82m",
voice: "af_alloy",
});
});
it("uses OPENROUTER_API_KEY when provider config omits apiKey", () => {
vi.stubEnv("OPENROUTER_API_KEY", "sk-env");
const provider = buildOpenRouterSpeechProvider();
expect(
provider.isConfigured({
cfg: {} as never,
providerConfig: {},
timeoutMs: 30_000,
}),
).toBe(true);
});
});

View File

@@ -0,0 +1,303 @@
import {
assertOkOrThrowHttpError,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
import {
asFiniteNumber,
asObject,
trimToUndefined,
type SpeechDirectiveTokenParseContext,
type SpeechProviderConfig,
type SpeechProviderOverrides,
type SpeechProviderPlugin,
} from "openclaw/plugin-sdk/speech";
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime";
import { normalizeOpenRouterBaseUrl, OPENROUTER_BASE_URL } from "./provider-catalog.js";
const DEFAULT_OPENROUTER_TTS_MODEL = "hexgrad/kokoro-82m";
const DEFAULT_OPENROUTER_TTS_VOICE = "af_alloy";
const OPENROUTER_TTS_MODELS = [
DEFAULT_OPENROUTER_TTS_MODEL,
"google/gemini-3.1-flash-tts-preview",
"mistralai/voxtral-mini-tts-2603",
"elevenlabs/eleven-turbo-v2",
] as const;
const OPENROUTER_TTS_RESPONSE_FORMATS = ["mp3", "pcm"] as const;
type OpenRouterTtsResponseFormat = (typeof OPENROUTER_TTS_RESPONSE_FORMATS)[number];
type OpenRouterTtsProviderConfig = {
apiKey?: string;
baseUrl?: string;
model: string;
voice: string;
speed?: number;
responseFormat?: OpenRouterTtsResponseFormat;
provider?: Record<string, unknown>;
};
type OpenRouterTtsProviderOverrides = {
model?: string;
voice?: string;
speed?: number;
};
function normalizeOpenRouterTtsResponseFormat(
value: unknown,
): OpenRouterTtsResponseFormat | undefined {
const next = normalizeOptionalLowercaseString(value);
if (!next) {
return undefined;
}
if (OPENROUTER_TTS_RESPONSE_FORMATS.some((format) => format === next)) {
return next as OpenRouterTtsResponseFormat;
}
throw new Error(`Invalid OpenRouter speech responseFormat: ${next}`);
}
function normalizeOpenRouterTtsBaseUrl(value: unknown): string {
return (
normalizeOpenRouterBaseUrl(trimToUndefined(value) ?? OPENROUTER_BASE_URL) ?? OPENROUTER_BASE_URL
);
}
function resolveOpenRouterProviderConfigRecord(
rawConfig: Record<string, unknown>,
): Record<string, unknown> | undefined {
const providers = asObject(rawConfig.providers);
return asObject(providers?.openrouter) ?? asObject(rawConfig.openrouter);
}
function normalizeOpenRouterTtsProviderConfig(
rawConfig: Record<string, unknown>,
): OpenRouterTtsProviderConfig {
const raw = resolveOpenRouterProviderConfigRecord(rawConfig);
return {
apiKey: normalizeResolvedSecretInputString({
value: raw?.apiKey,
path: "messages.tts.providers.openrouter.apiKey",
}),
baseUrl:
trimToUndefined(raw?.baseUrl) == null
? undefined
: normalizeOpenRouterTtsBaseUrl(raw?.baseUrl),
model: trimToUndefined(raw?.model ?? raw?.modelId) ?? DEFAULT_OPENROUTER_TTS_MODEL,
voice: trimToUndefined(raw?.voice ?? raw?.voiceId) ?? DEFAULT_OPENROUTER_TTS_VOICE,
speed: asFiniteNumber(raw?.speed),
responseFormat: normalizeOpenRouterTtsResponseFormat(raw?.responseFormat),
provider: asObject(raw?.provider),
};
}
function readOpenRouterTtsProviderConfig(
config: SpeechProviderConfig,
): OpenRouterTtsProviderConfig {
const normalized = normalizeOpenRouterTtsProviderConfig({});
return {
apiKey: trimToUndefined(config.apiKey) ?? normalized.apiKey,
baseUrl:
trimToUndefined(config.baseUrl) == null
? normalized.baseUrl
: normalizeOpenRouterTtsBaseUrl(config.baseUrl),
model: trimToUndefined(config.model ?? config.modelId) ?? normalized.model,
voice: trimToUndefined(config.voice ?? config.voiceId) ?? normalized.voice,
speed: asFiniteNumber(config.speed) ?? normalized.speed,
responseFormat:
normalizeOpenRouterTtsResponseFormat(config.responseFormat) ?? normalized.responseFormat,
provider: asObject(config.provider) ?? normalized.provider,
};
}
function readOpenRouterTtsOverrides(
overrides: SpeechProviderOverrides | undefined,
): OpenRouterTtsProviderOverrides {
if (!overrides) {
return {};
}
return {
model: trimToUndefined(overrides.model ?? overrides.modelId),
voice: trimToUndefined(overrides.voice ?? overrides.voiceId),
speed: asFiniteNumber(overrides.speed),
};
}
function resolveOpenRouterTtsApiKey(params: {
cfg?: { models?: { providers?: { openrouter?: { apiKey?: unknown } } } };
providerConfig: OpenRouterTtsProviderConfig;
}): string | undefined {
return (
params.providerConfig.apiKey ??
normalizeResolvedSecretInputString({
value: params.cfg?.models?.providers?.openrouter?.apiKey,
path: "models.providers.openrouter.apiKey",
}) ??
trimToUndefined(process.env.OPENROUTER_API_KEY)
);
}
function resolveOpenRouterTtsBaseUrl(params: {
cfg?: { models?: { providers?: { openrouter?: { baseUrl?: unknown } } } };
providerConfig: OpenRouterTtsProviderConfig;
}): string {
return normalizeOpenRouterTtsBaseUrl(
params.providerConfig.baseUrl ??
trimToUndefined(params.cfg?.models?.providers?.openrouter?.baseUrl) ??
OPENROUTER_BASE_URL,
);
}
function resolveOpenRouterTtsResponseFormat(
configuredFormat?: OpenRouterTtsResponseFormat,
): OpenRouterTtsResponseFormat {
if (configuredFormat) {
return configuredFormat;
}
return "mp3";
}
function responseFormatToFileExtension(format: OpenRouterTtsResponseFormat): ".mp3" | ".pcm" {
return format === "pcm" ? ".pcm" : ".mp3";
}
function parseDirectiveToken(ctx: SpeechDirectiveTokenParseContext): {
handled: boolean;
overrides?: SpeechProviderOverrides;
} {
switch (ctx.key) {
case "voice":
case "voice_id":
case "voiceid":
case "openrouter_voice":
case "openroutervoice":
if (!ctx.policy.allowVoice) {
return { handled: true };
}
return { handled: true, overrides: { voice: ctx.value } };
case "model":
case "model_id":
case "modelid":
case "openrouter_model":
case "openroutermodel":
if (!ctx.policy.allowModelId) {
return { handled: true };
}
return { handled: true, overrides: { model: ctx.value } };
default:
return { handled: false };
}
}
export function buildOpenRouterSpeechProvider(): SpeechProviderPlugin {
return {
id: "openrouter",
label: "OpenRouter",
autoSelectOrder: 35,
models: OPENROUTER_TTS_MODELS,
voices: [DEFAULT_OPENROUTER_TTS_VOICE],
resolveConfig: ({ rawConfig }) => normalizeOpenRouterTtsProviderConfig(rawConfig),
parseDirectiveToken,
resolveTalkConfig: ({ baseTtsConfig, talkProviderConfig }) => {
const base = normalizeOpenRouterTtsProviderConfig(baseTtsConfig);
const responseFormat = normalizeOpenRouterTtsResponseFormat(
talkProviderConfig.responseFormat,
);
return {
...base,
...(talkProviderConfig.apiKey === undefined
? {}
: {
apiKey: normalizeResolvedSecretInputString({
value: talkProviderConfig.apiKey,
path: "talk.providers.openrouter.apiKey",
}),
}),
...(trimToUndefined(talkProviderConfig.baseUrl) == null
? {}
: { baseUrl: normalizeOpenRouterTtsBaseUrl(talkProviderConfig.baseUrl) }),
...(trimToUndefined(talkProviderConfig.modelId) == null
? {}
: { model: trimToUndefined(talkProviderConfig.modelId) }),
...(trimToUndefined(talkProviderConfig.voiceId) == null
? {}
: { voice: trimToUndefined(talkProviderConfig.voiceId) }),
...(asFiniteNumber(talkProviderConfig.speed) == null
? {}
: { speed: asFiniteNumber(talkProviderConfig.speed) }),
...(responseFormat == null ? {} : { responseFormat }),
};
},
resolveTalkOverrides: ({ params }) => ({
...(trimToUndefined(params.voiceId ?? params.voice) == null
? {}
: { voice: trimToUndefined(params.voiceId ?? params.voice) }),
...(trimToUndefined(params.modelId ?? params.model) == null
? {}
: { model: trimToUndefined(params.modelId ?? params.model) }),
...(asFiniteNumber(params.speed) == null ? {} : { speed: asFiniteNumber(params.speed) }),
}),
listVoices: async () => [
{ id: DEFAULT_OPENROUTER_TTS_VOICE, name: DEFAULT_OPENROUTER_TTS_VOICE },
],
isConfigured: ({ cfg, providerConfig }) => {
const config = readOpenRouterTtsProviderConfig(providerConfig);
return Boolean(resolveOpenRouterTtsApiKey({ cfg, providerConfig: config }));
},
synthesize: async (req) => {
const config = readOpenRouterTtsProviderConfig(req.providerConfig);
const overrides = readOpenRouterTtsOverrides(req.providerOverrides);
const apiKey = resolveOpenRouterTtsApiKey({ cfg: req.cfg, providerConfig: config });
if (!apiKey) {
throw new Error("OpenRouter API key missing");
}
const baseUrl = resolveOpenRouterTtsBaseUrl({ cfg: req.cfg, providerConfig: config });
const responseFormat = resolveOpenRouterTtsResponseFormat(config.responseFormat);
const speed = overrides.speed ?? config.speed;
const { allowPrivateNetwork, headers, dispatcherPolicy } = resolveProviderHttpRequestConfig({
baseUrl,
defaultBaseUrl: OPENROUTER_BASE_URL,
allowPrivateNetwork: false,
defaultHeaders: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
"HTTP-Referer": "https://openclaw.ai",
"X-OpenRouter-Title": "OpenClaw",
},
provider: "openrouter",
capability: "audio",
transport: "http",
});
const { response, release } = await postJsonRequest({
url: `${baseUrl}/audio/speech`,
headers,
body: {
model: overrides.model ?? config.model,
input: req.text,
voice: overrides.voice ?? config.voice,
response_format: responseFormat,
...(speed == null ? {} : { speed }),
...(config.provider == null ? {} : { provider: config.provider }),
},
timeoutMs: req.timeoutMs,
fetchFn: fetch,
allowPrivateNetwork,
dispatcherPolicy,
});
try {
await assertOkOrThrowHttpError(response, "OpenRouter TTS API error");
return {
audioBuffer: Buffer.from(await response.arrayBuffer()),
outputFormat: responseFormat,
fileExtension: responseFormatToFileExtension(responseFormat),
voiceCompatible: responseFormat === "mp3",
};
} finally {
await release();
}
},
};
}

View File

@@ -1,2 +1,3 @@
export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js";
export { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js";
export { buildOpenRouterSpeechProvider } from "./speech-provider.js";