mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
feat(openrouter): add tts provider
This commit is contained in:
@@ -92,6 +92,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Providers/MiniMax TTS: mark MP3 output voice-compatible for Telegram voice-note delivery. Fixes #63540.
|
||||
- Providers/Microsoft TTS: keep allowlisted bundled speech providers discoverable even when another speech plugin has already registered, so Edge/Microsoft TTS is available alongside OpenAI. Fixes #62117 and #66850.
|
||||
- Providers/Microsoft TTS: honor legacy `messages.tts.providers.edge` voice settings after normalizing Edge TTS to the Microsoft provider. Fixes #64153.
|
||||
- Providers/OpenRouter: add an OpenRouter TTS provider using the OpenAI-compatible `/audio/speech` endpoint and `OPENROUTER_API_KEY`. Fixes #71268.
|
||||
- macOS Talk Mode: retry failed local ElevenLabs stream playback through gateway `talk.speak` before falling back to the system voice, so configured ElevenLabs voices still play when streaming playback fails. Fixes #65662.
|
||||
- Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens.
|
||||
- Plugins/Voice Call: terminate expired restored call sessions with the provider and restart restored max-duration timers with only the remaining duration, preventing stale outbound retry loops after Gateway restarts. Fixes #48739. Thanks @mira-solari.
|
||||
|
||||
@@ -79,6 +79,32 @@ OpenRouter can also back the `image_generate` tool. Use an OpenRouter image mode
|
||||
|
||||
OpenClaw sends image requests to OpenRouter's chat completions image API with `modalities: ["image", "text"]`. Gemini image models receive supported `aspectRatio` and `resolution` hints through OpenRouter's `image_config`.
|
||||
|
||||
## Text-to-speech
|
||||
|
||||
OpenRouter can also be used as a TTS provider through its OpenAI-compatible
|
||||
`/audio/speech` endpoint.
|
||||
|
||||
```json5
|
||||
{
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "openrouter",
|
||||
providers: {
|
||||
openrouter: {
|
||||
model: "hexgrad/kokoro-82m",
|
||||
voice: "af_alloy",
|
||||
responseFormat: "mp3",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
If `messages.tts.providers.openrouter.apiKey` is omitted, TTS reuses
|
||||
`models.providers.openrouter.apiKey`, then `OPENROUTER_API_KEY`.
|
||||
|
||||
## Authentication and headers
|
||||
|
||||
OpenRouter uses a Bearer token with your API key under the hood.
|
||||
|
||||
@@ -231,6 +231,32 @@ Resolution order is `messages.tts.providers.xai.apiKey` -> `XAI_API_KEY`.
|
||||
Current live voices are `ara`, `eve`, `leo`, `rex`, `sal`, and `una`; `eve` is
|
||||
the default. `language` accepts a BCP-47 tag or `auto`.
|
||||
|
||||
### OpenRouter primary
|
||||
|
||||
```json5
|
||||
{
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "openrouter",
|
||||
providers: {
|
||||
openrouter: {
|
||||
apiKey: "openrouter_api_key",
|
||||
model: "hexgrad/kokoro-82m",
|
||||
voice: "af_alloy",
|
||||
responseFormat: "mp3",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
OpenRouter TTS uses the same `OPENROUTER_API_KEY` path as the bundled
|
||||
OpenRouter model provider. Resolution order is
|
||||
`messages.tts.providers.openrouter.apiKey` ->
|
||||
`models.providers.openrouter.apiKey` -> `OPENROUTER_API_KEY`.
|
||||
|
||||
### Gradium primary
|
||||
|
||||
```json5
|
||||
@@ -361,6 +387,12 @@ Then run:
|
||||
- `providers.xai.language`: BCP-47 language code or `auto` (default `en`).
|
||||
- `providers.xai.responseFormat`: `mp3`, `wav`, `pcm`, `mulaw`, or `alaw` (default `mp3`).
|
||||
- `providers.xai.speed`: provider-native speed override.
|
||||
- `providers.openrouter.apiKey`: OpenRouter API key (env: `OPENROUTER_API_KEY`; can reuse `models.providers.openrouter.apiKey`).
|
||||
- `providers.openrouter.baseUrl`: override the OpenRouter TTS base URL (default `https://openrouter.ai/api/v1`; legacy `https://openrouter.ai/v1` is normalized).
|
||||
- `providers.openrouter.model`: OpenRouter TTS model id (default `hexgrad/kokoro-82m`; `modelId` is also accepted).
|
||||
- `providers.openrouter.voice`: provider-specific voice id (default `af_alloy`; `voiceId` is also accepted).
|
||||
- `providers.openrouter.responseFormat`: `mp3` or `pcm` (default `mp3`).
|
||||
- `providers.openrouter.speed`: provider-native speed override.
|
||||
- `providers.microsoft.enabled`: allow Microsoft speech usage (default `true`; no API key).
|
||||
- `providers.microsoft.voice`: Microsoft neural voice name (e.g. `en-US-MichelleNeural`).
|
||||
- `providers.microsoft.lang`: language code (e.g. `en-US`).
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js";
|
||||
export { buildOpenrouterProvider } from "./provider-catalog.js";
|
||||
export { buildOpenRouterSpeechProvider } from "./speech-provider.js";
|
||||
export {
|
||||
applyOpenrouterConfig,
|
||||
applyOpenrouterProviderConfig,
|
||||
|
||||
@@ -1,10 +1,25 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js";
|
||||
import { registerProviderPlugin } from "../../test/helpers/plugins/provider-registration.js";
|
||||
import { expectPassthroughReplayPolicy } from "../../test/helpers/provider-replay-policy.ts";
|
||||
import openrouterPlugin from "./index.js";
|
||||
import { buildOpenrouterProvider } from "./provider-catalog.js";
|
||||
|
||||
describe("openrouter provider hooks", () => {
|
||||
it("registers OpenRouter speech alongside model and media providers", async () => {
|
||||
const { providers, speechProviders, mediaProviders, imageProviders } =
|
||||
await registerProviderPlugin({
|
||||
plugin: openrouterPlugin,
|
||||
id: "openrouter",
|
||||
name: "OpenRouter Provider",
|
||||
});
|
||||
|
||||
expect(providers).toEqual([expect.objectContaining({ id: "openrouter" })]);
|
||||
expect(speechProviders).toEqual([expect.objectContaining({ id: "openrouter" })]);
|
||||
expect(mediaProviders).toEqual([expect.objectContaining({ id: "openrouter" })]);
|
||||
expect(imageProviders).toEqual([expect.objectContaining({ id: "openrouter" })]);
|
||||
});
|
||||
|
||||
it("includes Kimi K2.6 in the bundled catalog", () => {
|
||||
expect(buildOpenrouterProvider().models?.map((model) => model.id)).toContain(
|
||||
"moonshotai/kimi-k2.6",
|
||||
|
||||
@@ -20,6 +20,7 @@ import {
|
||||
normalizeOpenRouterBaseUrl,
|
||||
OPENROUTER_BASE_URL,
|
||||
} from "./provider-catalog.js";
|
||||
import { buildOpenRouterSpeechProvider } from "./speech-provider.js";
|
||||
import { wrapOpenRouterProviderStream } from "./stream.js";
|
||||
|
||||
const PROVIDER_ID = "openrouter";
|
||||
@@ -145,5 +146,6 @@ export default definePluginEntry({
|
||||
});
|
||||
api.registerMediaUnderstandingProvider(openrouterMediaUnderstandingProvider);
|
||||
api.registerImageGenerationProvider(buildOpenRouterImageGenerationProvider());
|
||||
api.registerSpeechProvider(buildOpenRouterSpeechProvider());
|
||||
},
|
||||
});
|
||||
|
||||
@@ -22,7 +22,8 @@
|
||||
],
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["openrouter"],
|
||||
"imageGenerationProviders": ["openrouter"]
|
||||
"imageGenerationProviders": ["openrouter"],
|
||||
"speechProviders": ["openrouter"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"openrouter": {
|
||||
|
||||
@@ -14,10 +14,12 @@ import {
|
||||
import { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
import { applyOpenrouterConfig, OPENROUTER_DEFAULT_MODEL_REF } from "./onboard.js";
|
||||
import { buildOpenrouterProvider } from "./provider-catalog.js";
|
||||
import { buildOpenRouterSpeechProvider } from "./speech-provider.js";
|
||||
|
||||
export {
|
||||
applyOpenrouterConfig,
|
||||
buildOpenrouterProvider,
|
||||
buildOpenRouterSpeechProvider,
|
||||
buildProviderReplayFamilyHooks,
|
||||
buildProviderStreamFamilyHooks,
|
||||
createOpenRouterSystemCacheWrapper,
|
||||
|
||||
155
extensions/openrouter/speech-provider.test.ts
Normal file
155
extensions/openrouter/speech-provider.test.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildOpenRouterSpeechProvider } from "./speech-provider.js";
|
||||
|
||||
const { assertOkOrThrowHttpErrorMock, postJsonRequestMock, resolveProviderHttpRequestConfigMock } =
|
||||
vi.hoisted(() => ({
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://openrouter.ai/api/v1",
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("openrouter speech provider", () => {
|
||||
afterEach(() => {
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
vi.unstubAllEnvs();
|
||||
});
|
||||
|
||||
it("normalizes provider-owned speech config", () => {
|
||||
const provider = buildOpenRouterSpeechProvider();
|
||||
const resolved = provider.resolveConfig?.({
|
||||
cfg: {} as never,
|
||||
timeoutMs: 30_000,
|
||||
rawConfig: {
|
||||
providers: {
|
||||
openrouter: {
|
||||
apiKey: "sk-test",
|
||||
baseUrl: "https://openrouter.ai/v1/",
|
||||
modelId: "google/gemini-3.1-flash-tts-preview",
|
||||
voiceId: "Kore",
|
||||
speed: 1.1,
|
||||
responseFormat: " MP3 ",
|
||||
provider: {
|
||||
options: {
|
||||
openai: {
|
||||
instructions: "Speak warmly.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolved).toEqual({
|
||||
apiKey: "sk-test",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
model: "google/gemini-3.1-flash-tts-preview",
|
||||
voice: "Kore",
|
||||
speed: 1.1,
|
||||
responseFormat: "mp3",
|
||||
provider: {
|
||||
options: {
|
||||
openai: {
|
||||
instructions: "Speak warmly.",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("synthesizes OpenAI-compatible speech through OpenRouter", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: new Response(new Uint8Array([1, 2, 3]), { status: 200 }),
|
||||
release,
|
||||
});
|
||||
|
||||
const provider = buildOpenRouterSpeechProvider();
|
||||
const result = await provider.synthesize({
|
||||
text: "hello",
|
||||
cfg: {
|
||||
models: {
|
||||
providers: {
|
||||
openrouter: {
|
||||
apiKey: "sk-openrouter",
|
||||
baseUrl: "https://openrouter.ai/v1/",
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
providerConfig: {
|
||||
model: "openai/gpt-4o-mini-tts-2025-12-15",
|
||||
voice: "nova",
|
||||
speed: 1.2,
|
||||
},
|
||||
target: "voice-note",
|
||||
timeoutMs: 12_345,
|
||||
});
|
||||
|
||||
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "openrouter",
|
||||
capability: "audio",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
defaultHeaders: expect.objectContaining({
|
||||
"Content-Type": "application/json",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://openrouter.ai/api/v1/audio/speech",
|
||||
timeoutMs: 12_345,
|
||||
body: {
|
||||
model: "openai/gpt-4o-mini-tts-2025-12-15",
|
||||
input: "hello",
|
||||
voice: "nova",
|
||||
response_format: "mp3",
|
||||
speed: 1.2,
|
||||
},
|
||||
}),
|
||||
);
|
||||
expect(result.audioBuffer).toEqual(Buffer.from([1, 2, 3]));
|
||||
expect(result.outputFormat).toBe("mp3");
|
||||
expect(result.fileExtension).toBe(".mp3");
|
||||
expect(result.voiceCompatible).toBe(true);
|
||||
expect(release).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("defaults to a live-proven OpenRouter TTS model", () => {
|
||||
const provider = buildOpenRouterSpeechProvider();
|
||||
|
||||
expect(
|
||||
provider.resolveConfig?.({ cfg: {} as never, rawConfig: {}, timeoutMs: 30_000 }),
|
||||
).toMatchObject({
|
||||
model: "hexgrad/kokoro-82m",
|
||||
voice: "af_alloy",
|
||||
});
|
||||
});
|
||||
|
||||
it("uses OPENROUTER_API_KEY when provider config omits apiKey", () => {
|
||||
vi.stubEnv("OPENROUTER_API_KEY", "sk-env");
|
||||
const provider = buildOpenRouterSpeechProvider();
|
||||
|
||||
expect(
|
||||
provider.isConfigured({
|
||||
cfg: {} as never,
|
||||
providerConfig: {},
|
||||
timeoutMs: 30_000,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
303
extensions/openrouter/speech-provider.ts
Normal file
303
extensions/openrouter/speech-provider.ts
Normal file
@@ -0,0 +1,303 @@
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
|
||||
import {
|
||||
asFiniteNumber,
|
||||
asObject,
|
||||
trimToUndefined,
|
||||
type SpeechDirectiveTokenParseContext,
|
||||
type SpeechProviderConfig,
|
||||
type SpeechProviderOverrides,
|
||||
type SpeechProviderPlugin,
|
||||
} from "openclaw/plugin-sdk/speech";
|
||||
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import { normalizeOpenRouterBaseUrl, OPENROUTER_BASE_URL } from "./provider-catalog.js";
|
||||
|
||||
const DEFAULT_OPENROUTER_TTS_MODEL = "hexgrad/kokoro-82m";
|
||||
const DEFAULT_OPENROUTER_TTS_VOICE = "af_alloy";
|
||||
const OPENROUTER_TTS_MODELS = [
|
||||
DEFAULT_OPENROUTER_TTS_MODEL,
|
||||
"google/gemini-3.1-flash-tts-preview",
|
||||
"mistralai/voxtral-mini-tts-2603",
|
||||
"elevenlabs/eleven-turbo-v2",
|
||||
] as const;
|
||||
const OPENROUTER_TTS_RESPONSE_FORMATS = ["mp3", "pcm"] as const;
|
||||
|
||||
type OpenRouterTtsResponseFormat = (typeof OPENROUTER_TTS_RESPONSE_FORMATS)[number];
|
||||
|
||||
type OpenRouterTtsProviderConfig = {
|
||||
apiKey?: string;
|
||||
baseUrl?: string;
|
||||
model: string;
|
||||
voice: string;
|
||||
speed?: number;
|
||||
responseFormat?: OpenRouterTtsResponseFormat;
|
||||
provider?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type OpenRouterTtsProviderOverrides = {
|
||||
model?: string;
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
};
|
||||
|
||||
function normalizeOpenRouterTtsResponseFormat(
|
||||
value: unknown,
|
||||
): OpenRouterTtsResponseFormat | undefined {
|
||||
const next = normalizeOptionalLowercaseString(value);
|
||||
if (!next) {
|
||||
return undefined;
|
||||
}
|
||||
if (OPENROUTER_TTS_RESPONSE_FORMATS.some((format) => format === next)) {
|
||||
return next as OpenRouterTtsResponseFormat;
|
||||
}
|
||||
throw new Error(`Invalid OpenRouter speech responseFormat: ${next}`);
|
||||
}
|
||||
|
||||
function normalizeOpenRouterTtsBaseUrl(value: unknown): string {
|
||||
return (
|
||||
normalizeOpenRouterBaseUrl(trimToUndefined(value) ?? OPENROUTER_BASE_URL) ?? OPENROUTER_BASE_URL
|
||||
);
|
||||
}
|
||||
|
||||
function resolveOpenRouterProviderConfigRecord(
|
||||
rawConfig: Record<string, unknown>,
|
||||
): Record<string, unknown> | undefined {
|
||||
const providers = asObject(rawConfig.providers);
|
||||
return asObject(providers?.openrouter) ?? asObject(rawConfig.openrouter);
|
||||
}
|
||||
|
||||
function normalizeOpenRouterTtsProviderConfig(
|
||||
rawConfig: Record<string, unknown>,
|
||||
): OpenRouterTtsProviderConfig {
|
||||
const raw = resolveOpenRouterProviderConfigRecord(rawConfig);
|
||||
return {
|
||||
apiKey: normalizeResolvedSecretInputString({
|
||||
value: raw?.apiKey,
|
||||
path: "messages.tts.providers.openrouter.apiKey",
|
||||
}),
|
||||
baseUrl:
|
||||
trimToUndefined(raw?.baseUrl) == null
|
||||
? undefined
|
||||
: normalizeOpenRouterTtsBaseUrl(raw?.baseUrl),
|
||||
model: trimToUndefined(raw?.model ?? raw?.modelId) ?? DEFAULT_OPENROUTER_TTS_MODEL,
|
||||
voice: trimToUndefined(raw?.voice ?? raw?.voiceId) ?? DEFAULT_OPENROUTER_TTS_VOICE,
|
||||
speed: asFiniteNumber(raw?.speed),
|
||||
responseFormat: normalizeOpenRouterTtsResponseFormat(raw?.responseFormat),
|
||||
provider: asObject(raw?.provider),
|
||||
};
|
||||
}
|
||||
|
||||
function readOpenRouterTtsProviderConfig(
|
||||
config: SpeechProviderConfig,
|
||||
): OpenRouterTtsProviderConfig {
|
||||
const normalized = normalizeOpenRouterTtsProviderConfig({});
|
||||
return {
|
||||
apiKey: trimToUndefined(config.apiKey) ?? normalized.apiKey,
|
||||
baseUrl:
|
||||
trimToUndefined(config.baseUrl) == null
|
||||
? normalized.baseUrl
|
||||
: normalizeOpenRouterTtsBaseUrl(config.baseUrl),
|
||||
model: trimToUndefined(config.model ?? config.modelId) ?? normalized.model,
|
||||
voice: trimToUndefined(config.voice ?? config.voiceId) ?? normalized.voice,
|
||||
speed: asFiniteNumber(config.speed) ?? normalized.speed,
|
||||
responseFormat:
|
||||
normalizeOpenRouterTtsResponseFormat(config.responseFormat) ?? normalized.responseFormat,
|
||||
provider: asObject(config.provider) ?? normalized.provider,
|
||||
};
|
||||
}
|
||||
|
||||
function readOpenRouterTtsOverrides(
|
||||
overrides: SpeechProviderOverrides | undefined,
|
||||
): OpenRouterTtsProviderOverrides {
|
||||
if (!overrides) {
|
||||
return {};
|
||||
}
|
||||
return {
|
||||
model: trimToUndefined(overrides.model ?? overrides.modelId),
|
||||
voice: trimToUndefined(overrides.voice ?? overrides.voiceId),
|
||||
speed: asFiniteNumber(overrides.speed),
|
||||
};
|
||||
}
|
||||
|
||||
function resolveOpenRouterTtsApiKey(params: {
|
||||
cfg?: { models?: { providers?: { openrouter?: { apiKey?: unknown } } } };
|
||||
providerConfig: OpenRouterTtsProviderConfig;
|
||||
}): string | undefined {
|
||||
return (
|
||||
params.providerConfig.apiKey ??
|
||||
normalizeResolvedSecretInputString({
|
||||
value: params.cfg?.models?.providers?.openrouter?.apiKey,
|
||||
path: "models.providers.openrouter.apiKey",
|
||||
}) ??
|
||||
trimToUndefined(process.env.OPENROUTER_API_KEY)
|
||||
);
|
||||
}
|
||||
|
||||
function resolveOpenRouterTtsBaseUrl(params: {
|
||||
cfg?: { models?: { providers?: { openrouter?: { baseUrl?: unknown } } } };
|
||||
providerConfig: OpenRouterTtsProviderConfig;
|
||||
}): string {
|
||||
return normalizeOpenRouterTtsBaseUrl(
|
||||
params.providerConfig.baseUrl ??
|
||||
trimToUndefined(params.cfg?.models?.providers?.openrouter?.baseUrl) ??
|
||||
OPENROUTER_BASE_URL,
|
||||
);
|
||||
}
|
||||
|
||||
function resolveOpenRouterTtsResponseFormat(
|
||||
configuredFormat?: OpenRouterTtsResponseFormat,
|
||||
): OpenRouterTtsResponseFormat {
|
||||
if (configuredFormat) {
|
||||
return configuredFormat;
|
||||
}
|
||||
return "mp3";
|
||||
}
|
||||
|
||||
function responseFormatToFileExtension(format: OpenRouterTtsResponseFormat): ".mp3" | ".pcm" {
|
||||
return format === "pcm" ? ".pcm" : ".mp3";
|
||||
}
|
||||
|
||||
function parseDirectiveToken(ctx: SpeechDirectiveTokenParseContext): {
|
||||
handled: boolean;
|
||||
overrides?: SpeechProviderOverrides;
|
||||
} {
|
||||
switch (ctx.key) {
|
||||
case "voice":
|
||||
case "voice_id":
|
||||
case "voiceid":
|
||||
case "openrouter_voice":
|
||||
case "openroutervoice":
|
||||
if (!ctx.policy.allowVoice) {
|
||||
return { handled: true };
|
||||
}
|
||||
return { handled: true, overrides: { voice: ctx.value } };
|
||||
case "model":
|
||||
case "model_id":
|
||||
case "modelid":
|
||||
case "openrouter_model":
|
||||
case "openroutermodel":
|
||||
if (!ctx.policy.allowModelId) {
|
||||
return { handled: true };
|
||||
}
|
||||
return { handled: true, overrides: { model: ctx.value } };
|
||||
default:
|
||||
return { handled: false };
|
||||
}
|
||||
}
|
||||
|
||||
export function buildOpenRouterSpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "openrouter",
|
||||
label: "OpenRouter",
|
||||
autoSelectOrder: 35,
|
||||
models: OPENROUTER_TTS_MODELS,
|
||||
voices: [DEFAULT_OPENROUTER_TTS_VOICE],
|
||||
resolveConfig: ({ rawConfig }) => normalizeOpenRouterTtsProviderConfig(rawConfig),
|
||||
parseDirectiveToken,
|
||||
resolveTalkConfig: ({ baseTtsConfig, talkProviderConfig }) => {
|
||||
const base = normalizeOpenRouterTtsProviderConfig(baseTtsConfig);
|
||||
const responseFormat = normalizeOpenRouterTtsResponseFormat(
|
||||
talkProviderConfig.responseFormat,
|
||||
);
|
||||
return {
|
||||
...base,
|
||||
...(talkProviderConfig.apiKey === undefined
|
||||
? {}
|
||||
: {
|
||||
apiKey: normalizeResolvedSecretInputString({
|
||||
value: talkProviderConfig.apiKey,
|
||||
path: "talk.providers.openrouter.apiKey",
|
||||
}),
|
||||
}),
|
||||
...(trimToUndefined(talkProviderConfig.baseUrl) == null
|
||||
? {}
|
||||
: { baseUrl: normalizeOpenRouterTtsBaseUrl(talkProviderConfig.baseUrl) }),
|
||||
...(trimToUndefined(talkProviderConfig.modelId) == null
|
||||
? {}
|
||||
: { model: trimToUndefined(talkProviderConfig.modelId) }),
|
||||
...(trimToUndefined(talkProviderConfig.voiceId) == null
|
||||
? {}
|
||||
: { voice: trimToUndefined(talkProviderConfig.voiceId) }),
|
||||
...(asFiniteNumber(talkProviderConfig.speed) == null
|
||||
? {}
|
||||
: { speed: asFiniteNumber(talkProviderConfig.speed) }),
|
||||
...(responseFormat == null ? {} : { responseFormat }),
|
||||
};
|
||||
},
|
||||
resolveTalkOverrides: ({ params }) => ({
|
||||
...(trimToUndefined(params.voiceId ?? params.voice) == null
|
||||
? {}
|
||||
: { voice: trimToUndefined(params.voiceId ?? params.voice) }),
|
||||
...(trimToUndefined(params.modelId ?? params.model) == null
|
||||
? {}
|
||||
: { model: trimToUndefined(params.modelId ?? params.model) }),
|
||||
...(asFiniteNumber(params.speed) == null ? {} : { speed: asFiniteNumber(params.speed) }),
|
||||
}),
|
||||
listVoices: async () => [
|
||||
{ id: DEFAULT_OPENROUTER_TTS_VOICE, name: DEFAULT_OPENROUTER_TTS_VOICE },
|
||||
],
|
||||
isConfigured: ({ cfg, providerConfig }) => {
|
||||
const config = readOpenRouterTtsProviderConfig(providerConfig);
|
||||
return Boolean(resolveOpenRouterTtsApiKey({ cfg, providerConfig: config }));
|
||||
},
|
||||
synthesize: async (req) => {
|
||||
const config = readOpenRouterTtsProviderConfig(req.providerConfig);
|
||||
const overrides = readOpenRouterTtsOverrides(req.providerOverrides);
|
||||
const apiKey = resolveOpenRouterTtsApiKey({ cfg: req.cfg, providerConfig: config });
|
||||
if (!apiKey) {
|
||||
throw new Error("OpenRouter API key missing");
|
||||
}
|
||||
|
||||
const baseUrl = resolveOpenRouterTtsBaseUrl({ cfg: req.cfg, providerConfig: config });
|
||||
const responseFormat = resolveOpenRouterTtsResponseFormat(config.responseFormat);
|
||||
const speed = overrides.speed ?? config.speed;
|
||||
const { allowPrivateNetwork, headers, dispatcherPolicy } = resolveProviderHttpRequestConfig({
|
||||
baseUrl,
|
||||
defaultBaseUrl: OPENROUTER_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://openclaw.ai",
|
||||
"X-OpenRouter-Title": "OpenClaw",
|
||||
},
|
||||
provider: "openrouter",
|
||||
capability: "audio",
|
||||
transport: "http",
|
||||
});
|
||||
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}/audio/speech`,
|
||||
headers,
|
||||
body: {
|
||||
model: overrides.model ?? config.model,
|
||||
input: req.text,
|
||||
voice: overrides.voice ?? config.voice,
|
||||
response_format: responseFormat,
|
||||
...(speed == null ? {} : { speed }),
|
||||
...(config.provider == null ? {} : { provider: config.provider }),
|
||||
},
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn: fetch,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "OpenRouter TTS API error");
|
||||
return {
|
||||
audioBuffer: Buffer.from(await response.arrayBuffer()),
|
||||
outputFormat: responseFormat,
|
||||
fileExtension: responseFormatToFileExtension(responseFormat),
|
||||
voiceCompatible: responseFormat === "mp3",
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -1,2 +1,3 @@
|
||||
export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js";
|
||||
export { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
export { buildOpenRouterSpeechProvider } from "./speech-provider.js";
|
||||
|
||||
Reference in New Issue
Block a user