diff --git a/docs/gateway/doctor.md b/docs/gateway/doctor.md index 20dd0f1123a..3f67df8a56d 100644 --- a/docs/gateway/doctor.md +++ b/docs/gateway/doctor.md @@ -172,9 +172,11 @@ Current migrations: - `routing.agentToAgent` → `tools.agentToAgent` - `routing.transcribeAudio` → `tools.media.audio.models` - `messages.tts.` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `messages.tts.providers.` +- `messages.tts.provider: "edge"` and `messages.tts.providers.edge` → `messages.tts.provider: "microsoft"` and `messages.tts.providers.microsoft` - `channels.discord.voice.tts.` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `channels.discord.voice.tts.providers.` - `channels.discord.accounts..voice.tts.` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `channels.discord.accounts..voice.tts.providers.` - `plugins.entries.voice-call.config.tts.` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `plugins.entries.voice-call.config.tts.providers.` +- `plugins.entries.voice-call.config.tts.provider: "edge"` and `plugins.entries.voice-call.config.tts.providers.edge` → `provider: "microsoft"` and `providers.microsoft` - `plugins.entries.voice-call.config.provider: "log"` → `"mock"` - `plugins.entries.voice-call.config.twilio.from` → `plugins.entries.voice-call.config.fromNumber` - `plugins.entries.voice-call.config.streaming.sttProvider` → `plugins.entries.voice-call.config.streaming.provider` diff --git a/docs/tools/tts.md b/docs/tools/tts.md index 5a6fc117906..30a7104b3c3 100644 --- a/docs/tools/tts.md +++ b/docs/tools/tts.md @@ -347,13 +347,15 @@ Then run: - `mode`: `"final"` (default) or `"all"` (includes tool/block replies). - `provider`: speech provider id such as `"elevenlabs"`, `"google"`, `"gradium"`, `"microsoft"`, `"minimax"`, `"openai"`, `"vydra"`, or `"xai"` (fallback is automatic). - If `provider` is **unset**, OpenClaw uses the first configured speech provider in registry auto-select order. -- Legacy `provider: "edge"` still works and is normalized to `microsoft`. +- Legacy `provider: "edge"` config is repaired by `openclaw doctor --fix` and + rewritten to `provider: "microsoft"`. - `summaryModel`: optional cheap model for auto-summary; defaults to `agents.defaults.model.primary`. - Accepts `provider/model` or a configured model alias. - `modelOverrides`: allow the model to emit TTS directives (on by default). - `allowProvider` defaults to `false` (provider switching is opt-in). - `providers.`: provider-owned settings keyed by speech provider id. - Legacy direct provider blocks (`messages.tts.openai`, `messages.tts.elevenlabs`, `messages.tts.microsoft`, `messages.tts.edge`) are repaired by `openclaw doctor --fix`; committed config should use `messages.tts.providers.`. +- Legacy `messages.tts.providers.edge` is also repaired by `openclaw doctor --fix`; committed config should use `messages.tts.providers.microsoft`. - `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded. - `timeoutMs`: request timeout (ms). - `prefsPath`: override the local prefs JSON path (provider/limit/summary). @@ -402,7 +404,8 @@ Then run: - `providers.microsoft.saveSubtitles`: write JSON subtitles alongside the audio file. - `providers.microsoft.proxy`: proxy URL for Microsoft speech requests. - `providers.microsoft.timeoutMs`: request timeout override (ms). -- `edge.*`: legacy alias for the same Microsoft settings. +- `edge.*`: legacy alias for the same Microsoft settings. Run + `openclaw doctor --fix` to rewrite persisted config to `providers.microsoft`. ## Model-driven overrides (default on) diff --git a/extensions/microsoft/speech-provider.test.ts b/extensions/microsoft/speech-provider.test.ts index 9e77593fc2b..fbe52717da3 100644 --- a/extensions/microsoft/speech-provider.test.ts +++ b/extensions/microsoft/speech-provider.test.ts @@ -184,27 +184,6 @@ describe("buildMicrosoftSpeechProvider", () => { vi.restoreAllMocks(); }); - it("accepts legacy providers.edge voice config", () => { - const provider = buildMicrosoftSpeechProvider(); - - const resolved = provider.resolveConfig?.({ - cfg: TEST_CFG, - rawConfig: { - provider: "edge", - providers: { - edge: { - voice: "en-US-AvaNeural", - }, - }, - }, - timeoutMs: 1000, - }); - - expect(resolved).toMatchObject({ - voice: "en-US-AvaNeural", - }); - }); - it("switches to a Chinese voice for CJK text when no explicit voice override is set", async () => { const provider = buildMicrosoftSpeechProvider(); const edgeSpy = vi.spyOn(ttsModule, "edgeTTS").mockImplementation(async ({ outputPath }) => { diff --git a/extensions/microsoft/speech-provider.ts b/extensions/microsoft/speech-provider.ts index ce07bd4f7eb..3c021648964 100644 --- a/extensions/microsoft/speech-provider.ts +++ b/extensions/microsoft/speech-provider.ts @@ -59,9 +59,8 @@ function normalizeMicrosoftProviderConfig( const providers = asObject(rawConfig.providers); const rawEdge = asObject(rawConfig.edge); const rawMicrosoft = asObject(rawConfig.microsoft); - const rawProviderEdge = asObject(providers?.edge); const rawProviderMicrosoft = asObject(providers?.microsoft); - const raw = { ...rawEdge, ...rawProviderEdge, ...rawMicrosoft, ...rawProviderMicrosoft }; + const raw = { ...rawEdge, ...rawMicrosoft, ...rawProviderMicrosoft }; const outputFormat = trimToUndefined(raw.outputFormat); return { enabled: asBoolean(raw.enabled) ?? true, diff --git a/src/commands/doctor/shared/legacy-config-migrate.provider-shapes.test.ts b/src/commands/doctor/shared/legacy-config-migrate.provider-shapes.test.ts index ae56943baf3..a6999f1ae9b 100644 --- a/src/commands/doctor/shared/legacy-config-migrate.provider-shapes.test.ts +++ b/src/commands/doctor/shared/legacy-config-migrate.provider-shapes.test.ts @@ -48,6 +48,41 @@ describe("legacy migrate provider-shaped config", () => { }); }); + it("moves legacy edge provider aliases into microsoft tts config", () => { + const res = migrateLegacyConfig({ + messages: { + tts: { + provider: "edge", + providers: { + edge: { + voice: "en-US-AvaNeural", + rate: "+8%", + }, + microsoft: { + lang: "en-US", + rate: "+4%", + }, + }, + }, + }, + }); + + expect(res.changes).toContain('Moved messages.tts.provider "edge" → "microsoft".'); + expect(res.changes).toContain( + "Moved messages.tts.providers.edge → messages.tts.providers.microsoft.", + ); + expect(res.config?.messages?.tts).toEqual({ + provider: "microsoft", + providers: { + microsoft: { + lang: "en-US", + rate: "+4%", + voice: "en-US-AvaNeural", + }, + }, + }); + }); + it("moves plugins.entries.voice-call.config.tts. keys into providers", () => { const res = migrateLegacyConfig({ plugins: { @@ -86,6 +121,47 @@ describe("legacy migrate provider-shaped config", () => { }); }); + it("moves voice-call legacy edge provider aliases into microsoft tts config", () => { + const res = migrateLegacyConfig({ + plugins: { + entries: { + "voice-call": { + config: { + tts: { + provider: "edge", + providers: { + edge: { + voice: "en-US-AvaNeural", + }, + }, + }, + }, + }, + }, + }, + }); + + expect(res.changes).toContain( + 'Moved plugins.entries.voice-call.config.tts.provider "edge" → "microsoft".', + ); + expect(res.changes).toContain( + "Moved plugins.entries.voice-call.config.tts.providers.edge → plugins.entries.voice-call.config.tts.providers.microsoft.", + ); + const voiceCallTts = ( + res.config?.plugins?.entries as + | Record } }> + | undefined + )?.["voice-call"]?.config?.tts; + expect(voiceCallTts).toEqual({ + provider: "microsoft", + providers: { + microsoft: { + voice: "en-US-AvaNeural", + }, + }, + }); + }); + it("does not migrate legacy tts provider keys for unknown plugin ids", () => { const res = migrateLegacyConfig({ plugins: { diff --git a/src/commands/doctor/shared/legacy-config-migrations.runtime.tts.ts b/src/commands/doctor/shared/legacy-config-migrations.runtime.tts.ts index 8877f3f098f..d3531b34fc2 100644 --- a/src/commands/doctor/shared/legacy-config-migrations.runtime.tts.ts +++ b/src/commands/doctor/shared/legacy-config-migrations.runtime.tts.ts @@ -10,12 +10,23 @@ import { isBlockedObjectKey } from "../../../config/prototype-keys.js"; const LEGACY_TTS_PROVIDER_KEYS = ["openai", "elevenlabs", "microsoft", "edge"] as const; const LEGACY_TTS_PLUGIN_IDS = new Set(["voice-call"]); +function isLegacyEdgeProviderId(value: unknown): boolean { + return typeof value === "string" && value.trim().toLowerCase() === "edge"; +} + function hasLegacyTtsProviderKeys(value: unknown): boolean { const tts = getRecord(value); if (!tts) { return false; } - return LEGACY_TTS_PROVIDER_KEYS.some((key) => Object.prototype.hasOwnProperty.call(tts, key)); + if (isLegacyEdgeProviderId(tts.provider)) { + return true; + } + if (LEGACY_TTS_PROVIDER_KEYS.some((key) => Object.prototype.hasOwnProperty.call(tts, key))) { + return true; + } + const providers = getRecord(tts.providers); + return Boolean(providers && Object.prototype.hasOwnProperty.call(providers, "edge")); } function hasLegacyPluginEntryTtsProviderKeys(value: unknown): boolean { @@ -57,6 +68,24 @@ function mergeLegacyTtsProviderConfig( return true; } +function mergeLegacyTtsProviderAliasConfig( + tts: Record, + aliasKey: string, + providerId: string, +): boolean { + const providers = getRecord(tts.providers); + const aliasValue = getRecord(providers?.[aliasKey]); + if (!providers || !aliasValue) { + return false; + } + const existing = getRecord(providers[providerId]) ?? {}; + const merged = structuredClone(existing); + mergeMissing(merged, aliasValue); + providers[providerId] = merged; + delete providers[aliasKey]; + return true; +} + function migrateLegacyTtsConfig( tts: Record | null | undefined, pathLabel: string, @@ -65,9 +94,14 @@ function migrateLegacyTtsConfig( if (!tts) { return; } + if (isLegacyEdgeProviderId(tts.provider)) { + tts.provider = "microsoft"; + changes.push(`Moved ${pathLabel}.provider "edge" → "microsoft".`); + } const movedOpenAI = mergeLegacyTtsProviderConfig(tts, "openai", "openai"); const movedElevenLabs = mergeLegacyTtsProviderConfig(tts, "elevenlabs", "elevenlabs"); const movedMicrosoft = mergeLegacyTtsProviderConfig(tts, "microsoft", "microsoft"); + const movedProviderEdge = mergeLegacyTtsProviderAliasConfig(tts, "edge", "microsoft"); const movedEdge = mergeLegacyTtsProviderConfig(tts, "edge", "microsoft"); if (movedOpenAI) { @@ -79,6 +113,9 @@ function migrateLegacyTtsConfig( if (movedMicrosoft) { changes.push(`Moved ${pathLabel}.microsoft → ${pathLabel}.providers.microsoft.`); } + if (movedProviderEdge) { + changes.push(`Moved ${pathLabel}.providers.edge → ${pathLabel}.providers.microsoft.`); + } if (movedEdge) { changes.push(`Moved ${pathLabel}.edge → ${pathLabel}.providers.microsoft.`); } @@ -88,13 +125,13 @@ const LEGACY_TTS_RULES: LegacyConfigRule[] = [ { path: ["messages", "tts"], message: - 'messages.tts. keys (openai/elevenlabs/microsoft/edge) are legacy; use messages.tts.providers.. Run "openclaw doctor --fix".', + 'messages.tts legacy provider aliases/keys are legacy; use provider: "microsoft" and messages.tts.providers.. Run "openclaw doctor --fix".', match: (value) => hasLegacyTtsProviderKeys(value), }, { path: ["plugins", "entries"], message: - 'plugins.entries.voice-call.config.tts. keys (openai/elevenlabs/microsoft/edge) are legacy; use plugins.entries.voice-call.config.tts.providers.. Run "openclaw doctor --fix".', + 'plugins.entries.voice-call.config.tts legacy provider aliases/keys are legacy; use provider: "microsoft" and plugins.entries.voice-call.config.tts.providers.. Run "openclaw doctor --fix".', match: (value) => hasLegacyPluginEntryTtsProviderKeys(value), }, ];