diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt index 8e17037f518..8b57b47a2f9 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt @@ -77,8 +77,19 @@ class TalkModeManager( return trimmed.takeIf { it.isNotEmpty() } } + private fun selectResolvedTalkProviderConfig(talk: JsonObject): TalkProviderConfigSelection? { + val resolved = talk["resolved"].asObjectOrNull() ?: return null + val providerId = normalizeTalkProviderId(resolved["provider"].asStringOrNull()) ?: return null + return TalkProviderConfigSelection( + provider = providerId, + config = resolved["config"].asObjectOrNull() ?: buildJsonObject {}, + normalizedPayload = true, + ) + } + internal fun selectTalkProviderConfig(talk: JsonObject?): TalkProviderConfigSelection? { if (talk == null) return null + selectResolvedTalkProviderConfig(talk)?.let { return it } val rawProvider = talk["provider"].asStringOrNull() val rawProviders = talk["providers"].asObjectOrNull() val hasNormalizedPayload = rawProvider != null || rawProviders != null diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigParsingTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigParsingTest.kt index a1a06e6aac3..218f7511cf0 100644 --- a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigParsingTest.kt +++ b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigParsingTest.kt @@ -13,6 +13,36 @@ import org.junit.Test class TalkModeConfigParsingTest { private val json = Json { ignoreUnknownKeys = true } + @Test + fun prefersCanonicalResolvedTalkProviderPayload() { + val talk = + json.parseToJsonElement( + """ + { + "resolved": { + "provider": "elevenlabs", + "config": { + "voiceId": "voice-resolved" + } + }, + "provider": "elevenlabs", + "providers": { + "elevenlabs": { + "voiceId": "voice-normalized" + } + } + } + """.trimIndent(), + ) + .jsonObject + + val selection = TalkModeManager.selectTalkProviderConfig(talk) + assertNotNull(selection) + assertEquals("elevenlabs", selection?.provider) + assertTrue(selection?.normalizedPayload == true) + assertEquals("voice-resolved", selection?.config?.get("voiceId")?.jsonPrimitive?.content) + } + @Test fun prefersNormalizedTalkProviderPayload() { val talk = diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkConfigParsing.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkConfigParsing.swift index f25eba5aa6f..05c587b2e9d 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkConfigParsing.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkConfigParsing.swift @@ -23,6 +23,9 @@ public enum TalkConfigParsing { allowLegacyFallback: Bool = true, ) -> TalkProviderConfigSelection? { guard let talk else { return nil } + if let resolvedSelection = self.resolvedProviderConfig(talk) { + return resolvedSelection + } let rawProvider = talk["provider"]?.stringValue let rawProviders = talk["providers"] let hasNormalizedPayload = rawProvider != nil || rawProviders != nil @@ -68,6 +71,19 @@ public enum TalkConfigParsing { return trimmed.isEmpty ? nil : trimmed } + private static func resolvedProviderConfig( + _ talk: [String: AnyCodable] + ) -> TalkProviderConfigSelection? { + guard + let resolved = talk["resolved"]?.dictionaryValue, + let providerID = self.normalizedTalkProviderID(resolved["provider"]?.stringValue) + else { return nil } + return TalkProviderConfigSelection( + provider: providerID, + config: resolved["config"]?.dictionaryValue ?? [:], + normalizedPayload: true) + } + private static func normalizedTalkProviders(_ raw: AnyCodable?) -> [String: [String: AnyCodable]] { guard let providerMap = raw?.dictionaryValue else { return [:] } return providerMap.reduce(into: [String: [String: AnyCodable]]()) { acc, entry in diff --git a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/TalkConfigParsingTests.swift b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/TalkConfigParsingTests.swift index aa2f8081d34..5edd2ff3368 100644 --- a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/TalkConfigParsingTests.swift +++ b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/TalkConfigParsingTests.swift @@ -2,6 +2,28 @@ import OpenClawKit import Testing struct TalkConfigParsingTests { + @Test func prefersCanonicalResolvedTalkProviderPayload() { + let talk: [String: AnyCodable] = [ + "resolved": AnyCodable([ + "provider": "elevenlabs", + "config": [ + "voiceId": "voice-resolved", + ], + ]), + "provider": AnyCodable("elevenlabs"), + "providers": AnyCodable([ + "elevenlabs": [ + "voiceId": "voice-normalized", + ], + ]), + ] + + let selection = TalkConfigParsing.selectProviderConfig(talk, defaultProvider: "elevenlabs") + #expect(selection?.provider == "elevenlabs") + #expect(selection?.normalizedPayload == true) + #expect(selection?.config["voiceId"]?.stringValue == "voice-resolved") + } + @Test func prefersNormalizedTalkProviderPayload() { let talk: [String: AnyCodable] = [ "provider": AnyCodable("elevenlabs"), diff --git a/src/config/defaults.ts b/src/config/defaults.ts index b8e20f260a1..2febc3869ee 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -178,17 +178,17 @@ export function applyTalkApiKey(config: OpenClawConfig): OpenClawConfig { const talk = normalized.talk; const active = resolveActiveTalkProviderConfig(talk); - if (active.provider && active.provider !== DEFAULT_TALK_PROVIDER) { + if (active?.provider && active.provider !== DEFAULT_TALK_PROVIDER) { return normalized; } - const existingProviderApiKeyConfigured = hasConfiguredSecretInput(active.config?.apiKey); + const existingProviderApiKeyConfigured = hasConfiguredSecretInput(active?.config?.apiKey); const existingLegacyApiKeyConfigured = hasConfiguredSecretInput(talk?.apiKey); if (existingProviderApiKeyConfigured || existingLegacyApiKeyConfigured) { return normalized; } - const providerId = active.provider ?? DEFAULT_TALK_PROVIDER; + const providerId = active?.provider ?? DEFAULT_TALK_PROVIDER; const providers = { ...talk?.providers }; const providerConfig = { ...providers[providerId], apiKey: resolved }; providers[providerId] = providerConfig; diff --git a/src/config/talk.normalize.test.ts b/src/config/talk.normalize.test.ts index ebca72326ab..f2b1ddff1a1 100644 --- a/src/config/talk.normalize.test.ts +++ b/src/config/talk.normalize.test.ts @@ -4,7 +4,7 @@ import path from "node:path"; import { describe, expect, it } from "vitest"; import { withEnvAsync } from "../test-utils/env.js"; import { createConfigIO } from "./io.js"; -import { normalizeTalkSection } from "./talk.js"; +import { buildTalkConfigResponse, normalizeTalkSection } from "./talk.js"; const envVar = (...parts: string[]) => parts.join("_"); const elevenLabsApiKeyEnv = ["ELEVENLABS_API", "KEY"].join("_"); @@ -82,6 +82,40 @@ describe("talk normalization", () => { }); }); + it("builds a canonical resolved talk payload for clients", () => { + const payload = buildTalkConfigResponse({ + provider: "acme", + providers: { + acme: { + voiceId: "acme-voice", + modelId: "acme-model", + }, + }, + voiceId: "legacy-voice", + interruptOnSpeech: true, + }); + + expect(payload).toEqual({ + provider: "acme", + providers: { + acme: { + voiceId: "acme-voice", + modelId: "acme-model", + }, + }, + resolved: { + provider: "acme", + config: { + voiceId: "acme-voice", + modelId: "acme-model", + }, + }, + voiceId: "acme-voice", + modelId: "acme-model", + interruptOnSpeech: true, + }); + }); + it("preserves SecretRef apiKey values during normalization", () => { const normalized = normalizeTalkSection({ provider: "elevenlabs", diff --git a/src/config/talk.ts b/src/config/talk.ts index 557153d99d8..2d8f4b79c3d 100644 --- a/src/config/talk.ts +++ b/src/config/talk.ts @@ -1,7 +1,12 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import type { TalkConfig, TalkProviderConfig } from "./types.gateway.js"; +import type { + ResolvedTalkConfig, + TalkConfig, + TalkConfigResponse, + TalkProviderConfig, +} from "./types.gateway.js"; import type { OpenClawConfig } from "./types.js"; import { coerceSecretRef } from "./types.secrets.js"; @@ -247,25 +252,24 @@ export function normalizeTalkConfig(config: OpenClawConfig): OpenClawConfig { }; } -export function resolveActiveTalkProviderConfig(talk: TalkConfig | undefined): { - provider?: string; - config?: TalkProviderConfig; -} { +export function resolveActiveTalkProviderConfig( + talk: TalkConfig | undefined, +): ResolvedTalkConfig | undefined { const normalizedTalk = normalizeTalkSection(talk); if (!normalizedTalk) { - return {}; + return undefined; } const provider = activeProviderFromTalk(normalizedTalk); if (!provider) { - return {}; + return undefined; } return { provider, - config: normalizedTalk.providers?.[provider], + config: normalizedTalk.providers?.[provider] ?? {}, }; } -export function buildTalkConfigResponse(value: unknown): TalkConfig | undefined { +export function buildTalkConfigResponse(value: unknown): TalkConfigResponse | undefined { if (!isPlainObject(value)) { return undefined; } @@ -274,7 +278,7 @@ export function buildTalkConfigResponse(value: unknown): TalkConfig | undefined return undefined; } - const payload: TalkConfig = {}; + const payload: TalkConfigResponse = {}; if (typeof normalized.interruptOnSpeech === "boolean") { payload.interruptOnSpeech = normalized.interruptOnSpeech; } @@ -288,8 +292,12 @@ export function buildTalkConfigResponse(value: unknown): TalkConfig | undefined payload.provider = normalized.provider; } - const activeProvider = activeProviderFromTalk(normalized); - const providerConfig = activeProvider ? normalized.providers?.[activeProvider] : undefined; + const resolved = resolveActiveTalkProviderConfig(normalized); + if (resolved) { + payload.resolved = resolved; + } + + const providerConfig = resolved?.config; const providerCompatibilityLegacy = legacyTalkFieldsFromProviderConfig(providerConfig); const compatibilityLegacy = Object.keys(providerCompatibilityLegacy).length > 0 diff --git a/src/config/types.gateway.ts b/src/config/types.gateway.ts index 482dd09aeb9..58b061682a1 100644 --- a/src/config/types.gateway.ts +++ b/src/config/types.gateway.ts @@ -63,6 +63,13 @@ export type TalkProviderConfig = { [key: string]: unknown; }; +export type ResolvedTalkConfig = { + /** Active Talk TTS provider resolved from the current config payload. */ + provider: string; + /** Provider config for the active Talk provider. */ + config: TalkProviderConfig; +}; + export type TalkConfig = { /** Active Talk TTS provider (for example "elevenlabs"). */ provider?: string; @@ -84,6 +91,11 @@ export type TalkConfig = { apiKey?: SecretInput; }; +export type TalkConfigResponse = TalkConfig & { + /** Canonical active Talk payload for clients. */ + resolved?: ResolvedTalkConfig; +}; + export type GatewayControlUiConfig = { /** If false, the Gateway will not serve the Control UI (default /). */ enabled?: boolean; diff --git a/src/gateway/protocol/schema/channels.ts b/src/gateway/protocol/schema/channels.ts index 24088fcaffd..cfe05819caa 100644 --- a/src/gateway/protocol/schema/channels.ts +++ b/src/gateway/protocol/schema/channels.ts @@ -27,6 +27,14 @@ const TalkProviderConfigSchema = Type.Object( { additionalProperties: true }, ); +const ResolvedTalkConfigSchema = Type.Object( + { + provider: Type.String(), + config: TalkProviderConfigSchema, + }, + { additionalProperties: false }, +); + export const TalkConfigResultSchema = Type.Object( { config: Type.Object( @@ -36,6 +44,7 @@ export const TalkConfigResultSchema = Type.Object( { provider: Type.Optional(Type.String()), providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)), + resolved: Type.Optional(ResolvedTalkConfigSchema), voiceId: Type.Optional(Type.String()), voiceAliases: Type.Optional(Type.Record(Type.String(), Type.String())), modelId: Type.Optional(Type.String()), diff --git a/src/gateway/server.talk-config.test.ts b/src/gateway/server.talk-config.test.ts index 1dcb29ea496..5c7fb760ed9 100644 --- a/src/gateway/server.talk-config.test.ts +++ b/src/gateway/server.talk-config.test.ts @@ -91,6 +91,10 @@ describe("gateway talk.config", () => { providers?: { elevenlabs?: { voiceId?: string; apiKey?: string }; }; + resolved?: { + provider?: string; + config?: { voiceId?: string; apiKey?: string }; + }; apiKey?: string; voiceId?: string; silenceTimeoutMs?: number; @@ -103,6 +107,9 @@ describe("gateway talk.config", () => { expect(res.payload?.config?.talk?.providers?.elevenlabs?.apiKey).toBe( "__OPENCLAW_REDACTED__", ); + expect(res.payload?.config?.talk?.resolved?.provider).toBe("elevenlabs"); + expect(res.payload?.config?.talk?.resolved?.config?.voiceId).toBe("voice-123"); + expect(res.payload?.config?.talk?.resolved?.config?.apiKey).toBe("__OPENCLAW_REDACTED__"); expect(res.payload?.config?.talk?.voiceId).toBe("voice-123"); expect(res.payload?.config?.talk?.apiKey).toBe("__OPENCLAW_REDACTED__"); expect(res.payload?.config?.talk?.silenceTimeoutMs).toBe(1500); @@ -156,6 +163,10 @@ describe("gateway talk.config", () => { providers?: { elevenlabs?: { voiceId?: string }; }; + resolved?: { + provider?: string; + config?: { voiceId?: string }; + }; voiceId?: string; }; }; @@ -163,6 +174,8 @@ describe("gateway talk.config", () => { expect(res.ok).toBe(true); expect(res.payload?.config?.talk?.provider).toBe("elevenlabs"); expect(res.payload?.config?.talk?.providers?.elevenlabs?.voiceId).toBe("voice-normalized"); + expect(res.payload?.config?.talk?.resolved?.provider).toBe("elevenlabs"); + expect(res.payload?.config?.talk?.resolved?.config?.voiceId).toBe("voice-normalized"); expect(res.payload?.config?.talk?.voiceId).toBe("voice-normalized"); }); });