From 8ce4f8fc84b3585fdcc9bed5eb8e455aa6c01578 Mon Sep 17 00:00:00 2001 From: Omar Shahine <10343873+omarshahine@users.noreply.github.com> Date: Sun, 26 Apr 2026 21:20:47 -0700 Subject: [PATCH] fix(gateway): redact SecretRef apiKey through talk.config without throwing The talk.config discovery RPC was handing the source-snapshot's talkProviderConfig (with the unresolved SecretRef wrapper still on apiKey) to speechProvider.resolveTalkConfig. ElevenLabs/OpenAI's strict normalizeResolvedSecretInputString helper threw 'unresolved SecretRef' there, so iOS / macOS / Control UI Talk overlays never learned the configured provider and silently fell back to local AVSpeechSynthesizer ('robot voice') even though talk.realtime.session and talk.speak both worked end-to-end with the same SecretRef. Prefer the runtime-resolved provider config when calling resolveTalkConfig, strip the apiKey field if it's still a SecretRef wrapper at the call site, and restore the source-shaped apiKey onto the response so the UI keeps the SecretRef context. Redaction strips the value when includeSecrets=false. Adds a regression test using a strict resolver speech provider that mirrors ElevenLabs/OpenAI behavior so the path stays covered for SecretRef apiKeys. Fixes #72496 Thanks @omarshahine --- src/gateway/server-methods/talk.ts | 30 ++++++++-- src/gateway/server.talk-config.test.ts | 82 ++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 4 deletions(-) diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts index 4d77b08ca16..4662337bce2 100644 --- a/src/gateway/server-methods/talk.ts +++ b/src/gateway/server-methods/talk.ts @@ -360,30 +360,52 @@ function resolveTalkResponseFromConfig(params: { const speechProvider = getSpeechProvider(provider, params.runtimeConfig); const sourceBaseTts = asRecord(params.sourceConfig.messages?.tts) ?? {}; const runtimeBaseTts = asRecord(params.runtimeConfig.messages?.tts) ?? {}; - const talkProviderConfig = sourceResolved?.config ?? runtimeResolved?.config ?? {}; + const sourceProviderConfig = sourceResolved?.config ?? {}; + const runtimeProviderConfig = runtimeResolved?.config ?? {}; + // Prefer runtime-resolved provider config (already-substituted secrets) and + // fall back to source. Strip any apiKey that is still a SecretRef wrapper — + // provider plugins (ElevenLabs/OpenAI) call strict secret helpers that throw + // on unresolved wrappers, and the discovery path doesn't need the resolved + // value: the response's apiKey is restored from source so the UI keeps the + // SecretRef shape, and redaction strips the value when includeSecrets=false. + const providerInputConfig = stripUnresolvedSecretApiKey( + Object.keys(runtimeProviderConfig).length > 0 ? runtimeProviderConfig : sourceProviderConfig, + ); const resolvedConfig = speechProvider?.resolveTalkConfig?.({ cfg: params.runtimeConfig, baseTtsConfig: Object.keys(sourceBaseTts).length > 0 ? sourceBaseTts : runtimeBaseTts, - talkProviderConfig, + talkProviderConfig: providerInputConfig, timeoutMs: typeof sourceBaseTts.timeoutMs === "number" ? sourceBaseTts.timeoutMs : typeof runtimeBaseTts.timeoutMs === "number" ? runtimeBaseTts.timeoutMs : 30_000, - }) ?? talkProviderConfig; + }) ?? providerInputConfig; + const responseConfig = + sourceProviderConfig.apiKey === undefined + ? resolvedConfig + : { ...resolvedConfig, apiKey: sourceProviderConfig.apiKey }; return { ...payload, provider, resolved: { provider, - config: resolvedConfig, + config: responseConfig, }, }; } +function stripUnresolvedSecretApiKey(config: TalkProviderConfig): TalkProviderConfig { + if (config.apiKey === undefined || typeof config.apiKey === "string") { + return config; + } + const { apiKey: _omit, ...rest } = config; + return rest; +} + export const talkHandlers: GatewayRequestHandlers = { "talk.config": async ({ params, respond, client, context }) => { if (!validateTalkConfigParams(params)) { diff --git a/src/gateway/server.talk-config.test.ts b/src/gateway/server.talk-config.test.ts index dbc85861092..fb2a3c8d9ba 100644 --- a/src/gateway/server.talk-config.test.ts +++ b/src/gateway/server.talk-config.test.ts @@ -1,6 +1,7 @@ import os from "node:os"; import path from "node:path"; import { afterAll, beforeAll, describe, expect, it } from "vitest"; +import { normalizeResolvedSecretInputString } from "../config/types.secrets.js"; import { loadOrCreateDeviceIdentity, publicKeyRawBase64UrlFromPem, @@ -323,6 +324,87 @@ describe("gateway talk.config", () => { }); }); + it("does not throw when SecretRef apiKey flows through a strict provider resolver", async () => { + // Regression for #72496: ElevenLabs/OpenAI speech providers call the strict + // normalizeResolvedSecretInputString helper inside resolveTalkConfig. The + // discovery path used to hand them the raw source config (with the SecretRef + // wrapper still intact), causing talk.config to throw "unresolved SecretRef" + // and pushing iOS/macOS Talk overlays onto local AVSpeechSynthesizer. + const apiKeyPath = `talk.providers.${GENERIC_TALK_PROVIDER_ID}.apiKey`; + await writeTalkConfig({ + apiKey: { source: "env", provider: "default", id: GENERIC_TALK_API_ENV }, + voiceId: "voice-secretref", + }); + + await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => { + await withSpeechProviders( + [ + { + pluginId: "acme-strict-talk-provider-test", + source: "test", + provider: { + id: GENERIC_TALK_PROVIDER_ID, + label: "Acme Strict Speech", + isConfigured: () => true, + resolveTalkConfig: ({ talkProviderConfig }) => { + const apiKey = normalizeResolvedSecretInputString({ + value: talkProviderConfig.apiKey, + path: apiKeyPath, + }); + return { + ...talkProviderConfig, + ...(apiKey === undefined ? {} : { apiKey }), + }; + }, + synthesize: async () => ({ + audioBuffer: Buffer.from([1]), + outputFormat: "mp3", + fileExtension: ".mp3", + voiceCompatible: false, + }), + }, + }, + ], + async () => { + const secretRef = { + source: "env", + provider: "default", + id: GENERIC_TALK_API_ENV, + } satisfies SecretRef; + + await withTalkConfigConnection(["operator.read"], async (ws) => { + const res = await fetchTalkConfig(ws); + expect(res.ok, JSON.stringify(res.error)).toBe(true); + const talk = res.payload?.config?.talk; + expect(talk?.provider).toBe(GENERIC_TALK_PROVIDER_ID); + expect(talk?.providers?.[GENERIC_TALK_PROVIDER_ID]?.voiceId).toBe("voice-secretref"); + // SecretRef apiKey is redacted in-place; the wrapper shape stays so + // the UI keeps the SecretRef context, but every field becomes the + // sentinel so no credential material leaks to read-scope callers. + const redactedApiKey = talk?.providers?.[GENERIC_TALK_PROVIDER_ID]?.apiKey; + expect(redactedApiKey).toBeTypeOf("object"); + expect((redactedApiKey as SecretRef).id).toBe("__OPENCLAW_REDACTED__"); + expect(talk?.resolved?.config?.apiKey).toEqual(redactedApiKey); + }); + + await withTalkConfigConnection( + ["operator.read", "operator.write", "operator.talk.secrets"], + async (ws) => { + const res = await fetchTalkConfig(ws, { includeSecrets: true }); + expect(res.ok, JSON.stringify(res.error)).toBe(true); + expect(validateTalkConfigResult(res.payload)).toBe(true); + expectTalkConfig(res.payload?.config?.talk, { + provider: GENERIC_TALK_PROVIDER_ID, + voiceId: "voice-secretref", + apiKey: secretRef, + }); + }, + ); + }, + ); + }); + }); + it("returns canonical provider talk payloads", async () => { await writeTalkConfig({ provider: GENERIC_TALK_PROVIDER_ID,