From 4b760be1ddea72630e41d4a4cfc8d44dd6ff9329 Mon Sep 17 00:00:00 2001
From: Omar Shahine <omarshahine@users.noreply.github.com>
Date: Mon, 27 Apr 2026 21:06:28 -0700
Subject: [PATCH] fix(gateway): strip SecretRef secret inputs from
 messages.tts.providers before talk.config hands them to speech providers
 (#73111)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the gap left by #72496 on the parallel `messages.tts.providers.<id>` site. After #72496 landed, `talk.config` still threw `unresolved SecretRef` whenever an operator pinned a TTS apiKey or token as a SecretRef on the messages.tts side — same user-facing symptom (iOS / macOS / Control UI Talk overlays falling back to local AVSpeechSynthesizer).

Adds `stripUnresolvedSecretInputsFromBaseTtsProviders` in `src/gateway/server-methods/talk.ts` that walks each entry in `messages.tts.providers` and strips any unresolved SecretRef wrappers from the configured secret-input keys (`apiKey`, `token`) before handing the base TTS config down to `speechProvider.resolveTalkConfig`. Mirrors the `talk.providers` strip pattern from #72496.

Hardening: rebuilds the providers map with `Object.create(null)` instead of `{}` so an operator-config payload carrying `messages.tts.providers.__proto__` (or `constructor`/`prototype`) cannot mutate Object.prototype via the dynamic `cleaned[providerId] = ...` assignment. Caught by Aisle security review.

Adds three regression tests covering: SecretRef apiKey on messages.tts (the original bug), SecretRef token on messages.tts (Peter's generalization), and `__proto__`-keyed providers (Aisle hardening). All pass; full CI green (57/57) on the rebased branch.

Fixes #73109. Refs #72496.

Co-authored-by: Peter Steinberger <steipete@gmail.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                           |   1 +
 src/gateway/server-methods/talk.ts     |  56 ++++++++-
 src/gateway/server.talk-config.test.ts | 167 +++++++++++++++++++++++++
 3 files changed, 223 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 00e090f1764..a3c80b965ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,6 +52,7 @@ Docs: https://docs.openclaw.ai
 - Backup: skip installed plugin `extensions/*/node_modules` dependency trees while keeping plugin manifests and source files in archives, so local backups avoid rebuildable npm payload bloat. Fixes #64144. Thanks @BrilliantWang.
 - Cron/models: fail isolated cron runs closed when an explicit `payload.model` is not allowed or cannot be resolved, so scheduled jobs do not silently fall back to an unrelated agent default or paid route before configured provider proxies such as LiteLLM can run. Fixes #73146. Thanks @oneandrewwang.
 - Memory/QMD: back off repeated chat-turn QMD open failures while still letting memory status and CLI probes recheck immediately, so a broken sidecar dependency cannot trigger active-memory or cron retry storms. Fixes #73188 and #73176. Thanks @leonlushgit and @w3i-William.
+- Talk Mode: keep `talk.config` callable when `messages.tts.providers.<id>` stores SecretRef-backed `apiKey` or `token` values, so Talk overlays can discover the configured speech provider without falling back to local speech. Fixes #73109. (#73111) Thanks @omarshahine.
 - Memory/Ollama: resolve `memorySearch.provider` custom provider ids through their configured `models.providers.<id>.api` owner, so multi-GPU Ollama setups can dedicate embeddings to providers such as `ollama-5080` without losing the Ollama adapter or local auth semantics. Fixes #73150. Thanks @oneandrewwang.
 - CLI/memory: skip eager context-window warmup for `openclaw memory` commands so memory search does not race unrelated model metadata discovery. Fixes #73123. Thanks @oalansilva and @neeravmakwana.
 - CLI/Telegram: route Telegram `message send` and poll actions through the running Gateway when available, so packaged installs use the staged `grammy` runtime deps and CLI sends return instead of hanging after the Telegram channel is active. Fixes #73140. Thanks @oalansilva.
diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts
index 4662337bce2..794f088ec61 100644
--- a/src/gateway/server-methods/talk.ts
+++ b/src/gateway/server-methods/talk.ts
@@ -371,10 +371,15 @@ function resolveTalkResponseFromConfig(params: {
   const providerInputConfig = stripUnresolvedSecretApiKey(
     Object.keys(runtimeProviderConfig).length > 0 ? runtimeProviderConfig : sourceProviderConfig,
   );
+  // The same SecretRef-wrapper hazard exists on `messages.tts.providers.*`:
+  // strict speech resolvers normalize base TTS secrets before merging talk config.
+  const baseTtsConfig = stripUnresolvedSecretInputsFromBaseTtsProviders(
+    Object.keys(sourceBaseTts).length > 0 ? sourceBaseTts : runtimeBaseTts,
+  );
   const resolvedConfig =
     speechProvider?.resolveTalkConfig?.({
       cfg: params.runtimeConfig,
-      baseTtsConfig: Object.keys(sourceBaseTts).length > 0 ? sourceBaseTts : runtimeBaseTts,
+      baseTtsConfig,
       talkProviderConfig: providerInputConfig,
       timeoutMs:
         typeof sourceBaseTts.timeoutMs === "number"
@@ -406,6 +411,55 @@ function stripUnresolvedSecretApiKey(config: TalkProviderConfig): TalkProviderCo
   return rest;
 }
 
+const BASE_TTS_PROVIDER_SECRET_INPUT_KEYS = ["apiKey", "token"] as const;
+
+function stripUnresolvedSecretInputsFromProviderConfig(
+  config: Record<string, unknown>,
+): Record<string, unknown> {
+  let next: Record<string, unknown> | undefined;
+  for (const key of BASE_TTS_PROVIDER_SECRET_INPUT_KEYS) {
+    const value = config[key];
+    if (value === undefined || typeof value === "string") {
+      continue;
+    }
+    next ??= { ...config };
+    delete next[key];
+  }
+  return next ?? config;
+}
+
+function stripUnresolvedSecretInputsFromBaseTtsProviders(
+  base: Record<string, unknown>,
+): Record<string, unknown> {
+  const providers = asRecord(base.providers);
+  if (!providers) {
+    return base;
+  }
+  let mutated = false;
+  // Null-prototype map so an attacker-influenced provider id like `__proto__`,
+  // `constructor`, or `prototype` cannot pollute Object.prototype via the
+  // dynamic `cleaned[providerId] = ...` assignment below. Provider-id keys
+  // come from operator config and may be plain JSON, so we cannot assume
+  // they're already validated upstream.
+  const cleaned: Record<string, unknown> = Object.create(null);
+  for (const [providerId, providerConfig] of Object.entries(providers)) {
+    const cfg = asRecord(providerConfig);
+    if (!cfg) {
+      cleaned[providerId] = providerConfig;
+      continue;
+    }
+    const next = stripUnresolvedSecretInputsFromProviderConfig(cfg);
+    if (next !== cfg) {
+      mutated = true;
+    }
+    cleaned[providerId] = next;
+  }
+  if (!mutated) {
+    return base;
+  }
+  return { ...base, providers: cleaned };
+}
+
 export const talkHandlers: GatewayRequestHandlers = {
   "talk.config": async ({ params, respond, client, context }) => {
     if (!validateTalkConfigParams(params)) {
diff --git a/src/gateway/server.talk-config.test.ts b/src/gateway/server.talk-config.test.ts
index fb2a3c8d9ba..88dc5134e12 100644
--- a/src/gateway/server.talk-config.test.ts
+++ b/src/gateway/server.talk-config.test.ts
@@ -405,6 +405,173 @@ describe("gateway talk.config", () => {
     });
   });
 
+  it("does not throw when SecretRef secrets on messages.tts.providers flow through a strict provider resolver", async () => {
+    // Regression for the messages.tts.providers.<id> secret-input side of the same
+    // bug fixed by #72496 for talk.providers.<id>.apiKey. Speech provider
+    // resolvers read the active provider's secret fields out of
+    // baseTtsConfig.providers[id] to merge with talkProviderConfig, and call
+    // the same strict normalizeResolvedSecretInputString helper that throws
+    // on an unresolved SecretRef. Without stripping that wrapper from the
+    // base TTS providers map before handing it down, talk.config errors out
+    // even when talk.providers.<id>.apiKey is configured cleanly.
+    const messagesTtsProviderPath = `messages.tts.providers.${GENERIC_TALK_PROVIDER_ID}`;
+    const { writeConfigFile } = await import("../config/config.js");
+    await writeConfigFile({
+      talk: {
+        provider: GENERIC_TALK_PROVIDER_ID,
+        providers: {
+          [GENERIC_TALK_PROVIDER_ID]: {
+            voiceId: "voice-from-talk-config",
+          },
+        },
+      },
+      messages: {
+        tts: {
+          provider: GENERIC_TALK_PROVIDER_ID,
+          providers: {
+            [GENERIC_TALK_PROVIDER_ID]: {
+              apiKey: { source: "env", provider: "default", id: GENERIC_TALK_API_ENV },
+              token: { source: "env", provider: "default", id: GENERIC_TALK_API_ENV },
+            },
+          },
+        },
+      },
+    });
+
+    await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => {
+      await withSpeechProviders(
+        [
+          {
+            pluginId: "acme-strict-tts-base-test",
+            source: "test",
+            provider: {
+              id: GENERIC_TALK_PROVIDER_ID,
+              label: "Acme Strict Speech (messages.tts)",
+              isConfigured: () => true,
+              resolveTalkConfig: ({ baseTtsConfig, talkProviderConfig }) => {
+                // Mirrors strict speech providers: dig into secret inputs on
+                // the base TTS providers map and feed them through the strict
+                // resolver that throws on unresolved SecretRefs.
+                const baseProviders =
+                  (baseTtsConfig as { providers?: Record<string, unknown> }).providers ?? {};
+                const baseEntry = (baseProviders[GENERIC_TALK_PROVIDER_ID] ?? {}) as {
+                  apiKey?: unknown;
+                  token?: unknown;
+                };
+                const apiKey = normalizeResolvedSecretInputString({
+                  value: baseEntry.apiKey,
+                  path: `${messagesTtsProviderPath}.apiKey`,
+                });
+                const token = normalizeResolvedSecretInputString({
+                  value: baseEntry.token,
+                  path: `${messagesTtsProviderPath}.token`,
+                });
+                return {
+                  ...talkProviderConfig,
+                  ...(apiKey === undefined ? {} : { apiKey }),
+                  ...(token === undefined ? {} : { token }),
+                };
+              },
+              synthesize: async () => ({
+                audioBuffer: Buffer.from([1]),
+                outputFormat: "mp3",
+                fileExtension: ".mp3",
+                voiceCompatible: false,
+              }),
+            },
+          },
+        ],
+        async () => {
+          await withTalkConfigConnection(["operator.read"], async (ws) => {
+            const res = await fetchTalkConfig(ws);
+            expect(res.ok, JSON.stringify(res.error)).toBe(true);
+            const talk = res.payload?.config?.talk;
+            expect(talk?.provider).toBe(GENERIC_TALK_PROVIDER_ID);
+            expect(talk?.providers?.[GENERIC_TALK_PROVIDER_ID]?.voiceId).toBe(
+              "voice-from-talk-config",
+            );
+          });
+        },
+      );
+    });
+  });
+
+  it("does not pollute Object.prototype when messages.tts.providers contains a __proto__ key", async () => {
+    // Hardening regression: stripUnresolvedSecretInputsFromBaseTtsProviders
+    // rebuilds the providers map with dynamic keys from operator config. Using
+    // a plain `{}` would let `cleaned['__proto__'] = {...}` mutate
+    // Object.prototype. The helper uses `Object.create(null)` to make that
+    // assignment a normal property write on the local map instead.
+    const { writeConfigFile } = await import("../config/config.js");
+    await writeConfigFile({
+      talk: {
+        provider: GENERIC_TALK_PROVIDER_ID,
+        providers: {
+          [GENERIC_TALK_PROVIDER_ID]: {
+            voiceId: "voice-proto-pollution-guard",
+          },
+        },
+      },
+      messages: {
+        tts: {
+          provider: GENERIC_TALK_PROVIDER_ID,
+          providers: {
+            [GENERIC_TALK_PROVIDER_ID]: {
+              apiKey: { source: "env", provider: "default", id: GENERIC_TALK_API_ENV },
+            },
+            // Hostile operator-config payload — not a real provider id, just
+            // a value-shaped key with a SecretRef-shaped apiKey to force the
+            // strip path.
+            __proto__: {
+              apiKey: { source: "env", provider: "default", id: GENERIC_TALK_API_ENV },
+              polluted: "yes",
+            },
+          },
+        },
+      },
+    });
+
+    const sentinelKeyBefore = ({} as Record<string, unknown>).polluted;
+
+    await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => {
+      await withSpeechProviders(
+        [
+          {
+            pluginId: "acme-strict-tts-proto-test",
+            source: "test",
+            provider: {
+              id: GENERIC_TALK_PROVIDER_ID,
+              label: "Acme Strict Speech (proto guard)",
+              isConfigured: () => true,
+              resolveTalkConfig: ({ talkProviderConfig }) => talkProviderConfig,
+              synthesize: async () => ({
+                audioBuffer: Buffer.from([1]),
+                outputFormat: "mp3",
+                fileExtension: ".mp3",
+                voiceCompatible: false,
+              }),
+            },
+          },
+        ],
+        async () => {
+          await withTalkConfigConnection(["operator.read"], async (ws) => {
+            const res = await fetchTalkConfig(ws);
+            expect(res.ok, JSON.stringify(res.error)).toBe(true);
+            // The active provider's voice still comes through cleanly.
+            expect(res.payload?.config?.talk?.provider).toBe(GENERIC_TALK_PROVIDER_ID);
+          });
+        },
+      );
+    });
+
+    // The strip helper must not have leaked the hostile `polluted` field onto
+    // Object.prototype: a fresh empty object should not gain a `.polluted`
+    // property as a side effect of processing the request.
+    const sentinelKeyAfter = ({} as Record<string, unknown>).polluted;
+    expect(sentinelKeyAfter).toBe(sentinelKeyBefore);
+    expect(sentinelKeyAfter).toBeUndefined();
+  });
+
   it("returns canonical provider talk payloads", async () => {
     await writeTalkConfig({
       provider: GENERIC_TALK_PROVIDER_ID,