fix: use runtime snapshot for TTS SecretRefs (#72581)

* fix: use runtime snapshot for tts secrets

* fix: keep tts secret snapshot selection local

* docs: add tts secretref changelog entry
This commit is contained in:
Josh Avant
2026-04-27 01:02:17 -05:00
committed by GitHub
parent ac5a1d1622
commit b3d9948c4c
3 changed files with 154 additions and 42 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
- macOS Gateway: detect installed-but-unloaded LaunchAgent split-brain states during status, doctor, and restart, and re-bootstrap launchd supervision before falling back to unmanaged listener restarts. Fixes #67335, #53475, and #71060; refs #58890, #60885, and #70801. Thanks @ze1tgeist88, @dafacto, and @vishutdhar.
- Plugins/install: stage bundled plugin runtime dependencies before Gateway startup and drain update restarts while preserving per-plugin isolation when pre-stage scan or install fails. Thanks @codex.
- TTS/SecretRef: resolve `messages.tts.providers.*.apiKey` from the active runtime snapshot so SecretRef-backed MiniMax and other TTS provider keys work in runtime reply/audio paths. Fixes #68690. Thanks @joshavant.
- CLI/startup: read generated startup metadata from the bundled `dist` layout before falling back to live help rendering, so root/browser help and channel-option bootstrap stay on the fast path. Thanks @vincentkoc.
- CLI/help: treat positional `help` invocations like `openclaw channels help` as help paths for startup gating, avoiding model/auth warmup while preserving positional arguments such as `openclaw docs help`. Thanks @gumadeiras.
- Web search: route plugin-scoped web_search SecretRefs through the active runtime config snapshot so provider execution receives resolved credentials across app/runtime paths, including `plugins.entries.brave.config.webSearch.apiKey`. Fixes #68690. Thanks @VACInc.

View File

@@ -1,6 +1,10 @@
import { rmSync } from "node:fs";
import path from "node:path";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import {
clearRuntimeConfigSnapshot,
setRuntimeConfigSnapshot,
type OpenClawConfig,
} from "openclaw/plugin-sdk/config-runtime";
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-payload";
import type {
SpeechProviderPlugin,
@@ -163,6 +167,7 @@ async function expectTtsPayloadResult(params: {
describe("speech-core native voice-note routing", () => {
afterEach(() => {
clearRuntimeConfigSnapshot();
synthesizeMock.mockClear();
prepareSynthesisMock.mockClear();
installSpeechProviders([createMockSpeechProvider()]);
@@ -214,6 +219,63 @@ describe("speech-core native voice-note routing", () => {
});
});
it("uses the active runtime snapshot when source config still contains TTS SecretRefs", async () => {
const sourceConfig = {
messages: {
tts: {
enabled: true,
provider: "mock",
providers: {
mock: {
apiKey: { source: "exec", provider: "mockexec", id: "minimax/tts/apiKey" },
},
},
},
},
} as unknown as OpenClawConfig;
const runtimeConfig = {
messages: {
tts: {
enabled: true,
provider: "mock",
providers: {
mock: {
apiKey: "resolved-minimax-key",
},
},
},
},
} as unknown as OpenClawConfig;
installSpeechProviders([
createMockSpeechProvider("mock", {
isConfigured: ({ providerConfig }) => providerConfig.apiKey === "resolved-minimax-key",
resolveConfig: ({ rawConfig }) => {
const providers = rawConfig.providers as Record<string, { apiKey?: unknown }> | undefined;
return {
apiKey: providers?.mock?.apiKey,
};
},
}),
]);
setRuntimeConfigSnapshot(runtimeConfig, sourceConfig);
const result = await synthesizeSpeech({
text: "Runtime snapshot TTS SecretRef",
cfg: sourceConfig,
disableFallback: true,
});
expect(result.success).toBe(true);
expect(synthesizeMock).toHaveBeenCalledWith(
expect.objectContaining({
cfg: runtimeConfig,
providerConfig: expect.objectContaining({
apiKey: "resolved-minimax-key",
}),
}),
);
});
it.each(["feishu", "whatsapp"] as const)(
"marks %s voice-note TTS for channel-side transcoding when provider returns mp3",
async (channel) => {

View File

@@ -10,13 +10,15 @@ import {
} from "node:fs";
import path from "node:path";
import { resolveChannelTtsVoiceDelivery } from "openclaw/plugin-sdk/channel-targets";
import type {
OpenClawConfig,
ResolvedTtsPersona,
TtsAutoMode,
TtsConfig,
TtsModelOverrideConfig,
TtsProvider,
import {
getRuntimeConfigSnapshot,
getRuntimeConfigSourceSnapshot,
type OpenClawConfig,
type ResolvedTtsPersona,
type TtsAutoMode,
type TtsConfig,
type TtsModelOverrideConfig,
type TtsProvider,
} from "openclaw/plugin-sdk/config-runtime";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import { redactSensitiveText } from "openclaw/plugin-sdk/logging-core";
@@ -230,6 +232,43 @@ function _resolveRegistryDefaultSpeechProviderId(cfg?: OpenClawConfig): TtsProvi
return sortSpeechProvidersForAutoSelection(cfg)[0]?.id ?? "";
}
function stableConfigStringify(value: unknown): string {
if (value === null || typeof value !== "object") {
return JSON.stringify(value) ?? "null";
}
if (Array.isArray(value)) {
return `[${value.map((entry) => stableConfigStringify(entry)).join(",")}]`;
}
const record = value as Record<string, unknown>;
return `{${Object.keys(record)
.toSorted()
.map((key) => `${JSON.stringify(key)}:${stableConfigStringify(record[key])}`)
.join(",")}}`;
}
function configSnapshotsMatch(left: OpenClawConfig, right: OpenClawConfig): boolean {
if (left === right) {
return true;
}
try {
return stableConfigStringify(left) === stableConfigStringify(right);
} catch {
return false;
}
}
function resolveTtsRuntimeConfig(cfg: OpenClawConfig): OpenClawConfig {
const runtimeConfig = getRuntimeConfigSnapshot();
if (!runtimeConfig || cfg === runtimeConfig) {
return cfg;
}
const sourceConfig = getRuntimeConfigSourceSnapshot();
if (!sourceConfig || configSnapshotsMatch(cfg, sourceConfig)) {
return runtimeConfig;
}
return cfg;
}
function asProviderConfig(value: unknown): SpeechProviderConfig {
return typeof value === "object" && value !== null && !Array.isArray(value)
? (value as SpeechProviderConfig)
@@ -343,7 +382,7 @@ function resolveLazyProviderConfig(
const canonical =
normalizeConfiguredSpeechProviderId(providerId) ?? normalizeLowercaseStringOrEmpty(providerId);
const existing = config.providerConfigs[canonical];
const effectiveCfg = cfg ?? config.sourceConfig;
const effectiveCfg = cfg ? resolveTtsRuntimeConfig(cfg) : config.sourceConfig;
if (existing && !effectiveCfg) {
return existing;
}
@@ -403,17 +442,19 @@ export function getResolvedSpeechProviderConfig(
providerId: string,
cfg?: OpenClawConfig,
): SpeechProviderConfig {
const effectiveCfg = cfg ? resolveTtsRuntimeConfig(cfg) : config.sourceConfig;
const canonical =
canonicalizeSpeechProviderId(providerId, cfg) ??
canonicalizeSpeechProviderId(providerId, effectiveCfg) ??
normalizeConfiguredSpeechProviderId(providerId) ??
normalizeLowercaseStringOrEmpty(providerId);
return resolveLazyProviderConfig(config, canonical, cfg);
return resolveLazyProviderConfig(config, canonical, effectiveCfg);
}
export function resolveTtsConfig(
cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
): ResolvedTtsConfig {
cfg = resolveTtsRuntimeConfig(cfg);
const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, contextOrAgentId);
const providerSource = raw.provider ? "config" : "default";
const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS;
@@ -504,6 +545,7 @@ export function buildTtsSystemPromptHint(
cfg: OpenClawConfig,
agentId?: string,
): string | undefined {
cfg = resolveTtsRuntimeConfig(cfg);
const { autoMode, prefsPath } = resolveEffectiveTtsAutoState({ cfg, agentId });
if (autoMode === "off") {
return undefined;
@@ -667,17 +709,18 @@ export function resolveExplicitTtsOverrides(params: {
channelId?: string;
accountId?: string;
}): TtsDirectiveOverrides {
const cfg = resolveTtsRuntimeConfig(params.cfg);
const providerInput = params.provider?.trim();
const modelId = params.modelId?.trim();
const voiceId = params.voiceId?.trim();
const config = resolveTtsConfig(params.cfg, {
const config = resolveTtsConfig(cfg, {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
});
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
const selectedProvider =
canonicalizeSpeechProviderId(providerInput, params.cfg) ??
canonicalizeSpeechProviderId(providerInput, cfg) ??
(modelId || voiceId ? getTtsProvider(config, prefsPath) : undefined);
if (providerInput && !selectedProvider) {
@@ -692,7 +735,7 @@ export function resolveExplicitTtsOverrides(params: {
throw new Error("TTS model or voice overrides require a resolved provider.");
}
const provider = getSpeechProvider(selectedProvider, params.cfg);
const provider = getSpeechProvider(selectedProvider, cfg);
if (!provider) {
throw new Error(`speech provider ${selectedProvider} is not registered`);
}
@@ -812,9 +855,10 @@ function shouldDeliverTtsAsVoice(params: {
}
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
const normalizedPrimary = canonicalizeSpeechProviderId(primary, cfg) ?? primary;
const effectiveCfg = cfg ? resolveTtsRuntimeConfig(cfg) : undefined;
const normalizedPrimary = canonicalizeSpeechProviderId(primary, effectiveCfg) ?? primary;
const ordered = new Set<TtsProvider>([normalizedPrimary]);
for (const provider of sortSpeechProvidersForAutoSelection(cfg)) {
for (const provider of sortSpeechProvidersForAutoSelection(effectiveCfg)) {
const normalized = provider.id;
if (normalized !== normalizedPrimary) {
ordered.add(normalized);
@@ -828,14 +872,15 @@ export function isTtsProviderConfigured(
provider: TtsProvider,
cfg?: OpenClawConfig,
): boolean {
const resolvedProvider = getSpeechProvider(provider, cfg);
const effectiveCfg = cfg ? resolveTtsRuntimeConfig(cfg) : config.sourceConfig;
const resolvedProvider = getSpeechProvider(provider, effectiveCfg);
if (!resolvedProvider) {
return false;
}
return (
resolvedProvider.isConfigured({
cfg,
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, cfg),
cfg: effectiveCfg,
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, effectiveCfg),
timeoutMs: config.timeoutMs,
}) ?? false
);
@@ -1011,6 +1056,7 @@ function resolveTtsRequestSetup(params: {
accountId?: string;
}):
| {
cfg: OpenClawConfig;
config: ResolvedTtsConfig;
persona?: ResolvedTtsPersona;
providers: TtsProvider[];
@@ -1018,7 +1064,8 @@ function resolveTtsRequestSetup(params: {
| {
error: string;
} {
const config = resolveTtsConfig(params.cfg, {
const cfg = resolveTtsRuntimeConfig(params.cfg);
const config = resolveTtsConfig(cfg, {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
@@ -1031,12 +1078,12 @@ function resolveTtsRequestSetup(params: {
}
const userProvider = getTtsProvider(config, prefsPath);
const provider =
canonicalizeSpeechProviderId(params.providerOverride, params.cfg) ?? userProvider;
const provider = canonicalizeSpeechProviderId(params.providerOverride, cfg) ?? userProvider;
return {
cfg,
config,
persona: getTtsPersona(config, prefsPath),
providers: params.disableFallback ? [provider] : resolveTtsProviderOrder(provider, params.cfg),
providers: params.disableFallback ? [provider] : resolveTtsProviderOrder(provider, cfg),
};
}
@@ -1116,7 +1163,7 @@ export async function synthesizeSpeech(params: {
return { success: false, error: setup.error };
}
const { config, persona, providers } = setup;
const { cfg, config, persona, providers } = setup;
const timeoutMs = params.timeoutMs ?? config.timeoutMs;
const target = resolveTtsSynthesisTarget(params.channel);
@@ -1134,7 +1181,7 @@ export async function synthesizeSpeech(params: {
try {
const resolvedProvider = resolveReadySpeechProvider({
provider,
cfg: params.cfg,
cfg,
config,
persona,
});
@@ -1156,7 +1203,7 @@ export async function synthesizeSpeech(params: {
const prepared = await prepareSpeechSynthesis({
provider: resolvedProvider.provider,
text: params.text,
cfg: params.cfg,
cfg,
providerConfig: resolvedProvider.providerConfig,
providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id],
persona: resolvedProvider.synthesisPersona,
@@ -1166,7 +1213,7 @@ export async function synthesizeSpeech(params: {
});
const synthesis = await resolvedProvider.provider.synthesize({
text: prepared.text,
cfg: params.cfg,
cfg,
providerConfig: prepared.providerConfig,
target,
providerOverrides: prepared.providerOverrides,
@@ -1243,7 +1290,7 @@ export async function textToSpeechTelephony(params: {
return { success: false, error: setup.error };
}
const { config, persona, providers } = setup;
const { cfg, config, persona, providers } = setup;
const errors: string[] = [];
const attemptedProviders: string[] = [];
const attempts: TtsProviderAttempt[] = [];
@@ -1258,7 +1305,7 @@ export async function textToSpeechTelephony(params: {
try {
const resolvedProvider = resolveReadySpeechProvider({
provider,
cfg: params.cfg,
cfg,
config,
persona,
requireTelephony: true,
@@ -1284,7 +1331,7 @@ export async function textToSpeechTelephony(params: {
const prepared = await prepareSpeechSynthesis({
provider: resolvedProvider.provider,
text: params.text,
cfg: params.cfg,
cfg,
providerConfig: resolvedProvider.providerConfig,
persona: resolvedProvider.synthesisPersona,
personaProviderConfig: resolvedProvider.personaProviderConfig,
@@ -1293,7 +1340,7 @@ export async function textToSpeechTelephony(params: {
});
const synthesis = await synthesizeTelephony({
text: prepared.text,
cfg: params.cfg,
cfg,
providerConfig: prepared.providerConfig,
timeoutMs: config.timeoutMs,
});
@@ -1360,15 +1407,16 @@ export async function listSpeechVoices(params: {
apiKey?: string;
baseUrl?: string;
}): Promise<SpeechVoiceOption[]> {
const provider = canonicalizeSpeechProviderId(params.provider, params.cfg);
const cfg = params.cfg ? resolveTtsRuntimeConfig(params.cfg) : undefined;
const provider = canonicalizeSpeechProviderId(params.provider, cfg);
if (!provider) {
throw new Error("speech provider id is required");
}
const config = params.config ?? (params.cfg ? resolveTtsConfig(params.cfg) : undefined);
const config = params.config ?? (cfg ? resolveTtsConfig(cfg) : undefined);
if (!config) {
throw new Error(`speech provider ${provider} requires cfg or resolved config`);
}
const resolvedProvider = getSpeechProvider(provider, params.cfg);
const resolvedProvider = getSpeechProvider(provider, cfg);
if (!resolvedProvider) {
throw new Error(`speech provider ${provider} is not registered`);
}
@@ -1376,8 +1424,8 @@ export async function listSpeechVoices(params: {
throw new Error(`speech provider ${provider} does not support voice listing`);
}
return await resolvedProvider.listVoices({
cfg: params.cfg,
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg),
cfg,
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, cfg),
apiKey: params.apiKey,
baseUrl: params.baseUrl,
});
@@ -1396,8 +1444,9 @@ export async function maybeApplyTtsToPayload(params: {
if (params.payload.isCompactionNotice) {
return params.payload;
}
const cfg = resolveTtsRuntimeConfig(params.cfg);
const { autoMode, prefsPath } = resolveEffectiveTtsAutoState({
cfg: params.cfg,
cfg,
sessionAuto: params.ttsAuto,
agentId: params.agentId,
channelId: params.channel,
@@ -1406,7 +1455,7 @@ export async function maybeApplyTtsToPayload(params: {
if (autoMode === "off") {
return params.payload;
}
const config = resolveTtsConfig(params.cfg, {
const config = resolveTtsConfig(cfg, {
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
@@ -1416,7 +1465,7 @@ export async function maybeApplyTtsToPayload(params: {
const reply = resolveSendableOutboundReplyParts(params.payload);
const text = reply.text;
const directives = parseTtsDirectives(text, config.modelOverrides, {
cfg: params.cfg,
cfg,
providerConfigs: config.providerConfigs,
preferredProviderId: activeProvider,
});
@@ -1426,7 +1475,7 @@ export async function maybeApplyTtsToPayload(params: {
if (isVerbose()) {
const effectiveProvider = directives.overrides?.provider
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ?? activeProvider)
? (canonicalizeSpeechProviderId(directives.overrides.provider, cfg) ?? activeProvider)
: activeProvider;
logVerbose(
`TTS: auto mode enabled (${autoMode}), channel=${params.channel}, selected provider=${effectiveProvider}, config.provider=${config.provider}, config.providerSource=${config.providerSource}`,
@@ -1486,7 +1535,7 @@ export async function maybeApplyTtsToPayload(params: {
const summary = await summarizeText({
text: textForAudio,
targetLength: maxLength,
cfg: params.cfg,
cfg,
config,
timeoutMs: config.timeoutMs,
});
@@ -1514,7 +1563,7 @@ export async function maybeApplyTtsToPayload(params: {
const ttsStart = Date.now();
const result = await textToSpeech({
text: textForAudio,
cfg: params.cfg,
cfg,
prefsPath,
channel: params.channel,
overrides: directives.overrides,