mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:00:42 +00:00
fix: use runtime snapshot for TTS SecretRefs (#72581)
* fix: use runtime snapshot for tts secrets * fix: keep tts secret snapshot selection local * docs: add tts secretref changelog entry
This commit is contained in:
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- macOS Gateway: detect installed-but-unloaded LaunchAgent split-brain states during status, doctor, and restart, and re-bootstrap launchd supervision before falling back to unmanaged listener restarts. Fixes #67335, #53475, and #71060; refs #58890, #60885, and #70801. Thanks @ze1tgeist88, @dafacto, and @vishutdhar.
|
||||
- Plugins/install: stage bundled plugin runtime dependencies before Gateway startup and drain update restarts while preserving per-plugin isolation when pre-stage scan or install fails. Thanks @codex.
|
||||
- TTS/SecretRef: resolve `messages.tts.providers.*.apiKey` from the active runtime snapshot so SecretRef-backed MiniMax and other TTS provider keys work in runtime reply/audio paths. Fixes #68690. Thanks @joshavant.
|
||||
- CLI/startup: read generated startup metadata from the bundled `dist` layout before falling back to live help rendering, so root/browser help and channel-option bootstrap stay on the fast path. Thanks @vincentkoc.
|
||||
- CLI/help: treat positional `help` invocations like `openclaw channels help` as help paths for startup gating, avoiding model/auth warmup while preserving positional arguments such as `openclaw docs help`. Thanks @gumadeiras.
|
||||
- Web search: route plugin-scoped web_search SecretRefs through the active runtime config snapshot so provider execution receives resolved credentials across app/runtime paths, including `plugins.entries.brave.config.webSearch.apiKey`. Fixes #68690. Thanks @VACInc.
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import { rmSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import {
|
||||
clearRuntimeConfigSnapshot,
|
||||
setRuntimeConfigSnapshot,
|
||||
type OpenClawConfig,
|
||||
} from "openclaw/plugin-sdk/config-runtime";
|
||||
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-payload";
|
||||
import type {
|
||||
SpeechProviderPlugin,
|
||||
@@ -163,6 +167,7 @@ async function expectTtsPayloadResult(params: {
|
||||
|
||||
describe("speech-core native voice-note routing", () => {
|
||||
afterEach(() => {
|
||||
clearRuntimeConfigSnapshot();
|
||||
synthesizeMock.mockClear();
|
||||
prepareSynthesisMock.mockClear();
|
||||
installSpeechProviders([createMockSpeechProvider()]);
|
||||
@@ -214,6 +219,63 @@ describe("speech-core native voice-note routing", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("uses the active runtime snapshot when source config still contains TTS SecretRefs", async () => {
|
||||
const sourceConfig = {
|
||||
messages: {
|
||||
tts: {
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
providers: {
|
||||
mock: {
|
||||
apiKey: { source: "exec", provider: "mockexec", id: "minimax/tts/apiKey" },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
const runtimeConfig = {
|
||||
messages: {
|
||||
tts: {
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
providers: {
|
||||
mock: {
|
||||
apiKey: "resolved-minimax-key",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
installSpeechProviders([
|
||||
createMockSpeechProvider("mock", {
|
||||
isConfigured: ({ providerConfig }) => providerConfig.apiKey === "resolved-minimax-key",
|
||||
resolveConfig: ({ rawConfig }) => {
|
||||
const providers = rawConfig.providers as Record<string, { apiKey?: unknown }> | undefined;
|
||||
return {
|
||||
apiKey: providers?.mock?.apiKey,
|
||||
};
|
||||
},
|
||||
}),
|
||||
]);
|
||||
setRuntimeConfigSnapshot(runtimeConfig, sourceConfig);
|
||||
|
||||
const result = await synthesizeSpeech({
|
||||
text: "Runtime snapshot TTS SecretRef",
|
||||
cfg: sourceConfig,
|
||||
disableFallback: true,
|
||||
});
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(synthesizeMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
cfg: runtimeConfig,
|
||||
providerConfig: expect.objectContaining({
|
||||
apiKey: "resolved-minimax-key",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it.each(["feishu", "whatsapp"] as const)(
|
||||
"marks %s voice-note TTS for channel-side transcoding when provider returns mp3",
|
||||
async (channel) => {
|
||||
|
||||
@@ -10,13 +10,15 @@ import {
|
||||
} from "node:fs";
|
||||
import path from "node:path";
|
||||
import { resolveChannelTtsVoiceDelivery } from "openclaw/plugin-sdk/channel-targets";
|
||||
import type {
|
||||
OpenClawConfig,
|
||||
ResolvedTtsPersona,
|
||||
TtsAutoMode,
|
||||
TtsConfig,
|
||||
TtsModelOverrideConfig,
|
||||
TtsProvider,
|
||||
import {
|
||||
getRuntimeConfigSnapshot,
|
||||
getRuntimeConfigSourceSnapshot,
|
||||
type OpenClawConfig,
|
||||
type ResolvedTtsPersona,
|
||||
type TtsAutoMode,
|
||||
type TtsConfig,
|
||||
type TtsModelOverrideConfig,
|
||||
type TtsProvider,
|
||||
} from "openclaw/plugin-sdk/config-runtime";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import { redactSensitiveText } from "openclaw/plugin-sdk/logging-core";
|
||||
@@ -230,6 +232,43 @@ function _resolveRegistryDefaultSpeechProviderId(cfg?: OpenClawConfig): TtsProvi
|
||||
return sortSpeechProvidersForAutoSelection(cfg)[0]?.id ?? "";
|
||||
}
|
||||
|
||||
function stableConfigStringify(value: unknown): string {
|
||||
if (value === null || typeof value !== "object") {
|
||||
return JSON.stringify(value) ?? "null";
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return `[${value.map((entry) => stableConfigStringify(entry)).join(",")}]`;
|
||||
}
|
||||
const record = value as Record<string, unknown>;
|
||||
return `{${Object.keys(record)
|
||||
.toSorted()
|
||||
.map((key) => `${JSON.stringify(key)}:${stableConfigStringify(record[key])}`)
|
||||
.join(",")}}`;
|
||||
}
|
||||
|
||||
function configSnapshotsMatch(left: OpenClawConfig, right: OpenClawConfig): boolean {
|
||||
if (left === right) {
|
||||
return true;
|
||||
}
|
||||
try {
|
||||
return stableConfigStringify(left) === stableConfigStringify(right);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function resolveTtsRuntimeConfig(cfg: OpenClawConfig): OpenClawConfig {
|
||||
const runtimeConfig = getRuntimeConfigSnapshot();
|
||||
if (!runtimeConfig || cfg === runtimeConfig) {
|
||||
return cfg;
|
||||
}
|
||||
const sourceConfig = getRuntimeConfigSourceSnapshot();
|
||||
if (!sourceConfig || configSnapshotsMatch(cfg, sourceConfig)) {
|
||||
return runtimeConfig;
|
||||
}
|
||||
return cfg;
|
||||
}
|
||||
|
||||
function asProviderConfig(value: unknown): SpeechProviderConfig {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value)
|
||||
? (value as SpeechProviderConfig)
|
||||
@@ -343,7 +382,7 @@ function resolveLazyProviderConfig(
|
||||
const canonical =
|
||||
normalizeConfiguredSpeechProviderId(providerId) ?? normalizeLowercaseStringOrEmpty(providerId);
|
||||
const existing = config.providerConfigs[canonical];
|
||||
const effectiveCfg = cfg ?? config.sourceConfig;
|
||||
const effectiveCfg = cfg ? resolveTtsRuntimeConfig(cfg) : config.sourceConfig;
|
||||
if (existing && !effectiveCfg) {
|
||||
return existing;
|
||||
}
|
||||
@@ -403,17 +442,19 @@ export function getResolvedSpeechProviderConfig(
|
||||
providerId: string,
|
||||
cfg?: OpenClawConfig,
|
||||
): SpeechProviderConfig {
|
||||
const effectiveCfg = cfg ? resolveTtsRuntimeConfig(cfg) : config.sourceConfig;
|
||||
const canonical =
|
||||
canonicalizeSpeechProviderId(providerId, cfg) ??
|
||||
canonicalizeSpeechProviderId(providerId, effectiveCfg) ??
|
||||
normalizeConfiguredSpeechProviderId(providerId) ??
|
||||
normalizeLowercaseStringOrEmpty(providerId);
|
||||
return resolveLazyProviderConfig(config, canonical, cfg);
|
||||
return resolveLazyProviderConfig(config, canonical, effectiveCfg);
|
||||
}
|
||||
|
||||
export function resolveTtsConfig(
|
||||
cfg: OpenClawConfig,
|
||||
contextOrAgentId?: string | TtsConfigResolutionContext,
|
||||
): ResolvedTtsConfig {
|
||||
cfg = resolveTtsRuntimeConfig(cfg);
|
||||
const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, contextOrAgentId);
|
||||
const providerSource = raw.provider ? "config" : "default";
|
||||
const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||
@@ -504,6 +545,7 @@ export function buildTtsSystemPromptHint(
|
||||
cfg: OpenClawConfig,
|
||||
agentId?: string,
|
||||
): string | undefined {
|
||||
cfg = resolveTtsRuntimeConfig(cfg);
|
||||
const { autoMode, prefsPath } = resolveEffectiveTtsAutoState({ cfg, agentId });
|
||||
if (autoMode === "off") {
|
||||
return undefined;
|
||||
@@ -667,17 +709,18 @@ export function resolveExplicitTtsOverrides(params: {
|
||||
channelId?: string;
|
||||
accountId?: string;
|
||||
}): TtsDirectiveOverrides {
|
||||
const cfg = resolveTtsRuntimeConfig(params.cfg);
|
||||
const providerInput = params.provider?.trim();
|
||||
const modelId = params.modelId?.trim();
|
||||
const voiceId = params.voiceId?.trim();
|
||||
const config = resolveTtsConfig(params.cfg, {
|
||||
const config = resolveTtsConfig(cfg, {
|
||||
agentId: params.agentId,
|
||||
channelId: params.channelId,
|
||||
accountId: params.accountId,
|
||||
});
|
||||
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
|
||||
const selectedProvider =
|
||||
canonicalizeSpeechProviderId(providerInput, params.cfg) ??
|
||||
canonicalizeSpeechProviderId(providerInput, cfg) ??
|
||||
(modelId || voiceId ? getTtsProvider(config, prefsPath) : undefined);
|
||||
|
||||
if (providerInput && !selectedProvider) {
|
||||
@@ -692,7 +735,7 @@ export function resolveExplicitTtsOverrides(params: {
|
||||
throw new Error("TTS model or voice overrides require a resolved provider.");
|
||||
}
|
||||
|
||||
const provider = getSpeechProvider(selectedProvider, params.cfg);
|
||||
const provider = getSpeechProvider(selectedProvider, cfg);
|
||||
if (!provider) {
|
||||
throw new Error(`speech provider ${selectedProvider} is not registered`);
|
||||
}
|
||||
@@ -812,9 +855,10 @@ function shouldDeliverTtsAsVoice(params: {
|
||||
}
|
||||
|
||||
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
|
||||
const normalizedPrimary = canonicalizeSpeechProviderId(primary, cfg) ?? primary;
|
||||
const effectiveCfg = cfg ? resolveTtsRuntimeConfig(cfg) : undefined;
|
||||
const normalizedPrimary = canonicalizeSpeechProviderId(primary, effectiveCfg) ?? primary;
|
||||
const ordered = new Set<TtsProvider>([normalizedPrimary]);
|
||||
for (const provider of sortSpeechProvidersForAutoSelection(cfg)) {
|
||||
for (const provider of sortSpeechProvidersForAutoSelection(effectiveCfg)) {
|
||||
const normalized = provider.id;
|
||||
if (normalized !== normalizedPrimary) {
|
||||
ordered.add(normalized);
|
||||
@@ -828,14 +872,15 @@ export function isTtsProviderConfigured(
|
||||
provider: TtsProvider,
|
||||
cfg?: OpenClawConfig,
|
||||
): boolean {
|
||||
const resolvedProvider = getSpeechProvider(provider, cfg);
|
||||
const effectiveCfg = cfg ? resolveTtsRuntimeConfig(cfg) : config.sourceConfig;
|
||||
const resolvedProvider = getSpeechProvider(provider, effectiveCfg);
|
||||
if (!resolvedProvider) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
resolvedProvider.isConfigured({
|
||||
cfg,
|
||||
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, cfg),
|
||||
cfg: effectiveCfg,
|
||||
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, effectiveCfg),
|
||||
timeoutMs: config.timeoutMs,
|
||||
}) ?? false
|
||||
);
|
||||
@@ -1011,6 +1056,7 @@ function resolveTtsRequestSetup(params: {
|
||||
accountId?: string;
|
||||
}):
|
||||
| {
|
||||
cfg: OpenClawConfig;
|
||||
config: ResolvedTtsConfig;
|
||||
persona?: ResolvedTtsPersona;
|
||||
providers: TtsProvider[];
|
||||
@@ -1018,7 +1064,8 @@ function resolveTtsRequestSetup(params: {
|
||||
| {
|
||||
error: string;
|
||||
} {
|
||||
const config = resolveTtsConfig(params.cfg, {
|
||||
const cfg = resolveTtsRuntimeConfig(params.cfg);
|
||||
const config = resolveTtsConfig(cfg, {
|
||||
agentId: params.agentId,
|
||||
channelId: params.channelId,
|
||||
accountId: params.accountId,
|
||||
@@ -1031,12 +1078,12 @@ function resolveTtsRequestSetup(params: {
|
||||
}
|
||||
|
||||
const userProvider = getTtsProvider(config, prefsPath);
|
||||
const provider =
|
||||
canonicalizeSpeechProviderId(params.providerOverride, params.cfg) ?? userProvider;
|
||||
const provider = canonicalizeSpeechProviderId(params.providerOverride, cfg) ?? userProvider;
|
||||
return {
|
||||
cfg,
|
||||
config,
|
||||
persona: getTtsPersona(config, prefsPath),
|
||||
providers: params.disableFallback ? [provider] : resolveTtsProviderOrder(provider, params.cfg),
|
||||
providers: params.disableFallback ? [provider] : resolveTtsProviderOrder(provider, cfg),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1116,7 +1163,7 @@ export async function synthesizeSpeech(params: {
|
||||
return { success: false, error: setup.error };
|
||||
}
|
||||
|
||||
const { config, persona, providers } = setup;
|
||||
const { cfg, config, persona, providers } = setup;
|
||||
const timeoutMs = params.timeoutMs ?? config.timeoutMs;
|
||||
const target = resolveTtsSynthesisTarget(params.channel);
|
||||
|
||||
@@ -1134,7 +1181,7 @@ export async function synthesizeSpeech(params: {
|
||||
try {
|
||||
const resolvedProvider = resolveReadySpeechProvider({
|
||||
provider,
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
config,
|
||||
persona,
|
||||
});
|
||||
@@ -1156,7 +1203,7 @@ export async function synthesizeSpeech(params: {
|
||||
const prepared = await prepareSpeechSynthesis({
|
||||
provider: resolvedProvider.provider,
|
||||
text: params.text,
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
providerConfig: resolvedProvider.providerConfig,
|
||||
providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id],
|
||||
persona: resolvedProvider.synthesisPersona,
|
||||
@@ -1166,7 +1213,7 @@ export async function synthesizeSpeech(params: {
|
||||
});
|
||||
const synthesis = await resolvedProvider.provider.synthesize({
|
||||
text: prepared.text,
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
providerConfig: prepared.providerConfig,
|
||||
target,
|
||||
providerOverrides: prepared.providerOverrides,
|
||||
@@ -1243,7 +1290,7 @@ export async function textToSpeechTelephony(params: {
|
||||
return { success: false, error: setup.error };
|
||||
}
|
||||
|
||||
const { config, persona, providers } = setup;
|
||||
const { cfg, config, persona, providers } = setup;
|
||||
const errors: string[] = [];
|
||||
const attemptedProviders: string[] = [];
|
||||
const attempts: TtsProviderAttempt[] = [];
|
||||
@@ -1258,7 +1305,7 @@ export async function textToSpeechTelephony(params: {
|
||||
try {
|
||||
const resolvedProvider = resolveReadySpeechProvider({
|
||||
provider,
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
config,
|
||||
persona,
|
||||
requireTelephony: true,
|
||||
@@ -1284,7 +1331,7 @@ export async function textToSpeechTelephony(params: {
|
||||
const prepared = await prepareSpeechSynthesis({
|
||||
provider: resolvedProvider.provider,
|
||||
text: params.text,
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
providerConfig: resolvedProvider.providerConfig,
|
||||
persona: resolvedProvider.synthesisPersona,
|
||||
personaProviderConfig: resolvedProvider.personaProviderConfig,
|
||||
@@ -1293,7 +1340,7 @@ export async function textToSpeechTelephony(params: {
|
||||
});
|
||||
const synthesis = await synthesizeTelephony({
|
||||
text: prepared.text,
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
providerConfig: prepared.providerConfig,
|
||||
timeoutMs: config.timeoutMs,
|
||||
});
|
||||
@@ -1360,15 +1407,16 @@ export async function listSpeechVoices(params: {
|
||||
apiKey?: string;
|
||||
baseUrl?: string;
|
||||
}): Promise<SpeechVoiceOption[]> {
|
||||
const provider = canonicalizeSpeechProviderId(params.provider, params.cfg);
|
||||
const cfg = params.cfg ? resolveTtsRuntimeConfig(params.cfg) : undefined;
|
||||
const provider = canonicalizeSpeechProviderId(params.provider, cfg);
|
||||
if (!provider) {
|
||||
throw new Error("speech provider id is required");
|
||||
}
|
||||
const config = params.config ?? (params.cfg ? resolveTtsConfig(params.cfg) : undefined);
|
||||
const config = params.config ?? (cfg ? resolveTtsConfig(cfg) : undefined);
|
||||
if (!config) {
|
||||
throw new Error(`speech provider ${provider} requires cfg or resolved config`);
|
||||
}
|
||||
const resolvedProvider = getSpeechProvider(provider, params.cfg);
|
||||
const resolvedProvider = getSpeechProvider(provider, cfg);
|
||||
if (!resolvedProvider) {
|
||||
throw new Error(`speech provider ${provider} is not registered`);
|
||||
}
|
||||
@@ -1376,8 +1424,8 @@ export async function listSpeechVoices(params: {
|
||||
throw new Error(`speech provider ${provider} does not support voice listing`);
|
||||
}
|
||||
return await resolvedProvider.listVoices({
|
||||
cfg: params.cfg,
|
||||
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg),
|
||||
cfg,
|
||||
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, cfg),
|
||||
apiKey: params.apiKey,
|
||||
baseUrl: params.baseUrl,
|
||||
});
|
||||
@@ -1396,8 +1444,9 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
if (params.payload.isCompactionNotice) {
|
||||
return params.payload;
|
||||
}
|
||||
const cfg = resolveTtsRuntimeConfig(params.cfg);
|
||||
const { autoMode, prefsPath } = resolveEffectiveTtsAutoState({
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
sessionAuto: params.ttsAuto,
|
||||
agentId: params.agentId,
|
||||
channelId: params.channel,
|
||||
@@ -1406,7 +1455,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
if (autoMode === "off") {
|
||||
return params.payload;
|
||||
}
|
||||
const config = resolveTtsConfig(params.cfg, {
|
||||
const config = resolveTtsConfig(cfg, {
|
||||
agentId: params.agentId,
|
||||
channelId: params.channel,
|
||||
accountId: params.accountId,
|
||||
@@ -1416,7 +1465,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
const reply = resolveSendableOutboundReplyParts(params.payload);
|
||||
const text = reply.text;
|
||||
const directives = parseTtsDirectives(text, config.modelOverrides, {
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
providerConfigs: config.providerConfigs,
|
||||
preferredProviderId: activeProvider,
|
||||
});
|
||||
@@ -1426,7 +1475,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
|
||||
if (isVerbose()) {
|
||||
const effectiveProvider = directives.overrides?.provider
|
||||
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ?? activeProvider)
|
||||
? (canonicalizeSpeechProviderId(directives.overrides.provider, cfg) ?? activeProvider)
|
||||
: activeProvider;
|
||||
logVerbose(
|
||||
`TTS: auto mode enabled (${autoMode}), channel=${params.channel}, selected provider=${effectiveProvider}, config.provider=${config.provider}, config.providerSource=${config.providerSource}`,
|
||||
@@ -1486,7 +1535,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
const summary = await summarizeText({
|
||||
text: textForAudio,
|
||||
targetLength: maxLength,
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
config,
|
||||
timeoutMs: config.timeoutMs,
|
||||
});
|
||||
@@ -1514,7 +1563,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
const ttsStart = Date.now();
|
||||
const result = await textToSpeech({
|
||||
text: textForAudio,
|
||||
cfg: params.cfg,
|
||||
cfg,
|
||||
prefsPath,
|
||||
channel: params.channel,
|
||||
overrides: directives.overrides,
|
||||
|
||||
Reference in New Issue
Block a user