diff --git a/CHANGELOG.md b/CHANGELOG.md index babcf824d06..fae1966d0f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai - Codex/agent: translate `--thinking minimal` to `low` for modern Codex models (gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.2) at request build time so the first turn is accepted instead of paying a wasted call + retry-with-low fallback. Older Codex models still receive `minimal` directly. Fixes #71946. Thanks @hclsys. - TTS/WhatsApp: add `/tts latest` read-aloud support with duplicate suppression and `/tts chat on|off|default` session-scoped auto-TTS overrides, completing the on-demand voice-note UX for current-chat replies. Fixes #66032. +- TTS/channels: resolve channel and account TTS overrides generically, enabling Feishu and QQBot accounts to deep-merge `channels..accounts..tts` over global and per-agent TTS config. Thanks @sahilsatralkar. - Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc. - TTS/agents: allow `agents.list[].tts` to override global `messages.tts` for per-agent voices while keeping shared provider credentials and preferences in the existing TTS config surface. - TTS/agents: make `/tts audio`, `/tts status`, and the `tts` agent tool honor the active `agents.list[].tts` voice/provider override. diff --git a/docs/channels/feishu.md b/docs/channels/feishu.md index c3ba93b3424..e6158534923 100644 --- a/docs/channels/feishu.md +++ b/docs/channels/feishu.md @@ -213,6 +213,11 @@ openclaw pairing list feishu appId: "cli_xxx", appSecret: "xxx", name: "Primary bot", + tts: { + providers: { + openai: { voice: "shimmer" }, + }, + }, }, backup: { appId: "cli_yyy", @@ -227,6 +232,10 @@ openclaw pairing list feishu ``` `defaultAccount` controls which account is used when outbound APIs do not specify an `accountId`. +`accounts..tts` uses the same shape as `messages.tts` and deep-merges over +global TTS config, so multi-bot Feishu setups can keep shared provider +credentials globally while overriding only voice, model, persona, or auto mode +per account. ### Message limits @@ -386,6 +395,7 @@ Full configuration: [Gateway configuration](/gateway/configuration) | `channels.feishu.accounts..appId` | App ID | — | | `channels.feishu.accounts..appSecret` | App Secret | — | | `channels.feishu.accounts..domain` | Per-account domain override | `feishu` | +| `channels.feishu.accounts..tts` | Per-account TTS override | `messages.tts` | | `channels.feishu.dmPolicy` | DM policy | `allowlist` | | `channels.feishu.allowFrom` | DM allowlist (open_id list) | [BotOwnerId] | | `channels.feishu.groupPolicy` | Group policy | `allowlist` | diff --git a/docs/channels/qqbot.md b/docs/channels/qqbot.md index e8b0ca42ed3..c98c527c826 100644 --- a/docs/channels/qqbot.md +++ b/docs/channels/qqbot.md @@ -122,10 +122,10 @@ openclaw channels add --channel qqbot --account bot2 --token "222222222:secret-o STT and TTS support two-level configuration with priority fallback: -| Setting | Plugin-specific | Framework fallback | -| ------- | -------------------- | ----------------------------- | -| STT | `channels.qqbot.stt` | `tools.media.audio.models[0]` | -| TTS | `channels.qqbot.tts` | `messages.tts` | +| Setting | Plugin-specific | Framework fallback | +| ------- | -------------------------------------------------------- | ----------------------------- | +| STT | `channels.qqbot.stt` | `tools.media.audio.models[0]` | +| TTS | `channels.qqbot.tts`, `channels.qqbot.accounts..tts` | `messages.tts` | ```json5 { @@ -140,12 +140,23 @@ STT and TTS support two-level configuration with priority fallback: model: "your-tts-model", voice: "your-voice", }, + accounts: { + qq-main: { + tts: { + providers: { + openai: { voice: "shimmer" }, + }, + }, + }, + }, }, }, } ``` Set `enabled: false` on either to disable. +Account-level TTS overrides use the same shape as `messages.tts` and deep-merge +over the channel/global TTS config. Inbound QQ voice attachments are exposed to agents as audio media metadata while keeping raw voice files out of generic `MediaPaths`. `[[audio_as_voice]]` plain diff --git a/docs/tools/tts.md b/docs/tools/tts.md index c84b328aad1..305541ecddd 100644 --- a/docs/tools/tts.md +++ b/docs/tools/tts.md @@ -403,8 +403,41 @@ Precedence order for automatic replies, `/tts audio`, `/tts status`, and the 1. `messages.tts` 2. active `agents.list[].tts` -3. local `/tts` preferences for this host -4. inline `[[tts:...]]` directives when [model overrides](#model-driven-directives) are enabled +3. channel override, when the channel supports `channels..tts` +4. account override, when the channel passes `channels..accounts..tts` +5. local `/tts` preferences for this host +6. inline `[[tts:...]]` directives when [model overrides](#model-driven-directives) are enabled + +Channel and account overrides use the same shape as `messages.tts` and +deep-merge over the earlier layers, so shared provider credentials can stay in +`messages.tts` while a channel or bot account changes only voice, model, persona, +or auto mode: + +```json5 +{ + messages: { + tts: { + provider: "openai", + providers: { + openai: { apiKey: "${OPENAI_API_KEY}", model: "gpt-4o-mini-tts" }, + }, + }, + }, + channels: { + feishu: { + accounts: { + english: { + tts: { + providers: { + openai: { voice: "shimmer" }, + }, + }, + }, + }, + }, + }, +} +``` ## Personas diff --git a/extensions/feishu/src/config-schema.test.ts b/extensions/feishu/src/config-schema.test.ts index e937af2f39e..a5e7854d17b 100644 --- a/extensions/feishu/src/config-schema.test.ts +++ b/extensions/feishu/src/config-schema.test.ts @@ -220,6 +220,45 @@ describe("FeishuConfigSchema optimization flags", () => { }); }); +describe("FeishuConfigSchema TTS overrides", () => { + it("accepts top-level and account-level TTS overrides", () => { + const result = FeishuConfigSchema.parse({ + tts: { + auto: "always", + provider: "openai", + providers: { + openai: { + voice: "alloy", + }, + }, + }, + accounts: { + english: { + tts: { + providers: { + openai: { + voice: "shimmer", + }, + }, + }, + }, + }, + }); + + expect(result.tts).toMatchObject({ + auto: "always", + provider: "openai", + }); + expect(result.accounts?.english?.tts).toMatchObject({ + providers: { + openai: { + voice: "shimmer", + }, + }, + }); + }); +}); + describe("FeishuConfigSchema actions", () => { it("accepts top-level reactions action gate", () => { const result = FeishuConfigSchema.parse({ diff --git a/extensions/feishu/src/config-schema.ts b/extensions/feishu/src/config-schema.ts index c75fd98fbfc..9a824f6df66 100644 --- a/extensions/feishu/src/config-schema.ts +++ b/extensions/feishu/src/config-schema.ts @@ -20,6 +20,23 @@ const FeishuDomainSchema = z.union([ z.string().url().startsWith("https://"), ]); const FeishuConnectionModeSchema = z.enum(["websocket", "webhook"]); +const TtsOverrideSchema = z + .object({ + auto: z.enum(["off", "always", "inbound", "tagged"]).optional(), + enabled: z.boolean().optional(), + mode: z.enum(["final", "all"]).optional(), + provider: z.string().optional(), + persona: z.string().optional(), + personas: z.record(z.string(), z.record(z.string(), z.unknown())).optional(), + summaryModel: z.string().optional(), + modelOverrides: z.record(z.string(), z.unknown()).optional(), + providers: z.record(z.string(), z.record(z.string(), z.unknown())).optional(), + prefsPath: z.string().optional(), + maxTextLength: z.number().int().min(1).optional(), + timeoutMs: z.number().int().min(1000).max(120000).optional(), + }) + .strict() + .optional(); const ToolPolicySchema = z .object({ @@ -183,6 +200,7 @@ const FeishuSharedConfigShape = { reactionNotifications: ReactionNotificationModeSchema, typingIndicator: z.boolean().optional(), resolveSenderNames: z.boolean().optional(), + tts: TtsOverrideSchema, }; /** diff --git a/extensions/qqbot/src/engine/gateway/outbound-dispatch.test.ts b/extensions/qqbot/src/engine/gateway/outbound-dispatch.test.ts index 16f2e04899e..0d9041f6ef8 100644 --- a/extensions/qqbot/src/engine/gateway/outbound-dispatch.test.ts +++ b/extensions/qqbot/src/engine/gateway/outbound-dispatch.test.ts @@ -185,6 +185,7 @@ describe("dispatchOutbound", () => { text: "read this aloud", cfg: {}, channel: "qqbot", + accountId: "qq-main", }); expect(audioFileToSilkBase64Mock).toHaveBeenCalledWith("/tmp/openclaw-qqbot/tts.wav"); expect(sendVoiceMessageMock).toHaveBeenCalledWith( diff --git a/extensions/qqbot/src/engine/gateway/types.ts b/extensions/qqbot/src/engine/gateway/types.ts index a6662dc6118..dfbf61a4cbe 100644 --- a/extensions/qqbot/src/engine/gateway/types.ts +++ b/extensions/qqbot/src/engine/gateway/types.ts @@ -57,7 +57,12 @@ export interface GatewayPluginRuntime { }; }; tts: { - textToSpeech: (params: { text: string; cfg: unknown; channel: string }) => Promise<{ + textToSpeech: (params: { + text: string; + cfg: unknown; + channel: string; + accountId?: string; + }) => Promise<{ success: boolean; audioPath?: string; provider?: string; diff --git a/extensions/qqbot/src/engine/messaging/reply-dispatcher.ts b/extensions/qqbot/src/engine/messaging/reply-dispatcher.ts index 17af09a2653..42bf02cb341 100644 --- a/extensions/qqbot/src/engine/messaging/reply-dispatcher.ts +++ b/extensions/qqbot/src/engine/messaging/reply-dispatcher.ts @@ -37,7 +37,12 @@ import { /** TTS provider interface — injected from the outer layer. */ export interface TTSProvider { /** Framework TTS: text → audio file path. */ - textToSpeech(params: { text: string; cfg: unknown; channel: string }): Promise<{ + textToSpeech(params: { + text: string; + cfg: unknown; + channel: string; + accountId?: string; + }): Promise<{ success: boolean; audioPath?: string; provider?: string; @@ -406,6 +411,7 @@ export async function sendTextAsVoiceReply( text: ttsText, cfg, channel: "qqbot", + accountId: account.accountId, }); if (!ttsResult.success || !ttsResult.audioPath) { log?.error(`TTS failed: ${ttsResult.error ?? "unknown"}`); diff --git a/extensions/speech-core/src/tts.ts b/extensions/speech-core/src/tts.ts index c0932a682b2..1dacf4ba8a5 100644 --- a/extensions/speech-core/src/tts.ts +++ b/extensions/speech-core/src/tts.ts @@ -51,6 +51,7 @@ import { type SpeechVoiceOption, type TtsDirectiveOverrides, type TtsDirectiveParseResult, + type TtsConfigResolutionContext, } from "../api.js"; export type { @@ -409,8 +410,11 @@ export function getResolvedSpeechProviderConfig( return resolveLazyProviderConfig(config, canonical, cfg); } -export function resolveTtsConfig(cfg: OpenClawConfig, agentId?: string): ResolvedTtsConfig { - const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, agentId); +export function resolveTtsConfig( + cfg: OpenClawConfig, + contextOrAgentId?: string | TtsConfigResolutionContext, +): ResolvedTtsConfig { + const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, contextOrAgentId); const providerSource = raw.provider ? "config" : "default"; const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS; const auto = resolveConfiguredTtsAutoMode(raw); @@ -470,11 +474,17 @@ function resolveEffectiveTtsAutoState(params: { cfg: OpenClawConfig; sessionAuto?: string; agentId?: string; + channelId?: string; + accountId?: string; }): { autoMode: TtsAutoMode; prefsPath: string; } { - const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, params.agentId); + const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, { + agentId: params.agentId, + channelId: params.channelId, + accountId: params.accountId, + }); const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath); const sessionAuto = normalizeTtsAutoMode(params.sessionAuto); if (sessionAuto) { @@ -654,11 +664,17 @@ export function resolveExplicitTtsOverrides(params: { modelId?: string; voiceId?: string; agentId?: string; + channelId?: string; + accountId?: string; }): TtsDirectiveOverrides { const providerInput = params.provider?.trim(); const modelId = params.modelId?.trim(); const voiceId = params.voiceId?.trim(); - const config = resolveTtsConfig(params.cfg, params.agentId); + const config = resolveTtsConfig(params.cfg, { + agentId: params.agentId, + channelId: params.channelId, + accountId: params.accountId, + }); const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); const selectedProvider = canonicalizeSpeechProviderId(providerInput, params.cfg) ?? @@ -991,6 +1007,8 @@ function resolveTtsRequestSetup(params: { providerOverride?: TtsProvider; disableFallback?: boolean; agentId?: string; + channelId?: string; + accountId?: string; }): | { config: ResolvedTtsConfig; @@ -1000,7 +1018,11 @@ function resolveTtsRequestSetup(params: { | { error: string; } { - const config = resolveTtsConfig(params.cfg, params.agentId); + const config = resolveTtsConfig(params.cfg, { + agentId: params.agentId, + channelId: params.channelId, + accountId: params.accountId, + }); const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); if (params.text.length > config.maxTextLength) { return { @@ -1027,6 +1049,7 @@ export async function textToSpeech(params: { disableFallback?: boolean; timeoutMs?: number; agentId?: string; + accountId?: string; }): Promise { const synthesis = await synthesizeSpeech(params); if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) { @@ -1077,6 +1100,7 @@ export async function synthesizeSpeech(params: { disableFallback?: boolean; timeoutMs?: number; agentId?: string; + accountId?: string; }): Promise { const setup = resolveTtsRequestSetup({ text: params.text, @@ -1085,6 +1109,8 @@ export async function synthesizeSpeech(params: { providerOverride: params.overrides?.provider, disableFallback: params.disableFallback, agentId: params.agentId, + channelId: params.channel, + accountId: params.accountId, }); if ("error" in setup) { return { success: false, error: setup.error }; @@ -1365,6 +1391,7 @@ export async function maybeApplyTtsToPayload(params: { inboundAudio?: boolean; ttsAuto?: string; agentId?: string; + accountId?: string; }): Promise { if (params.payload.isCompactionNotice) { return params.payload; @@ -1373,11 +1400,17 @@ export async function maybeApplyTtsToPayload(params: { cfg: params.cfg, sessionAuto: params.ttsAuto, agentId: params.agentId, + channelId: params.channel, + accountId: params.accountId, }); if (autoMode === "off") { return params.payload; } - const config = resolveTtsConfig(params.cfg, params.agentId); + const config = resolveTtsConfig(params.cfg, { + agentId: params.agentId, + channelId: params.channel, + accountId: params.accountId, + }); const activeProvider = getTtsProvider(config, prefsPath); const reply = resolveSendableOutboundReplyParts(params.payload); @@ -1486,6 +1519,7 @@ export async function maybeApplyTtsToPayload(params: { channel: params.channel, overrides: directives.overrides, agentId: params.agentId, + accountId: params.accountId, }); if (result.success && result.audioPath) { diff --git a/src/agents/openclaw-tools.ts b/src/agents/openclaw-tools.ts index ad4d0ac90b3..ada50e41873 100644 --- a/src/agents/openclaw-tools.ts +++ b/src/agents/openclaw-tools.ts @@ -254,6 +254,7 @@ export function createOpenClawTools( agentChannel: options?.agentChannel, config: resolvedConfig, agentId: sessionAgentId, + agentAccountId: options?.agentAccountId, }), ...collectPresentOpenClawTools([imageGenerateTool, musicGenerateTool, videoGenerateTool]), ...(embedded diff --git a/src/agents/openclaw-tools.tts-config.test.ts b/src/agents/openclaw-tools.tts-config.test.ts index 4e04d1319f1..a1ce683d6bb 100644 --- a/src/agents/openclaw-tools.tts-config.test.ts +++ b/src/agents/openclaw-tools.tts-config.test.ts @@ -201,6 +201,51 @@ describe("createOpenClawTools TTS config wiring", () => { __testing.setDepsForTest(); } }); + + it("passes the active account id into the tts tool", async () => { + const injectedConfig = { + channels: { + feishu: { + accounts: { + "feishu-main": { + tts: { + provider: "microsoft", + }, + }, + }, + }, + }, + } satisfies OpenClawConfig; + + const { __testing, createOpenClawTools } = await import("./openclaw-tools.js"); + __testing.setDepsForTest({ config: injectedConfig }); + + try { + const tool = createOpenClawTools({ + agentChannel: "feishu", + agentAccountId: "feishu-main", + disableMessageTool: true, + disablePluginTools: true, + }).find((candidate) => candidate.name === "tts"); + + if (!tool) { + throw new Error("missing tts tool"); + } + + await tool.execute("call-1", { text: "hello from account" }); + + expect(mocks.textToSpeech).toHaveBeenCalledWith( + expect.objectContaining({ + text: "hello from account", + cfg: injectedConfig, + channel: "feishu", + accountId: "feishu-main", + }), + ); + } finally { + __testing.setDepsForTest(); + } + }); }); describe("createOpenClawTools cron context wiring", () => { diff --git a/src/agents/tools/tts-tool.test.ts b/src/agents/tools/tts-tool.test.ts index e9cba32a24e..c78cf85f701 100644 --- a/src/agents/tools/tts-tool.test.ts +++ b/src/agents/tools/tts-tool.test.ts @@ -104,6 +104,25 @@ describe("createTtsTool", () => { ); }); + it("passes the active account id to speech generation", async () => { + textToSpeechSpy.mockResolvedValue({ + success: true, + audioPath: "/tmp/reply.opus", + provider: "test", + voiceCompatible: true, + }); + + const tool = createTtsTool({ agentAccountId: "feishu-main" }); + await tool.execute("call-1", { text: "hello" }); + + expect(textToSpeechSpy).toHaveBeenCalledWith( + expect.objectContaining({ + text: "hello", + accountId: "feishu-main", + }), + ); + }); + it("echoes longer utterances verbatim into the tool-result content", async () => { textToSpeechSpy.mockResolvedValue({ success: true, diff --git a/src/agents/tools/tts-tool.ts b/src/agents/tools/tts-tool.ts index 862d0700484..6d96638833e 100644 --- a/src/agents/tools/tts-tool.ts +++ b/src/agents/tools/tts-tool.ts @@ -58,6 +58,7 @@ export function createTtsTool(opts?: { config?: OpenClawConfig; agentChannel?: GatewayMessageChannel; agentId?: string; + agentAccountId?: string; }): AnyAgentTool { return { label: "TTS", @@ -77,6 +78,7 @@ export function createTtsTool(opts?: { channel: channel ?? opts?.agentChannel, timeoutMs, agentId: opts?.agentId, + accountId: opts?.agentAccountId, }); if (result.success && result.audioPath) { diff --git a/src/auto-reply/reply/commands-tts.test.ts b/src/auto-reply/reply/commands-tts.test.ts index 611b6956d1b..7458d92110e 100644 --- a/src/auto-reply/reply/commands-tts.test.ts +++ b/src/auto-reply/reply/commands-tts.test.ts @@ -213,10 +213,13 @@ describe("handleTtsCommands status fallback reporting", () => { const result = await handleTtsCommands(buildTtsParams("/tts status", cfg, "reader"), true); expect(result?.shouldContinue).toBe(false); - expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith(cfg, "reader"); + expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith( + cfg, + expect.objectContaining({ agentId: "reader", channelId: "forum" }), + ); }); - it("passes the active agent id to /tts audio synthesis", async () => { + it("passes the active agent and account ids to /tts audio synthesis", async () => { ttsMocks.textToSpeech.mockResolvedValue({ success: true, audioPath: "/tmp/reader.ogg", @@ -227,7 +230,12 @@ describe("handleTtsCommands status fallback reporting", () => { agents: { list: [{ id: "reader", tts: { provider: PRIMARY_TTS_PROVIDER } }] }, } as OpenClawConfig; - const result = await handleTtsCommands(buildTtsParams("/tts audio hello", cfg, "reader"), true); + const result = await handleTtsCommands( + buildTtsParams("/tts audio hello", cfg, "reader", { + ctx: { AccountId: "feishu-main" }, + }), + true, + ); expect(result?.shouldContinue).toBe(false); expect(ttsMocks.textToSpeech).toHaveBeenCalledWith( @@ -235,6 +243,7 @@ describe("handleTtsCommands status fallback reporting", () => { text: "hello", cfg, agentId: "reader", + accountId: "feishu-main", }), ); }); diff --git a/src/auto-reply/reply/commands-tts.ts b/src/auto-reply/reply/commands-tts.ts index bc37a91e27c..142019c36ff 100644 --- a/src/auto-reply/reply/commands-tts.ts +++ b/src/auto-reply/reply/commands-tts.ts @@ -119,6 +119,7 @@ async function buildTtsAudioReply(params: { text: string; cfg: Parameters[0]["cfg"]; channel: string; + accountId?: string; prefsPath: string; agentId?: string; }): Promise<{ reply: ReplyPayload; provider?: string; hash?: string } | { error: string }> { @@ -127,6 +128,7 @@ async function buildTtsAudioReply(params: { text: params.text, cfg: params.cfg, channel: params.channel, + accountId: params.accountId, prefsPath: params.prefsPath, agentId: params.agentId, }); @@ -185,7 +187,12 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand return { shouldContinue: false }; } - const config = resolveTtsConfig(params.cfg, params.agentId); + const accountId = params.ctx?.AccountId; + const config = resolveTtsConfig(params.cfg, { + agentId: params.agentId, + channelId: params.command.channel, + accountId, + }); const prefsPath = resolveTtsPrefsPath(config); const action = parsed.action; const args = parsed.args; @@ -268,6 +275,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand text: latestText, cfg: params.cfg, channel: params.command.channel, + accountId, prefsPath, agentId: params.agentId, }); @@ -301,6 +309,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand text: args, cfg: params.cfg, channel: params.command.channel, + accountId, prefsPath, agentId: params.agentId, }); diff --git a/src/auto-reply/reply/dispatch-acp-delivery.ts b/src/auto-reply/reply/dispatch-acp-delivery.ts index 1a114f44ad2..016bb626f43 100644 --- a/src/auto-reply/reply/dispatch-acp-delivery.ts +++ b/src/auto-reply/reply/dispatch-acp-delivery.ts @@ -91,6 +91,7 @@ async function maybeApplyAcpTts(params: { cfg: OpenClawConfig; agentId?: string; channel?: string; + accountId?: string; kind: ReplyDispatchKind; inboundAudio: boolean; ttsAuto?: TtsAutoMode; @@ -103,6 +104,8 @@ async function maybeApplyAcpTts(params: { cfg: params.cfg, sessionAuto: params.ttsAuto, agentId: params.agentId, + channelId: params.channel, + accountId: params.accountId, }); if (!ttsStatus) { return params.payload; @@ -110,7 +113,14 @@ async function maybeApplyAcpTts(params: { if (ttsStatus.autoMode === "inbound" && !params.inboundAudio) { return params.payload; } - if (params.kind !== "final" && resolveConfiguredTtsMode(params.cfg, params.agentId) === "final") { + if ( + params.kind !== "final" && + resolveConfiguredTtsMode(params.cfg, { + agentId: params.agentId, + channelId: params.channel, + accountId: params.accountId, + }) === "final" + ) { return params.payload; } const { maybeApplyTtsToPayload } = await loadDispatchAcpTtsRuntime(); @@ -122,6 +132,7 @@ async function maybeApplyAcpTts(params: { inboundAudio: params.inboundAudio, ttsAuto: params.ttsAuto, agentId: params.agentId, + accountId: params.accountId, }); } @@ -175,6 +186,17 @@ export function createAcpDispatchDeliveryCoordinator(params: { originatingTo?: string; onReplyStart?: () => Promise | void; }): AcpDispatchDeliveryCoordinator { + const directChannel = normalizeOptionalLowercaseString(params.ctx.Provider ?? params.ctx.Surface); + const routedChannel = normalizeOptionalLowercaseString(params.originatingChannel); + const deliverySessionKey = normalizeOptionalString(params.sessionKey) ?? params.ctx.SessionKey; + const explicitAccountId = normalizeOptionalString(params.ctx.AccountId); + const resolvedAccountId = + explicitAccountId ?? + normalizeOptionalString( + ( + params.cfg.channels as Record | undefined + )?.[routedChannel ?? directChannel ?? ""]?.defaultAccount, + ); const state: AcpDispatchDeliveryState = { startedReplyLifecycle: false, accumulatedBlockText: "", @@ -184,6 +206,8 @@ export function createAcpDispatchDeliveryCoordinator(params: { cfg: params.cfg, ttsAuto: params.sessionTtsAuto, agentId: params.agentId, + channelId: params.ttsChannel, + accountId: resolvedAccountId, }) ? createTtsDirectiveTextStreamCleaner() : undefined, @@ -200,18 +224,6 @@ export function createAcpDispatchDeliveryCoordinator(params: { }, toolMessageByCallId: new Map(), }; - const directChannel = normalizeOptionalLowercaseString(params.ctx.Provider ?? params.ctx.Surface); - const routedChannel = normalizeOptionalLowercaseString(params.originatingChannel); - const deliverySessionKey = normalizeOptionalString(params.sessionKey) ?? params.ctx.SessionKey; - const explicitAccountId = normalizeOptionalString(params.ctx.AccountId); - const resolvedAccountId = - explicitAccountId ?? - normalizeOptionalString( - ( - params.cfg.channels as Record | undefined - )?.[routedChannel ?? directChannel ?? ""]?.defaultAccount, - ); - const settleDirectVisibleText = async () => { if (state.settledDirectVisibleText || state.queuedDirectVisibleTextDeliveries === 0) { return; @@ -336,6 +348,7 @@ export function createAcpDispatchDeliveryCoordinator(params: { cfg: params.cfg, agentId: params.agentId, channel: params.ttsChannel, + accountId: resolvedAccountId, kind, inboundAudio: params.inboundAudio, ttsAuto: params.sessionTtsAuto, diff --git a/src/auto-reply/reply/dispatch-acp.ts b/src/auto-reply/reply/dispatch-acp.ts index badaa93f5e0..2ab86fa4138 100644 --- a/src/auto-reply/reply/dispatch-acp.ts +++ b/src/auto-reply/reply/dispatch-acp.ts @@ -191,12 +191,17 @@ async function finalizeAcpTurnOutput(params: { inboundAudio: boolean; sessionTtsAuto?: TtsAutoMode; ttsChannel?: string; + ttsAccountId?: string; shouldEmitResolvedIdentityNotice: boolean; }): Promise { await params.delivery.settleVisibleText(); let queuedFinal = params.delivery.hasDeliveredVisibleText() && !params.delivery.hasFailedVisibleTextDelivery(); - const ttsMode = resolveConfiguredTtsMode(params.cfg, params.agentId); + const ttsMode = resolveConfiguredTtsMode(params.cfg, { + agentId: params.agentId, + channelId: params.ttsChannel, + accountId: params.ttsAccountId, + }); const accumulatedVisibleBlockText = params.delivery.getAccumulatedVisibleBlockText(); const accumulatedBlockTtsText = params.delivery.getAccumulatedBlockTtsText(); const hasAccumulatedBlockText = accumulatedBlockTtsText.trim().length > 0; @@ -204,6 +209,8 @@ async function finalizeAcpTurnOutput(params: { cfg: params.cfg, sessionAuto: params.sessionTtsAuto, agentId: params.agentId, + channelId: params.ttsChannel, + accountId: params.ttsAccountId, }); const canAttemptFinalTts = ttsStatus != null && !(ttsStatus.autoMode === "inbound" && !params.inboundAudio); @@ -220,6 +227,7 @@ async function finalizeAcpTurnOutput(params: { inboundAudio: params.inboundAudio, ttsAuto: params.sessionTtsAuto, agentId: params.agentId, + accountId: params.ttsAccountId, }); if (ttsSyntheticReply.mediaUrl) { const delivered = await params.delivery.deliver("final", { @@ -487,6 +495,7 @@ export async function tryDispatchAcpReply(params: { inboundAudio: params.inboundAudio, sessionTtsAuto: params.sessionTtsAuto, ttsChannel: params.ttsChannel, + ttsAccountId: effectiveDispatchAccountId, shouldEmitResolvedIdentityNotice, })) || queuedFinal; diff --git a/src/auto-reply/reply/dispatch-from-config.ts b/src/auto-reply/reply/dispatch-from-config.ts index 413ec418f60..e89b7d6841f 100644 --- a/src/auto-reply/reply/dispatch-from-config.ts +++ b/src/auto-reply/reply/dispatch-from-config.ts @@ -122,7 +122,13 @@ async function maybeApplyTtsToReplyPayload( params: Parameters>["maybeApplyTtsToPayload"]>[0], ) { if ( - !shouldAttemptTtsPayload({ cfg: params.cfg, ttsAuto: params.ttsAuto, agentId: params.agentId }) + !shouldAttemptTtsPayload({ + cfg: params.cfg, + ttsAuto: params.ttsAuto, + agentId: params.agentId, + channelId: params.channel, + accountId: params.accountId, + }) ) { return params.payload; } @@ -734,6 +740,7 @@ export async function dispatchReplyFromConfig( inboundAudio, ttsAuto: sessionTtsAuto, agentId: sessionAgentId, + accountId: replyRoute.accountId, }); const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload); const result = await routeReplyToOriginating(normalizedPayload); @@ -939,6 +946,8 @@ export async function dispatchReplyFromConfig( cfg, ttsAuto: sessionTtsAuto, agentId: sessionAgentId, + channelId: deliveryChannel, + accountId: replyRoute.accountId, }) ? createTtsDirectiveTextStreamCleaner() : undefined; @@ -1010,6 +1019,7 @@ export async function dispatchReplyFromConfig( inboundAudio, ttsAuto: sessionTtsAuto, agentId: sessionAgentId, + accountId: replyRoute.accountId, }); const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload); const deliveryPayload = resolveToolDeliveryPayload(normalizedPayload); @@ -1128,6 +1138,7 @@ export async function dispatchReplyFromConfig( inboundAudio, ttsAuto: sessionTtsAuto, agentId: sessionAgentId, + accountId: replyRoute.accountId, }); const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload); if (shouldRouteToOriginating) { @@ -1198,7 +1209,11 @@ export async function dispatchReplyFromConfig( routedFinalCount += finalReply.routedFinalCount; } - const ttsMode = resolveConfiguredTtsMode(cfg, sessionAgentId); + const ttsMode = resolveConfiguredTtsMode(cfg, { + agentId: sessionAgentId, + channelId: deliveryChannel, + accountId: replyRoute.accountId, + }); // Generate TTS-only reply after block streaming completes (when there's no final reply). // This handles the case where block streaming succeeds and drops final payloads, // but we still want TTS audio to be generated from the accumulated block content. @@ -1217,6 +1232,7 @@ export async function dispatchReplyFromConfig( inboundAudio, ttsAuto: sessionTtsAuto, agentId: sessionAgentId, + accountId: replyRoute.accountId, }); // Only send if TTS was actually applied (mediaUrl exists) if (ttsSyntheticReply.mediaUrl) { diff --git a/src/plugin-sdk/speech-core.ts b/src/plugin-sdk/speech-core.ts index 42bbc7629dc..76de5dfe624 100644 --- a/src/plugin-sdk/speech-core.ts +++ b/src/plugin-sdk/speech-core.ts @@ -39,6 +39,7 @@ export { normalizeSpeechProviderId, } from "../tts/provider-registry.js"; export { resolveEffectiveTtsConfig } from "../tts/tts-config.js"; +export type { TtsConfigResolutionContext } from "../tts/tts-config.js"; export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js"; export { asBoolean, diff --git a/src/plugin-sdk/tts-runtime.types.ts b/src/plugin-sdk/tts-runtime.types.ts index a3a608daaa8..ae1724314ce 100644 --- a/src/plugin-sdk/tts-runtime.types.ts +++ b/src/plugin-sdk/tts-runtime.types.ts @@ -6,10 +6,12 @@ import type { TtsDirectiveOverrides, TtsDirectiveParseResult, } from "../tts/provider-types.js"; +import type { TtsConfigResolutionContext } from "../tts/tts-config.js"; import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js"; import type { ReplyPayload } from "./reply-payload.js"; export type { ResolvedTtsConfig, ResolvedTtsModelOverrides }; +export type { TtsConfigResolutionContext }; export type { TtsDirectiveOverrides, TtsDirectiveParseResult }; export type TtsAttemptReasonCode = @@ -66,6 +68,8 @@ export type ResolveExplicitTtsOverridesParams = { modelId?: string; voiceId?: string; agentId?: string; + channelId?: string; + accountId?: string; }; export type TtsRequestParams = { @@ -77,6 +81,7 @@ export type TtsRequestParams = { disableFallback?: boolean; timeoutMs?: number; agentId?: string; + accountId?: string; }; export type TtsTelephonyRequestParams = { @@ -101,6 +106,7 @@ export type MaybeApplyTtsToPayloadParams = { inboundAudio?: boolean; ttsAuto?: string; agentId?: string; + accountId?: string; }; export type TtsTestFacade = { @@ -201,7 +207,10 @@ export type TtsRuntimeFacade = { maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise; resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides; resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode; - resolveTtsConfig: (cfg: OpenClawConfig, agentId?: string) => ResolvedTtsConfig; + resolveTtsConfig: ( + cfg: OpenClawConfig, + contextOrAgentId?: string | TtsConfigResolutionContext, + ) => ResolvedTtsConfig; resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string; resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[]; setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void; diff --git a/src/tts/status-config.ts b/src/tts/status-config.ts index 0415088f08c..a3c79903f7f 100644 --- a/src/tts/status-config.ts +++ b/src/tts/status-config.ts @@ -8,7 +8,7 @@ import { } from "../shared/string-coerce.js"; import { resolveConfigDir, resolveUserPath } from "../utils.js"; import { normalizeTtsAutoMode } from "./tts-auto-mode.js"; -import { resolveEffectiveTtsConfig } from "./tts-config.js"; +import { resolveEffectiveTtsConfig, type TtsConfigResolutionContext } from "./tts-config.js"; const DEFAULT_TTS_MAX_LENGTH = 1500; const DEFAULT_TTS_SUMMARIZE = true; @@ -222,8 +222,15 @@ export function resolveStatusTtsSnapshot(params: { cfg: OpenClawConfig; sessionAuto?: string; agentId?: string; + channelId?: string; + accountId?: string; }): TtsStatusSnapshot | null { - const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, params.agentId); + const context: TtsConfigResolutionContext = { + agentId: params.agentId, + channelId: params.channelId, + accountId: params.accountId, + }; + const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, context); const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath); const prefs = readPrefs(prefsPath); const autoMode = diff --git a/src/tts/tts-config.test.ts b/src/tts/tts-config.test.ts index c0f72d666e6..f3a7d6b9d17 100644 --- a/src/tts/tts-config.test.ts +++ b/src/tts/tts-config.test.ts @@ -3,7 +3,11 @@ import { tmpdir } from "node:os"; import path from "node:path"; import { afterAll, beforeAll, afterEach, beforeEach, describe, expect, it } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; -import { resolveConfiguredTtsMode, shouldAttemptTtsPayload } from "./tts-config.js"; +import { + resolveConfiguredTtsMode, + resolveEffectiveTtsConfig, + shouldAttemptTtsPayload, +} from "./tts-config.js"; describe("shouldAttemptTtsPayload", () => { let originalPrefsPath: string | undefined; @@ -88,4 +92,73 @@ describe("shouldAttemptTtsPayload", () => { expect(shouldAttemptTtsPayload({ cfg, agentId: "main" })).toBe(false); expect(resolveConfiguredTtsMode(cfg, "main")).toBe("final"); }); + + it("merges channel and account TTS overrides after agent overrides", () => { + const cfg = { + messages: { + tts: { + auto: "off", + mode: "final", + provider: "openai", + providers: { + openai: { + model: "gpt-4o-mini-tts", + voice: "alloy", + }, + }, + }, + }, + agents: { + list: [ + { + id: "reader", + tts: { + providers: { + openai: { + voice: "nova", + }, + }, + }, + }, + ], + }, + channels: { + feishu: { + tts: { + auto: "always", + }, + accounts: { + EnglishBot: { + tts: { + mode: "all", + providers: { + openai: { + voice: "shimmer", + }, + }, + }, + }, + }, + }, + }, + } as OpenClawConfig; + + const resolved = resolveEffectiveTtsConfig(cfg, { + agentId: "reader", + channelId: "FEISHU", + accountId: "englishbot", + }); + + expect(resolved).toMatchObject({ + auto: "always", + mode: "all", + provider: "openai", + providers: { + openai: { + model: "gpt-4o-mini-tts", + voice: "shimmer", + }, + }, + }); + }); }); diff --git a/src/tts/tts-config.ts b/src/tts/tts-config.ts index 8208c4c3a62..3a6d8a0dbc9 100644 --- a/src/tts/tts-config.ts +++ b/src/tts/tts-config.ts @@ -2,13 +2,23 @@ import { existsSync, readFileSync } from "node:fs"; import path from "node:path"; import type { OpenClawConfig } from "../config/types.js"; import type { TtsAutoMode, TtsConfig, TtsMode } from "../config/types.tts.js"; -import { normalizeAgentId } from "../routing/session-key.js"; +import { normalizeAccountId, normalizeAgentId } from "../routing/session-key.js"; +import { + normalizeLowercaseStringOrEmpty, + normalizeOptionalString, +} from "../shared/string-coerce.js"; import { resolveConfigDir, resolveUserPath } from "../utils.js"; import { normalizeTtsAutoMode } from "./tts-auto-mode.js"; export { normalizeTtsAutoMode } from "./tts-auto-mode.js"; const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]); +export type TtsConfigResolutionContext = { + agentId?: string; + channelId?: string; + accountId?: string; +}; + function isPlainObject(value: unknown): value is Record { return Boolean(value) && typeof value === "object" && !Array.isArray(value); } @@ -41,14 +51,97 @@ function resolveAgentTtsOverride( return agent?.tts; } -export function resolveEffectiveTtsConfig(cfg: OpenClawConfig, agentId?: string): TtsConfig { - const base = cfg.messages?.tts ?? {}; - const override = resolveAgentTtsOverride(cfg, agentId); - return deepMergeDefined(base, override ?? {}) as TtsConfig; +function resolveTtsConfigContext( + contextOrAgentId?: string | TtsConfigResolutionContext, +): TtsConfigResolutionContext { + return typeof contextOrAgentId === "string" + ? { agentId: contextOrAgentId } + : (contextOrAgentId ?? {}); } -export function resolveConfiguredTtsMode(cfg: OpenClawConfig, agentId?: string): TtsMode { - return resolveEffectiveTtsConfig(cfg, agentId).mode ?? "final"; +function resolveRecordEntry( + entries: Record | undefined, + id: string | undefined, + normalize: (value: string) => string, +): T | undefined { + const normalizedId = normalizeOptionalString(id); + if (!entries || !normalizedId) { + return undefined; + } + if (Object.hasOwn(entries, normalizedId)) { + return entries[normalizedId]; + } + const normalized = normalize(normalizedId); + const key = Object.keys(entries).find((candidate) => normalize(candidate) === normalized); + return key ? entries[key] : undefined; +} + +function asTtsConfig(value: unknown): TtsConfig | undefined { + return isPlainObject(value) ? (value as TtsConfig) : undefined; +} + +function asObjectRecord(value: unknown): Record | undefined { + return isPlainObject(value) ? value : undefined; +} + +function resolveChannelConfig( + cfg: OpenClawConfig, + channelId: string | undefined, +): Record | undefined { + if (!isPlainObject(cfg.channels)) { + return undefined; + } + const normalizedChannelId = normalizeOptionalString(channelId); + if (!normalizedChannelId) { + return undefined; + } + return asObjectRecord( + resolveRecordEntry( + cfg.channels as Record, + normalizedChannelId, + normalizeLowercaseStringOrEmpty, + ), + ); +} + +function resolveChannelTtsOverride( + cfg: OpenClawConfig, + context: TtsConfigResolutionContext, +): TtsConfig | undefined { + return asTtsConfig(resolveChannelConfig(cfg, context.channelId)?.tts); +} + +function resolveAccountTtsOverride( + cfg: OpenClawConfig, + context: TtsConfigResolutionContext, +): TtsConfig | undefined { + const channelConfig = resolveChannelConfig(cfg, context.channelId); + const accounts = isPlainObject(channelConfig?.accounts) ? channelConfig.accounts : undefined; + const accountConfig = resolveRecordEntry(accounts, context.accountId, normalizeAccountId); + return asTtsConfig(asObjectRecord(accountConfig)?.tts); +} + +export function resolveEffectiveTtsConfig( + cfg: OpenClawConfig, + contextOrAgentId?: string | TtsConfigResolutionContext, +): TtsConfig { + const context = resolveTtsConfigContext(contextOrAgentId); + const base = cfg.messages?.tts ?? {}; + const agentOverride = resolveAgentTtsOverride(cfg, context.agentId); + const channelOverride = resolveChannelTtsOverride(cfg, context); + const accountOverride = resolveAccountTtsOverride(cfg, context); + let merged: unknown = base; + for (const override of [agentOverride, channelOverride, accountOverride]) { + merged = deepMergeDefined(merged, override ?? {}); + } + return merged as TtsConfig; +} + +export function resolveConfiguredTtsMode( + cfg: OpenClawConfig, + contextOrAgentId?: string | TtsConfigResolutionContext, +): TtsMode { + return resolveEffectiveTtsConfig(cfg, contextOrAgentId).mode ?? "final"; } function resolveTtsPrefsPathValue(prefsPath: string | undefined): string { @@ -87,13 +180,15 @@ export function shouldAttemptTtsPayload(params: { cfg: OpenClawConfig; ttsAuto?: string; agentId?: string; + channelId?: string; + accountId?: string; }): boolean { const sessionAuto = normalizeTtsAutoMode(params.ttsAuto); if (sessionAuto) { return sessionAuto !== "off"; } - const raw = resolveEffectiveTtsConfig(params.cfg, params.agentId); + const raw = resolveEffectiveTtsConfig(params.cfg, params); const prefsAuto = readTtsPrefsAutoMode(resolveTtsPrefsPathValue(raw?.prefsPath)); if (prefsAuto) { return prefsAuto !== "off"; @@ -110,9 +205,11 @@ export function shouldCleanTtsDirectiveText(params: { cfg: OpenClawConfig; ttsAuto?: string; agentId?: string; + channelId?: string; + accountId?: string; }): boolean { if (!shouldAttemptTtsPayload(params)) { return false; } - return resolveEffectiveTtsConfig(params.cfg, params.agentId).modelOverrides?.enabled !== false; + return resolveEffectiveTtsConfig(params.cfg, params).modelOverrides?.enabled !== false; }