feat(tts): resolve channel account config generically

This commit is contained in:
Peter Steinberger
2026-04-26 08:10:25 +01:00
parent 6c60cd2b72
commit d419fb561d
24 changed files with 515 additions and 47 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
- Codex/agent: translate `--thinking minimal` to `low` for modern Codex models (gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.2) at request build time so the first turn is accepted instead of paying a wasted call + retry-with-low fallback. Older Codex models still receive `minimal` directly. Fixes #71946. Thanks @hclsys. - Codex/agent: translate `--thinking minimal` to `low` for modern Codex models (gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.2) at request build time so the first turn is accepted instead of paying a wasted call + retry-with-low fallback. Older Codex models still receive `minimal` directly. Fixes #71946. Thanks @hclsys.
- TTS/WhatsApp: add `/tts latest` read-aloud support with duplicate suppression and `/tts chat on|off|default` session-scoped auto-TTS overrides, completing the on-demand voice-note UX for current-chat replies. Fixes #66032. - TTS/WhatsApp: add `/tts latest` read-aloud support with duplicate suppression and `/tts chat on|off|default` session-scoped auto-TTS overrides, completing the on-demand voice-note UX for current-chat replies. Fixes #66032.
- TTS/channels: resolve channel and account TTS overrides generically, enabling Feishu and QQBot accounts to deep-merge `channels.<channel>.accounts.<id>.tts` over global and per-agent TTS config. Thanks @sahilsatralkar.
- Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc. - Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc.
- TTS/agents: allow `agents.list[].tts` to override global `messages.tts` for per-agent voices while keeping shared provider credentials and preferences in the existing TTS config surface. - TTS/agents: allow `agents.list[].tts` to override global `messages.tts` for per-agent voices while keeping shared provider credentials and preferences in the existing TTS config surface.
- TTS/agents: make `/tts audio`, `/tts status`, and the `tts` agent tool honor the active `agents.list[].tts` voice/provider override. - TTS/agents: make `/tts audio`, `/tts status`, and the `tts` agent tool honor the active `agents.list[].tts` voice/provider override.

View File

@@ -213,6 +213,11 @@ openclaw pairing list feishu
appId: "cli_xxx", appId: "cli_xxx",
appSecret: "xxx", appSecret: "xxx",
name: "Primary bot", name: "Primary bot",
tts: {
providers: {
openai: { voice: "shimmer" },
},
},
}, },
backup: { backup: {
appId: "cli_yyy", appId: "cli_yyy",
@@ -227,6 +232,10 @@ openclaw pairing list feishu
``` ```
`defaultAccount` controls which account is used when outbound APIs do not specify an `accountId`. `defaultAccount` controls which account is used when outbound APIs do not specify an `accountId`.
`accounts.<id>.tts` uses the same shape as `messages.tts` and deep-merges over
global TTS config, so multi-bot Feishu setups can keep shared provider
credentials globally while overriding only voice, model, persona, or auto mode
per account.
### Message limits ### Message limits
@@ -386,6 +395,7 @@ Full configuration: [Gateway configuration](/gateway/configuration)
| `channels.feishu.accounts.<id>.appId` | App ID | — | | `channels.feishu.accounts.<id>.appId` | App ID | — |
| `channels.feishu.accounts.<id>.appSecret` | App Secret | — | | `channels.feishu.accounts.<id>.appSecret` | App Secret | — |
| `channels.feishu.accounts.<id>.domain` | Per-account domain override | `feishu` | | `channels.feishu.accounts.<id>.domain` | Per-account domain override | `feishu` |
| `channels.feishu.accounts.<id>.tts` | Per-account TTS override | `messages.tts` |
| `channels.feishu.dmPolicy` | DM policy | `allowlist` | | `channels.feishu.dmPolicy` | DM policy | `allowlist` |
| `channels.feishu.allowFrom` | DM allowlist (open_id list) | [BotOwnerId] | | `channels.feishu.allowFrom` | DM allowlist (open_id list) | [BotOwnerId] |
| `channels.feishu.groupPolicy` | Group policy | `allowlist` | | `channels.feishu.groupPolicy` | Group policy | `allowlist` |

View File

@@ -122,10 +122,10 @@ openclaw channels add --channel qqbot --account bot2 --token "222222222:secret-o
STT and TTS support two-level configuration with priority fallback: STT and TTS support two-level configuration with priority fallback:
| Setting | Plugin-specific | Framework fallback | | Setting | Plugin-specific | Framework fallback |
| ------- | -------------------- | ----------------------------- | | ------- | -------------------------------------------------------- | ----------------------------- |
| STT | `channels.qqbot.stt` | `tools.media.audio.models[0]` | | STT | `channels.qqbot.stt` | `tools.media.audio.models[0]` |
| TTS | `channels.qqbot.tts` | `messages.tts` | | TTS | `channels.qqbot.tts`, `channels.qqbot.accounts.<id>.tts` | `messages.tts` |
```json5 ```json5
{ {
@@ -140,12 +140,23 @@ STT and TTS support two-level configuration with priority fallback:
model: "your-tts-model", model: "your-tts-model",
voice: "your-voice", voice: "your-voice",
}, },
accounts: {
qq-main: {
tts: {
providers: {
openai: { voice: "shimmer" },
},
},
},
},
}, },
}, },
} }
``` ```
Set `enabled: false` on either to disable. Set `enabled: false` on either to disable.
Account-level TTS overrides use the same shape as `messages.tts` and deep-merge
over the channel/global TTS config.
Inbound QQ voice attachments are exposed to agents as audio media metadata while Inbound QQ voice attachments are exposed to agents as audio media metadata while
keeping raw voice files out of generic `MediaPaths`. `[[audio_as_voice]]` plain keeping raw voice files out of generic `MediaPaths`. `[[audio_as_voice]]` plain

View File

@@ -403,8 +403,41 @@ Precedence order for automatic replies, `/tts audio`, `/tts status`, and the
1. `messages.tts` 1. `messages.tts`
2. active `agents.list[].tts` 2. active `agents.list[].tts`
3. local `/tts` preferences for this host 3. channel override, when the channel supports `channels.<channel>.tts`
4. inline `[[tts:...]]` directives when [model overrides](#model-driven-directives) are enabled 4. account override, when the channel passes `channels.<channel>.accounts.<id>.tts`
5. local `/tts` preferences for this host
6. inline `[[tts:...]]` directives when [model overrides](#model-driven-directives) are enabled
Channel and account overrides use the same shape as `messages.tts` and
deep-merge over the earlier layers, so shared provider credentials can stay in
`messages.tts` while a channel or bot account changes only voice, model, persona,
or auto mode:
```json5
{
messages: {
tts: {
provider: "openai",
providers: {
openai: { apiKey: "${OPENAI_API_KEY}", model: "gpt-4o-mini-tts" },
},
},
},
channels: {
feishu: {
accounts: {
english: {
tts: {
providers: {
openai: { voice: "shimmer" },
},
},
},
},
},
},
}
```
## Personas ## Personas

View File

@@ -220,6 +220,45 @@ describe("FeishuConfigSchema optimization flags", () => {
}); });
}); });
describe("FeishuConfigSchema TTS overrides", () => {
it("accepts top-level and account-level TTS overrides", () => {
const result = FeishuConfigSchema.parse({
tts: {
auto: "always",
provider: "openai",
providers: {
openai: {
voice: "alloy",
},
},
},
accounts: {
english: {
tts: {
providers: {
openai: {
voice: "shimmer",
},
},
},
},
},
});
expect(result.tts).toMatchObject({
auto: "always",
provider: "openai",
});
expect(result.accounts?.english?.tts).toMatchObject({
providers: {
openai: {
voice: "shimmer",
},
},
});
});
});
describe("FeishuConfigSchema actions", () => { describe("FeishuConfigSchema actions", () => {
it("accepts top-level reactions action gate", () => { it("accepts top-level reactions action gate", () => {
const result = FeishuConfigSchema.parse({ const result = FeishuConfigSchema.parse({

View File

@@ -20,6 +20,23 @@ const FeishuDomainSchema = z.union([
z.string().url().startsWith("https://"), z.string().url().startsWith("https://"),
]); ]);
const FeishuConnectionModeSchema = z.enum(["websocket", "webhook"]); const FeishuConnectionModeSchema = z.enum(["websocket", "webhook"]);
const TtsOverrideSchema = z
.object({
auto: z.enum(["off", "always", "inbound", "tagged"]).optional(),
enabled: z.boolean().optional(),
mode: z.enum(["final", "all"]).optional(),
provider: z.string().optional(),
persona: z.string().optional(),
personas: z.record(z.string(), z.record(z.string(), z.unknown())).optional(),
summaryModel: z.string().optional(),
modelOverrides: z.record(z.string(), z.unknown()).optional(),
providers: z.record(z.string(), z.record(z.string(), z.unknown())).optional(),
prefsPath: z.string().optional(),
maxTextLength: z.number().int().min(1).optional(),
timeoutMs: z.number().int().min(1000).max(120000).optional(),
})
.strict()
.optional();
const ToolPolicySchema = z const ToolPolicySchema = z
.object({ .object({
@@ -183,6 +200,7 @@ const FeishuSharedConfigShape = {
reactionNotifications: ReactionNotificationModeSchema, reactionNotifications: ReactionNotificationModeSchema,
typingIndicator: z.boolean().optional(), typingIndicator: z.boolean().optional(),
resolveSenderNames: z.boolean().optional(), resolveSenderNames: z.boolean().optional(),
tts: TtsOverrideSchema,
}; };
/** /**

View File

@@ -185,6 +185,7 @@ describe("dispatchOutbound", () => {
text: "read this aloud", text: "read this aloud",
cfg: {}, cfg: {},
channel: "qqbot", channel: "qqbot",
accountId: "qq-main",
}); });
expect(audioFileToSilkBase64Mock).toHaveBeenCalledWith("/tmp/openclaw-qqbot/tts.wav"); expect(audioFileToSilkBase64Mock).toHaveBeenCalledWith("/tmp/openclaw-qqbot/tts.wav");
expect(sendVoiceMessageMock).toHaveBeenCalledWith( expect(sendVoiceMessageMock).toHaveBeenCalledWith(

View File

@@ -57,7 +57,12 @@ export interface GatewayPluginRuntime {
}; };
}; };
tts: { tts: {
textToSpeech: (params: { text: string; cfg: unknown; channel: string }) => Promise<{ textToSpeech: (params: {
text: string;
cfg: unknown;
channel: string;
accountId?: string;
}) => Promise<{
success: boolean; success: boolean;
audioPath?: string; audioPath?: string;
provider?: string; provider?: string;

View File

@@ -37,7 +37,12 @@ import {
/** TTS provider interface — injected from the outer layer. */ /** TTS provider interface — injected from the outer layer. */
export interface TTSProvider { export interface TTSProvider {
/** Framework TTS: text → audio file path. */ /** Framework TTS: text → audio file path. */
textToSpeech(params: { text: string; cfg: unknown; channel: string }): Promise<{ textToSpeech(params: {
text: string;
cfg: unknown;
channel: string;
accountId?: string;
}): Promise<{
success: boolean; success: boolean;
audioPath?: string; audioPath?: string;
provider?: string; provider?: string;
@@ -406,6 +411,7 @@ export async function sendTextAsVoiceReply(
text: ttsText, text: ttsText,
cfg, cfg,
channel: "qqbot", channel: "qqbot",
accountId: account.accountId,
}); });
if (!ttsResult.success || !ttsResult.audioPath) { if (!ttsResult.success || !ttsResult.audioPath) {
log?.error(`TTS failed: ${ttsResult.error ?? "unknown"}`); log?.error(`TTS failed: ${ttsResult.error ?? "unknown"}`);

View File

@@ -51,6 +51,7 @@ import {
type SpeechVoiceOption, type SpeechVoiceOption,
type TtsDirectiveOverrides, type TtsDirectiveOverrides,
type TtsDirectiveParseResult, type TtsDirectiveParseResult,
type TtsConfigResolutionContext,
} from "../api.js"; } from "../api.js";
export type { export type {
@@ -409,8 +410,11 @@ export function getResolvedSpeechProviderConfig(
return resolveLazyProviderConfig(config, canonical, cfg); return resolveLazyProviderConfig(config, canonical, cfg);
} }
export function resolveTtsConfig(cfg: OpenClawConfig, agentId?: string): ResolvedTtsConfig { export function resolveTtsConfig(
const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, agentId); cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
): ResolvedTtsConfig {
const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, contextOrAgentId);
const providerSource = raw.provider ? "config" : "default"; const providerSource = raw.provider ? "config" : "default";
const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS; const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS;
const auto = resolveConfiguredTtsAutoMode(raw); const auto = resolveConfiguredTtsAutoMode(raw);
@@ -470,11 +474,17 @@ function resolveEffectiveTtsAutoState(params: {
cfg: OpenClawConfig; cfg: OpenClawConfig;
sessionAuto?: string; sessionAuto?: string;
agentId?: string; agentId?: string;
channelId?: string;
accountId?: string;
}): { }): {
autoMode: TtsAutoMode; autoMode: TtsAutoMode;
prefsPath: string; prefsPath: string;
} { } {
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, params.agentId); const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
});
const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath); const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath);
const sessionAuto = normalizeTtsAutoMode(params.sessionAuto); const sessionAuto = normalizeTtsAutoMode(params.sessionAuto);
if (sessionAuto) { if (sessionAuto) {
@@ -654,11 +664,17 @@ export function resolveExplicitTtsOverrides(params: {
modelId?: string; modelId?: string;
voiceId?: string; voiceId?: string;
agentId?: string; agentId?: string;
channelId?: string;
accountId?: string;
}): TtsDirectiveOverrides { }): TtsDirectiveOverrides {
const providerInput = params.provider?.trim(); const providerInput = params.provider?.trim();
const modelId = params.modelId?.trim(); const modelId = params.modelId?.trim();
const voiceId = params.voiceId?.trim(); const voiceId = params.voiceId?.trim();
const config = resolveTtsConfig(params.cfg, params.agentId); const config = resolveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
});
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
const selectedProvider = const selectedProvider =
canonicalizeSpeechProviderId(providerInput, params.cfg) ?? canonicalizeSpeechProviderId(providerInput, params.cfg) ??
@@ -991,6 +1007,8 @@ function resolveTtsRequestSetup(params: {
providerOverride?: TtsProvider; providerOverride?: TtsProvider;
disableFallback?: boolean; disableFallback?: boolean;
agentId?: string; agentId?: string;
channelId?: string;
accountId?: string;
}): }):
| { | {
config: ResolvedTtsConfig; config: ResolvedTtsConfig;
@@ -1000,7 +1018,11 @@ function resolveTtsRequestSetup(params: {
| { | {
error: string; error: string;
} { } {
const config = resolveTtsConfig(params.cfg, params.agentId); const config = resolveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
});
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
if (params.text.length > config.maxTextLength) { if (params.text.length > config.maxTextLength) {
return { return {
@@ -1027,6 +1049,7 @@ export async function textToSpeech(params: {
disableFallback?: boolean; disableFallback?: boolean;
timeoutMs?: number; timeoutMs?: number;
agentId?: string; agentId?: string;
accountId?: string;
}): Promise<TtsResult> { }): Promise<TtsResult> {
const synthesis = await synthesizeSpeech(params); const synthesis = await synthesizeSpeech(params);
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) { if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
@@ -1077,6 +1100,7 @@ export async function synthesizeSpeech(params: {
disableFallback?: boolean; disableFallback?: boolean;
timeoutMs?: number; timeoutMs?: number;
agentId?: string; agentId?: string;
accountId?: string;
}): Promise<TtsSynthesisResult> { }): Promise<TtsSynthesisResult> {
const setup = resolveTtsRequestSetup({ const setup = resolveTtsRequestSetup({
text: params.text, text: params.text,
@@ -1085,6 +1109,8 @@ export async function synthesizeSpeech(params: {
providerOverride: params.overrides?.provider, providerOverride: params.overrides?.provider,
disableFallback: params.disableFallback, disableFallback: params.disableFallback,
agentId: params.agentId, agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
}); });
if ("error" in setup) { if ("error" in setup) {
return { success: false, error: setup.error }; return { success: false, error: setup.error };
@@ -1365,6 +1391,7 @@ export async function maybeApplyTtsToPayload(params: {
inboundAudio?: boolean; inboundAudio?: boolean;
ttsAuto?: string; ttsAuto?: string;
agentId?: string; agentId?: string;
accountId?: string;
}): Promise<ReplyPayload> { }): Promise<ReplyPayload> {
if (params.payload.isCompactionNotice) { if (params.payload.isCompactionNotice) {
return params.payload; return params.payload;
@@ -1373,11 +1400,17 @@ export async function maybeApplyTtsToPayload(params: {
cfg: params.cfg, cfg: params.cfg,
sessionAuto: params.ttsAuto, sessionAuto: params.ttsAuto,
agentId: params.agentId, agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
}); });
if (autoMode === "off") { if (autoMode === "off") {
return params.payload; return params.payload;
} }
const config = resolveTtsConfig(params.cfg, params.agentId); const config = resolveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
});
const activeProvider = getTtsProvider(config, prefsPath); const activeProvider = getTtsProvider(config, prefsPath);
const reply = resolveSendableOutboundReplyParts(params.payload); const reply = resolveSendableOutboundReplyParts(params.payload);
@@ -1486,6 +1519,7 @@ export async function maybeApplyTtsToPayload(params: {
channel: params.channel, channel: params.channel,
overrides: directives.overrides, overrides: directives.overrides,
agentId: params.agentId, agentId: params.agentId,
accountId: params.accountId,
}); });
if (result.success && result.audioPath) { if (result.success && result.audioPath) {

View File

@@ -254,6 +254,7 @@ export function createOpenClawTools(
agentChannel: options?.agentChannel, agentChannel: options?.agentChannel,
config: resolvedConfig, config: resolvedConfig,
agentId: sessionAgentId, agentId: sessionAgentId,
agentAccountId: options?.agentAccountId,
}), }),
...collectPresentOpenClawTools([imageGenerateTool, musicGenerateTool, videoGenerateTool]), ...collectPresentOpenClawTools([imageGenerateTool, musicGenerateTool, videoGenerateTool]),
...(embedded ...(embedded

View File

@@ -201,6 +201,51 @@ describe("createOpenClawTools TTS config wiring", () => {
__testing.setDepsForTest(); __testing.setDepsForTest();
} }
}); });
it("passes the active account id into the tts tool", async () => {
const injectedConfig = {
channels: {
feishu: {
accounts: {
"feishu-main": {
tts: {
provider: "microsoft",
},
},
},
},
},
} satisfies OpenClawConfig;
const { __testing, createOpenClawTools } = await import("./openclaw-tools.js");
__testing.setDepsForTest({ config: injectedConfig });
try {
const tool = createOpenClawTools({
agentChannel: "feishu",
agentAccountId: "feishu-main",
disableMessageTool: true,
disablePluginTools: true,
}).find((candidate) => candidate.name === "tts");
if (!tool) {
throw new Error("missing tts tool");
}
await tool.execute("call-1", { text: "hello from account" });
expect(mocks.textToSpeech).toHaveBeenCalledWith(
expect.objectContaining({
text: "hello from account",
cfg: injectedConfig,
channel: "feishu",
accountId: "feishu-main",
}),
);
} finally {
__testing.setDepsForTest();
}
});
}); });
describe("createOpenClawTools cron context wiring", () => { describe("createOpenClawTools cron context wiring", () => {

View File

@@ -104,6 +104,25 @@ describe("createTtsTool", () => {
); );
}); });
it("passes the active account id to speech generation", async () => {
textToSpeechSpy.mockResolvedValue({
success: true,
audioPath: "/tmp/reply.opus",
provider: "test",
voiceCompatible: true,
});
const tool = createTtsTool({ agentAccountId: "feishu-main" });
await tool.execute("call-1", { text: "hello" });
expect(textToSpeechSpy).toHaveBeenCalledWith(
expect.objectContaining({
text: "hello",
accountId: "feishu-main",
}),
);
});
it("echoes longer utterances verbatim into the tool-result content", async () => { it("echoes longer utterances verbatim into the tool-result content", async () => {
textToSpeechSpy.mockResolvedValue({ textToSpeechSpy.mockResolvedValue({
success: true, success: true,

View File

@@ -58,6 +58,7 @@ export function createTtsTool(opts?: {
config?: OpenClawConfig; config?: OpenClawConfig;
agentChannel?: GatewayMessageChannel; agentChannel?: GatewayMessageChannel;
agentId?: string; agentId?: string;
agentAccountId?: string;
}): AnyAgentTool { }): AnyAgentTool {
return { return {
label: "TTS", label: "TTS",
@@ -77,6 +78,7 @@ export function createTtsTool(opts?: {
channel: channel ?? opts?.agentChannel, channel: channel ?? opts?.agentChannel,
timeoutMs, timeoutMs,
agentId: opts?.agentId, agentId: opts?.agentId,
accountId: opts?.agentAccountId,
}); });
if (result.success && result.audioPath) { if (result.success && result.audioPath) {

View File

@@ -213,10 +213,13 @@ describe("handleTtsCommands status fallback reporting", () => {
const result = await handleTtsCommands(buildTtsParams("/tts status", cfg, "reader"), true); const result = await handleTtsCommands(buildTtsParams("/tts status", cfg, "reader"), true);
expect(result?.shouldContinue).toBe(false); expect(result?.shouldContinue).toBe(false);
expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith(cfg, "reader"); expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith(
cfg,
expect.objectContaining({ agentId: "reader", channelId: "forum" }),
);
}); });
it("passes the active agent id to /tts audio synthesis", async () => { it("passes the active agent and account ids to /tts audio synthesis", async () => {
ttsMocks.textToSpeech.mockResolvedValue({ ttsMocks.textToSpeech.mockResolvedValue({
success: true, success: true,
audioPath: "/tmp/reader.ogg", audioPath: "/tmp/reader.ogg",
@@ -227,7 +230,12 @@ describe("handleTtsCommands status fallback reporting", () => {
agents: { list: [{ id: "reader", tts: { provider: PRIMARY_TTS_PROVIDER } }] }, agents: { list: [{ id: "reader", tts: { provider: PRIMARY_TTS_PROVIDER } }] },
} as OpenClawConfig; } as OpenClawConfig;
const result = await handleTtsCommands(buildTtsParams("/tts audio hello", cfg, "reader"), true); const result = await handleTtsCommands(
buildTtsParams("/tts audio hello", cfg, "reader", {
ctx: { AccountId: "feishu-main" },
}),
true,
);
expect(result?.shouldContinue).toBe(false); expect(result?.shouldContinue).toBe(false);
expect(ttsMocks.textToSpeech).toHaveBeenCalledWith( expect(ttsMocks.textToSpeech).toHaveBeenCalledWith(
@@ -235,6 +243,7 @@ describe("handleTtsCommands status fallback reporting", () => {
text: "hello", text: "hello",
cfg, cfg,
agentId: "reader", agentId: "reader",
accountId: "feishu-main",
}), }),
); );
}); });

View File

@@ -119,6 +119,7 @@ async function buildTtsAudioReply(params: {
text: string; text: string;
cfg: Parameters<typeof textToSpeech>[0]["cfg"]; cfg: Parameters<typeof textToSpeech>[0]["cfg"];
channel: string; channel: string;
accountId?: string;
prefsPath: string; prefsPath: string;
agentId?: string; agentId?: string;
}): Promise<{ reply: ReplyPayload; provider?: string; hash?: string } | { error: string }> { }): Promise<{ reply: ReplyPayload; provider?: string; hash?: string } | { error: string }> {
@@ -127,6 +128,7 @@ async function buildTtsAudioReply(params: {
text: params.text, text: params.text,
cfg: params.cfg, cfg: params.cfg,
channel: params.channel, channel: params.channel,
accountId: params.accountId,
prefsPath: params.prefsPath, prefsPath: params.prefsPath,
agentId: params.agentId, agentId: params.agentId,
}); });
@@ -185,7 +187,12 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
return { shouldContinue: false }; return { shouldContinue: false };
} }
const config = resolveTtsConfig(params.cfg, params.agentId); const accountId = params.ctx?.AccountId;
const config = resolveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.command.channel,
accountId,
});
const prefsPath = resolveTtsPrefsPath(config); const prefsPath = resolveTtsPrefsPath(config);
const action = parsed.action; const action = parsed.action;
const args = parsed.args; const args = parsed.args;
@@ -268,6 +275,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
text: latestText, text: latestText,
cfg: params.cfg, cfg: params.cfg,
channel: params.command.channel, channel: params.command.channel,
accountId,
prefsPath, prefsPath,
agentId: params.agentId, agentId: params.agentId,
}); });
@@ -301,6 +309,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
text: args, text: args,
cfg: params.cfg, cfg: params.cfg,
channel: params.command.channel, channel: params.command.channel,
accountId,
prefsPath, prefsPath,
agentId: params.agentId, agentId: params.agentId,
}); });

View File

@@ -91,6 +91,7 @@ async function maybeApplyAcpTts(params: {
cfg: OpenClawConfig; cfg: OpenClawConfig;
agentId?: string; agentId?: string;
channel?: string; channel?: string;
accountId?: string;
kind: ReplyDispatchKind; kind: ReplyDispatchKind;
inboundAudio: boolean; inboundAudio: boolean;
ttsAuto?: TtsAutoMode; ttsAuto?: TtsAutoMode;
@@ -103,6 +104,8 @@ async function maybeApplyAcpTts(params: {
cfg: params.cfg, cfg: params.cfg,
sessionAuto: params.ttsAuto, sessionAuto: params.ttsAuto,
agentId: params.agentId, agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
}); });
if (!ttsStatus) { if (!ttsStatus) {
return params.payload; return params.payload;
@@ -110,7 +113,14 @@ async function maybeApplyAcpTts(params: {
if (ttsStatus.autoMode === "inbound" && !params.inboundAudio) { if (ttsStatus.autoMode === "inbound" && !params.inboundAudio) {
return params.payload; return params.payload;
} }
if (params.kind !== "final" && resolveConfiguredTtsMode(params.cfg, params.agentId) === "final") { if (
params.kind !== "final" &&
resolveConfiguredTtsMode(params.cfg, {
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
}) === "final"
) {
return params.payload; return params.payload;
} }
const { maybeApplyTtsToPayload } = await loadDispatchAcpTtsRuntime(); const { maybeApplyTtsToPayload } = await loadDispatchAcpTtsRuntime();
@@ -122,6 +132,7 @@ async function maybeApplyAcpTts(params: {
inboundAudio: params.inboundAudio, inboundAudio: params.inboundAudio,
ttsAuto: params.ttsAuto, ttsAuto: params.ttsAuto,
agentId: params.agentId, agentId: params.agentId,
accountId: params.accountId,
}); });
} }
@@ -175,6 +186,17 @@ export function createAcpDispatchDeliveryCoordinator(params: {
originatingTo?: string; originatingTo?: string;
onReplyStart?: () => Promise<void> | void; onReplyStart?: () => Promise<void> | void;
}): AcpDispatchDeliveryCoordinator { }): AcpDispatchDeliveryCoordinator {
const directChannel = normalizeOptionalLowercaseString(params.ctx.Provider ?? params.ctx.Surface);
const routedChannel = normalizeOptionalLowercaseString(params.originatingChannel);
const deliverySessionKey = normalizeOptionalString(params.sessionKey) ?? params.ctx.SessionKey;
const explicitAccountId = normalizeOptionalString(params.ctx.AccountId);
const resolvedAccountId =
explicitAccountId ??
normalizeOptionalString(
(
params.cfg.channels as Record<string, { defaultAccount?: unknown } | undefined> | undefined
)?.[routedChannel ?? directChannel ?? ""]?.defaultAccount,
);
const state: AcpDispatchDeliveryState = { const state: AcpDispatchDeliveryState = {
startedReplyLifecycle: false, startedReplyLifecycle: false,
accumulatedBlockText: "", accumulatedBlockText: "",
@@ -184,6 +206,8 @@ export function createAcpDispatchDeliveryCoordinator(params: {
cfg: params.cfg, cfg: params.cfg,
ttsAuto: params.sessionTtsAuto, ttsAuto: params.sessionTtsAuto,
agentId: params.agentId, agentId: params.agentId,
channelId: params.ttsChannel,
accountId: resolvedAccountId,
}) })
? createTtsDirectiveTextStreamCleaner() ? createTtsDirectiveTextStreamCleaner()
: undefined, : undefined,
@@ -200,18 +224,6 @@ export function createAcpDispatchDeliveryCoordinator(params: {
}, },
toolMessageByCallId: new Map(), toolMessageByCallId: new Map(),
}; };
const directChannel = normalizeOptionalLowercaseString(params.ctx.Provider ?? params.ctx.Surface);
const routedChannel = normalizeOptionalLowercaseString(params.originatingChannel);
const deliverySessionKey = normalizeOptionalString(params.sessionKey) ?? params.ctx.SessionKey;
const explicitAccountId = normalizeOptionalString(params.ctx.AccountId);
const resolvedAccountId =
explicitAccountId ??
normalizeOptionalString(
(
params.cfg.channels as Record<string, { defaultAccount?: unknown } | undefined> | undefined
)?.[routedChannel ?? directChannel ?? ""]?.defaultAccount,
);
const settleDirectVisibleText = async () => { const settleDirectVisibleText = async () => {
if (state.settledDirectVisibleText || state.queuedDirectVisibleTextDeliveries === 0) { if (state.settledDirectVisibleText || state.queuedDirectVisibleTextDeliveries === 0) {
return; return;
@@ -336,6 +348,7 @@ export function createAcpDispatchDeliveryCoordinator(params: {
cfg: params.cfg, cfg: params.cfg,
agentId: params.agentId, agentId: params.agentId,
channel: params.ttsChannel, channel: params.ttsChannel,
accountId: resolvedAccountId,
kind, kind,
inboundAudio: params.inboundAudio, inboundAudio: params.inboundAudio,
ttsAuto: params.sessionTtsAuto, ttsAuto: params.sessionTtsAuto,

View File

@@ -191,12 +191,17 @@ async function finalizeAcpTurnOutput(params: {
inboundAudio: boolean; inboundAudio: boolean;
sessionTtsAuto?: TtsAutoMode; sessionTtsAuto?: TtsAutoMode;
ttsChannel?: string; ttsChannel?: string;
ttsAccountId?: string;
shouldEmitResolvedIdentityNotice: boolean; shouldEmitResolvedIdentityNotice: boolean;
}): Promise<boolean> { }): Promise<boolean> {
await params.delivery.settleVisibleText(); await params.delivery.settleVisibleText();
let queuedFinal = let queuedFinal =
params.delivery.hasDeliveredVisibleText() && !params.delivery.hasFailedVisibleTextDelivery(); params.delivery.hasDeliveredVisibleText() && !params.delivery.hasFailedVisibleTextDelivery();
const ttsMode = resolveConfiguredTtsMode(params.cfg, params.agentId); const ttsMode = resolveConfiguredTtsMode(params.cfg, {
agentId: params.agentId,
channelId: params.ttsChannel,
accountId: params.ttsAccountId,
});
const accumulatedVisibleBlockText = params.delivery.getAccumulatedVisibleBlockText(); const accumulatedVisibleBlockText = params.delivery.getAccumulatedVisibleBlockText();
const accumulatedBlockTtsText = params.delivery.getAccumulatedBlockTtsText(); const accumulatedBlockTtsText = params.delivery.getAccumulatedBlockTtsText();
const hasAccumulatedBlockText = accumulatedBlockTtsText.trim().length > 0; const hasAccumulatedBlockText = accumulatedBlockTtsText.trim().length > 0;
@@ -204,6 +209,8 @@ async function finalizeAcpTurnOutput(params: {
cfg: params.cfg, cfg: params.cfg,
sessionAuto: params.sessionTtsAuto, sessionAuto: params.sessionTtsAuto,
agentId: params.agentId, agentId: params.agentId,
channelId: params.ttsChannel,
accountId: params.ttsAccountId,
}); });
const canAttemptFinalTts = const canAttemptFinalTts =
ttsStatus != null && !(ttsStatus.autoMode === "inbound" && !params.inboundAudio); ttsStatus != null && !(ttsStatus.autoMode === "inbound" && !params.inboundAudio);
@@ -220,6 +227,7 @@ async function finalizeAcpTurnOutput(params: {
inboundAudio: params.inboundAudio, inboundAudio: params.inboundAudio,
ttsAuto: params.sessionTtsAuto, ttsAuto: params.sessionTtsAuto,
agentId: params.agentId, agentId: params.agentId,
accountId: params.ttsAccountId,
}); });
if (ttsSyntheticReply.mediaUrl) { if (ttsSyntheticReply.mediaUrl) {
const delivered = await params.delivery.deliver("final", { const delivered = await params.delivery.deliver("final", {
@@ -487,6 +495,7 @@ export async function tryDispatchAcpReply(params: {
inboundAudio: params.inboundAudio, inboundAudio: params.inboundAudio,
sessionTtsAuto: params.sessionTtsAuto, sessionTtsAuto: params.sessionTtsAuto,
ttsChannel: params.ttsChannel, ttsChannel: params.ttsChannel,
ttsAccountId: effectiveDispatchAccountId,
shouldEmitResolvedIdentityNotice, shouldEmitResolvedIdentityNotice,
})) || queuedFinal; })) || queuedFinal;

View File

@@ -122,7 +122,13 @@ async function maybeApplyTtsToReplyPayload(
params: Parameters<Awaited<ReturnType<typeof loadTtsRuntime>>["maybeApplyTtsToPayload"]>[0], params: Parameters<Awaited<ReturnType<typeof loadTtsRuntime>>["maybeApplyTtsToPayload"]>[0],
) { ) {
if ( if (
!shouldAttemptTtsPayload({ cfg: params.cfg, ttsAuto: params.ttsAuto, agentId: params.agentId }) !shouldAttemptTtsPayload({
cfg: params.cfg,
ttsAuto: params.ttsAuto,
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
})
) { ) {
return params.payload; return params.payload;
} }
@@ -734,6 +740,7 @@ export async function dispatchReplyFromConfig(
inboundAudio, inboundAudio,
ttsAuto: sessionTtsAuto, ttsAuto: sessionTtsAuto,
agentId: sessionAgentId, agentId: sessionAgentId,
accountId: replyRoute.accountId,
}); });
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload); const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
const result = await routeReplyToOriginating(normalizedPayload); const result = await routeReplyToOriginating(normalizedPayload);
@@ -939,6 +946,8 @@ export async function dispatchReplyFromConfig(
cfg, cfg,
ttsAuto: sessionTtsAuto, ttsAuto: sessionTtsAuto,
agentId: sessionAgentId, agentId: sessionAgentId,
channelId: deliveryChannel,
accountId: replyRoute.accountId,
}) })
? createTtsDirectiveTextStreamCleaner() ? createTtsDirectiveTextStreamCleaner()
: undefined; : undefined;
@@ -1010,6 +1019,7 @@ export async function dispatchReplyFromConfig(
inboundAudio, inboundAudio,
ttsAuto: sessionTtsAuto, ttsAuto: sessionTtsAuto,
agentId: sessionAgentId, agentId: sessionAgentId,
accountId: replyRoute.accountId,
}); });
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload); const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
const deliveryPayload = resolveToolDeliveryPayload(normalizedPayload); const deliveryPayload = resolveToolDeliveryPayload(normalizedPayload);
@@ -1128,6 +1138,7 @@ export async function dispatchReplyFromConfig(
inboundAudio, inboundAudio,
ttsAuto: sessionTtsAuto, ttsAuto: sessionTtsAuto,
agentId: sessionAgentId, agentId: sessionAgentId,
accountId: replyRoute.accountId,
}); });
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload); const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
if (shouldRouteToOriginating) { if (shouldRouteToOriginating) {
@@ -1198,7 +1209,11 @@ export async function dispatchReplyFromConfig(
routedFinalCount += finalReply.routedFinalCount; routedFinalCount += finalReply.routedFinalCount;
} }
const ttsMode = resolveConfiguredTtsMode(cfg, sessionAgentId); const ttsMode = resolveConfiguredTtsMode(cfg, {
agentId: sessionAgentId,
channelId: deliveryChannel,
accountId: replyRoute.accountId,
});
// Generate TTS-only reply after block streaming completes (when there's no final reply). // Generate TTS-only reply after block streaming completes (when there's no final reply).
// This handles the case where block streaming succeeds and drops final payloads, // This handles the case where block streaming succeeds and drops final payloads,
// but we still want TTS audio to be generated from the accumulated block content. // but we still want TTS audio to be generated from the accumulated block content.
@@ -1217,6 +1232,7 @@ export async function dispatchReplyFromConfig(
inboundAudio, inboundAudio,
ttsAuto: sessionTtsAuto, ttsAuto: sessionTtsAuto,
agentId: sessionAgentId, agentId: sessionAgentId,
accountId: replyRoute.accountId,
}); });
// Only send if TTS was actually applied (mediaUrl exists) // Only send if TTS was actually applied (mediaUrl exists)
if (ttsSyntheticReply.mediaUrl) { if (ttsSyntheticReply.mediaUrl) {

View File

@@ -39,6 +39,7 @@ export {
normalizeSpeechProviderId, normalizeSpeechProviderId,
} from "../tts/provider-registry.js"; } from "../tts/provider-registry.js";
export { resolveEffectiveTtsConfig } from "../tts/tts-config.js"; export { resolveEffectiveTtsConfig } from "../tts/tts-config.js";
export type { TtsConfigResolutionContext } from "../tts/tts-config.js";
export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js"; export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js";
export { export {
asBoolean, asBoolean,

View File

@@ -6,10 +6,12 @@ import type {
TtsDirectiveOverrides, TtsDirectiveOverrides,
TtsDirectiveParseResult, TtsDirectiveParseResult,
} from "../tts/provider-types.js"; } from "../tts/provider-types.js";
import type { TtsConfigResolutionContext } from "../tts/tts-config.js";
import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js"; import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js";
import type { ReplyPayload } from "./reply-payload.js"; import type { ReplyPayload } from "./reply-payload.js";
export type { ResolvedTtsConfig, ResolvedTtsModelOverrides }; export type { ResolvedTtsConfig, ResolvedTtsModelOverrides };
export type { TtsConfigResolutionContext };
export type { TtsDirectiveOverrides, TtsDirectiveParseResult }; export type { TtsDirectiveOverrides, TtsDirectiveParseResult };
export type TtsAttemptReasonCode = export type TtsAttemptReasonCode =
@@ -66,6 +68,8 @@ export type ResolveExplicitTtsOverridesParams = {
modelId?: string; modelId?: string;
voiceId?: string; voiceId?: string;
agentId?: string; agentId?: string;
channelId?: string;
accountId?: string;
}; };
export type TtsRequestParams = { export type TtsRequestParams = {
@@ -77,6 +81,7 @@ export type TtsRequestParams = {
disableFallback?: boolean; disableFallback?: boolean;
timeoutMs?: number; timeoutMs?: number;
agentId?: string; agentId?: string;
accountId?: string;
}; };
export type TtsTelephonyRequestParams = { export type TtsTelephonyRequestParams = {
@@ -101,6 +106,7 @@ export type MaybeApplyTtsToPayloadParams = {
inboundAudio?: boolean; inboundAudio?: boolean;
ttsAuto?: string; ttsAuto?: string;
agentId?: string; agentId?: string;
accountId?: string;
}; };
export type TtsTestFacade = { export type TtsTestFacade = {
@@ -201,7 +207,10 @@ export type TtsRuntimeFacade = {
maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>; maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>;
resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides; resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides;
resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode; resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode;
resolveTtsConfig: (cfg: OpenClawConfig, agentId?: string) => ResolvedTtsConfig; resolveTtsConfig: (
cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
) => ResolvedTtsConfig;
resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string; resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string;
resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[]; resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[];
setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void; setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void;

View File

@@ -8,7 +8,7 @@ import {
} from "../shared/string-coerce.js"; } from "../shared/string-coerce.js";
import { resolveConfigDir, resolveUserPath } from "../utils.js"; import { resolveConfigDir, resolveUserPath } from "../utils.js";
import { normalizeTtsAutoMode } from "./tts-auto-mode.js"; import { normalizeTtsAutoMode } from "./tts-auto-mode.js";
import { resolveEffectiveTtsConfig } from "./tts-config.js"; import { resolveEffectiveTtsConfig, type TtsConfigResolutionContext } from "./tts-config.js";
const DEFAULT_TTS_MAX_LENGTH = 1500; const DEFAULT_TTS_MAX_LENGTH = 1500;
const DEFAULT_TTS_SUMMARIZE = true; const DEFAULT_TTS_SUMMARIZE = true;
@@ -222,8 +222,15 @@ export function resolveStatusTtsSnapshot(params: {
cfg: OpenClawConfig; cfg: OpenClawConfig;
sessionAuto?: string; sessionAuto?: string;
agentId?: string; agentId?: string;
channelId?: string;
accountId?: string;
}): TtsStatusSnapshot | null { }): TtsStatusSnapshot | null {
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, params.agentId); const context: TtsConfigResolutionContext = {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
};
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, context);
const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath); const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath);
const prefs = readPrefs(prefsPath); const prefs = readPrefs(prefsPath);
const autoMode = const autoMode =

View File

@@ -3,7 +3,11 @@ import { tmpdir } from "node:os";
import path from "node:path"; import path from "node:path";
import { afterAll, beforeAll, afterEach, beforeEach, describe, expect, it } from "vitest"; import { afterAll, beforeAll, afterEach, beforeEach, describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/config.js"; import type { OpenClawConfig } from "../config/config.js";
import { resolveConfiguredTtsMode, shouldAttemptTtsPayload } from "./tts-config.js"; import {
resolveConfiguredTtsMode,
resolveEffectiveTtsConfig,
shouldAttemptTtsPayload,
} from "./tts-config.js";
describe("shouldAttemptTtsPayload", () => { describe("shouldAttemptTtsPayload", () => {
let originalPrefsPath: string | undefined; let originalPrefsPath: string | undefined;
@@ -88,4 +92,73 @@ describe("shouldAttemptTtsPayload", () => {
expect(shouldAttemptTtsPayload({ cfg, agentId: "main" })).toBe(false); expect(shouldAttemptTtsPayload({ cfg, agentId: "main" })).toBe(false);
expect(resolveConfiguredTtsMode(cfg, "main")).toBe("final"); expect(resolveConfiguredTtsMode(cfg, "main")).toBe("final");
}); });
it("merges channel and account TTS overrides after agent overrides", () => {
const cfg = {
messages: {
tts: {
auto: "off",
mode: "final",
provider: "openai",
providers: {
openai: {
model: "gpt-4o-mini-tts",
voice: "alloy",
},
},
},
},
agents: {
list: [
{
id: "reader",
tts: {
providers: {
openai: {
voice: "nova",
},
},
},
},
],
},
channels: {
feishu: {
tts: {
auto: "always",
},
accounts: {
EnglishBot: {
tts: {
mode: "all",
providers: {
openai: {
voice: "shimmer",
},
},
},
},
},
},
},
} as OpenClawConfig;
const resolved = resolveEffectiveTtsConfig(cfg, {
agentId: "reader",
channelId: "FEISHU",
accountId: "englishbot",
});
expect(resolved).toMatchObject({
auto: "always",
mode: "all",
provider: "openai",
providers: {
openai: {
model: "gpt-4o-mini-tts",
voice: "shimmer",
},
},
});
});
}); });

View File

@@ -2,13 +2,23 @@ import { existsSync, readFileSync } from "node:fs";
import path from "node:path"; import path from "node:path";
import type { OpenClawConfig } from "../config/types.js"; import type { OpenClawConfig } from "../config/types.js";
import type { TtsAutoMode, TtsConfig, TtsMode } from "../config/types.tts.js"; import type { TtsAutoMode, TtsConfig, TtsMode } from "../config/types.tts.js";
import { normalizeAgentId } from "../routing/session-key.js"; import { normalizeAccountId, normalizeAgentId } from "../routing/session-key.js";
import {
normalizeLowercaseStringOrEmpty,
normalizeOptionalString,
} from "../shared/string-coerce.js";
import { resolveConfigDir, resolveUserPath } from "../utils.js"; import { resolveConfigDir, resolveUserPath } from "../utils.js";
import { normalizeTtsAutoMode } from "./tts-auto-mode.js"; import { normalizeTtsAutoMode } from "./tts-auto-mode.js";
export { normalizeTtsAutoMode } from "./tts-auto-mode.js"; export { normalizeTtsAutoMode } from "./tts-auto-mode.js";
const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]); const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
export type TtsConfigResolutionContext = {
agentId?: string;
channelId?: string;
accountId?: string;
};
function isPlainObject(value: unknown): value is Record<string, unknown> { function isPlainObject(value: unknown): value is Record<string, unknown> {
return Boolean(value) && typeof value === "object" && !Array.isArray(value); return Boolean(value) && typeof value === "object" && !Array.isArray(value);
} }
@@ -41,14 +51,97 @@ function resolveAgentTtsOverride(
return agent?.tts; return agent?.tts;
} }
export function resolveEffectiveTtsConfig(cfg: OpenClawConfig, agentId?: string): TtsConfig { function resolveTtsConfigContext(
const base = cfg.messages?.tts ?? {}; contextOrAgentId?: string | TtsConfigResolutionContext,
const override = resolveAgentTtsOverride(cfg, agentId); ): TtsConfigResolutionContext {
return deepMergeDefined(base, override ?? {}) as TtsConfig; return typeof contextOrAgentId === "string"
? { agentId: contextOrAgentId }
: (contextOrAgentId ?? {});
} }
export function resolveConfiguredTtsMode(cfg: OpenClawConfig, agentId?: string): TtsMode { function resolveRecordEntry<T>(
return resolveEffectiveTtsConfig(cfg, agentId).mode ?? "final"; entries: Record<string, T> | undefined,
id: string | undefined,
normalize: (value: string) => string,
): T | undefined {
const normalizedId = normalizeOptionalString(id);
if (!entries || !normalizedId) {
return undefined;
}
if (Object.hasOwn(entries, normalizedId)) {
return entries[normalizedId];
}
const normalized = normalize(normalizedId);
const key = Object.keys(entries).find((candidate) => normalize(candidate) === normalized);
return key ? entries[key] : undefined;
}
function asTtsConfig(value: unknown): TtsConfig | undefined {
return isPlainObject(value) ? (value as TtsConfig) : undefined;
}
function asObjectRecord(value: unknown): Record<string, unknown> | undefined {
return isPlainObject(value) ? value : undefined;
}
function resolveChannelConfig(
cfg: OpenClawConfig,
channelId: string | undefined,
): Record<string, unknown> | undefined {
if (!isPlainObject(cfg.channels)) {
return undefined;
}
const normalizedChannelId = normalizeOptionalString(channelId);
if (!normalizedChannelId) {
return undefined;
}
return asObjectRecord(
resolveRecordEntry(
cfg.channels as Record<string, unknown>,
normalizedChannelId,
normalizeLowercaseStringOrEmpty,
),
);
}
function resolveChannelTtsOverride(
cfg: OpenClawConfig,
context: TtsConfigResolutionContext,
): TtsConfig | undefined {
return asTtsConfig(resolveChannelConfig(cfg, context.channelId)?.tts);
}
function resolveAccountTtsOverride(
cfg: OpenClawConfig,
context: TtsConfigResolutionContext,
): TtsConfig | undefined {
const channelConfig = resolveChannelConfig(cfg, context.channelId);
const accounts = isPlainObject(channelConfig?.accounts) ? channelConfig.accounts : undefined;
const accountConfig = resolveRecordEntry(accounts, context.accountId, normalizeAccountId);
return asTtsConfig(asObjectRecord(accountConfig)?.tts);
}
export function resolveEffectiveTtsConfig(
cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
): TtsConfig {
const context = resolveTtsConfigContext(contextOrAgentId);
const base = cfg.messages?.tts ?? {};
const agentOverride = resolveAgentTtsOverride(cfg, context.agentId);
const channelOverride = resolveChannelTtsOverride(cfg, context);
const accountOverride = resolveAccountTtsOverride(cfg, context);
let merged: unknown = base;
for (const override of [agentOverride, channelOverride, accountOverride]) {
merged = deepMergeDefined(merged, override ?? {});
}
return merged as TtsConfig;
}
export function resolveConfiguredTtsMode(
cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
): TtsMode {
return resolveEffectiveTtsConfig(cfg, contextOrAgentId).mode ?? "final";
} }
function resolveTtsPrefsPathValue(prefsPath: string | undefined): string { function resolveTtsPrefsPathValue(prefsPath: string | undefined): string {
@@ -87,13 +180,15 @@ export function shouldAttemptTtsPayload(params: {
cfg: OpenClawConfig; cfg: OpenClawConfig;
ttsAuto?: string; ttsAuto?: string;
agentId?: string; agentId?: string;
channelId?: string;
accountId?: string;
}): boolean { }): boolean {
const sessionAuto = normalizeTtsAutoMode(params.ttsAuto); const sessionAuto = normalizeTtsAutoMode(params.ttsAuto);
if (sessionAuto) { if (sessionAuto) {
return sessionAuto !== "off"; return sessionAuto !== "off";
} }
const raw = resolveEffectiveTtsConfig(params.cfg, params.agentId); const raw = resolveEffectiveTtsConfig(params.cfg, params);
const prefsAuto = readTtsPrefsAutoMode(resolveTtsPrefsPathValue(raw?.prefsPath)); const prefsAuto = readTtsPrefsAutoMode(resolveTtsPrefsPathValue(raw?.prefsPath));
if (prefsAuto) { if (prefsAuto) {
return prefsAuto !== "off"; return prefsAuto !== "off";
@@ -110,9 +205,11 @@ export function shouldCleanTtsDirectiveText(params: {
cfg: OpenClawConfig; cfg: OpenClawConfig;
ttsAuto?: string; ttsAuto?: string;
agentId?: string; agentId?: string;
channelId?: string;
accountId?: string;
}): boolean { }): boolean {
if (!shouldAttemptTtsPayload(params)) { if (!shouldAttemptTtsPayload(params)) {
return false; return false;
} }
return resolveEffectiveTtsConfig(params.cfg, params.agentId).modelOverrides?.enabled !== false; return resolveEffectiveTtsConfig(params.cfg, params).modelOverrides?.enabled !== false;
} }