feat(tts): resolve channel account config generically

This commit is contained in:
Peter Steinberger
2026-04-26 08:10:25 +01:00
parent 6c60cd2b72
commit d419fb561d
24 changed files with 515 additions and 47 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
- Codex/agent: translate `--thinking minimal` to `low` for modern Codex models (gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.2) at request build time so the first turn is accepted instead of paying a wasted call + retry-with-low fallback. Older Codex models still receive `minimal` directly. Fixes #71946. Thanks @hclsys.
- TTS/WhatsApp: add `/tts latest` read-aloud support with duplicate suppression and `/tts chat on|off|default` session-scoped auto-TTS overrides, completing the on-demand voice-note UX for current-chat replies. Fixes #66032.
- TTS/channels: resolve channel and account TTS overrides generically, enabling Feishu and QQBot accounts to deep-merge `channels.<channel>.accounts.<id>.tts` over global and per-agent TTS config. Thanks @sahilsatralkar.
- Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc.
- TTS/agents: allow `agents.list[].tts` to override global `messages.tts` for per-agent voices while keeping shared provider credentials and preferences in the existing TTS config surface.
- TTS/agents: make `/tts audio`, `/tts status`, and the `tts` agent tool honor the active `agents.list[].tts` voice/provider override.

View File

@@ -213,6 +213,11 @@ openclaw pairing list feishu
appId: "cli_xxx",
appSecret: "xxx",
name: "Primary bot",
tts: {
providers: {
openai: { voice: "shimmer" },
},
},
},
backup: {
appId: "cli_yyy",
@@ -227,6 +232,10 @@ openclaw pairing list feishu
```
`defaultAccount` controls which account is used when outbound APIs do not specify an `accountId`.
`accounts.<id>.tts` uses the same shape as `messages.tts` and deep-merges over
global TTS config, so multi-bot Feishu setups can keep shared provider
credentials globally while overriding only voice, model, persona, or auto mode
per account.
### Message limits
@@ -386,6 +395,7 @@ Full configuration: [Gateway configuration](/gateway/configuration)
| `channels.feishu.accounts.<id>.appId` | App ID | — |
| `channels.feishu.accounts.<id>.appSecret` | App Secret | — |
| `channels.feishu.accounts.<id>.domain` | Per-account domain override | `feishu` |
| `channels.feishu.accounts.<id>.tts` | Per-account TTS override | `messages.tts` |
| `channels.feishu.dmPolicy` | DM policy | `allowlist` |
| `channels.feishu.allowFrom` | DM allowlist (open_id list) | [BotOwnerId] |
| `channels.feishu.groupPolicy` | Group policy | `allowlist` |

View File

@@ -122,10 +122,10 @@ openclaw channels add --channel qqbot --account bot2 --token "222222222:secret-o
STT and TTS support two-level configuration with priority fallback:
| Setting | Plugin-specific | Framework fallback |
| ------- | -------------------- | ----------------------------- |
| STT | `channels.qqbot.stt` | `tools.media.audio.models[0]` |
| TTS | `channels.qqbot.tts` | `messages.tts` |
| Setting | Plugin-specific | Framework fallback |
| ------- | -------------------------------------------------------- | ----------------------------- |
| STT | `channels.qqbot.stt` | `tools.media.audio.models[0]` |
| TTS | `channels.qqbot.tts`, `channels.qqbot.accounts.<id>.tts` | `messages.tts` |
```json5
{
@@ -140,12 +140,23 @@ STT and TTS support two-level configuration with priority fallback:
model: "your-tts-model",
voice: "your-voice",
},
accounts: {
qq-main: {
tts: {
providers: {
openai: { voice: "shimmer" },
},
},
},
},
},
},
}
```
Set `enabled: false` on either to disable.
Account-level TTS overrides use the same shape as `messages.tts` and deep-merge
over the channel/global TTS config.
Inbound QQ voice attachments are exposed to agents as audio media metadata while
keeping raw voice files out of generic `MediaPaths`. `[[audio_as_voice]]` plain

View File

@@ -403,8 +403,41 @@ Precedence order for automatic replies, `/tts audio`, `/tts status`, and the
1. `messages.tts`
2. active `agents.list[].tts`
3. local `/tts` preferences for this host
4. inline `[[tts:...]]` directives when [model overrides](#model-driven-directives) are enabled
3. channel override, when the channel supports `channels.<channel>.tts`
4. account override, when the channel passes `channels.<channel>.accounts.<id>.tts`
5. local `/tts` preferences for this host
6. inline `[[tts:...]]` directives when [model overrides](#model-driven-directives) are enabled
Channel and account overrides use the same shape as `messages.tts` and
deep-merge over the earlier layers, so shared provider credentials can stay in
`messages.tts` while a channel or bot account changes only voice, model, persona,
or auto mode:
```json5
{
messages: {
tts: {
provider: "openai",
providers: {
openai: { apiKey: "${OPENAI_API_KEY}", model: "gpt-4o-mini-tts" },
},
},
},
channels: {
feishu: {
accounts: {
english: {
tts: {
providers: {
openai: { voice: "shimmer" },
},
},
},
},
},
},
}
```
## Personas

View File

@@ -220,6 +220,45 @@ describe("FeishuConfigSchema optimization flags", () => {
});
});
describe("FeishuConfigSchema TTS overrides", () => {
it("accepts top-level and account-level TTS overrides", () => {
const result = FeishuConfigSchema.parse({
tts: {
auto: "always",
provider: "openai",
providers: {
openai: {
voice: "alloy",
},
},
},
accounts: {
english: {
tts: {
providers: {
openai: {
voice: "shimmer",
},
},
},
},
},
});
expect(result.tts).toMatchObject({
auto: "always",
provider: "openai",
});
expect(result.accounts?.english?.tts).toMatchObject({
providers: {
openai: {
voice: "shimmer",
},
},
});
});
});
describe("FeishuConfigSchema actions", () => {
it("accepts top-level reactions action gate", () => {
const result = FeishuConfigSchema.parse({

View File

@@ -20,6 +20,23 @@ const FeishuDomainSchema = z.union([
z.string().url().startsWith("https://"),
]);
const FeishuConnectionModeSchema = z.enum(["websocket", "webhook"]);
const TtsOverrideSchema = z
.object({
auto: z.enum(["off", "always", "inbound", "tagged"]).optional(),
enabled: z.boolean().optional(),
mode: z.enum(["final", "all"]).optional(),
provider: z.string().optional(),
persona: z.string().optional(),
personas: z.record(z.string(), z.record(z.string(), z.unknown())).optional(),
summaryModel: z.string().optional(),
modelOverrides: z.record(z.string(), z.unknown()).optional(),
providers: z.record(z.string(), z.record(z.string(), z.unknown())).optional(),
prefsPath: z.string().optional(),
maxTextLength: z.number().int().min(1).optional(),
timeoutMs: z.number().int().min(1000).max(120000).optional(),
})
.strict()
.optional();
const ToolPolicySchema = z
.object({
@@ -183,6 +200,7 @@ const FeishuSharedConfigShape = {
reactionNotifications: ReactionNotificationModeSchema,
typingIndicator: z.boolean().optional(),
resolveSenderNames: z.boolean().optional(),
tts: TtsOverrideSchema,
};
/**

View File

@@ -185,6 +185,7 @@ describe("dispatchOutbound", () => {
text: "read this aloud",
cfg: {},
channel: "qqbot",
accountId: "qq-main",
});
expect(audioFileToSilkBase64Mock).toHaveBeenCalledWith("/tmp/openclaw-qqbot/tts.wav");
expect(sendVoiceMessageMock).toHaveBeenCalledWith(

View File

@@ -57,7 +57,12 @@ export interface GatewayPluginRuntime {
};
};
tts: {
textToSpeech: (params: { text: string; cfg: unknown; channel: string }) => Promise<{
textToSpeech: (params: {
text: string;
cfg: unknown;
channel: string;
accountId?: string;
}) => Promise<{
success: boolean;
audioPath?: string;
provider?: string;

View File

@@ -37,7 +37,12 @@ import {
/** TTS provider interface — injected from the outer layer. */
export interface TTSProvider {
/** Framework TTS: text → audio file path. */
textToSpeech(params: { text: string; cfg: unknown; channel: string }): Promise<{
textToSpeech(params: {
text: string;
cfg: unknown;
channel: string;
accountId?: string;
}): Promise<{
success: boolean;
audioPath?: string;
provider?: string;
@@ -406,6 +411,7 @@ export async function sendTextAsVoiceReply(
text: ttsText,
cfg,
channel: "qqbot",
accountId: account.accountId,
});
if (!ttsResult.success || !ttsResult.audioPath) {
log?.error(`TTS failed: ${ttsResult.error ?? "unknown"}`);

View File

@@ -51,6 +51,7 @@ import {
type SpeechVoiceOption,
type TtsDirectiveOverrides,
type TtsDirectiveParseResult,
type TtsConfigResolutionContext,
} from "../api.js";
export type {
@@ -409,8 +410,11 @@ export function getResolvedSpeechProviderConfig(
return resolveLazyProviderConfig(config, canonical, cfg);
}
export function resolveTtsConfig(cfg: OpenClawConfig, agentId?: string): ResolvedTtsConfig {
const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, agentId);
export function resolveTtsConfig(
cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
): ResolvedTtsConfig {
const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, contextOrAgentId);
const providerSource = raw.provider ? "config" : "default";
const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS;
const auto = resolveConfiguredTtsAutoMode(raw);
@@ -470,11 +474,17 @@ function resolveEffectiveTtsAutoState(params: {
cfg: OpenClawConfig;
sessionAuto?: string;
agentId?: string;
channelId?: string;
accountId?: string;
}): {
autoMode: TtsAutoMode;
prefsPath: string;
} {
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, params.agentId);
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
});
const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath);
const sessionAuto = normalizeTtsAutoMode(params.sessionAuto);
if (sessionAuto) {
@@ -654,11 +664,17 @@ export function resolveExplicitTtsOverrides(params: {
modelId?: string;
voiceId?: string;
agentId?: string;
channelId?: string;
accountId?: string;
}): TtsDirectiveOverrides {
const providerInput = params.provider?.trim();
const modelId = params.modelId?.trim();
const voiceId = params.voiceId?.trim();
const config = resolveTtsConfig(params.cfg, params.agentId);
const config = resolveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
});
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
const selectedProvider =
canonicalizeSpeechProviderId(providerInput, params.cfg) ??
@@ -991,6 +1007,8 @@ function resolveTtsRequestSetup(params: {
providerOverride?: TtsProvider;
disableFallback?: boolean;
agentId?: string;
channelId?: string;
accountId?: string;
}):
| {
config: ResolvedTtsConfig;
@@ -1000,7 +1018,11 @@ function resolveTtsRequestSetup(params: {
| {
error: string;
} {
const config = resolveTtsConfig(params.cfg, params.agentId);
const config = resolveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
});
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
if (params.text.length > config.maxTextLength) {
return {
@@ -1027,6 +1049,7 @@ export async function textToSpeech(params: {
disableFallback?: boolean;
timeoutMs?: number;
agentId?: string;
accountId?: string;
}): Promise<TtsResult> {
const synthesis = await synthesizeSpeech(params);
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
@@ -1077,6 +1100,7 @@ export async function synthesizeSpeech(params: {
disableFallback?: boolean;
timeoutMs?: number;
agentId?: string;
accountId?: string;
}): Promise<TtsSynthesisResult> {
const setup = resolveTtsRequestSetup({
text: params.text,
@@ -1085,6 +1109,8 @@ export async function synthesizeSpeech(params: {
providerOverride: params.overrides?.provider,
disableFallback: params.disableFallback,
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
});
if ("error" in setup) {
return { success: false, error: setup.error };
@@ -1365,6 +1391,7 @@ export async function maybeApplyTtsToPayload(params: {
inboundAudio?: boolean;
ttsAuto?: string;
agentId?: string;
accountId?: string;
}): Promise<ReplyPayload> {
if (params.payload.isCompactionNotice) {
return params.payload;
@@ -1373,11 +1400,17 @@ export async function maybeApplyTtsToPayload(params: {
cfg: params.cfg,
sessionAuto: params.ttsAuto,
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
});
if (autoMode === "off") {
return params.payload;
}
const config = resolveTtsConfig(params.cfg, params.agentId);
const config = resolveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
});
const activeProvider = getTtsProvider(config, prefsPath);
const reply = resolveSendableOutboundReplyParts(params.payload);
@@ -1486,6 +1519,7 @@ export async function maybeApplyTtsToPayload(params: {
channel: params.channel,
overrides: directives.overrides,
agentId: params.agentId,
accountId: params.accountId,
});
if (result.success && result.audioPath) {

View File

@@ -254,6 +254,7 @@ export function createOpenClawTools(
agentChannel: options?.agentChannel,
config: resolvedConfig,
agentId: sessionAgentId,
agentAccountId: options?.agentAccountId,
}),
...collectPresentOpenClawTools([imageGenerateTool, musicGenerateTool, videoGenerateTool]),
...(embedded

View File

@@ -201,6 +201,51 @@ describe("createOpenClawTools TTS config wiring", () => {
__testing.setDepsForTest();
}
});
it("passes the active account id into the tts tool", async () => {
const injectedConfig = {
channels: {
feishu: {
accounts: {
"feishu-main": {
tts: {
provider: "microsoft",
},
},
},
},
},
} satisfies OpenClawConfig;
const { __testing, createOpenClawTools } = await import("./openclaw-tools.js");
__testing.setDepsForTest({ config: injectedConfig });
try {
const tool = createOpenClawTools({
agentChannel: "feishu",
agentAccountId: "feishu-main",
disableMessageTool: true,
disablePluginTools: true,
}).find((candidate) => candidate.name === "tts");
if (!tool) {
throw new Error("missing tts tool");
}
await tool.execute("call-1", { text: "hello from account" });
expect(mocks.textToSpeech).toHaveBeenCalledWith(
expect.objectContaining({
text: "hello from account",
cfg: injectedConfig,
channel: "feishu",
accountId: "feishu-main",
}),
);
} finally {
__testing.setDepsForTest();
}
});
});
describe("createOpenClawTools cron context wiring", () => {

View File

@@ -104,6 +104,25 @@ describe("createTtsTool", () => {
);
});
it("passes the active account id to speech generation", async () => {
textToSpeechSpy.mockResolvedValue({
success: true,
audioPath: "/tmp/reply.opus",
provider: "test",
voiceCompatible: true,
});
const tool = createTtsTool({ agentAccountId: "feishu-main" });
await tool.execute("call-1", { text: "hello" });
expect(textToSpeechSpy).toHaveBeenCalledWith(
expect.objectContaining({
text: "hello",
accountId: "feishu-main",
}),
);
});
it("echoes longer utterances verbatim into the tool-result content", async () => {
textToSpeechSpy.mockResolvedValue({
success: true,

View File

@@ -58,6 +58,7 @@ export function createTtsTool(opts?: {
config?: OpenClawConfig;
agentChannel?: GatewayMessageChannel;
agentId?: string;
agentAccountId?: string;
}): AnyAgentTool {
return {
label: "TTS",
@@ -77,6 +78,7 @@ export function createTtsTool(opts?: {
channel: channel ?? opts?.agentChannel,
timeoutMs,
agentId: opts?.agentId,
accountId: opts?.agentAccountId,
});
if (result.success && result.audioPath) {

View File

@@ -213,10 +213,13 @@ describe("handleTtsCommands status fallback reporting", () => {
const result = await handleTtsCommands(buildTtsParams("/tts status", cfg, "reader"), true);
expect(result?.shouldContinue).toBe(false);
expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith(cfg, "reader");
expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith(
cfg,
expect.objectContaining({ agentId: "reader", channelId: "forum" }),
);
});
it("passes the active agent id to /tts audio synthesis", async () => {
it("passes the active agent and account ids to /tts audio synthesis", async () => {
ttsMocks.textToSpeech.mockResolvedValue({
success: true,
audioPath: "/tmp/reader.ogg",
@@ -227,7 +230,12 @@ describe("handleTtsCommands status fallback reporting", () => {
agents: { list: [{ id: "reader", tts: { provider: PRIMARY_TTS_PROVIDER } }] },
} as OpenClawConfig;
const result = await handleTtsCommands(buildTtsParams("/tts audio hello", cfg, "reader"), true);
const result = await handleTtsCommands(
buildTtsParams("/tts audio hello", cfg, "reader", {
ctx: { AccountId: "feishu-main" },
}),
true,
);
expect(result?.shouldContinue).toBe(false);
expect(ttsMocks.textToSpeech).toHaveBeenCalledWith(
@@ -235,6 +243,7 @@ describe("handleTtsCommands status fallback reporting", () => {
text: "hello",
cfg,
agentId: "reader",
accountId: "feishu-main",
}),
);
});

View File

@@ -119,6 +119,7 @@ async function buildTtsAudioReply(params: {
text: string;
cfg: Parameters<typeof textToSpeech>[0]["cfg"];
channel: string;
accountId?: string;
prefsPath: string;
agentId?: string;
}): Promise<{ reply: ReplyPayload; provider?: string; hash?: string } | { error: string }> {
@@ -127,6 +128,7 @@ async function buildTtsAudioReply(params: {
text: params.text,
cfg: params.cfg,
channel: params.channel,
accountId: params.accountId,
prefsPath: params.prefsPath,
agentId: params.agentId,
});
@@ -185,7 +187,12 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
return { shouldContinue: false };
}
const config = resolveTtsConfig(params.cfg, params.agentId);
const accountId = params.ctx?.AccountId;
const config = resolveTtsConfig(params.cfg, {
agentId: params.agentId,
channelId: params.command.channel,
accountId,
});
const prefsPath = resolveTtsPrefsPath(config);
const action = parsed.action;
const args = parsed.args;
@@ -268,6 +275,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
text: latestText,
cfg: params.cfg,
channel: params.command.channel,
accountId,
prefsPath,
agentId: params.agentId,
});
@@ -301,6 +309,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
text: args,
cfg: params.cfg,
channel: params.command.channel,
accountId,
prefsPath,
agentId: params.agentId,
});

View File

@@ -91,6 +91,7 @@ async function maybeApplyAcpTts(params: {
cfg: OpenClawConfig;
agentId?: string;
channel?: string;
accountId?: string;
kind: ReplyDispatchKind;
inboundAudio: boolean;
ttsAuto?: TtsAutoMode;
@@ -103,6 +104,8 @@ async function maybeApplyAcpTts(params: {
cfg: params.cfg,
sessionAuto: params.ttsAuto,
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
});
if (!ttsStatus) {
return params.payload;
@@ -110,7 +113,14 @@ async function maybeApplyAcpTts(params: {
if (ttsStatus.autoMode === "inbound" && !params.inboundAudio) {
return params.payload;
}
if (params.kind !== "final" && resolveConfiguredTtsMode(params.cfg, params.agentId) === "final") {
if (
params.kind !== "final" &&
resolveConfiguredTtsMode(params.cfg, {
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
}) === "final"
) {
return params.payload;
}
const { maybeApplyTtsToPayload } = await loadDispatchAcpTtsRuntime();
@@ -122,6 +132,7 @@ async function maybeApplyAcpTts(params: {
inboundAudio: params.inboundAudio,
ttsAuto: params.ttsAuto,
agentId: params.agentId,
accountId: params.accountId,
});
}
@@ -175,6 +186,17 @@ export function createAcpDispatchDeliveryCoordinator(params: {
originatingTo?: string;
onReplyStart?: () => Promise<void> | void;
}): AcpDispatchDeliveryCoordinator {
const directChannel = normalizeOptionalLowercaseString(params.ctx.Provider ?? params.ctx.Surface);
const routedChannel = normalizeOptionalLowercaseString(params.originatingChannel);
const deliverySessionKey = normalizeOptionalString(params.sessionKey) ?? params.ctx.SessionKey;
const explicitAccountId = normalizeOptionalString(params.ctx.AccountId);
const resolvedAccountId =
explicitAccountId ??
normalizeOptionalString(
(
params.cfg.channels as Record<string, { defaultAccount?: unknown } | undefined> | undefined
)?.[routedChannel ?? directChannel ?? ""]?.defaultAccount,
);
const state: AcpDispatchDeliveryState = {
startedReplyLifecycle: false,
accumulatedBlockText: "",
@@ -184,6 +206,8 @@ export function createAcpDispatchDeliveryCoordinator(params: {
cfg: params.cfg,
ttsAuto: params.sessionTtsAuto,
agentId: params.agentId,
channelId: params.ttsChannel,
accountId: resolvedAccountId,
})
? createTtsDirectiveTextStreamCleaner()
: undefined,
@@ -200,18 +224,6 @@ export function createAcpDispatchDeliveryCoordinator(params: {
},
toolMessageByCallId: new Map(),
};
const directChannel = normalizeOptionalLowercaseString(params.ctx.Provider ?? params.ctx.Surface);
const routedChannel = normalizeOptionalLowercaseString(params.originatingChannel);
const deliverySessionKey = normalizeOptionalString(params.sessionKey) ?? params.ctx.SessionKey;
const explicitAccountId = normalizeOptionalString(params.ctx.AccountId);
const resolvedAccountId =
explicitAccountId ??
normalizeOptionalString(
(
params.cfg.channels as Record<string, { defaultAccount?: unknown } | undefined> | undefined
)?.[routedChannel ?? directChannel ?? ""]?.defaultAccount,
);
const settleDirectVisibleText = async () => {
if (state.settledDirectVisibleText || state.queuedDirectVisibleTextDeliveries === 0) {
return;
@@ -336,6 +348,7 @@ export function createAcpDispatchDeliveryCoordinator(params: {
cfg: params.cfg,
agentId: params.agentId,
channel: params.ttsChannel,
accountId: resolvedAccountId,
kind,
inboundAudio: params.inboundAudio,
ttsAuto: params.sessionTtsAuto,

View File

@@ -191,12 +191,17 @@ async function finalizeAcpTurnOutput(params: {
inboundAudio: boolean;
sessionTtsAuto?: TtsAutoMode;
ttsChannel?: string;
ttsAccountId?: string;
shouldEmitResolvedIdentityNotice: boolean;
}): Promise<boolean> {
await params.delivery.settleVisibleText();
let queuedFinal =
params.delivery.hasDeliveredVisibleText() && !params.delivery.hasFailedVisibleTextDelivery();
const ttsMode = resolveConfiguredTtsMode(params.cfg, params.agentId);
const ttsMode = resolveConfiguredTtsMode(params.cfg, {
agentId: params.agentId,
channelId: params.ttsChannel,
accountId: params.ttsAccountId,
});
const accumulatedVisibleBlockText = params.delivery.getAccumulatedVisibleBlockText();
const accumulatedBlockTtsText = params.delivery.getAccumulatedBlockTtsText();
const hasAccumulatedBlockText = accumulatedBlockTtsText.trim().length > 0;
@@ -204,6 +209,8 @@ async function finalizeAcpTurnOutput(params: {
cfg: params.cfg,
sessionAuto: params.sessionTtsAuto,
agentId: params.agentId,
channelId: params.ttsChannel,
accountId: params.ttsAccountId,
});
const canAttemptFinalTts =
ttsStatus != null && !(ttsStatus.autoMode === "inbound" && !params.inboundAudio);
@@ -220,6 +227,7 @@ async function finalizeAcpTurnOutput(params: {
inboundAudio: params.inboundAudio,
ttsAuto: params.sessionTtsAuto,
agentId: params.agentId,
accountId: params.ttsAccountId,
});
if (ttsSyntheticReply.mediaUrl) {
const delivered = await params.delivery.deliver("final", {
@@ -487,6 +495,7 @@ export async function tryDispatchAcpReply(params: {
inboundAudio: params.inboundAudio,
sessionTtsAuto: params.sessionTtsAuto,
ttsChannel: params.ttsChannel,
ttsAccountId: effectiveDispatchAccountId,
shouldEmitResolvedIdentityNotice,
})) || queuedFinal;

View File

@@ -122,7 +122,13 @@ async function maybeApplyTtsToReplyPayload(
params: Parameters<Awaited<ReturnType<typeof loadTtsRuntime>>["maybeApplyTtsToPayload"]>[0],
) {
if (
!shouldAttemptTtsPayload({ cfg: params.cfg, ttsAuto: params.ttsAuto, agentId: params.agentId })
!shouldAttemptTtsPayload({
cfg: params.cfg,
ttsAuto: params.ttsAuto,
agentId: params.agentId,
channelId: params.channel,
accountId: params.accountId,
})
) {
return params.payload;
}
@@ -734,6 +740,7 @@ export async function dispatchReplyFromConfig(
inboundAudio,
ttsAuto: sessionTtsAuto,
agentId: sessionAgentId,
accountId: replyRoute.accountId,
});
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
const result = await routeReplyToOriginating(normalizedPayload);
@@ -939,6 +946,8 @@ export async function dispatchReplyFromConfig(
cfg,
ttsAuto: sessionTtsAuto,
agentId: sessionAgentId,
channelId: deliveryChannel,
accountId: replyRoute.accountId,
})
? createTtsDirectiveTextStreamCleaner()
: undefined;
@@ -1010,6 +1019,7 @@ export async function dispatchReplyFromConfig(
inboundAudio,
ttsAuto: sessionTtsAuto,
agentId: sessionAgentId,
accountId: replyRoute.accountId,
});
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
const deliveryPayload = resolveToolDeliveryPayload(normalizedPayload);
@@ -1128,6 +1138,7 @@ export async function dispatchReplyFromConfig(
inboundAudio,
ttsAuto: sessionTtsAuto,
agentId: sessionAgentId,
accountId: replyRoute.accountId,
});
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
if (shouldRouteToOriginating) {
@@ -1198,7 +1209,11 @@ export async function dispatchReplyFromConfig(
routedFinalCount += finalReply.routedFinalCount;
}
const ttsMode = resolveConfiguredTtsMode(cfg, sessionAgentId);
const ttsMode = resolveConfiguredTtsMode(cfg, {
agentId: sessionAgentId,
channelId: deliveryChannel,
accountId: replyRoute.accountId,
});
// Generate TTS-only reply after block streaming completes (when there's no final reply).
// This handles the case where block streaming succeeds and drops final payloads,
// but we still want TTS audio to be generated from the accumulated block content.
@@ -1217,6 +1232,7 @@ export async function dispatchReplyFromConfig(
inboundAudio,
ttsAuto: sessionTtsAuto,
agentId: sessionAgentId,
accountId: replyRoute.accountId,
});
// Only send if TTS was actually applied (mediaUrl exists)
if (ttsSyntheticReply.mediaUrl) {

View File

@@ -39,6 +39,7 @@ export {
normalizeSpeechProviderId,
} from "../tts/provider-registry.js";
export { resolveEffectiveTtsConfig } from "../tts/tts-config.js";
export type { TtsConfigResolutionContext } from "../tts/tts-config.js";
export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js";
export {
asBoolean,

View File

@@ -6,10 +6,12 @@ import type {
TtsDirectiveOverrides,
TtsDirectiveParseResult,
} from "../tts/provider-types.js";
import type { TtsConfigResolutionContext } from "../tts/tts-config.js";
import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js";
import type { ReplyPayload } from "./reply-payload.js";
export type { ResolvedTtsConfig, ResolvedTtsModelOverrides };
export type { TtsConfigResolutionContext };
export type { TtsDirectiveOverrides, TtsDirectiveParseResult };
export type TtsAttemptReasonCode =
@@ -66,6 +68,8 @@ export type ResolveExplicitTtsOverridesParams = {
modelId?: string;
voiceId?: string;
agentId?: string;
channelId?: string;
accountId?: string;
};
export type TtsRequestParams = {
@@ -77,6 +81,7 @@ export type TtsRequestParams = {
disableFallback?: boolean;
timeoutMs?: number;
agentId?: string;
accountId?: string;
};
export type TtsTelephonyRequestParams = {
@@ -101,6 +106,7 @@ export type MaybeApplyTtsToPayloadParams = {
inboundAudio?: boolean;
ttsAuto?: string;
agentId?: string;
accountId?: string;
};
export type TtsTestFacade = {
@@ -201,7 +207,10 @@ export type TtsRuntimeFacade = {
maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>;
resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides;
resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode;
resolveTtsConfig: (cfg: OpenClawConfig, agentId?: string) => ResolvedTtsConfig;
resolveTtsConfig: (
cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
) => ResolvedTtsConfig;
resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string;
resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[];
setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void;

View File

@@ -8,7 +8,7 @@ import {
} from "../shared/string-coerce.js";
import { resolveConfigDir, resolveUserPath } from "../utils.js";
import { normalizeTtsAutoMode } from "./tts-auto-mode.js";
import { resolveEffectiveTtsConfig } from "./tts-config.js";
import { resolveEffectiveTtsConfig, type TtsConfigResolutionContext } from "./tts-config.js";
const DEFAULT_TTS_MAX_LENGTH = 1500;
const DEFAULT_TTS_SUMMARIZE = true;
@@ -222,8 +222,15 @@ export function resolveStatusTtsSnapshot(params: {
cfg: OpenClawConfig;
sessionAuto?: string;
agentId?: string;
channelId?: string;
accountId?: string;
}): TtsStatusSnapshot | null {
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, params.agentId);
const context: TtsConfigResolutionContext = {
agentId: params.agentId,
channelId: params.channelId,
accountId: params.accountId,
};
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, context);
const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath);
const prefs = readPrefs(prefsPath);
const autoMode =

View File

@@ -3,7 +3,11 @@ import { tmpdir } from "node:os";
import path from "node:path";
import { afterAll, beforeAll, afterEach, beforeEach, describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { resolveConfiguredTtsMode, shouldAttemptTtsPayload } from "./tts-config.js";
import {
resolveConfiguredTtsMode,
resolveEffectiveTtsConfig,
shouldAttemptTtsPayload,
} from "./tts-config.js";
describe("shouldAttemptTtsPayload", () => {
let originalPrefsPath: string | undefined;
@@ -88,4 +92,73 @@ describe("shouldAttemptTtsPayload", () => {
expect(shouldAttemptTtsPayload({ cfg, agentId: "main" })).toBe(false);
expect(resolveConfiguredTtsMode(cfg, "main")).toBe("final");
});
it("merges channel and account TTS overrides after agent overrides", () => {
const cfg = {
messages: {
tts: {
auto: "off",
mode: "final",
provider: "openai",
providers: {
openai: {
model: "gpt-4o-mini-tts",
voice: "alloy",
},
},
},
},
agents: {
list: [
{
id: "reader",
tts: {
providers: {
openai: {
voice: "nova",
},
},
},
},
],
},
channels: {
feishu: {
tts: {
auto: "always",
},
accounts: {
EnglishBot: {
tts: {
mode: "all",
providers: {
openai: {
voice: "shimmer",
},
},
},
},
},
},
},
} as OpenClawConfig;
const resolved = resolveEffectiveTtsConfig(cfg, {
agentId: "reader",
channelId: "FEISHU",
accountId: "englishbot",
});
expect(resolved).toMatchObject({
auto: "always",
mode: "all",
provider: "openai",
providers: {
openai: {
model: "gpt-4o-mini-tts",
voice: "shimmer",
},
},
});
});
});

View File

@@ -2,13 +2,23 @@ import { existsSync, readFileSync } from "node:fs";
import path from "node:path";
import type { OpenClawConfig } from "../config/types.js";
import type { TtsAutoMode, TtsConfig, TtsMode } from "../config/types.tts.js";
import { normalizeAgentId } from "../routing/session-key.js";
import { normalizeAccountId, normalizeAgentId } from "../routing/session-key.js";
import {
normalizeLowercaseStringOrEmpty,
normalizeOptionalString,
} from "../shared/string-coerce.js";
import { resolveConfigDir, resolveUserPath } from "../utils.js";
import { normalizeTtsAutoMode } from "./tts-auto-mode.js";
export { normalizeTtsAutoMode } from "./tts-auto-mode.js";
const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
export type TtsConfigResolutionContext = {
agentId?: string;
channelId?: string;
accountId?: string;
};
function isPlainObject(value: unknown): value is Record<string, unknown> {
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
}
@@ -41,14 +51,97 @@ function resolveAgentTtsOverride(
return agent?.tts;
}
export function resolveEffectiveTtsConfig(cfg: OpenClawConfig, agentId?: string): TtsConfig {
const base = cfg.messages?.tts ?? {};
const override = resolveAgentTtsOverride(cfg, agentId);
return deepMergeDefined(base, override ?? {}) as TtsConfig;
function resolveTtsConfigContext(
contextOrAgentId?: string | TtsConfigResolutionContext,
): TtsConfigResolutionContext {
return typeof contextOrAgentId === "string"
? { agentId: contextOrAgentId }
: (contextOrAgentId ?? {});
}
export function resolveConfiguredTtsMode(cfg: OpenClawConfig, agentId?: string): TtsMode {
return resolveEffectiveTtsConfig(cfg, agentId).mode ?? "final";
function resolveRecordEntry<T>(
entries: Record<string, T> | undefined,
id: string | undefined,
normalize: (value: string) => string,
): T | undefined {
const normalizedId = normalizeOptionalString(id);
if (!entries || !normalizedId) {
return undefined;
}
if (Object.hasOwn(entries, normalizedId)) {
return entries[normalizedId];
}
const normalized = normalize(normalizedId);
const key = Object.keys(entries).find((candidate) => normalize(candidate) === normalized);
return key ? entries[key] : undefined;
}
function asTtsConfig(value: unknown): TtsConfig | undefined {
return isPlainObject(value) ? (value as TtsConfig) : undefined;
}
function asObjectRecord(value: unknown): Record<string, unknown> | undefined {
return isPlainObject(value) ? value : undefined;
}
function resolveChannelConfig(
cfg: OpenClawConfig,
channelId: string | undefined,
): Record<string, unknown> | undefined {
if (!isPlainObject(cfg.channels)) {
return undefined;
}
const normalizedChannelId = normalizeOptionalString(channelId);
if (!normalizedChannelId) {
return undefined;
}
return asObjectRecord(
resolveRecordEntry(
cfg.channels as Record<string, unknown>,
normalizedChannelId,
normalizeLowercaseStringOrEmpty,
),
);
}
function resolveChannelTtsOverride(
cfg: OpenClawConfig,
context: TtsConfigResolutionContext,
): TtsConfig | undefined {
return asTtsConfig(resolveChannelConfig(cfg, context.channelId)?.tts);
}
function resolveAccountTtsOverride(
cfg: OpenClawConfig,
context: TtsConfigResolutionContext,
): TtsConfig | undefined {
const channelConfig = resolveChannelConfig(cfg, context.channelId);
const accounts = isPlainObject(channelConfig?.accounts) ? channelConfig.accounts : undefined;
const accountConfig = resolveRecordEntry(accounts, context.accountId, normalizeAccountId);
return asTtsConfig(asObjectRecord(accountConfig)?.tts);
}
export function resolveEffectiveTtsConfig(
cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
): TtsConfig {
const context = resolveTtsConfigContext(contextOrAgentId);
const base = cfg.messages?.tts ?? {};
const agentOverride = resolveAgentTtsOverride(cfg, context.agentId);
const channelOverride = resolveChannelTtsOverride(cfg, context);
const accountOverride = resolveAccountTtsOverride(cfg, context);
let merged: unknown = base;
for (const override of [agentOverride, channelOverride, accountOverride]) {
merged = deepMergeDefined(merged, override ?? {});
}
return merged as TtsConfig;
}
export function resolveConfiguredTtsMode(
cfg: OpenClawConfig,
contextOrAgentId?: string | TtsConfigResolutionContext,
): TtsMode {
return resolveEffectiveTtsConfig(cfg, contextOrAgentId).mode ?? "final";
}
function resolveTtsPrefsPathValue(prefsPath: string | undefined): string {
@@ -87,13 +180,15 @@ export function shouldAttemptTtsPayload(params: {
cfg: OpenClawConfig;
ttsAuto?: string;
agentId?: string;
channelId?: string;
accountId?: string;
}): boolean {
const sessionAuto = normalizeTtsAutoMode(params.ttsAuto);
if (sessionAuto) {
return sessionAuto !== "off";
}
const raw = resolveEffectiveTtsConfig(params.cfg, params.agentId);
const raw = resolveEffectiveTtsConfig(params.cfg, params);
const prefsAuto = readTtsPrefsAutoMode(resolveTtsPrefsPathValue(raw?.prefsPath));
if (prefsAuto) {
return prefsAuto !== "off";
@@ -110,9 +205,11 @@ export function shouldCleanTtsDirectiveText(params: {
cfg: OpenClawConfig;
ttsAuto?: string;
agentId?: string;
channelId?: string;
accountId?: string;
}): boolean {
if (!shouldAttemptTtsPayload(params)) {
return false;
}
return resolveEffectiveTtsConfig(params.cfg, params.agentId).modelOverrides?.enabled !== false;
return resolveEffectiveTtsConfig(params.cfg, params).modelOverrides?.enabled !== false;
}