fix(tts): honor per-agent config in tts commands

This commit is contained in:
Peter Steinberger
2026-04-26 03:11:52 +01:00
parent 6a688e33f6
commit 9b4f0779ce
8 changed files with 97 additions and 2 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
- Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc.
- TTS/agents: allow `agents.list[].tts` to override global `messages.tts` for per-agent voices while keeping shared provider credentials and preferences in the existing TTS config surface.
- TTS/agents: make `/tts audio`, `/tts status`, and the `tts` agent tool honor the active `agents.list[].tts` voice/provider override.
- Providers/Azure Speech: add Azure Speech as a bundled TTS provider with Speech-resource auth, voice listing, SSML escaping, native Ogg/Opus voice-note output, and telephony output. (#51776) Thanks @leonchui.
- CLI/image generation: expose generic `--background` on `openclaw infer image generate` and `openclaw infer image edit`, keep `--openai-background` as an OpenAI alias, and let fal image generation honor `--output-format png|jpeg`. Thanks @steipete.
- Browser/config: allow local managed Chrome launch discovery and post-launch CDP readiness timeouts to be raised for slower hosts such as Raspberry Pi. Fixes #66803. Thanks @beat843796.

View File

@@ -146,7 +146,8 @@ voice, model, style, or auto-TTS mode. The agent block deep-merges over
}
```
Precedence for automatic replies is:
Precedence for automatic replies, `/tts audio`, `/tts status`, and the `tts`
agent tool is:
1. `messages.tts`
2. active `agents.list[].tts`

View File

@@ -253,6 +253,7 @@ export function createOpenClawTools(
createTtsTool({
agentChannel: options?.agentChannel,
config: resolvedConfig,
agentId: sessionAgentId,
}),
...collectPresentOpenClawTools([imageGenerateTool, musicGenerateTool, videoGenerateTool]),
...(embedded

View File

@@ -167,6 +167,40 @@ describe("createOpenClawTools TTS config wiring", () => {
__testing.setDepsForTest();
}
});
it("passes the resolved session agent id into the tts tool", async () => {
const injectedConfig = {
agents: {
list: [{ id: "reader" }, { id: "main" }],
},
} satisfies OpenClawConfig;
const { __testing, createOpenClawTools } = await import("./openclaw-tools.js");
__testing.setDepsForTest({ config: injectedConfig });
try {
const tool = createOpenClawTools({
agentSessionKey: "agent:reader:telegram:chat:123",
disableMessageTool: true,
disablePluginTools: true,
}).find((candidate) => candidate.name === "tts");
if (!tool) {
throw new Error("missing tts tool");
}
await tool.execute("call-1", { text: "hello from reader" });
expect(mocks.textToSpeech).toHaveBeenCalledWith(
expect.objectContaining({
text: "hello from reader",
agentId: "reader",
}),
);
} finally {
__testing.setDepsForTest();
}
});
});
describe("createOpenClawTools cron context wiring", () => {

View File

@@ -85,6 +85,25 @@ describe("createTtsTool", () => {
expect(result.details).toMatchObject({ timeoutMs: 12_345 });
});
it("passes the active agent id to speech generation", async () => {
textToSpeechSpy.mockResolvedValue({
success: true,
audioPath: "/tmp/reply.opus",
provider: "test",
voiceCompatible: true,
});
const tool = createTtsTool({ agentId: "voice-agent" });
await tool.execute("call-1", { text: "hello" });
expect(textToSpeechSpy).toHaveBeenCalledWith(
expect.objectContaining({
text: "hello",
agentId: "voice-agent",
}),
);
});
it("echoes longer utterances verbatim into the tool-result content", async () => {
textToSpeechSpy.mockResolvedValue({
success: true,

View File

@@ -57,6 +57,7 @@ function sanitizeTranscriptForToolContent(text: string): string {
export function createTtsTool(opts?: {
config?: OpenClawConfig;
agentChannel?: GatewayMessageChannel;
agentId?: string;
}): AnyAgentTool {
return {
label: "TTS",
@@ -75,6 +76,7 @@ export function createTtsTool(opts?: {
cfg,
channel: channel ?? opts?.agentChannel,
timeoutMs,
agentId: opts?.agentId,
});
if (result.success && result.audioPath) {

View File

@@ -38,9 +38,11 @@ const FALLBACK_TTS_PROVIDER = "backup-speech";
function buildTtsParams(
commandBodyNormalized: string,
cfg: OpenClawConfig = {},
agentId?: string,
): Parameters<typeof handleTtsCommands>[0] {
return {
cfg,
agentId,
command: {
commandBodyNormalized,
isAuthorizedSender: true,
@@ -189,4 +191,38 @@ describe("handleTtsCommands status fallback reporting", () => {
expect(result?.shouldContinue).toBe(false);
expect(result?.reply?.text).toContain("TTS status");
});
it("resolves status config for the active agent", async () => {
const cfg = {
agents: { list: [{ id: "reader", tts: { provider: "elevenlabs" } }] },
} as OpenClawConfig;
const result = await handleTtsCommands(buildTtsParams("/tts status", cfg, "reader"), true);
expect(result?.shouldContinue).toBe(false);
expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith(cfg, "reader");
});
it("passes the active agent id to /tts audio synthesis", async () => {
ttsMocks.textToSpeech.mockResolvedValue({
success: true,
audioPath: "/tmp/reader.ogg",
provider: PRIMARY_TTS_PROVIDER,
voiceCompatible: true,
});
const cfg = {
agents: { list: [{ id: "reader", tts: { provider: PRIMARY_TTS_PROVIDER } }] },
} as OpenClawConfig;
const result = await handleTtsCommands(buildTtsParams("/tts audio hello", cfg, "reader"), true);
expect(result?.shouldContinue).toBe(false);
expect(ttsMocks.textToSpeech).toHaveBeenCalledWith(
expect.objectContaining({
text: "hello",
cfg,
agentId: "reader",
}),
);
});
});

View File

@@ -111,7 +111,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
return { shouldContinue: false };
}
const config = resolveTtsConfig(params.cfg);
const config = resolveTtsConfig(params.cfg, params.agentId);
const prefsPath = resolveTtsPrefsPath(config);
const action = parsed.action;
const args = parsed.args;
@@ -149,6 +149,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
cfg: params.cfg,
channel: params.command.channel,
prefsPath,
agentId: params.agentId,
});
if (result.success && result.audioPath) {