diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bd47b44803..27548868f08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai - Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc. - TTS/agents: allow `agents.list[].tts` to override global `messages.tts` for per-agent voices while keeping shared provider credentials and preferences in the existing TTS config surface. +- TTS/agents: make `/tts audio`, `/tts status`, and the `tts` agent tool honor the active `agents.list[].tts` voice/provider override. - Providers/Azure Speech: add Azure Speech as a bundled TTS provider with Speech-resource auth, voice listing, SSML escaping, native Ogg/Opus voice-note output, and telephony output. (#51776) Thanks @leonchui. - CLI/image generation: expose generic `--background` on `openclaw infer image generate` and `openclaw infer image edit`, keep `--openai-background` as an OpenAI alias, and let fal image generation honor `--output-format png|jpeg`. Thanks @steipete. - Browser/config: allow local managed Chrome launch discovery and post-launch CDP readiness timeouts to be raised for slower hosts such as Raspberry Pi. Fixes #66803. Thanks @beat843796. diff --git a/docs/tools/tts.md b/docs/tools/tts.md index ef19241702d..7e2e9adc8ff 100644 --- a/docs/tools/tts.md +++ b/docs/tools/tts.md @@ -146,7 +146,8 @@ voice, model, style, or auto-TTS mode. The agent block deep-merges over } ``` -Precedence for automatic replies is: +Precedence for automatic replies, `/tts audio`, `/tts status`, and the `tts` +agent tool is: 1. `messages.tts` 2. active `agents.list[].tts` diff --git a/src/agents/openclaw-tools.ts b/src/agents/openclaw-tools.ts index dae4583c87e..ad4d0ac90b3 100644 --- a/src/agents/openclaw-tools.ts +++ b/src/agents/openclaw-tools.ts @@ -253,6 +253,7 @@ export function createOpenClawTools( createTtsTool({ agentChannel: options?.agentChannel, config: resolvedConfig, + agentId: sessionAgentId, }), ...collectPresentOpenClawTools([imageGenerateTool, musicGenerateTool, videoGenerateTool]), ...(embedded diff --git a/src/agents/openclaw-tools.tts-config.test.ts b/src/agents/openclaw-tools.tts-config.test.ts index 6ab362dd949..4e04d1319f1 100644 --- a/src/agents/openclaw-tools.tts-config.test.ts +++ b/src/agents/openclaw-tools.tts-config.test.ts @@ -167,6 +167,40 @@ describe("createOpenClawTools TTS config wiring", () => { __testing.setDepsForTest(); } }); + + it("passes the resolved session agent id into the tts tool", async () => { + const injectedConfig = { + agents: { + list: [{ id: "reader" }, { id: "main" }], + }, + } satisfies OpenClawConfig; + + const { __testing, createOpenClawTools } = await import("./openclaw-tools.js"); + __testing.setDepsForTest({ config: injectedConfig }); + + try { + const tool = createOpenClawTools({ + agentSessionKey: "agent:reader:telegram:chat:123", + disableMessageTool: true, + disablePluginTools: true, + }).find((candidate) => candidate.name === "tts"); + + if (!tool) { + throw new Error("missing tts tool"); + } + + await tool.execute("call-1", { text: "hello from reader" }); + + expect(mocks.textToSpeech).toHaveBeenCalledWith( + expect.objectContaining({ + text: "hello from reader", + agentId: "reader", + }), + ); + } finally { + __testing.setDepsForTest(); + } + }); }); describe("createOpenClawTools cron context wiring", () => { diff --git a/src/agents/tools/tts-tool.test.ts b/src/agents/tools/tts-tool.test.ts index 412e42b8667..e9cba32a24e 100644 --- a/src/agents/tools/tts-tool.test.ts +++ b/src/agents/tools/tts-tool.test.ts @@ -85,6 +85,25 @@ describe("createTtsTool", () => { expect(result.details).toMatchObject({ timeoutMs: 12_345 }); }); + it("passes the active agent id to speech generation", async () => { + textToSpeechSpy.mockResolvedValue({ + success: true, + audioPath: "/tmp/reply.opus", + provider: "test", + voiceCompatible: true, + }); + + const tool = createTtsTool({ agentId: "voice-agent" }); + await tool.execute("call-1", { text: "hello" }); + + expect(textToSpeechSpy).toHaveBeenCalledWith( + expect.objectContaining({ + text: "hello", + agentId: "voice-agent", + }), + ); + }); + it("echoes longer utterances verbatim into the tool-result content", async () => { textToSpeechSpy.mockResolvedValue({ success: true, diff --git a/src/agents/tools/tts-tool.ts b/src/agents/tools/tts-tool.ts index 8861cf3ad0b..862d0700484 100644 --- a/src/agents/tools/tts-tool.ts +++ b/src/agents/tools/tts-tool.ts @@ -57,6 +57,7 @@ function sanitizeTranscriptForToolContent(text: string): string { export function createTtsTool(opts?: { config?: OpenClawConfig; agentChannel?: GatewayMessageChannel; + agentId?: string; }): AnyAgentTool { return { label: "TTS", @@ -75,6 +76,7 @@ export function createTtsTool(opts?: { cfg, channel: channel ?? opts?.agentChannel, timeoutMs, + agentId: opts?.agentId, }); if (result.success && result.audioPath) { diff --git a/src/auto-reply/reply/commands-tts.test.ts b/src/auto-reply/reply/commands-tts.test.ts index a22b6f33494..25e672c5098 100644 --- a/src/auto-reply/reply/commands-tts.test.ts +++ b/src/auto-reply/reply/commands-tts.test.ts @@ -38,9 +38,11 @@ const FALLBACK_TTS_PROVIDER = "backup-speech"; function buildTtsParams( commandBodyNormalized: string, cfg: OpenClawConfig = {}, + agentId?: string, ): Parameters[0] { return { cfg, + agentId, command: { commandBodyNormalized, isAuthorizedSender: true, @@ -189,4 +191,38 @@ describe("handleTtsCommands status fallback reporting", () => { expect(result?.shouldContinue).toBe(false); expect(result?.reply?.text).toContain("TTS status"); }); + + it("resolves status config for the active agent", async () => { + const cfg = { + agents: { list: [{ id: "reader", tts: { provider: "elevenlabs" } }] }, + } as OpenClawConfig; + + const result = await handleTtsCommands(buildTtsParams("/tts status", cfg, "reader"), true); + + expect(result?.shouldContinue).toBe(false); + expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith(cfg, "reader"); + }); + + it("passes the active agent id to /tts audio synthesis", async () => { + ttsMocks.textToSpeech.mockResolvedValue({ + success: true, + audioPath: "/tmp/reader.ogg", + provider: PRIMARY_TTS_PROVIDER, + voiceCompatible: true, + }); + const cfg = { + agents: { list: [{ id: "reader", tts: { provider: PRIMARY_TTS_PROVIDER } }] }, + } as OpenClawConfig; + + const result = await handleTtsCommands(buildTtsParams("/tts audio hello", cfg, "reader"), true); + + expect(result?.shouldContinue).toBe(false); + expect(ttsMocks.textToSpeech).toHaveBeenCalledWith( + expect.objectContaining({ + text: "hello", + cfg, + agentId: "reader", + }), + ); + }); }); diff --git a/src/auto-reply/reply/commands-tts.ts b/src/auto-reply/reply/commands-tts.ts index 5dba5527f82..b211776120d 100644 --- a/src/auto-reply/reply/commands-tts.ts +++ b/src/auto-reply/reply/commands-tts.ts @@ -111,7 +111,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand return { shouldContinue: false }; } - const config = resolveTtsConfig(params.cfg); + const config = resolveTtsConfig(params.cfg, params.agentId); const prefsPath = resolveTtsPrefsPath(config); const action = parsed.action; const args = parsed.args; @@ -149,6 +149,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand cfg: params.cfg, channel: params.command.channel, prefsPath, + agentId: params.agentId, }); if (result.success && result.audioPath) {