mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:30:42 +00:00
fix(tts): honor per-agent config in tts commands
This commit is contained in:
@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc.
|
||||
- TTS/agents: allow `agents.list[].tts` to override global `messages.tts` for per-agent voices while keeping shared provider credentials and preferences in the existing TTS config surface.
|
||||
- TTS/agents: make `/tts audio`, `/tts status`, and the `tts` agent tool honor the active `agents.list[].tts` voice/provider override.
|
||||
- Providers/Azure Speech: add Azure Speech as a bundled TTS provider with Speech-resource auth, voice listing, SSML escaping, native Ogg/Opus voice-note output, and telephony output. (#51776) Thanks @leonchui.
|
||||
- CLI/image generation: expose generic `--background` on `openclaw infer image generate` and `openclaw infer image edit`, keep `--openai-background` as an OpenAI alias, and let fal image generation honor `--output-format png|jpeg`. Thanks @steipete.
|
||||
- Browser/config: allow local managed Chrome launch discovery and post-launch CDP readiness timeouts to be raised for slower hosts such as Raspberry Pi. Fixes #66803. Thanks @beat843796.
|
||||
|
||||
@@ -146,7 +146,8 @@ voice, model, style, or auto-TTS mode. The agent block deep-merges over
|
||||
}
|
||||
```
|
||||
|
||||
Precedence for automatic replies is:
|
||||
Precedence for automatic replies, `/tts audio`, `/tts status`, and the `tts`
|
||||
agent tool is:
|
||||
|
||||
1. `messages.tts`
|
||||
2. active `agents.list[].tts`
|
||||
|
||||
@@ -253,6 +253,7 @@ export function createOpenClawTools(
|
||||
createTtsTool({
|
||||
agentChannel: options?.agentChannel,
|
||||
config: resolvedConfig,
|
||||
agentId: sessionAgentId,
|
||||
}),
|
||||
...collectPresentOpenClawTools([imageGenerateTool, musicGenerateTool, videoGenerateTool]),
|
||||
...(embedded
|
||||
|
||||
@@ -167,6 +167,40 @@ describe("createOpenClawTools TTS config wiring", () => {
|
||||
__testing.setDepsForTest();
|
||||
}
|
||||
});
|
||||
|
||||
it("passes the resolved session agent id into the tts tool", async () => {
|
||||
const injectedConfig = {
|
||||
agents: {
|
||||
list: [{ id: "reader" }, { id: "main" }],
|
||||
},
|
||||
} satisfies OpenClawConfig;
|
||||
|
||||
const { __testing, createOpenClawTools } = await import("./openclaw-tools.js");
|
||||
__testing.setDepsForTest({ config: injectedConfig });
|
||||
|
||||
try {
|
||||
const tool = createOpenClawTools({
|
||||
agentSessionKey: "agent:reader:telegram:chat:123",
|
||||
disableMessageTool: true,
|
||||
disablePluginTools: true,
|
||||
}).find((candidate) => candidate.name === "tts");
|
||||
|
||||
if (!tool) {
|
||||
throw new Error("missing tts tool");
|
||||
}
|
||||
|
||||
await tool.execute("call-1", { text: "hello from reader" });
|
||||
|
||||
expect(mocks.textToSpeech).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
text: "hello from reader",
|
||||
agentId: "reader",
|
||||
}),
|
||||
);
|
||||
} finally {
|
||||
__testing.setDepsForTest();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("createOpenClawTools cron context wiring", () => {
|
||||
|
||||
@@ -85,6 +85,25 @@ describe("createTtsTool", () => {
|
||||
expect(result.details).toMatchObject({ timeoutMs: 12_345 });
|
||||
});
|
||||
|
||||
it("passes the active agent id to speech generation", async () => {
|
||||
textToSpeechSpy.mockResolvedValue({
|
||||
success: true,
|
||||
audioPath: "/tmp/reply.opus",
|
||||
provider: "test",
|
||||
voiceCompatible: true,
|
||||
});
|
||||
|
||||
const tool = createTtsTool({ agentId: "voice-agent" });
|
||||
await tool.execute("call-1", { text: "hello" });
|
||||
|
||||
expect(textToSpeechSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
text: "hello",
|
||||
agentId: "voice-agent",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("echoes longer utterances verbatim into the tool-result content", async () => {
|
||||
textToSpeechSpy.mockResolvedValue({
|
||||
success: true,
|
||||
|
||||
@@ -57,6 +57,7 @@ function sanitizeTranscriptForToolContent(text: string): string {
|
||||
export function createTtsTool(opts?: {
|
||||
config?: OpenClawConfig;
|
||||
agentChannel?: GatewayMessageChannel;
|
||||
agentId?: string;
|
||||
}): AnyAgentTool {
|
||||
return {
|
||||
label: "TTS",
|
||||
@@ -75,6 +76,7 @@ export function createTtsTool(opts?: {
|
||||
cfg,
|
||||
channel: channel ?? opts?.agentChannel,
|
||||
timeoutMs,
|
||||
agentId: opts?.agentId,
|
||||
});
|
||||
|
||||
if (result.success && result.audioPath) {
|
||||
|
||||
@@ -38,9 +38,11 @@ const FALLBACK_TTS_PROVIDER = "backup-speech";
|
||||
function buildTtsParams(
|
||||
commandBodyNormalized: string,
|
||||
cfg: OpenClawConfig = {},
|
||||
agentId?: string,
|
||||
): Parameters<typeof handleTtsCommands>[0] {
|
||||
return {
|
||||
cfg,
|
||||
agentId,
|
||||
command: {
|
||||
commandBodyNormalized,
|
||||
isAuthorizedSender: true,
|
||||
@@ -189,4 +191,38 @@ describe("handleTtsCommands status fallback reporting", () => {
|
||||
expect(result?.shouldContinue).toBe(false);
|
||||
expect(result?.reply?.text).toContain("TTS status");
|
||||
});
|
||||
|
||||
it("resolves status config for the active agent", async () => {
|
||||
const cfg = {
|
||||
agents: { list: [{ id: "reader", tts: { provider: "elevenlabs" } }] },
|
||||
} as OpenClawConfig;
|
||||
|
||||
const result = await handleTtsCommands(buildTtsParams("/tts status", cfg, "reader"), true);
|
||||
|
||||
expect(result?.shouldContinue).toBe(false);
|
||||
expect(ttsMocks.resolveTtsConfig).toHaveBeenCalledWith(cfg, "reader");
|
||||
});
|
||||
|
||||
it("passes the active agent id to /tts audio synthesis", async () => {
|
||||
ttsMocks.textToSpeech.mockResolvedValue({
|
||||
success: true,
|
||||
audioPath: "/tmp/reader.ogg",
|
||||
provider: PRIMARY_TTS_PROVIDER,
|
||||
voiceCompatible: true,
|
||||
});
|
||||
const cfg = {
|
||||
agents: { list: [{ id: "reader", tts: { provider: PRIMARY_TTS_PROVIDER } }] },
|
||||
} as OpenClawConfig;
|
||||
|
||||
const result = await handleTtsCommands(buildTtsParams("/tts audio hello", cfg, "reader"), true);
|
||||
|
||||
expect(result?.shouldContinue).toBe(false);
|
||||
expect(ttsMocks.textToSpeech).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
text: "hello",
|
||||
cfg,
|
||||
agentId: "reader",
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -111,7 +111,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
|
||||
return { shouldContinue: false };
|
||||
}
|
||||
|
||||
const config = resolveTtsConfig(params.cfg);
|
||||
const config = resolveTtsConfig(params.cfg, params.agentId);
|
||||
const prefsPath = resolveTtsPrefsPath(config);
|
||||
const action = parsed.action;
|
||||
const args = parsed.args;
|
||||
@@ -149,6 +149,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
|
||||
cfg: params.cfg,
|
||||
channel: params.command.channel,
|
||||
prefsPath,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
|
||||
if (result.success && result.audioPath) {
|
||||
|
||||
Reference in New Issue
Block a user