mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
fix: require explicit TTS intent
This commit is contained in:
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- TTS/Telegram: keep trusted local audio generated by the TTS tool queued for voice-note delivery even when the run-level built-in tool list omits the raw `tts` name. Fixes #74752. Thanks @Loveworld3033 and @andyliu.
|
||||
- TTS: require explicit user or config audio intent for the agent speech tool so dashboard chats stay text unless audio is requested. Fixes #69777. Thanks @alexandre-leng.
|
||||
- Heartbeat: strip legacy `[TOOL_CALL]...[/TOOL_CALL]` and `[TOOL_RESULT]...[/TOOL_RESULT]` pseudo-call blocks from heartbeat replies before channel delivery. Fixes #54138. Thanks @Deniable9570.
|
||||
- macOS/Voice Wake: send wake-word and Push-to-Talk transcripts through the selected macOS session target instead of always falling back to main WebChat. Fixes #51040. Thanks @carl-jeffrolc.
|
||||
- Providers/xAI: give Grok `web_search` a 60s default timeout, harden malformed xAI Responses parsing, and return structured timeout errors instead of aborting the tool call. Fixes #58063 and #58733. Thanks @dnishimura, @marvcasasola-svg, and @Nanako0129.
|
||||
|
||||
@@ -48,6 +48,9 @@ audio attachments everywhere else, and PCM/Ulaw streams for telephony and Talk.
|
||||
<Note>
|
||||
Auto-TTS is **off** by default. When `messages.tts.provider` is unset,
|
||||
OpenClaw picks the first configured provider in registry auto-select order.
|
||||
The built-in `tts` agent tool is explicit-intent only: ordinary chat stays
|
||||
text unless the user asks for audio, uses `/tts`, or enables Auto-TTS/directive
|
||||
speech.
|
||||
</Note>
|
||||
|
||||
## Supported providers
|
||||
|
||||
@@ -168,6 +168,27 @@ describe("createOpenClawTools TTS config wiring", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps direct TTS tool guidance explicit even when the tool is available", async () => {
|
||||
const { __testing, createOpenClawTools } = await import("./openclaw-tools.js");
|
||||
__testing.setDepsForTest({ config: {} });
|
||||
|
||||
try {
|
||||
const tool = createOpenClawTools({
|
||||
disableMessageTool: true,
|
||||
disablePluginTools: true,
|
||||
}).find((candidate) => candidate.name === "tts");
|
||||
|
||||
if (!tool) {
|
||||
throw new Error("missing tts tool");
|
||||
}
|
||||
|
||||
expect(tool.description).toContain("Use only for explicit audio intent");
|
||||
expect(tool.description).toContain("Never use for ordinary text replies");
|
||||
} finally {
|
||||
__testing.setDepsForTest();
|
||||
}
|
||||
});
|
||||
|
||||
it("passes the resolved session agent id into the tts tool", async () => {
|
||||
const injectedConfig = {
|
||||
agents: {
|
||||
|
||||
@@ -17,6 +17,14 @@ describe("createTtsTool", () => {
|
||||
expect(tool.description).toContain(SILENT_REPLY_TOKEN);
|
||||
});
|
||||
|
||||
it("requires explicit user or config audio intent in guidance text", () => {
|
||||
const tool = createTtsTool();
|
||||
|
||||
expect(tool.description).toContain("Use only for explicit audio intent");
|
||||
expect(tool.description).toContain("active TTS config");
|
||||
expect(tool.description).toContain("Never use for ordinary text replies");
|
||||
});
|
||||
|
||||
it("stores audio delivery in details.media and preserves the spoken text in content", async () => {
|
||||
textToSpeechSpy.mockResolvedValue({
|
||||
success: true,
|
||||
|
||||
@@ -64,7 +64,9 @@ export function createTtsTool(opts?: {
|
||||
label: "TTS",
|
||||
name: "tts",
|
||||
displaySummary: "Convert text to speech and return audio.",
|
||||
description: `Convert text to speech. Audio is delivered automatically from the tool result — reply with ${SILENT_REPLY_TOKEN} after a successful call to avoid duplicate messages.`,
|
||||
description:
|
||||
"Use only for explicit audio intent (audio, voice, speech, TTS) or active TTS config. Never use for ordinary text replies. " +
|
||||
`Audio is delivered automatically from the tool result — reply with ${SILENT_REPLY_TOKEN} after a successful call to avoid duplicate messages.`,
|
||||
parameters: TtsToolSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const params = args as Record<string, unknown>;
|
||||
|
||||
@@ -46,6 +46,17 @@ describe("shouldAttemptTtsPayload", () => {
|
||||
expect(shouldAttemptTtsPayload({ cfg: {} as OpenClawConfig })).toBe(false);
|
||||
});
|
||||
|
||||
it("does not infer automatic TTS from a dashboard text turn without opt-in state", () => {
|
||||
expect(
|
||||
shouldAttemptTtsPayload({
|
||||
cfg: {} as OpenClawConfig,
|
||||
agentId: "main",
|
||||
channelId: "webchat",
|
||||
accountId: "dashboard",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("honors session auto state before prefs and config", () => {
|
||||
writeFileSync(prefsPath, JSON.stringify({ tts: { auto: "off" } }));
|
||||
const cfg = { messages: { tts: { auto: "off" } } } as OpenClawConfig;
|
||||
|
||||
Reference in New Issue
Block a user