diff --git a/CHANGELOG.md b/CHANGELOG.md index 25d0a996963..bac2f1b65aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- TTS/Telegram: keep trusted local audio generated by the TTS tool queued for voice-note delivery even when the run-level built-in tool list omits the raw `tts` name. Fixes #74752. Thanks @Loveworld3033 and @andyliu. - Heartbeat: strip legacy `[TOOL_CALL]...[/TOOL_CALL]` and `[TOOL_RESULT]...[/TOOL_RESULT]` pseudo-call blocks from heartbeat replies before channel delivery. Fixes #54138. Thanks @Deniable9570. - macOS/Voice Wake: send wake-word and Push-to-Talk transcripts through the selected macOS session target instead of always falling back to main WebChat. Fixes #51040. Thanks @carl-jeffrolc. - Providers/xAI: give Grok `web_search` a 60s default timeout, harden malformed xAI Responses parsing, and return structured timeout errors instead of aborting the tool call. Fixes #58063 and #58733. Thanks @dnishimura, @marvcasasola-svg, and @Nanako0129. diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts b/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts index 96be6c19f9d..053a4507224 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts @@ -590,4 +590,33 @@ describe("handleToolExecutionEnd media emission", () => { expect(ctx.state.pendingToolAudioAsVoice).toBe(true); expect(ctx.state.pendingToolTrustedLocalMedia).toBe(true); }); + + it("queues trusted TTS local media when the exact built-in name is absent", async () => { + const ctx = createMockContext({ + shouldEmitToolOutput: false, + onToolResult: vi.fn(), + builtinToolNames: new Set(["web_search"]), + }); + + await handleToolExecutionEnd(ctx, { + type: "tool_execution_end", + toolName: "tts", + toolCallId: "tc-1", + isError: false, + result: { + content: [{ type: "text", text: "(spoken) hello" }], + details: { + media: { + mediaUrl: "/tmp/reply.opus", + audioAsVoice: true, + trustedLocalMedia: true, + }, + }, + }, + }); + + expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]); + expect(ctx.state.pendingToolAudioAsVoice).toBe(true); + expect(ctx.state.pendingToolTrustedLocalMedia).toBe(true); + }); }); diff --git a/src/agents/pi-embedded-subscribe.tools.media.test.ts b/src/agents/pi-embedded-subscribe.tools.media.test.ts index b688e2829d5..6ee51c571f8 100644 --- a/src/agents/pi-embedded-subscribe.tools.media.test.ts +++ b/src/agents/pi-embedded-subscribe.tools.media.test.ts @@ -340,6 +340,24 @@ describe("extractToolResultMediaPaths", () => { ).toEqual(["/tmp/screenshot.png"]); }); + it("keeps trusted TTS local media when the raw built-in name is absent", () => { + expect( + filterToolResultMediaUrls( + "tts", + ["/tmp/reply.opus"], + { + details: { + media: { + mediaUrl: "/tmp/reply.opus", + trustedLocalMedia: true, + }, + }, + }, + new Set(["web_search"]), + ), + ).toEqual(["/tmp/reply.opus"]); + }); + it("keeps local media for bundled plugin tool names registered in this run", () => { // music_generate is a bundled-plugin trusted tool; when the runner // registers it for this run, its raw name must be allowed through the @@ -365,6 +383,24 @@ describe("extractToolResultMediaPaths", () => { ).toEqual([]); }); + it("does not let non-TTS trustedLocalMedia bypass the exact-name gate", () => { + expect( + filterToolResultMediaUrls( + "Web_Search", + ["/etc/passwd"], + { + details: { + media: { + mediaUrl: "/etc/passwd", + trustedLocalMedia: true, + }, + }, + }, + new Set(["web_search"]), + ), + ).toEqual([]); + }); + it("still allows remote media for colliding aliases", () => { expect( filterToolResultMediaUrls( @@ -387,6 +423,21 @@ describe("extractToolResultMediaPaths", () => { ).toEqual([]); }); + it("does not trust external TTS results with trustedLocalMedia", () => { + expect( + filterToolResultMediaUrls("tts", ["/tmp/reply.opus"], { + details: { + mcpServer: "probe", + mcpTool: "tts", + media: { + mediaUrl: "/tmp/reply.opus", + trustedLocalMedia: true, + }, + }, + }), + ).toEqual([]); + }); + it("still allows remote MEDIA urls for MCP-provenance results", () => { expect( filterToolResultMediaUrls("browser", ["https://example.com/screenshot.png"], { diff --git a/src/agents/pi-embedded-subscribe.tools.ts b/src/agents/pi-embedded-subscribe.tools.ts index e027662a3e2..2346fa091f8 100644 --- a/src/agents/pi-embedded-subscribe.tools.ts +++ b/src/agents/pi-embedded-subscribe.tools.ts @@ -286,6 +286,21 @@ export function isToolResultMediaTrusted(toolName?: string, result?: unknown): b ); } +function isTrustedOwnedTtsLocalMedia(toolName: string | undefined, result: unknown): boolean { + if ( + !toolName || + !isToolResultMediaTrusted(toolName, result) || + normalizeToolName(toolName) !== "tts" + ) { + return false; + } + const media = readToolResultDetails(result)?.media; + if (!media || typeof media !== "object" || Array.isArray(media)) { + return false; + } + return (media as Record).trustedLocalMedia === true; +} + export function filterToolResultMediaUrls( toolName: string | undefined, mediaUrls: string[], @@ -295,14 +310,17 @@ export function filterToolResultMediaUrls( if (mediaUrls.length === 0) { return mediaUrls; } + const trustedOwnedTtsLocalMedia = isTrustedOwnedTtsLocalMedia(toolName, result); if (isToolResultMediaTrusted(toolName, result)) { // When the current run provides its exact registered tool names (core // built-ins plus bundled/trusted plugin tools), require the raw emitted // tool name to match one of them before allowing local MEDIA: paths. // This blocks normalized aliases and case-variant collisions such as // "Bash" -> "bash" or "Web_Search" -> "web_search" from inheriting a - // registered tool's media trust. - if (builtinToolNames !== undefined) { + // registered tool's media trust. TTS-generated local files carry a + // separate trusted-media flag from the owned tool result, so they can + // survive runs whose exact built-in set omitted the raw tts name. + if (builtinToolNames !== undefined && !trustedOwnedTtsLocalMedia) { const registeredName = toolName?.trim(); if (!registeredName || !builtinToolNames.has(registeredName)) { return mediaUrls.filter((url) => HTTP_URL_RE.test(url.trim()));