fix: keep tts voice media queued

This commit is contained in:
Peter Steinberger
2026-05-02 03:15:57 +01:00
parent 3800e49aa5
commit dd1c6cc38f
4 changed files with 101 additions and 2 deletions

View File

@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- TTS/Telegram: keep trusted local audio generated by the TTS tool queued for voice-note delivery even when the run-level built-in tool list omits the raw `tts` name. Fixes #74752. Thanks @Loveworld3033 and @andyliu.
- Heartbeat: strip legacy `[TOOL_CALL]...[/TOOL_CALL]` and `[TOOL_RESULT]...[/TOOL_RESULT]` pseudo-call blocks from heartbeat replies before channel delivery. Fixes #54138. Thanks @Deniable9570.
- macOS/Voice Wake: send wake-word and Push-to-Talk transcripts through the selected macOS session target instead of always falling back to main WebChat. Fixes #51040. Thanks @carl-jeffrolc.
- Providers/xAI: give Grok `web_search` a 60s default timeout, harden malformed xAI Responses parsing, and return structured timeout errors instead of aborting the tool call. Fixes #58063 and #58733. Thanks @dnishimura, @marvcasasola-svg, and @Nanako0129.

View File

@@ -590,4 +590,33 @@ describe("handleToolExecutionEnd media emission", () => {
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
expect(ctx.state.pendingToolTrustedLocalMedia).toBe(true);
});
it("queues trusted TTS local media when the exact built-in name is absent", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: false,
onToolResult: vi.fn(),
builtinToolNames: new Set(["web_search"]),
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "tts",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: "(spoken) hello" }],
details: {
media: {
mediaUrl: "/tmp/reply.opus",
audioAsVoice: true,
trustedLocalMedia: true,
},
},
},
});
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
expect(ctx.state.pendingToolTrustedLocalMedia).toBe(true);
});
});

View File

@@ -340,6 +340,24 @@ describe("extractToolResultMediaPaths", () => {
).toEqual(["/tmp/screenshot.png"]);
});
it("keeps trusted TTS local media when the raw built-in name is absent", () => {
expect(
filterToolResultMediaUrls(
"tts",
["/tmp/reply.opus"],
{
details: {
media: {
mediaUrl: "/tmp/reply.opus",
trustedLocalMedia: true,
},
},
},
new Set(["web_search"]),
),
).toEqual(["/tmp/reply.opus"]);
});
it("keeps local media for bundled plugin tool names registered in this run", () => {
// music_generate is a bundled-plugin trusted tool; when the runner
// registers it for this run, its raw name must be allowed through the
@@ -365,6 +383,24 @@ describe("extractToolResultMediaPaths", () => {
).toEqual([]);
});
it("does not let non-TTS trustedLocalMedia bypass the exact-name gate", () => {
expect(
filterToolResultMediaUrls(
"Web_Search",
["/etc/passwd"],
{
details: {
media: {
mediaUrl: "/etc/passwd",
trustedLocalMedia: true,
},
},
},
new Set(["web_search"]),
),
).toEqual([]);
});
it("still allows remote media for colliding aliases", () => {
expect(
filterToolResultMediaUrls(
@@ -387,6 +423,21 @@ describe("extractToolResultMediaPaths", () => {
).toEqual([]);
});
it("does not trust external TTS results with trustedLocalMedia", () => {
expect(
filterToolResultMediaUrls("tts", ["/tmp/reply.opus"], {
details: {
mcpServer: "probe",
mcpTool: "tts",
media: {
mediaUrl: "/tmp/reply.opus",
trustedLocalMedia: true,
},
},
}),
).toEqual([]);
});
it("still allows remote MEDIA urls for MCP-provenance results", () => {
expect(
filterToolResultMediaUrls("browser", ["https://example.com/screenshot.png"], {

View File

@@ -286,6 +286,21 @@ export function isToolResultMediaTrusted(toolName?: string, result?: unknown): b
);
}
function isTrustedOwnedTtsLocalMedia(toolName: string | undefined, result: unknown): boolean {
if (
!toolName ||
!isToolResultMediaTrusted(toolName, result) ||
normalizeToolName(toolName) !== "tts"
) {
return false;
}
const media = readToolResultDetails(result)?.media;
if (!media || typeof media !== "object" || Array.isArray(media)) {
return false;
}
return (media as Record<string, unknown>).trustedLocalMedia === true;
}
export function filterToolResultMediaUrls(
toolName: string | undefined,
mediaUrls: string[],
@@ -295,14 +310,17 @@ export function filterToolResultMediaUrls(
if (mediaUrls.length === 0) {
return mediaUrls;
}
const trustedOwnedTtsLocalMedia = isTrustedOwnedTtsLocalMedia(toolName, result);
if (isToolResultMediaTrusted(toolName, result)) {
// When the current run provides its exact registered tool names (core
// built-ins plus bundled/trusted plugin tools), require the raw emitted
// tool name to match one of them before allowing local MEDIA: paths.
// This blocks normalized aliases and case-variant collisions such as
// "Bash" -> "bash" or "Web_Search" -> "web_search" from inheriting a
// registered tool's media trust.
if (builtinToolNames !== undefined) {
// registered tool's media trust. TTS-generated local files carry a
// separate trusted-media flag from the owned tool result, so they can
// survive runs whose exact built-in set omitted the raw tts name.
if (builtinToolNames !== undefined && !trustedOwnedTtsLocalMedia) {
const registeredName = toolName?.trim();
if (!registeredName || !builtinToolNames.has(registeredName)) {
return mediaUrls.filter((url) => HTTP_URL_RE.test(url.trim()));