diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts b/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts index e72e6ed105c..6a607aed011 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts @@ -286,6 +286,35 @@ describe("handleToolExecutionEnd media emission", () => { expect(ctx.state.pendingToolAudioAsVoice).toBe(true); }); + it("keeps verbose TTS text when structured local media is not trusted", async () => { + const ctx = createMockContext({ + shouldEmitToolOutput: true, + onToolResult: vi.fn(), + toolResultFormat: "plain", + builtinToolNames: new Set(["tts"]), + }); + + await handleToolExecutionEnd(ctx, { + type: "tool_execution_end", + toolName: "TTS", + toolCallId: "tc-1", + isError: false, + result: { + content: [{ type: "text", text: "(spoken) hello" }], + details: { + media: { + mediaUrl: "/tmp/reply.opus", + audioAsVoice: true, + }, + }, + }, + }); + + expect(ctx.emitToolOutput).toHaveBeenCalled(); + expect(ctx.state.pendingToolMediaUrls).toEqual([]); + expect(ctx.state.pendingToolAudioAsVoice).toBe(false); + }); + async function handleVerboseGeneratedImage(toolResultFormat: "plain" | "markdown") { const ctx = createMockContext({ shouldEmitToolOutput: true, diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.ts b/src/agents/pi-embedded-subscribe.handlers.tools.ts index 0736e3e4de6..2aeb852b59a 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.ts @@ -192,9 +192,9 @@ function readApplyPatchSummary(result: unknown): ApplyPatchSummary | null { function shouldSuppressStructuredMediaToolOutput(params: { toolName: string; isToolError: boolean; - hasStructuredMedia: boolean; + hasDeliverableStructuredMedia: boolean; }): boolean { - return params.toolName === "tts" && !params.isToolError && params.hasStructuredMedia; + return params.toolName === "tts" && !params.isToolError && params.hasDeliverableStructuredMedia; } function buildPatchSummaryText(summary: ApplyPatchSummary): string { @@ -520,8 +520,16 @@ async function emitToolResultOutput(params: { } const outputText = extractToolResultText(sanitizedResult); + const mediaReply = isToolError ? undefined : extractToolResultMediaArtifact(result); + const mediaUrls = mediaReply + ? filterToolResultMediaUrls(rawToolName, mediaReply.mediaUrls, result, ctx.builtinToolNames) + : []; const shouldEmitOutput = - !shouldSuppressStructuredMediaToolOutput({ toolName, isToolError, hasStructuredMedia }) && + !shouldSuppressStructuredMediaToolOutput({ + toolName, + isToolError, + hasDeliverableStructuredMedia: hasStructuredMedia && mediaUrls.length > 0, + }) && (ctx.shouldEmitToolOutput() || shouldEmitCompactToolOutput({ toolName, result, outputText })); if (shouldEmitOutput) { if (outputText) { @@ -543,16 +551,9 @@ async function emitToolResultOutput(params: { return; } - const mediaReply = extractToolResultMediaArtifact(result); if (!mediaReply) { return; } - const mediaUrls = filterToolResultMediaUrls( - rawToolName, - mediaReply.mediaUrls, - result, - ctx.builtinToolNames, - ); const pendingMediaUrls = emittedToolOutputMediaUrls.length === 0 ? mediaUrls