fix: gate tts output suppression on deliverable media

This commit is contained in:
Neerav Makwana
2026-04-24 21:57:11 -04:00
committed by Peter Steinberger
parent db8f7478b4
commit 628f0e8055
2 changed files with 40 additions and 10 deletions

View File

@@ -286,6 +286,35 @@ describe("handleToolExecutionEnd media emission", () => {
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
});
it("keeps verbose TTS text when structured local media is not trusted", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
toolResultFormat: "plain",
builtinToolNames: new Set(["tts"]),
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "TTS",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: "(spoken) hello" }],
details: {
media: {
mediaUrl: "/tmp/reply.opus",
audioAsVoice: true,
},
},
},
});
expect(ctx.emitToolOutput).toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(false);
});
async function handleVerboseGeneratedImage(toolResultFormat: "plain" | "markdown") {
const ctx = createMockContext({
shouldEmitToolOutput: true,

View File

@@ -192,9 +192,9 @@ function readApplyPatchSummary(result: unknown): ApplyPatchSummary | null {
function shouldSuppressStructuredMediaToolOutput(params: {
toolName: string;
isToolError: boolean;
hasStructuredMedia: boolean;
hasDeliverableStructuredMedia: boolean;
}): boolean {
return params.toolName === "tts" && !params.isToolError && params.hasStructuredMedia;
return params.toolName === "tts" && !params.isToolError && params.hasDeliverableStructuredMedia;
}
function buildPatchSummaryText(summary: ApplyPatchSummary): string {
@@ -520,8 +520,16 @@ async function emitToolResultOutput(params: {
}
const outputText = extractToolResultText(sanitizedResult);
const mediaReply = isToolError ? undefined : extractToolResultMediaArtifact(result);
const mediaUrls = mediaReply
? filterToolResultMediaUrls(rawToolName, mediaReply.mediaUrls, result, ctx.builtinToolNames)
: [];
const shouldEmitOutput =
!shouldSuppressStructuredMediaToolOutput({ toolName, isToolError, hasStructuredMedia }) &&
!shouldSuppressStructuredMediaToolOutput({
toolName,
isToolError,
hasDeliverableStructuredMedia: hasStructuredMedia && mediaUrls.length > 0,
}) &&
(ctx.shouldEmitToolOutput() || shouldEmitCompactToolOutput({ toolName, result, outputText }));
if (shouldEmitOutput) {
if (outputText) {
@@ -543,16 +551,9 @@ async function emitToolResultOutput(params: {
return;
}
const mediaReply = extractToolResultMediaArtifact(result);
if (!mediaReply) {
return;
}
const mediaUrls = filterToolResultMediaUrls(
rawToolName,
mediaReply.mediaUrls,
result,
ctx.builtinToolNames,
);
const pendingMediaUrls =
emittedToolOutputMediaUrls.length === 0
? mediaUrls