fix: suppress tts transcript tool output

This commit is contained in:
Neerav Makwana
2026-04-24 21:47:04 -04:00
committed by Peter Steinberger
parent 6abab7555e
commit db8f7478b4
2 changed files with 19 additions and 9 deletions

View File

@@ -230,7 +230,7 @@ describe("handleToolExecutionEnd media emission", () => {
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("still queues structured media when verbose is full", async () => {
it("queues TTS structured media without leaking spoken text when verbose is full", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
@@ -253,12 +253,12 @@ describe("handleToolExecutionEnd media emission", () => {
},
});
expect(ctx.emitToolOutput).toHaveBeenCalled();
expect(ctx.emitToolOutput).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
});
it("does not queue a duplicate voice copy when emitted tool output already sent the same audio", async () => {
it("queues one voice copy when TTS output also contains a legacy media directive", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
@@ -281,9 +281,9 @@ describe("handleToolExecutionEnd media emission", () => {
},
});
expect(ctx.emitToolOutput).toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(false);
expect(ctx.emitToolOutput).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
});
async function handleVerboseGeneratedImage(toolResultFormat: "plain" | "markdown") {

View File

@@ -189,6 +189,14 @@ function readApplyPatchSummary(result: unknown): ApplyPatchSummary | null {
return { added, modified, deleted };
}
function shouldSuppressStructuredMediaToolOutput(params: {
toolName: string;
isToolError: boolean;
hasStructuredMedia: boolean;
}): boolean {
return params.toolName === "tts" && !params.isToolError && params.hasStructuredMedia;
}
function buildPatchSummaryText(summary: ApplyPatchSummary): string {
const parts: string[] = [];
if (summary.added.length > 0) {
@@ -443,7 +451,7 @@ async function emitToolResultOutput(params: {
sanitizedResult: unknown;
}) {
const { ctx, toolName, rawToolName, meta, isToolError, result, sanitizedResult } = params;
const hasStructuredMedia =
const hasStructuredMedia = Boolean(
result &&
typeof result === "object" &&
(result as { details?: unknown }).details &&
@@ -451,7 +459,8 @@ async function emitToolResultOutput(params: {
!Array.isArray((result as { details?: unknown }).details) &&
typeof ((result as { details?: { media?: unknown } }).details?.media ?? undefined) ===
"object" &&
!Array.isArray((result as { details?: { media?: unknown } }).details?.media);
!Array.isArray((result as { details?: { media?: unknown } }).details?.media),
);
const approvalPending = readExecApprovalPendingDetails(result);
let emittedToolOutputMediaUrls: string[] = [];
if (!isToolError && approvalPending) {
@@ -512,7 +521,8 @@ async function emitToolResultOutput(params: {
const outputText = extractToolResultText(sanitizedResult);
const shouldEmitOutput =
ctx.shouldEmitToolOutput() || shouldEmitCompactToolOutput({ toolName, result, outputText });
!shouldSuppressStructuredMediaToolOutput({ toolName, isToolError, hasStructuredMedia }) &&
(ctx.shouldEmitToolOutput() || shouldEmitCompactToolOutput({ toolName, result, outputText }));
if (shouldEmitOutput) {
if (outputText) {
ctx.emitToolOutput(rawToolName, meta, outputText, result);