fix(agents): dedupe emitted TTS media

This commit is contained in:
Peter Steinberger
2026-04-22 20:24:19 +01:00
parent e5b67b7ebd
commit 81f247b1ae
2 changed files with 34 additions and 2 deletions

View File

@@ -231,7 +231,11 @@ describe("handleToolExecutionEnd media emission", () => {
});
it("still queues structured media when verbose is full", async () => {
const ctx = createMockContext({ shouldEmitToolOutput: true, onToolResult: vi.fn() });
const ctx = createMockContext({
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
toolResultFormat: "plain",
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
@@ -254,6 +258,34 @@ describe("handleToolExecutionEnd media emission", () => {
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
});
it("does not queue a duplicate voice copy when emitted tool output already sent the same audio", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
toolResultFormat: "plain",
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "tts",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: "Generated audio reply.\nMEDIA:/tmp/reply.opus" }],
details: {
media: {
mediaUrl: "/tmp/reply.opus",
audioAsVoice: true,
},
},
},
});
expect(ctx.emitToolOutput).toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(false);
});
async function handleVerboseGeneratedImage(toolResultFormat: "plain" | "markdown") {
const ctx = createMockContext({
shouldEmitToolOutput: true,

View File

@@ -544,7 +544,7 @@ async function emitToolResultOutput(params: {
ctx.builtinToolNames,
);
const pendingMediaUrls =
mediaReply.audioAsVoice || emittedToolOutputMediaUrls.length === 0
emittedToolOutputMediaUrls.length === 0
? mediaUrls
: mediaUrls.filter((url) => !emittedToolOutputMediaUrls.includes(url));
if (pendingMediaUrls.length === 0) {