fix(agents): trust-gate tts transcript suppression

Co-authored-by: Neerav Makwana <261249544+neeravmakwana@users.noreply.github.com>
This commit is contained in:
Peter Steinberger
2026-04-25 03:03:48 +01:00
parent 628f0e8055
commit 250d13de53
3 changed files with 48 additions and 1 deletions

View File

@@ -70,6 +70,7 @@ Docs: https://docs.openclaw.ai
- Browser/sandbox: clean up idle tracked tabs opened by primary-agent browser sessions, while preserving active tab reuse and lifecycle cleanup for subagents, cron, and ACP sessions. Fixes #71165. Thanks @dwbutler.
- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv.
- Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana.
- Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana.
- Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi.
- Codex approvals: keep command approval responses within Codex app-server `availableDecisions`, including deny/cancel fallbacks for prompts that do not offer `decline`. (#71338) Thanks @Lucenx9.
- Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete.

View File

@@ -235,6 +235,7 @@ describe("handleToolExecutionEnd media emission", () => {
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
toolResultFormat: "plain",
builtinToolNames: new Set(["tts"]),
});
await handleToolExecutionEnd(ctx, {
@@ -263,6 +264,7 @@ describe("handleToolExecutionEnd media emission", () => {
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
toolResultFormat: "plain",
builtinToolNames: new Set(["tts"]),
});
await handleToolExecutionEnd(ctx, {
@@ -315,6 +317,40 @@ describe("handleToolExecutionEnd media emission", () => {
expect(ctx.state.pendingToolAudioAsVoice).toBe(false);
});
it("keeps verbose TTS text for non-builtin remote media collisions", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
toolResultFormat: "plain",
builtinToolNames: new Set(["web_search"]),
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "tts",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: "remote tool output" }],
details: {
media: {
mediaUrl: "https://example.com/reply.opus",
audioAsVoice: true,
},
},
},
});
expect(ctx.emitToolOutput).toHaveBeenCalledWith(
"tts",
undefined,
"remote tool output",
expect.any(Object),
);
expect(ctx.state.pendingToolMediaUrls).toEqual(["https://example.com/reply.opus"]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
});
async function handleVerboseGeneratedImage(toolResultFormat: "plain" | "markdown") {
const ctx = createMockContext({
shouldEmitToolOutput: true,

View File

@@ -191,10 +191,18 @@ function readApplyPatchSummary(result: unknown): ApplyPatchSummary | null {
function shouldSuppressStructuredMediaToolOutput(params: {
toolName: string;
rawToolName: string;
isToolError: boolean;
hasDeliverableStructuredMedia: boolean;
builtinToolNames?: ReadonlySet<string>;
}): boolean {
return params.toolName === "tts" && !params.isToolError && params.hasDeliverableStructuredMedia;
return (
params.toolName === "tts" &&
params.rawToolName.trim() === "tts" &&
params.builtinToolNames?.has("tts") === true &&
!params.isToolError &&
params.hasDeliverableStructuredMedia
);
}
function buildPatchSummaryText(summary: ApplyPatchSummary): string {
@@ -527,8 +535,10 @@ async function emitToolResultOutput(params: {
const shouldEmitOutput =
!shouldSuppressStructuredMediaToolOutput({
toolName,
rawToolName,
isToolError,
hasDeliverableStructuredMedia: hasStructuredMedia && mediaUrls.length > 0,
builtinToolNames: ctx.builtinToolNames,
}) &&
(ctx.shouldEmitToolOutput() || shouldEmitCompactToolOutput({ toolName, result, outputText }));
if (shouldEmitOutput) {