mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:50:43 +00:00
fix(agents): trust-gate tts transcript suppression
Co-authored-by: Neerav Makwana <261249544+neeravmakwana@users.noreply.github.com>
This commit is contained in:
@@ -70,6 +70,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Browser/sandbox: clean up idle tracked tabs opened by primary-agent browser sessions, while preserving active tab reuse and lifecycle cleanup for subagents, cron, and ACP sessions. Fixes #71165. Thanks @dwbutler.
|
||||
- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv.
|
||||
- Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana.
|
||||
- Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana.
|
||||
- Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi.
|
||||
- Codex approvals: keep command approval responses within Codex app-server `availableDecisions`, including deny/cancel fallbacks for prompts that do not offer `decline`. (#71338) Thanks @Lucenx9.
|
||||
- Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete.
|
||||
|
||||
@@ -235,6 +235,7 @@ describe("handleToolExecutionEnd media emission", () => {
|
||||
shouldEmitToolOutput: true,
|
||||
onToolResult: vi.fn(),
|
||||
toolResultFormat: "plain",
|
||||
builtinToolNames: new Set(["tts"]),
|
||||
});
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
@@ -263,6 +264,7 @@ describe("handleToolExecutionEnd media emission", () => {
|
||||
shouldEmitToolOutput: true,
|
||||
onToolResult: vi.fn(),
|
||||
toolResultFormat: "plain",
|
||||
builtinToolNames: new Set(["tts"]),
|
||||
});
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
@@ -315,6 +317,40 @@ describe("handleToolExecutionEnd media emission", () => {
|
||||
expect(ctx.state.pendingToolAudioAsVoice).toBe(false);
|
||||
});
|
||||
|
||||
it("keeps verbose TTS text for non-builtin remote media collisions", async () => {
|
||||
const ctx = createMockContext({
|
||||
shouldEmitToolOutput: true,
|
||||
onToolResult: vi.fn(),
|
||||
toolResultFormat: "plain",
|
||||
builtinToolNames: new Set(["web_search"]),
|
||||
});
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
type: "tool_execution_end",
|
||||
toolName: "tts",
|
||||
toolCallId: "tc-1",
|
||||
isError: false,
|
||||
result: {
|
||||
content: [{ type: "text", text: "remote tool output" }],
|
||||
details: {
|
||||
media: {
|
||||
mediaUrl: "https://example.com/reply.opus",
|
||||
audioAsVoice: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(ctx.emitToolOutput).toHaveBeenCalledWith(
|
||||
"tts",
|
||||
undefined,
|
||||
"remote tool output",
|
||||
expect.any(Object),
|
||||
);
|
||||
expect(ctx.state.pendingToolMediaUrls).toEqual(["https://example.com/reply.opus"]);
|
||||
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
|
||||
});
|
||||
|
||||
async function handleVerboseGeneratedImage(toolResultFormat: "plain" | "markdown") {
|
||||
const ctx = createMockContext({
|
||||
shouldEmitToolOutput: true,
|
||||
|
||||
@@ -191,10 +191,18 @@ function readApplyPatchSummary(result: unknown): ApplyPatchSummary | null {
|
||||
|
||||
function shouldSuppressStructuredMediaToolOutput(params: {
|
||||
toolName: string;
|
||||
rawToolName: string;
|
||||
isToolError: boolean;
|
||||
hasDeliverableStructuredMedia: boolean;
|
||||
builtinToolNames?: ReadonlySet<string>;
|
||||
}): boolean {
|
||||
return params.toolName === "tts" && !params.isToolError && params.hasDeliverableStructuredMedia;
|
||||
return (
|
||||
params.toolName === "tts" &&
|
||||
params.rawToolName.trim() === "tts" &&
|
||||
params.builtinToolNames?.has("tts") === true &&
|
||||
!params.isToolError &&
|
||||
params.hasDeliverableStructuredMedia
|
||||
);
|
||||
}
|
||||
|
||||
function buildPatchSummaryText(summary: ApplyPatchSummary): string {
|
||||
@@ -527,8 +535,10 @@ async function emitToolResultOutput(params: {
|
||||
const shouldEmitOutput =
|
||||
!shouldSuppressStructuredMediaToolOutput({
|
||||
toolName,
|
||||
rawToolName,
|
||||
isToolError,
|
||||
hasDeliverableStructuredMedia: hasStructuredMedia && mediaUrls.length > 0,
|
||||
builtinToolNames: ctx.builtinToolNames,
|
||||
}) &&
|
||||
(ctx.shouldEmitToolOutput() || shouldEmitCompactToolOutput({ toolName, result, outputText }));
|
||||
if (shouldEmitOutput) {
|
||||
|
||||
Reference in New Issue
Block a user