From 2a4eea58a9bf25f61566c34756498ffc64caef4d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 5 Apr 2026 20:43:20 +0100 Subject: [PATCH] fix: suppress commentary text in completed ws replies --- CHANGELOG.md | 1 + src/agents/openai-ws-message-conversion.ts | 41 +++++++++++--- src/agents/openai-ws-stream.test.ts | 63 ++++++++++++++++------ 3 files changed, 80 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c500406202..c3c59ed4196 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -120,6 +120,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI: make GPT-5 and Codex runs act sooner with lower-verbosity defaults, visible progress during tool work, and a one-shot retry when a turn only narrates the plan instead of taking action. - Providers/OpenAI: preserve native `reasoning.effort: “none”` and strict tool schemas on direct OpenAI-family endpoints, keep compat routes on compat shaping, fix Responses WebSocket warm-up behavior, keep stable session and turn metadata, and fall back more gracefully after early WebSocket failures. - Providers/OpenAI: support GPT-5.4 assistant `phase` metadata across OpenAI-family Responses replay and the Gateway `/v1/responses` compatibility layer, including `commentary` tool preambles and `final_answer` replies. +- Control UI/chat: prefer assistant `final_answer` text over commentary in completed WebSocket replies, hide commentary-only visible text in web chat and session previews, and keep mixed-phase persisted transcripts from leaking reasoning-style prose into final replies. Fixes #59150. - Providers/OpenAI GPT: treat short approval turns like `ok do it` and `go ahead` as immediate action turns, and trim overly memo-like GPT-5 chat confirmations so OpenAI replies stay shorter and more conversational by default. - Providers/OpenAI Codex: split native `contextWindow` from runtime `contextTokens`, keep the default effective cap at `272000`, and expose a per-model `contextTokens` override on `models.providers.*.models[]`. - Providers/OpenAI-compatible WS: compute fallback token totals from normalized usage when providers omit or zero `total_tokens`, so DashScope-compatible sessions stop storing zero totals after alias normalization. (#54940) Thanks @lyfuci. diff --git a/src/agents/openai-ws-message-conversion.ts b/src/agents/openai-ws-message-conversion.ts index 9a2e997a715..48586ce5122 100644 --- a/src/agents/openai-ws-message-conversion.ts +++ b/src/agents/openai-ws-message-conversion.ts @@ -497,19 +497,42 @@ export function buildAssistantMessageFromResponse( modelInfo: { api: string; provider: string; id: string }, ): AssistantMessage { const content: AssistantMessage["content"] = []; - const assistantPhases = new Set(); - let hasUnphasedAssistantText = false; + const assistantMessageOutputs = (response.output ?? []).filter( + (item): item is Extract => + item.type === "message", + ); + const hasExplicitPhasedAssistantText = assistantMessageOutputs.some((item) => { + const itemPhase = normalizeAssistantPhase(item.phase); + return Boolean( + itemPhase && item.content?.some((part) => part.type === "output_text" && Boolean(part.text)), + ); + }); + const hasFinalAnswerText = assistantMessageOutputs.some((item) => { + if (normalizeAssistantPhase(item.phase) !== "final_answer") { + return false; + } + return item.content?.some((part) => part.type === "output_text" && Boolean(part.text)) ?? false; + }); + const includedAssistantPhases = new Set(); + let hasIncludedUnphasedAssistantText = false; for (const item of response.output ?? []) { if (item.type === "message") { const itemPhase = normalizeAssistantPhase(item.phase); - if (itemPhase) { - assistantPhases.add(itemPhase); - } for (const part of item.content ?? []) { if (part.type === "output_text" && part.text) { - if (!itemPhase) { - hasUnphasedAssistantText = true; + const shouldIncludeText = hasFinalAnswerText + ? itemPhase === "final_answer" + : hasExplicitPhasedAssistantText + ? itemPhase === undefined + : true; + if (!shouldIncludeText) { + continue; + } + if (itemPhase) { + includedAssistantPhases.add(itemPhase); + } else { + hasIncludedUnphasedAssistantText = true; } content.push({ type: "text", @@ -584,7 +607,9 @@ export function buildAssistantMessageFromResponse( }); const finalAssistantPhase = - assistantPhases.size === 1 && !hasUnphasedAssistantText ? [...assistantPhases][0] : undefined; + includedAssistantPhases.size === 1 && !hasIncludedUnphasedAssistantText + ? [...includedAssistantPhases][0] + : undefined; return finalAssistantPhase ? ({ ...message, phase: finalAssistantPhase } as AssistantMessageWithPhase) diff --git a/src/agents/openai-ws-stream.test.ts b/src/agents/openai-ws-stream.test.ts index 73019004f6c..6b915902993 100644 --- a/src/agents/openai-ws-stream.test.ts +++ b/src/agents/openai-ws-stream.test.ts @@ -1128,7 +1128,7 @@ describe("buildAssistantMessageFromResponse", () => { expect(msg.content[0]?.text).toBe("Final answer"); }); - it("omits top-level phase when a response contains mixed assistant phases", () => { + it("keeps only final-answer text when a response contains mixed assistant phases", () => { const response = { id: "resp_mixed_phase", object: "response", @@ -1159,13 +1159,8 @@ describe("buildAssistantMessageFromResponse", () => { content: Array<{ type: string; text?: string; textSignature?: string }>; }; - expect(msg.phase).toBeUndefined(); + expect(msg.phase).toBe("final_answer"); expect(msg.content).toMatchObject([ - { - type: "text", - text: "Working... ", - textSignature: JSON.stringify({ v: 1, id: "item_commentary", phase: "commentary" }), - }, { type: "text", text: "Done.", @@ -1174,7 +1169,7 @@ describe("buildAssistantMessageFromResponse", () => { ]); }); - it("omits top-level phase when unphased legacy text and phased final text coexist", () => { + it("keeps only phased final text when unphased legacy text and phased final text coexist", () => { const response = { id: "resp_unphased_plus_final", object: "response", @@ -1204,13 +1199,8 @@ describe("buildAssistantMessageFromResponse", () => { content: Array<{ type: string; text?: string; textSignature?: string }>; }; - expect(msg.phase).toBeUndefined(); + expect(msg.phase).toBe("final_answer"); expect(msg.content).toMatchObject([ - { - type: "text", - text: "Legacy. ", - textSignature: JSON.stringify({ v: 1, id: "item_legacy" }), - }, { type: "text", text: "Done.", @@ -1219,6 +1209,44 @@ describe("buildAssistantMessageFromResponse", () => { ]); }); + it("drops commentary-only text from completed assistant messages but keeps tool calls", () => { + const response = { + id: "resp_commentary_only_tool", + object: "response", + created_at: Date.now(), + status: "completed", + model: "gpt-5.2", + output: [ + { + type: "message", + id: "item_commentary", + role: "assistant", + phase: "commentary", + content: [{ type: "output_text", text: "Working... " }], + }, + { + type: "function_call", + id: "item_tool", + call_id: "call_abc", + name: "exec", + arguments: '{"arg":"value"}', + }, + ], + usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 }, + } as unknown as ResponseObject; + + const msg = buildAssistantMessageFromResponse(response, modelInfo) as { + phase?: string; + content: Array<{ type: string; text?: string; name?: string }>; + stopReason: string; + }; + + expect(msg.phase).toBeUndefined(); + expect(msg.content.some((part) => part.type === "text")).toBe(false); + expect(msg.content).toMatchObject([{ type: "toolCall", name: "exec" }]); + expect(msg.stopReason).toBe("toolUse"); + }); + it("maps reasoning output items to thinking blocks with signature", () => { const response = { id: "resp_reasoning", @@ -1749,7 +1777,7 @@ describe("createOpenAIWebSocketStreamFn", () => { expect(doneEvent?.message.content[0]?.text).toBe("Hello back!"); }); - it("keeps assistant phase on completed WebSocket responses", async () => { + it("suppresses commentary-only text on completed WebSocket responses", async () => { const streamFn = createOpenAIWebSocketStreamFn("sk-test", "sess-phase"); const stream = streamFn( modelStub as Parameters[0], @@ -1776,10 +1804,11 @@ describe("createOpenAIWebSocketStreamFn", () => { | { type: string; reason: string; - message: { phase?: string; stopReason: string }; + message: { phase?: string; stopReason: string; content?: Array<{ type?: string }> }; } | undefined; - expect(doneEvent?.message.phase).toBe("commentary"); + expect(doneEvent?.message.phase).toBeUndefined(); + expect(doneEvent?.message.content?.some((part) => part.type === "text")).toBe(false); expect(doneEvent?.message.stopReason).toBe("toolUse"); });