From 2a4eea58a9bf25f61566c34756498ffc64caef4d Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sun, 5 Apr 2026 20:43:20 +0100
Subject: [PATCH] fix: suppress commentary text in completed ws replies

---
 CHANGELOG.md                               |  1 +
 src/agents/openai-ws-message-conversion.ts | 41 +++++++++++---
 src/agents/openai-ws-stream.test.ts        | 63 ++++++++++++++++------
 3 files changed, 80 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c500406202..c3c59ed4196 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -120,6 +120,7 @@ Docs: https://docs.openclaw.ai
 - Providers/OpenAI: make GPT-5 and Codex runs act sooner with lower-verbosity defaults, visible progress during tool work, and a one-shot retry when a turn only narrates the plan instead of taking action.
 - Providers/OpenAI: preserve native `reasoning.effort: “none”` and strict tool schemas on direct OpenAI-family endpoints, keep compat routes on compat shaping, fix Responses WebSocket warm-up behavior, keep stable session and turn metadata, and fall back more gracefully after early WebSocket failures.
 - Providers/OpenAI: support GPT-5.4 assistant `phase` metadata across OpenAI-family Responses replay and the Gateway `/v1/responses` compatibility layer, including `commentary` tool preambles and `final_answer` replies.
+- Control UI/chat: prefer assistant `final_answer` text over commentary in completed WebSocket replies, hide commentary-only visible text in web chat and session previews, and keep mixed-phase persisted transcripts from leaking reasoning-style prose into final replies. Fixes #59150.
 - Providers/OpenAI GPT: treat short approval turns like `ok do it` and `go ahead` as immediate action turns, and trim overly memo-like GPT-5 chat confirmations so OpenAI replies stay shorter and more conversational by default.
 - Providers/OpenAI Codex: split native `contextWindow` from runtime `contextTokens`, keep the default effective cap at `272000`, and expose a per-model `contextTokens` override on `models.providers.*.models[]`.
 - Providers/OpenAI-compatible WS: compute fallback token totals from normalized usage when providers omit or zero `total_tokens`, so DashScope-compatible sessions stop storing zero totals after alias normalization. (#54940) Thanks @lyfuci.
diff --git a/src/agents/openai-ws-message-conversion.ts b/src/agents/openai-ws-message-conversion.ts
index 9a2e997a715..48586ce5122 100644
--- a/src/agents/openai-ws-message-conversion.ts
+++ b/src/agents/openai-ws-message-conversion.ts
@@ -497,19 +497,42 @@ export function buildAssistantMessageFromResponse(
   modelInfo: { api: string; provider: string; id: string },
 ): AssistantMessage {
   const content: AssistantMessage["content"] = [];
-  const assistantPhases = new Set<OpenAIResponsesAssistantPhase>();
-  let hasUnphasedAssistantText = false;
+  const assistantMessageOutputs = (response.output ?? []).filter(
+    (item): item is Extract<ResponseObject["output"][number], { type: "message" }> =>
+      item.type === "message",
+  );
+  const hasExplicitPhasedAssistantText = assistantMessageOutputs.some((item) => {
+    const itemPhase = normalizeAssistantPhase(item.phase);
+    return Boolean(
+      itemPhase && item.content?.some((part) => part.type === "output_text" && Boolean(part.text)),
+    );
+  });
+  const hasFinalAnswerText = assistantMessageOutputs.some((item) => {
+    if (normalizeAssistantPhase(item.phase) !== "final_answer") {
+      return false;
+    }
+    return item.content?.some((part) => part.type === "output_text" && Boolean(part.text)) ?? false;
+  });
+  const includedAssistantPhases = new Set<OpenAIResponsesAssistantPhase>();
+  let hasIncludedUnphasedAssistantText = false;
 
   for (const item of response.output ?? []) {
     if (item.type === "message") {
       const itemPhase = normalizeAssistantPhase(item.phase);
-      if (itemPhase) {
-        assistantPhases.add(itemPhase);
-      }
       for (const part of item.content ?? []) {
         if (part.type === "output_text" && part.text) {
-          if (!itemPhase) {
-            hasUnphasedAssistantText = true;
+          const shouldIncludeText = hasFinalAnswerText
+            ? itemPhase === "final_answer"
+            : hasExplicitPhasedAssistantText
+              ? itemPhase === undefined
+              : true;
+          if (!shouldIncludeText) {
+            continue;
+          }
+          if (itemPhase) {
+            includedAssistantPhases.add(itemPhase);
+          } else {
+            hasIncludedUnphasedAssistantText = true;
           }
           content.push({
             type: "text",
@@ -584,7 +607,9 @@ export function buildAssistantMessageFromResponse(
   });
 
   const finalAssistantPhase =
-    assistantPhases.size === 1 && !hasUnphasedAssistantText ? [...assistantPhases][0] : undefined;
+    includedAssistantPhases.size === 1 && !hasIncludedUnphasedAssistantText
+      ? [...includedAssistantPhases][0]
+      : undefined;
 
   return finalAssistantPhase
     ? ({ ...message, phase: finalAssistantPhase } as AssistantMessageWithPhase)
diff --git a/src/agents/openai-ws-stream.test.ts b/src/agents/openai-ws-stream.test.ts
index 73019004f6c..6b915902993 100644
--- a/src/agents/openai-ws-stream.test.ts
+++ b/src/agents/openai-ws-stream.test.ts
@@ -1128,7 +1128,7 @@ describe("buildAssistantMessageFromResponse", () => {
     expect(msg.content[0]?.text).toBe("Final answer");
   });
 
-  it("omits top-level phase when a response contains mixed assistant phases", () => {
+  it("keeps only final-answer text when a response contains mixed assistant phases", () => {
     const response = {
       id: "resp_mixed_phase",
       object: "response",
@@ -1159,13 +1159,8 @@ describe("buildAssistantMessageFromResponse", () => {
       content: Array<{ type: string; text?: string; textSignature?: string }>;
     };
 
-    expect(msg.phase).toBeUndefined();
+    expect(msg.phase).toBe("final_answer");
     expect(msg.content).toMatchObject([
-      {
-        type: "text",
-        text: "Working... ",
-        textSignature: JSON.stringify({ v: 1, id: "item_commentary", phase: "commentary" }),
-      },
       {
         type: "text",
         text: "Done.",
@@ -1174,7 +1169,7 @@ describe("buildAssistantMessageFromResponse", () => {
     ]);
   });
 
-  it("omits top-level phase when unphased legacy text and phased final text coexist", () => {
+  it("keeps only phased final text when unphased legacy text and phased final text coexist", () => {
     const response = {
       id: "resp_unphased_plus_final",
       object: "response",
@@ -1204,13 +1199,8 @@ describe("buildAssistantMessageFromResponse", () => {
       content: Array<{ type: string; text?: string; textSignature?: string }>;
     };
 
-    expect(msg.phase).toBeUndefined();
+    expect(msg.phase).toBe("final_answer");
     expect(msg.content).toMatchObject([
-      {
-        type: "text",
-        text: "Legacy. ",
-        textSignature: JSON.stringify({ v: 1, id: "item_legacy" }),
-      },
       {
         type: "text",
         text: "Done.",
@@ -1219,6 +1209,44 @@ describe("buildAssistantMessageFromResponse", () => {
     ]);
   });
 
+  it("drops commentary-only text from completed assistant messages but keeps tool calls", () => {
+    const response = {
+      id: "resp_commentary_only_tool",
+      object: "response",
+      created_at: Date.now(),
+      status: "completed",
+      model: "gpt-5.2",
+      output: [
+        {
+          type: "message",
+          id: "item_commentary",
+          role: "assistant",
+          phase: "commentary",
+          content: [{ type: "output_text", text: "Working... " }],
+        },
+        {
+          type: "function_call",
+          id: "item_tool",
+          call_id: "call_abc",
+          name: "exec",
+          arguments: '{"arg":"value"}',
+        },
+      ],
+      usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 },
+    } as unknown as ResponseObject;
+
+    const msg = buildAssistantMessageFromResponse(response, modelInfo) as {
+      phase?: string;
+      content: Array<{ type: string; text?: string; name?: string }>;
+      stopReason: string;
+    };
+
+    expect(msg.phase).toBeUndefined();
+    expect(msg.content.some((part) => part.type === "text")).toBe(false);
+    expect(msg.content).toMatchObject([{ type: "toolCall", name: "exec" }]);
+    expect(msg.stopReason).toBe("toolUse");
+  });
+
   it("maps reasoning output items to thinking blocks with signature", () => {
     const response = {
       id: "resp_reasoning",
@@ -1749,7 +1777,7 @@ describe("createOpenAIWebSocketStreamFn", () => {
     expect(doneEvent?.message.content[0]?.text).toBe("Hello back!");
   });
 
-  it("keeps assistant phase on completed WebSocket responses", async () => {
+  it("suppresses commentary-only text on completed WebSocket responses", async () => {
     const streamFn = createOpenAIWebSocketStreamFn("sk-test", "sess-phase");
     const stream = streamFn(
       modelStub as Parameters<typeof streamFn>[0],
@@ -1776,10 +1804,11 @@ describe("createOpenAIWebSocketStreamFn", () => {
       | {
           type: string;
           reason: string;
-          message: { phase?: string; stopReason: string };
+          message: { phase?: string; stopReason: string; content?: Array<{ type?: string }> };
         }
       | undefined;
-    expect(doneEvent?.message.phase).toBe("commentary");
+    expect(doneEvent?.message.phase).toBeUndefined();
+    expect(doneEvent?.message.content?.some((part) => part.type === "text")).toBe(false);
     expect(doneEvent?.message.stopReason).toBe("toolUse");
   });