fix: suppress commentary text in completed ws replies

This commit is contained in:
Peter Steinberger
2026-04-05 20:43:20 +01:00
parent a4f16f572c
commit 2a4eea58a9
3 changed files with 80 additions and 25 deletions

View File

@@ -120,6 +120,7 @@ Docs: https://docs.openclaw.ai
- Providers/OpenAI: make GPT-5 and Codex runs act sooner with lower-verbosity defaults, visible progress during tool work, and a one-shot retry when a turn only narrates the plan instead of taking action.
- Providers/OpenAI: preserve native `reasoning.effort: “none”` and strict tool schemas on direct OpenAI-family endpoints, keep compat routes on compat shaping, fix Responses WebSocket warm-up behavior, keep stable session and turn metadata, and fall back more gracefully after early WebSocket failures.
- Providers/OpenAI: support GPT-5.4 assistant `phase` metadata across OpenAI-family Responses replay and the Gateway `/v1/responses` compatibility layer, including `commentary` tool preambles and `final_answer` replies.
- Control UI/chat: prefer assistant `final_answer` text over commentary in completed WebSocket replies, hide commentary-only visible text in web chat and session previews, and keep mixed-phase persisted transcripts from leaking reasoning-style prose into final replies. Fixes #59150.
- Providers/OpenAI GPT: treat short approval turns like `ok do it` and `go ahead` as immediate action turns, and trim overly memo-like GPT-5 chat confirmations so OpenAI replies stay shorter and more conversational by default.
- Providers/OpenAI Codex: split native `contextWindow` from runtime `contextTokens`, keep the default effective cap at `272000`, and expose a per-model `contextTokens` override on `models.providers.*.models[]`.
- Providers/OpenAI-compatible WS: compute fallback token totals from normalized usage when providers omit or zero `total_tokens`, so DashScope-compatible sessions stop storing zero totals after alias normalization. (#54940) Thanks @lyfuci.

View File

@@ -497,19 +497,42 @@ export function buildAssistantMessageFromResponse(
modelInfo: { api: string; provider: string; id: string },
): AssistantMessage {
const content: AssistantMessage["content"] = [];
const assistantPhases = new Set<OpenAIResponsesAssistantPhase>();
let hasUnphasedAssistantText = false;
const assistantMessageOutputs = (response.output ?? []).filter(
(item): item is Extract<ResponseObject["output"][number], { type: "message" }> =>
item.type === "message",
);
const hasExplicitPhasedAssistantText = assistantMessageOutputs.some((item) => {
const itemPhase = normalizeAssistantPhase(item.phase);
return Boolean(
itemPhase && item.content?.some((part) => part.type === "output_text" && Boolean(part.text)),
);
});
const hasFinalAnswerText = assistantMessageOutputs.some((item) => {
if (normalizeAssistantPhase(item.phase) !== "final_answer") {
return false;
}
return item.content?.some((part) => part.type === "output_text" && Boolean(part.text)) ?? false;
});
const includedAssistantPhases = new Set<OpenAIResponsesAssistantPhase>();
let hasIncludedUnphasedAssistantText = false;
for (const item of response.output ?? []) {
if (item.type === "message") {
const itemPhase = normalizeAssistantPhase(item.phase);
if (itemPhase) {
assistantPhases.add(itemPhase);
}
for (const part of item.content ?? []) {
if (part.type === "output_text" && part.text) {
if (!itemPhase) {
hasUnphasedAssistantText = true;
const shouldIncludeText = hasFinalAnswerText
? itemPhase === "final_answer"
: hasExplicitPhasedAssistantText
? itemPhase === undefined
: true;
if (!shouldIncludeText) {
continue;
}
if (itemPhase) {
includedAssistantPhases.add(itemPhase);
} else {
hasIncludedUnphasedAssistantText = true;
}
content.push({
type: "text",
@@ -584,7 +607,9 @@ export function buildAssistantMessageFromResponse(
});
const finalAssistantPhase =
assistantPhases.size === 1 && !hasUnphasedAssistantText ? [...assistantPhases][0] : undefined;
includedAssistantPhases.size === 1 && !hasIncludedUnphasedAssistantText
? [...includedAssistantPhases][0]
: undefined;
return finalAssistantPhase
? ({ ...message, phase: finalAssistantPhase } as AssistantMessageWithPhase)

View File

@@ -1128,7 +1128,7 @@ describe("buildAssistantMessageFromResponse", () => {
expect(msg.content[0]?.text).toBe("Final answer");
});
it("omits top-level phase when a response contains mixed assistant phases", () => {
it("keeps only final-answer text when a response contains mixed assistant phases", () => {
const response = {
id: "resp_mixed_phase",
object: "response",
@@ -1159,13 +1159,8 @@ describe("buildAssistantMessageFromResponse", () => {
content: Array<{ type: string; text?: string; textSignature?: string }>;
};
expect(msg.phase).toBeUndefined();
expect(msg.phase).toBe("final_answer");
expect(msg.content).toMatchObject([
{
type: "text",
text: "Working... ",
textSignature: JSON.stringify({ v: 1, id: "item_commentary", phase: "commentary" }),
},
{
type: "text",
text: "Done.",
@@ -1174,7 +1169,7 @@ describe("buildAssistantMessageFromResponse", () => {
]);
});
it("omits top-level phase when unphased legacy text and phased final text coexist", () => {
it("keeps only phased final text when unphased legacy text and phased final text coexist", () => {
const response = {
id: "resp_unphased_plus_final",
object: "response",
@@ -1204,13 +1199,8 @@ describe("buildAssistantMessageFromResponse", () => {
content: Array<{ type: string; text?: string; textSignature?: string }>;
};
expect(msg.phase).toBeUndefined();
expect(msg.phase).toBe("final_answer");
expect(msg.content).toMatchObject([
{
type: "text",
text: "Legacy. ",
textSignature: JSON.stringify({ v: 1, id: "item_legacy" }),
},
{
type: "text",
text: "Done.",
@@ -1219,6 +1209,44 @@ describe("buildAssistantMessageFromResponse", () => {
]);
});
it("drops commentary-only text from completed assistant messages but keeps tool calls", () => {
const response = {
id: "resp_commentary_only_tool",
object: "response",
created_at: Date.now(),
status: "completed",
model: "gpt-5.2",
output: [
{
type: "message",
id: "item_commentary",
role: "assistant",
phase: "commentary",
content: [{ type: "output_text", text: "Working... " }],
},
{
type: "function_call",
id: "item_tool",
call_id: "call_abc",
name: "exec",
arguments: '{"arg":"value"}',
},
],
usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 },
} as unknown as ResponseObject;
const msg = buildAssistantMessageFromResponse(response, modelInfo) as {
phase?: string;
content: Array<{ type: string; text?: string; name?: string }>;
stopReason: string;
};
expect(msg.phase).toBeUndefined();
expect(msg.content.some((part) => part.type === "text")).toBe(false);
expect(msg.content).toMatchObject([{ type: "toolCall", name: "exec" }]);
expect(msg.stopReason).toBe("toolUse");
});
it("maps reasoning output items to thinking blocks with signature", () => {
const response = {
id: "resp_reasoning",
@@ -1749,7 +1777,7 @@ describe("createOpenAIWebSocketStreamFn", () => {
expect(doneEvent?.message.content[0]?.text).toBe("Hello back!");
});
it("keeps assistant phase on completed WebSocket responses", async () => {
it("suppresses commentary-only text on completed WebSocket responses", async () => {
const streamFn = createOpenAIWebSocketStreamFn("sk-test", "sess-phase");
const stream = streamFn(
modelStub as Parameters<typeof streamFn>[0],
@@ -1776,10 +1804,11 @@ describe("createOpenAIWebSocketStreamFn", () => {
| {
type: string;
reason: string;
message: { phase?: string; stopReason: string };
message: { phase?: string; stopReason: string; content?: Array<{ type?: string }> };
}
| undefined;
expect(doneEvent?.message.phase).toBe("commentary");
expect(doneEvent?.message.phase).toBeUndefined();
expect(doneEvent?.message.content?.some((part) => part.type === "text")).toBe(false);
expect(doneEvent?.message.stopReason).toBe("toolUse");
});