From 69d588cf2affd8f6cc6cd38088307ed9bfc1fc70 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 17 May 2026 09:28:39 +0100 Subject: [PATCH] fix(openai): remove GPT reply brevity cap --- CHANGELOG.md | 1 + .../run.incomplete-turn.test.ts | 10 ++ src/agents/pi-embedded-runner/run.ts | 18 ++- .../reply/agent-runner-execution.test.ts | 12 +- .../reply/agent-runner-execution.ts | 144 ------------------ 5 files changed, 34 insertions(+), 151 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a90d153ba06..2f4859f2789 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.7.1, bringing Codex hook approval compatibility, pre-tool command wrapping fixes, and Rolldown/Vitest output compaction improvements into the OpenClaw plugin. +- Agents/OpenAI: stop post-processing GPT-5 final replies with hardcoded brevity caps, preserving full channel responses instead of appending synthetic ellipses, and log when strict-agentic GPT-5 execution activates. Fixes #82910. - Agents/media: deliver failed async image, music, and video generation completions directly when requester-session completion handoff fails, so channel users see provider errors instead of silent fallback stalls. - Agents/music: steer song, jingle, beat, anthem, and instrumental requests toward `music_generate` audio creation instead of lyric-only replies, and reserve `lyrics` for exact sung words. - Codex app-server: record native Codex tool calls and results into trajectory artifacts so debug/trajectory exports capture the full Codex-native tool history, not just OpenClaw-bridged turns. Thanks @vyctorbrzezowski. diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts index b8827bf9246..e336796dc30 100644 --- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts +++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts @@ -53,6 +53,10 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { return mockedLog.warn.mock.calls.map(([message]) => String(message)); } + function infoMessages(): string[] { + return mockedLog.info.mock.calls.map(([message]) => String(message)); + } + function expectWarnMessageWith(text: string): void { expect(warnMessages().join("\n")).toContain(text); } @@ -349,6 +353,12 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { }, ]); expect(result.meta.livenessState).toBe("blocked"); + expect(infoMessages().join("\n")).toContain( + "strict-agentic execution contract active: runId=run-strict-agentic-auto-activated", + ); + expect(infoMessages().join("\n")).toContain( + "provider=openai-codex/gpt-5.4 harness=codex configured=unspecified", + ); }); it("respects explicit default contract opt-out on GPT-5 openai runs", async () => { diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index be387e03b28..03034f242cd 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -980,8 +980,10 @@ export async function runEmbeddedPiAgent( config: params.config, agentId: params.agentId, }); - const configuredExecutionContract = - resolveAgentExecutionContract(params.config, sessionAgentId) ?? "default"; + const configuredExecutionContract = resolveAgentExecutionContract( + params.config, + sessionAgentId, + ); const strictAgenticActive = isStrictAgenticExecutionContractActive({ config: params.config, sessionKey: params.sessionKey, @@ -990,6 +992,14 @@ export async function runEmbeddedPiAgent( modelId, }); const executionContract = strictAgenticActive ? "strict-agentic" : "default"; + const configuredExecutionContractForLog = configuredExecutionContract ?? "default"; + if (strictAgenticActive) { + log.info( + `strict-agentic execution contract active: runId=${params.runId} sessionId=${params.sessionId} ` + + `provider=${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} harness=${sanitizeForLog(agentHarness.id)} ` + + `configured=${configuredExecutionContract ?? "unspecified"}`, + ); + } const maxPlanningOnlyRetryAttempts = resolvePlanningOnlyRetryLimit(executionContract); const maxReasoningOnlyRetryAttempts = DEFAULT_REASONING_ONLY_RETRY_LIMIT; const maxEmptyResponseRetryAttempts = DEFAULT_EMPTY_RESPONSE_RETRY_LIMIT; @@ -2790,7 +2800,7 @@ export async function runEmbeddedPiAgent( planningOnlyRetryInstruction = nextPlanningOnlyRetryInstruction; log.warn( `planning-only turn detected: runId=${params.runId} sessionId=${params.sessionId} ` + - `provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContract} — retrying ` + + `provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContractForLog} — retrying ` + `${planningOnlyRetryAttempts}/${maxPlanningOnlyRetryAttempts} with act-now steer`, ); continue; @@ -2869,7 +2879,7 @@ export async function runEmbeddedPiAgent( if (!incompleteTurnText && nextPlanningOnlyRetryInstruction && strictAgenticActive) { log.warn( `strict-agentic run exhausted planning-only retries: runId=${params.runId} sessionId=${params.sessionId} ` + - `provider=${provider}/${modelId} configured=${configuredExecutionContract} — surfacing blocked state`, + `provider=${provider}/${modelId} configured=${configuredExecutionContractForLog} — surfacing blocked state`, ); // Criterion 4 of the GPT-5.4 parity gate requires every terminal // exit path to emit an explicit livenessState + replayInvalid so diff --git a/src/auto-reply/reply/agent-runner-execution.test.ts b/src/auto-reply/reply/agent-runner-execution.test.ts index e0c9e37b987..591f9afcac8 100644 --- a/src/auto-reply/reply/agent-runner-execution.test.ts +++ b/src/auto-reply/reply/agent-runner-execution.test.ts @@ -2734,7 +2734,7 @@ describe("runAgentTurnWithFallback", () => { }); }); - it("trims chatty GPT ack-turn final prose", async () => { + it("preserves GPT ack-turn final prose without reply-side truncation", async () => { state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({ result: await params.run("openai", "gpt-5.4"), provider: "openai", @@ -2747,7 +2747,7 @@ describe("runAgentTurnWithFallback", () => { text: [ "I updated the prompt overlay and tightened the runtime guard.", "I also added the ack-turn fast path so short approvals skip the recap.", - "The reply-side brevity cap now trims long prose-heavy GPT confirmations.", + "The reply-side output now keeps long prose-heavy GPT confirmations intact.", "I updated tests for the overlay, retry guard, and reply normalization.", "Everything is wired together and ready for verification.", ].join(" "), @@ -2787,7 +2787,13 @@ describe("runAgentTurnWithFallback", () => { expect(result.kind).toBe("success"); if (result.kind === "success") { expect(result.runResult.payloads?.[0]?.text).toBe( - "I updated the prompt overlay and tightened the runtime guard. I also added the ack-turn fast path so short approvals skip the recap. The reply-side brevity cap now trims long prose-heavy GPT confirmations...", + [ + "I updated the prompt overlay and tightened the runtime guard.", + "I also added the ack-turn fast path so short approvals skip the recap.", + "The reply-side output now keeps long prose-heavy GPT confirmations intact.", + "I updated tests for the overlay, retry guard, and reply normalization.", + "Everything is wired together and ready for verification.", + ].join(" "), ); } }); diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index e400550996d..ec8c7374533 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -42,7 +42,6 @@ import { isTransientHttpError, } from "../../agents/pi-embedded-helpers.js"; import { sanitizeUserFacingText } from "../../agents/pi-embedded-helpers/sanitize-user-facing-text.js"; -import { isLikelyExecutionAckPrompt } from "../../agents/pi-embedded-runner/run/incomplete-turn.js"; import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js"; import { buildAgentRuntimeOutcomePlan } from "../../agents/runtime-plan/build.js"; import { @@ -112,10 +111,6 @@ import type { TypingSignaler } from "./typing-mode.js"; // selection keeps conflicting with fallback model choices. // See: https://github.com/openclaw/openclaw/issues/58348 export const MAX_LIVE_SWITCH_RETRIES = 2; -const GPT_CHAT_BREVITY_ACK_MAX_CHARS = 420; -const GPT_CHAT_BREVITY_ACK_MAX_SENTENCES = 3; -const GPT_CHAT_BREVITY_SOFT_MAX_CHARS = 900; -const GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES = 6; function readApprovalScopeValue(value: unknown): "turn" | "session" | undefined { return value === "turn" || value === "session" ? value : undefined; @@ -857,137 +852,6 @@ export function buildContextOverflowRecoveryText(params: { ); } -function shouldApplyOpenAIGptChatGuard(params: { provider?: string; model?: string }): boolean { - if (params.provider !== "openai" && params.provider !== "openai-codex") { - return false; - } - return /^gpt-5(?:[.-]|$)/i.test(params.model ?? ""); -} - -function countChatReplySentences(text: string): number { - return text - .trim() - .split(/(?<=[.!?])\s+/u) - .map((part) => part.trim()) - .filter(Boolean).length; -} - -function scoreChattyFinalReplyText(text: string): number { - const trimmed = text.trim(); - if (!trimmed) { - return 0; - } - let score = 0; - const sentenceCount = countChatReplySentences(trimmed); - if (trimmed.length > 900) { - score += 1; - } - if (trimmed.length > 1_500) { - score += 1; - } - if (sentenceCount > 6) { - score += 1; - } - if (sentenceCount > 10) { - score += 1; - } - if (trimmed.split(/\n{2,}/u).filter(Boolean).length >= 3) { - score += 1; - } - if ( - /\b(?:in summary|to summarize|here(?:'s| is) what|what changed|what I verified)\b/i.test( - trimmed, - ) - ) { - score += 1; - } - return score; -} - -function shortenChattyFinalReplyText( - text: string, - params: { maxChars: number; maxSentences: number }, -): string { - const trimmed = text.trim(); - if (!trimmed) { - return trimmed; - } - const sentences = trimmed - .split(/(?<=[.!?])\s+/u) - .map((part) => part.trim()) - .filter(Boolean); - let shortened = sentences.slice(0, params.maxSentences).join(" "); - if (!shortened) { - shortened = trimmed.slice(0, params.maxChars).trimEnd(); - } - if (shortened.length > params.maxChars) { - shortened = shortened.slice(0, params.maxChars).trimEnd(); - } - if (shortened.length >= trimmed.length) { - return trimmed; - } - return shortened.replace(/[.,;:!?-]*$/u, "").trimEnd() + "..."; -} - -function applyOpenAIGptChatReplyGuard(params: { - provider?: string; - model?: string; - commandBody: string; - isHeartbeat: boolean; - payloads?: ReplyPayload[]; -}): void { - if ( - params.isHeartbeat || - !shouldApplyOpenAIGptChatGuard({ - provider: params.provider, - model: params.model, - }) || - !params.payloads?.length - ) { - return; - } - - const trimmedCommand = params.commandBody.trim(); - const isAckTurn = isLikelyExecutionAckPrompt(trimmedCommand); - const allowSoftCap = - !isAckTurn && - trimmedCommand.length > 0 && - trimmedCommand.length <= 120 && - !/\b(?:detail|detailed|depth|deep dive|explain|compare|walk me through|why|how)\b/i.test( - trimmedCommand, - ); - - for (const payload of params.payloads) { - const text = normalizeOptionalString(payload.text); - if ( - !text || - payload.isError || - payload.isReasoning || - payload.mediaUrl || - (payload.mediaUrls?.length ?? 0) > 0 || - payload.interactive || - text.includes("```") - ) { - continue; - } - - if (isAckTurn) { - payload.text = shortenChattyFinalReplyText(text, { - maxChars: GPT_CHAT_BREVITY_ACK_MAX_CHARS, - maxSentences: GPT_CHAT_BREVITY_ACK_MAX_SENTENCES, - }); - continue; - } - - if (allowSoftCap && scoreChattyFinalReplyText(text) >= 4) { - payload.text = shortenChattyFinalReplyText(text, { - maxChars: GPT_CHAT_BREVITY_SOFT_MAX_CHARS, - maxSentences: GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES, - }); - } - } -} - function buildRestartLifecycleReplyText(): string { return "⚠️ Gateway is restarting. Please wait a few seconds and try again."; } @@ -2521,14 +2385,6 @@ export async function runAgentTurnWithFallback(params: { ]; } } - - applyOpenAIGptChatReplyGuard({ - provider: fallbackProvider, - model: fallbackModel, - commandBody: params.commandBody, - isHeartbeat: params.isHeartbeat, - payloads: runResult.payloads, - }); } return {