From 69d588cf2affd8f6cc6cd38088307ed9bfc1fc70 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sun, 17 May 2026 09:28:39 +0100
Subject: [PATCH] fix(openai): remove GPT reply brevity cap

---
 CHANGELOG.md                                  |   1 +
 .../run.incomplete-turn.test.ts               |  10 ++
 src/agents/pi-embedded-runner/run.ts          |  18 ++-
 .../reply/agent-runner-execution.test.ts      |  12 +-
 .../reply/agent-runner-execution.ts           | 144 ------------------
 5 files changed, 34 insertions(+), 151 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a90d153ba06..2f4859f2789 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes
 
 - Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.7.1, bringing Codex hook approval compatibility, pre-tool command wrapping fixes, and Rolldown/Vitest output compaction improvements into the OpenClaw plugin.
+- Agents/OpenAI: stop post-processing GPT-5 final replies with hardcoded brevity caps, preserving full channel responses instead of appending synthetic ellipses, and log when strict-agentic GPT-5 execution activates. Fixes #82910.
 - Agents/media: deliver failed async image, music, and video generation completions directly when requester-session completion handoff fails, so channel users see provider errors instead of silent fallback stalls.
 - Agents/music: steer song, jingle, beat, anthem, and instrumental requests toward `music_generate` audio creation instead of lyric-only replies, and reserve `lyrics` for exact sung words.
 - Codex app-server: record native Codex tool calls and results into trajectory artifacts so debug/trajectory exports capture the full Codex-native tool history, not just OpenClaw-bridged turns. Thanks @vyctorbrzezowski.
diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
index b8827bf9246..e336796dc30 100644
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -53,6 +53,10 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
     return mockedLog.warn.mock.calls.map(([message]) => String(message));
   }
 
+  function infoMessages(): string[] {
+    return mockedLog.info.mock.calls.map(([message]) => String(message));
+  }
+
   function expectWarnMessageWith(text: string): void {
     expect(warnMessages().join("\n")).toContain(text);
   }
@@ -349,6 +353,12 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
       },
     ]);
     expect(result.meta.livenessState).toBe("blocked");
+    expect(infoMessages().join("\n")).toContain(
+      "strict-agentic execution contract active: runId=run-strict-agentic-auto-activated",
+    );
+    expect(infoMessages().join("\n")).toContain(
+      "provider=openai-codex/gpt-5.4 harness=codex configured=unspecified",
+    );
   });
 
   it("respects explicit default contract opt-out on GPT-5 openai runs", async () => {
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index be387e03b28..03034f242cd 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -980,8 +980,10 @@ export async function runEmbeddedPiAgent(
         config: params.config,
         agentId: params.agentId,
       });
-      const configuredExecutionContract =
-        resolveAgentExecutionContract(params.config, sessionAgentId) ?? "default";
+      const configuredExecutionContract = resolveAgentExecutionContract(
+        params.config,
+        sessionAgentId,
+      );
       const strictAgenticActive = isStrictAgenticExecutionContractActive({
         config: params.config,
         sessionKey: params.sessionKey,
@@ -990,6 +992,14 @@ export async function runEmbeddedPiAgent(
         modelId,
       });
       const executionContract = strictAgenticActive ? "strict-agentic" : "default";
+      const configuredExecutionContractForLog = configuredExecutionContract ?? "default";
+      if (strictAgenticActive) {
+        log.info(
+          `strict-agentic execution contract active: runId=${params.runId} sessionId=${params.sessionId} ` +
+            `provider=${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} harness=${sanitizeForLog(agentHarness.id)} ` +
+            `configured=${configuredExecutionContract ?? "unspecified"}`,
+        );
+      }
       const maxPlanningOnlyRetryAttempts = resolvePlanningOnlyRetryLimit(executionContract);
       const maxReasoningOnlyRetryAttempts = DEFAULT_REASONING_ONLY_RETRY_LIMIT;
       const maxEmptyResponseRetryAttempts = DEFAULT_EMPTY_RESPONSE_RETRY_LIMIT;
@@ -2790,7 +2800,7 @@ export async function runEmbeddedPiAgent(
             planningOnlyRetryInstruction = nextPlanningOnlyRetryInstruction;
             log.warn(
               `planning-only turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
-                `provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContract} — retrying ` +
+                `provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContractForLog} — retrying ` +
                 `${planningOnlyRetryAttempts}/${maxPlanningOnlyRetryAttempts} with act-now steer`,
             );
             continue;
@@ -2869,7 +2879,7 @@ export async function runEmbeddedPiAgent(
           if (!incompleteTurnText && nextPlanningOnlyRetryInstruction && strictAgenticActive) {
             log.warn(
               `strict-agentic run exhausted planning-only retries: runId=${params.runId} sessionId=${params.sessionId} ` +
-                `provider=${provider}/${modelId} configured=${configuredExecutionContract} — surfacing blocked state`,
+                `provider=${provider}/${modelId} configured=${configuredExecutionContractForLog} — surfacing blocked state`,
             );
             // Criterion 4 of the GPT-5.4 parity gate requires every terminal
             // exit path to emit an explicit livenessState + replayInvalid so
diff --git a/src/auto-reply/reply/agent-runner-execution.test.ts b/src/auto-reply/reply/agent-runner-execution.test.ts
index e0c9e37b987..591f9afcac8 100644
--- a/src/auto-reply/reply/agent-runner-execution.test.ts
+++ b/src/auto-reply/reply/agent-runner-execution.test.ts
@@ -2734,7 +2734,7 @@ describe("runAgentTurnWithFallback", () => {
     });
   });
 
-  it("trims chatty GPT ack-turn final prose", async () => {
+  it("preserves GPT ack-turn final prose without reply-side truncation", async () => {
     state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({
       result: await params.run("openai", "gpt-5.4"),
       provider: "openai",
@@ -2747,7 +2747,7 @@ describe("runAgentTurnWithFallback", () => {
           text: [
             "I updated the prompt overlay and tightened the runtime guard.",
             "I also added the ack-turn fast path so short approvals skip the recap.",
-            "The reply-side brevity cap now trims long prose-heavy GPT confirmations.",
+            "The reply-side output now keeps long prose-heavy GPT confirmations intact.",
             "I updated tests for the overlay, retry guard, and reply normalization.",
             "Everything is wired together and ready for verification.",
           ].join(" "),
@@ -2787,7 +2787,13 @@ describe("runAgentTurnWithFallback", () => {
     expect(result.kind).toBe("success");
     if (result.kind === "success") {
       expect(result.runResult.payloads?.[0]?.text).toBe(
-        "I updated the prompt overlay and tightened the runtime guard. I also added the ack-turn fast path so short approvals skip the recap. The reply-side brevity cap now trims long prose-heavy GPT confirmations...",
+        [
+          "I updated the prompt overlay and tightened the runtime guard.",
+          "I also added the ack-turn fast path so short approvals skip the recap.",
+          "The reply-side output now keeps long prose-heavy GPT confirmations intact.",
+          "I updated tests for the overlay, retry guard, and reply normalization.",
+          "Everything is wired together and ready for verification.",
+        ].join(" "),
       );
     }
   });
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index e400550996d..ec8c7374533 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -42,7 +42,6 @@ import {
   isTransientHttpError,
 } from "../../agents/pi-embedded-helpers.js";
 import { sanitizeUserFacingText } from "../../agents/pi-embedded-helpers/sanitize-user-facing-text.js";
-import { isLikelyExecutionAckPrompt } from "../../agents/pi-embedded-runner/run/incomplete-turn.js";
 import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
 import { buildAgentRuntimeOutcomePlan } from "../../agents/runtime-plan/build.js";
 import {
@@ -112,10 +111,6 @@ import type { TypingSignaler } from "./typing-mode.js";
 // selection keeps conflicting with fallback model choices.
 // See: https://github.com/openclaw/openclaw/issues/58348
 export const MAX_LIVE_SWITCH_RETRIES = 2;
-const GPT_CHAT_BREVITY_ACK_MAX_CHARS = 420;
-const GPT_CHAT_BREVITY_ACK_MAX_SENTENCES = 3;
-const GPT_CHAT_BREVITY_SOFT_MAX_CHARS = 900;
-const GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES = 6;
 
 function readApprovalScopeValue(value: unknown): "turn" | "session" | undefined {
   return value === "turn" || value === "session" ? value : undefined;
@@ -857,137 +852,6 @@ export function buildContextOverflowRecoveryText(params: {
   );
 }
 
-function shouldApplyOpenAIGptChatGuard(params: { provider?: string; model?: string }): boolean {
-  if (params.provider !== "openai" && params.provider !== "openai-codex") {
-    return false;
-  }
-  return /^gpt-5(?:[.-]|$)/i.test(params.model ?? "");
-}
-
-function countChatReplySentences(text: string): number {
-  return text
-    .trim()
-    .split(/(?<=[.!?])\s+/u)
-    .map((part) => part.trim())
-    .filter(Boolean).length;
-}
-
-function scoreChattyFinalReplyText(text: string): number {
-  const trimmed = text.trim();
-  if (!trimmed) {
-    return 0;
-  }
-  let score = 0;
-  const sentenceCount = countChatReplySentences(trimmed);
-  if (trimmed.length > 900) {
-    score += 1;
-  }
-  if (trimmed.length > 1_500) {
-    score += 1;
-  }
-  if (sentenceCount > 6) {
-    score += 1;
-  }
-  if (sentenceCount > 10) {
-    score += 1;
-  }
-  if (trimmed.split(/\n{2,}/u).filter(Boolean).length >= 3) {
-    score += 1;
-  }
-  if (
-    /\b(?:in summary|to summarize|here(?:'s| is) what|what changed|what I verified)\b/i.test(
-      trimmed,
-    )
-  ) {
-    score += 1;
-  }
-  return score;
-}
-
-function shortenChattyFinalReplyText(
-  text: string,
-  params: { maxChars: number; maxSentences: number },
-): string {
-  const trimmed = text.trim();
-  if (!trimmed) {
-    return trimmed;
-  }
-  const sentences = trimmed
-    .split(/(?<=[.!?])\s+/u)
-    .map((part) => part.trim())
-    .filter(Boolean);
-  let shortened = sentences.slice(0, params.maxSentences).join(" ");
-  if (!shortened) {
-    shortened = trimmed.slice(0, params.maxChars).trimEnd();
-  }
-  if (shortened.length > params.maxChars) {
-    shortened = shortened.slice(0, params.maxChars).trimEnd();
-  }
-  if (shortened.length >= trimmed.length) {
-    return trimmed;
-  }
-  return shortened.replace(/[.,;:!?-]*$/u, "").trimEnd() + "...";
-}
-
-function applyOpenAIGptChatReplyGuard(params: {
-  provider?: string;
-  model?: string;
-  commandBody: string;
-  isHeartbeat: boolean;
-  payloads?: ReplyPayload[];
-}): void {
-  if (
-    params.isHeartbeat ||
-    !shouldApplyOpenAIGptChatGuard({
-      provider: params.provider,
-      model: params.model,
-    }) ||
-    !params.payloads?.length
-  ) {
-    return;
-  }
-
-  const trimmedCommand = params.commandBody.trim();
-  const isAckTurn = isLikelyExecutionAckPrompt(trimmedCommand);
-  const allowSoftCap =
-    !isAckTurn &&
-    trimmedCommand.length > 0 &&
-    trimmedCommand.length <= 120 &&
-    !/\b(?:detail|detailed|depth|deep dive|explain|compare|walk me through|why|how)\b/i.test(
-      trimmedCommand,
-    );
-
-  for (const payload of params.payloads) {
-    const text = normalizeOptionalString(payload.text);
-    if (
-      !text ||
-      payload.isError ||
-      payload.isReasoning ||
-      payload.mediaUrl ||
-      (payload.mediaUrls?.length ?? 0) > 0 ||
-      payload.interactive ||
-      text.includes("```")
-    ) {
-      continue;
-    }
-
-    if (isAckTurn) {
-      payload.text = shortenChattyFinalReplyText(text, {
-        maxChars: GPT_CHAT_BREVITY_ACK_MAX_CHARS,
-        maxSentences: GPT_CHAT_BREVITY_ACK_MAX_SENTENCES,
-      });
-      continue;
-    }
-
-    if (allowSoftCap && scoreChattyFinalReplyText(text) >= 4) {
-      payload.text = shortenChattyFinalReplyText(text, {
-        maxChars: GPT_CHAT_BREVITY_SOFT_MAX_CHARS,
-        maxSentences: GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES,
-      });
-    }
-  }
-}
-
 function buildRestartLifecycleReplyText(): string {
   return "⚠️ Gateway is restarting. Please wait a few seconds and try again.";
 }
@@ -2521,14 +2385,6 @@ export async function runAgentTurnWithFallback(params: {
         ];
       }
     }
-
-    applyOpenAIGptChatReplyGuard({
-      provider: fallbackProvider,
-      model: fallbackModel,
-      commandBody: params.commandBody,
-      isHeartbeat: params.isHeartbeat,
-      payloads: runResult.payloads,
-    });
   }
 
   return {