fix(openai): remove GPT reply brevity cap

2026-05-18 20:44:47 +00:00 · 2026-05-17 09:28:39 +01:00
parent 37806afd2d
commit 69d588cf2a
5 changed files with 34 additions and 151 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.7.1, bringing Codex hook approval compatibility, pre-tool command wrapping fixes, and Rolldown/Vitest output compaction improvements into the OpenClaw plugin.
+- Agents/OpenAI: stop post-processing GPT-5 final replies with hardcoded brevity caps, preserving full channel responses instead of appending synthetic ellipses, and log when strict-agentic GPT-5 execution activates. Fixes #82910.
 - Agents/media: deliver failed async image, music, and video generation completions directly when requester-session completion handoff fails, so channel users see provider errors instead of silent fallback stalls.
 - Agents/music: steer song, jingle, beat, anthem, and instrumental requests toward `music_generate` audio creation instead of lyric-only replies, and reserve `lyrics` for exact sung words.
 - Codex app-server: record native Codex tool calls and results into trajectory artifacts so debug/trajectory exports capture the full Codex-native tool history, not just OpenClaw-bridged turns. Thanks @vyctorbrzezowski.
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -53,6 +53,10 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
    return mockedLog.warn.mock.calls.map(([message]) => String(message));
  }

+  function infoMessages(): string[] {
+    return mockedLog.info.mock.calls.map(([message]) => String(message));
+  }
+
  function expectWarnMessageWith(text: string): void {
    expect(warnMessages().join("\n")).toContain(text);
  }
@@ -349,6 +353,12 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
      },
    ]);
    expect(result.meta.livenessState).toBe("blocked");
+    expect(infoMessages().join("\n")).toContain(
+      "strict-agentic execution contract active: runId=run-strict-agentic-auto-activated",
+    );
+    expect(infoMessages().join("\n")).toContain(
+      "provider=openai-codex/gpt-5.4 harness=codex configured=unspecified",
+    );
  });

  it("respects explicit default contract opt-out on GPT-5 openai runs", async () => {
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -980,8 +980,10 @@ export async function runEmbeddedPiAgent(
        config: params.config,
        agentId: params.agentId,
      });
-      const configuredExecutionContract =
-        resolveAgentExecutionContract(params.config, sessionAgentId) ?? "default";
+      const configuredExecutionContract = resolveAgentExecutionContract(
+        params.config,
+        sessionAgentId,
+      );
      const strictAgenticActive = isStrictAgenticExecutionContractActive({
        config: params.config,
        sessionKey: params.sessionKey,
@@ -990,6 +992,14 @@ export async function runEmbeddedPiAgent(
        modelId,
      });
      const executionContract = strictAgenticActive ? "strict-agentic" : "default";
+      const configuredExecutionContractForLog = configuredExecutionContract ?? "default";
+      if (strictAgenticActive) {
+        log.info(
+          `strict-agentic execution contract active: runId=${params.runId} sessionId=${params.sessionId} ` +
+            `provider=${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} harness=${sanitizeForLog(agentHarness.id)} ` +
+            `configured=${configuredExecutionContract ?? "unspecified"}`,
+        );
+      }
      const maxPlanningOnlyRetryAttempts = resolvePlanningOnlyRetryLimit(executionContract);
      const maxReasoningOnlyRetryAttempts = DEFAULT_REASONING_ONLY_RETRY_LIMIT;
      const maxEmptyResponseRetryAttempts = DEFAULT_EMPTY_RESPONSE_RETRY_LIMIT;
@@ -2790,7 +2800,7 @@ export async function runEmbeddedPiAgent(
            planningOnlyRetryInstruction = nextPlanningOnlyRetryInstruction;
            log.warn(
              `planning-only turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
-                `provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContract} — retrying ` +
+                `provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContractForLog} — retrying ` +
                `${planningOnlyRetryAttempts}/${maxPlanningOnlyRetryAttempts} with act-now steer`,
            );
            continue;
@@ -2869,7 +2879,7 @@ export async function runEmbeddedPiAgent(
          if (!incompleteTurnText && nextPlanningOnlyRetryInstruction && strictAgenticActive) {
            log.warn(
              `strict-agentic run exhausted planning-only retries: runId=${params.runId} sessionId=${params.sessionId} ` +
-                `provider=${provider}/${modelId} configured=${configuredExecutionContract} — surfacing blocked state`,
+                `provider=${provider}/${modelId} configured=${configuredExecutionContractForLog} — surfacing blocked state`,
            );
            // Criterion 4 of the GPT-5.4 parity gate requires every terminal
            // exit path to emit an explicit livenessState + replayInvalid so
--- a/src/auto-reply/reply/agent-runner-execution.test.ts
+++ b/src/auto-reply/reply/agent-runner-execution.test.ts
@@ -2734,7 +2734,7 @@ describe("runAgentTurnWithFallback", () => {
    });
  });

-  it("trims chatty GPT ack-turn final prose", async () => {
+  it("preserves GPT ack-turn final prose without reply-side truncation", async () => {
    state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => ({
      result: await params.run("openai", "gpt-5.4"),
      provider: "openai",
@@ -2747,7 +2747,7 @@ describe("runAgentTurnWithFallback", () => {
          text: [
            "I updated the prompt overlay and tightened the runtime guard.",
            "I also added the ack-turn fast path so short approvals skip the recap.",
-            "The reply-side brevity cap now trims long prose-heavy GPT confirmations.",
+            "The reply-side output now keeps long prose-heavy GPT confirmations intact.",
            "I updated tests for the overlay, retry guard, and reply normalization.",
            "Everything is wired together and ready for verification.",
          ].join(" "),
@@ -2787,7 +2787,13 @@ describe("runAgentTurnWithFallback", () => {
    expect(result.kind).toBe("success");
    if (result.kind === "success") {
      expect(result.runResult.payloads?.[0]?.text).toBe(
-        "I updated the prompt overlay and tightened the runtime guard. I also added the ack-turn fast path so short approvals skip the recap. The reply-side brevity cap now trims long prose-heavy GPT confirmations...",
+        [
+          "I updated the prompt overlay and tightened the runtime guard.",
+          "I also added the ack-turn fast path so short approvals skip the recap.",
+          "The reply-side output now keeps long prose-heavy GPT confirmations intact.",
+          "I updated tests for the overlay, retry guard, and reply normalization.",
+          "Everything is wired together and ready for verification.",
+        ].join(" "),
      );
    }
  });
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -42,7 +42,6 @@ import {
  isTransientHttpError,
 } from "../../agents/pi-embedded-helpers.js";
 import { sanitizeUserFacingText } from "../../agents/pi-embedded-helpers/sanitize-user-facing-text.js";
-import { isLikelyExecutionAckPrompt } from "../../agents/pi-embedded-runner/run/incomplete-turn.js";
 import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
 import { buildAgentRuntimeOutcomePlan } from "../../agents/runtime-plan/build.js";
 import {
@@ -112,10 +111,6 @@ import type { TypingSignaler } from "./typing-mode.js";
 // selection keeps conflicting with fallback model choices.
 // See: https://github.com/openclaw/openclaw/issues/58348
 export const MAX_LIVE_SWITCH_RETRIES = 2;
-const GPT_CHAT_BREVITY_ACK_MAX_CHARS = 420;
-const GPT_CHAT_BREVITY_ACK_MAX_SENTENCES = 3;
-const GPT_CHAT_BREVITY_SOFT_MAX_CHARS = 900;
-const GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES = 6;

 function readApprovalScopeValue(value: unknown): "turn" | "session" | undefined {
  return value === "turn" || value === "session" ? value : undefined;
@@ -857,137 +852,6 @@ export function buildContextOverflowRecoveryText(params: {
  );
 }

-function shouldApplyOpenAIGptChatGuard(params: { provider?: string; model?: string }): boolean {
-  if (params.provider !== "openai" && params.provider !== "openai-codex") {
-    return false;
-  }
-  return /^gpt-5(?:[.-]|$)/i.test(params.model ?? "");
-}
-
-function countChatReplySentences(text: string): number {
-  return text
-    .trim()
-    .split(/(?<=[.!?])\s+/u)
-    .map((part) => part.trim())
-    .filter(Boolean).length;
-}
-
-function scoreChattyFinalReplyText(text: string): number {
-  const trimmed = text.trim();
-  if (!trimmed) {
-    return 0;
-  }
-  let score = 0;
-  const sentenceCount = countChatReplySentences(trimmed);
-  if (trimmed.length > 900) {
-    score += 1;
-  }
-  if (trimmed.length > 1_500) {
-    score += 1;
-  }
-  if (sentenceCount > 6) {
-    score += 1;
-  }
-  if (sentenceCount > 10) {
-    score += 1;
-  }
-  if (trimmed.split(/\n{2,}/u).filter(Boolean).length >= 3) {
-    score += 1;
-  }
-  if (
-    /\b(?:in summary|to summarize|here(?:'s| is) what|what changed|what I verified)\b/i.test(
-      trimmed,
-    )
-  ) {
-    score += 1;
-  }
-  return score;
-}
-
-function shortenChattyFinalReplyText(
-  text: string,
-  params: { maxChars: number; maxSentences: number },
-): string {
-  const trimmed = text.trim();
-  if (!trimmed) {
-    return trimmed;
-  }
-  const sentences = trimmed
-    .split(/(?<=[.!?])\s+/u)
-    .map((part) => part.trim())
-    .filter(Boolean);
-  let shortened = sentences.slice(0, params.maxSentences).join(" ");
-  if (!shortened) {
-    shortened = trimmed.slice(0, params.maxChars).trimEnd();
-  }
-  if (shortened.length > params.maxChars) {
-    shortened = shortened.slice(0, params.maxChars).trimEnd();
-  }
-  if (shortened.length >= trimmed.length) {
-    return trimmed;
-  }
-  return shortened.replace(/[.,;:!?-]*$/u, "").trimEnd() + "...";
-}
-
-function applyOpenAIGptChatReplyGuard(params: {
-  provider?: string;
-  model?: string;
-  commandBody: string;
-  isHeartbeat: boolean;
-  payloads?: ReplyPayload[];
-}): void {
-  if (
-    params.isHeartbeat ||
-    !shouldApplyOpenAIGptChatGuard({
-      provider: params.provider,
-      model: params.model,
-    }) ||
-    !params.payloads?.length
-  ) {
-    return;
-  }
-
-  const trimmedCommand = params.commandBody.trim();
-  const isAckTurn = isLikelyExecutionAckPrompt(trimmedCommand);
-  const allowSoftCap =
-    !isAckTurn &&
-    trimmedCommand.length > 0 &&
-    trimmedCommand.length <= 120 &&
-    !/\b(?:detail|detailed|depth|deep dive|explain|compare|walk me through|why|how)\b/i.test(
-      trimmedCommand,
-    );
-
-  for (const payload of params.payloads) {
-    const text = normalizeOptionalString(payload.text);
-    if (
-      !text ||
-      payload.isError ||
-      payload.isReasoning ||
-      payload.mediaUrl ||
-      (payload.mediaUrls?.length ?? 0) > 0 ||
-      payload.interactive ||
-      text.includes("```")
-    ) {
-      continue;
-    }
-
-    if (isAckTurn) {
-      payload.text = shortenChattyFinalReplyText(text, {
-        maxChars: GPT_CHAT_BREVITY_ACK_MAX_CHARS,
-        maxSentences: GPT_CHAT_BREVITY_ACK_MAX_SENTENCES,
-      });
-      continue;
-    }
-
-    if (allowSoftCap && scoreChattyFinalReplyText(text) >= 4) {
-      payload.text = shortenChattyFinalReplyText(text, {
-        maxChars: GPT_CHAT_BREVITY_SOFT_MAX_CHARS,
-        maxSentences: GPT_CHAT_BREVITY_SOFT_MAX_SENTENCES,
-      });
-    }
-  }
-}
-
 function buildRestartLifecycleReplyText(): string {
  return "⚠️ Gateway is restarting. Please wait a few seconds and try again.";
 }
@@ -2521,14 +2385,6 @@ export async function runAgentTurnWithFallback(params: {
        ];
      }
    }
-
-    applyOpenAIGptChatReplyGuard({
-      provider: fallbackProvider,
-      model: fallbackModel,
-      commandBody: params.commandBody,
-      isHeartbeat: params.isHeartbeat,
-      payloads: runResult.payloads,
-    });
  }

  return {