fix(codex): translate minimal thinking for modern models

Fixes #71946
2026-05-06 07:20:43 +00:00 · 2026-04-26 13:40:53 +08:00
parent 036b422fc6
commit c5c40b22af
4 changed files with 80 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai

 ### Changes

+- Codex/agent: translate `--thinking minimal` to `low` for modern Codex models (gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.2) at request build time so the first turn is accepted instead of paying a wasted call + retry-with-low fallback. Older Codex models still receive `minimal` directly. Fixes #71946. Thanks @hclsys.
 - TTS/WhatsApp: add `/tts latest` read-aloud support with duplicate suppression and `/tts chat on|off|default` session-scoped auto-TTS overrides, completing the on-demand voice-note UX for current-chat replies. Fixes #66032.
 - Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc.
 - TTS/agents: allow `agents.list[].tts` to override global `messages.tts` for per-agent voices while keeping shared provider credentials and preferences in the existing TTS config surface.
--- a/extensions/codex/provider.ts
+++ b/extensions/codex/provider.ts
@@ -211,7 +211,11 @@ function isKnownXHighCodexModel(modelId: string): boolean {
  );
 }

-function isModernCodexModel(modelId: string): boolean {
+// Exported so adapter request paths (thread-lifecycle.resolveReasoningEffort)
+// can branch on model-family enum support: modern Codex models use the
+// none/low/medium/high/xhigh effort enum and reject "minimal", which is the
+// CLI default. (#71946)
+export function isModernCodexModel(modelId: string): boolean {
  const lower = modelId.trim().toLowerCase();
  return (
    lower === "gpt-5.5" || lower === "gpt-5.4" || lower === "gpt-5.4-mini" || lower === "gpt-5.2"
--- a/extensions/codex/src/app-server/thread-lifecycle.test.ts
+++ b/extensions/codex/src/app-server/thread-lifecycle.test.ts
@@ -0,0 +1,59 @@
+import { describe, expect, it } from "vitest";
+import { resolveReasoningEffort } from "./thread-lifecycle.js";
+
+describe("resolveReasoningEffort (#71946)", () => {
+  describe("modern Codex models (none/low/medium/high/xhigh enum)", () => {
+    it.each(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.2"] as const)(
+      "translates 'minimal' -> 'low' for %s so the first request is accepted",
+      (modelId) => {
+        expect(resolveReasoningEffort("minimal", modelId)).toBe("low");
+      },
+    );
+
+    it.each(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.2"] as const)(
+      "passes 'low' / 'medium' / 'high' / 'xhigh' through unchanged for %s",
+      (modelId) => {
+        expect(resolveReasoningEffort("low", modelId)).toBe("low");
+        expect(resolveReasoningEffort("medium", modelId)).toBe("medium");
+        expect(resolveReasoningEffort("high", modelId)).toBe("high");
+        expect(resolveReasoningEffort("xhigh", modelId)).toBe("xhigh");
+      },
+    );
+
+    it("normalizes case-variant model ids", () => {
+      expect(resolveReasoningEffort("minimal", "GPT-5.5")).toBe("low");
+      expect(resolveReasoningEffort("minimal", " gpt-5.4-mini ")).toBe("low");
+    });
+  });
+
+  describe("legacy / non-modern Codex models", () => {
+    it.each(["gpt-5", "gpt-4o", "o3-mini", "codex-mini-latest"] as const)(
+      "preserves 'minimal' for %s — pre-modern enum still supports it",
+      (modelId) => {
+        expect(resolveReasoningEffort("minimal", modelId)).toBe("minimal");
+      },
+    );
+
+    it("preserves 'minimal' for empty / unknown model ids (conservative default)", () => {
+      expect(resolveReasoningEffort("minimal", "")).toBe("minimal");
+      expect(resolveReasoningEffort("minimal", "unknown-model-xyz")).toBe("minimal");
+    });
+  });
+
+  describe("non-effort thinkLevel values", () => {
+    it("returns null for 'off'", () => {
+      expect(resolveReasoningEffort("off", "gpt-5.5")).toBeNull();
+      expect(resolveReasoningEffort("off", "gpt-4o")).toBeNull();
+    });
+
+    it("returns null for 'adaptive' (non-effort enum value)", () => {
+      expect(resolveReasoningEffort("adaptive", "gpt-5.5")).toBeNull();
+      expect(resolveReasoningEffort("adaptive", "gpt-4o")).toBeNull();
+    });
+
+    it("returns null for 'max' (non-effort enum value)", () => {
+      expect(resolveReasoningEffort("max", "gpt-5.5")).toBeNull();
+      expect(resolveReasoningEffort("max", "gpt-4o")).toBeNull();
+    });
+  });
+});
--- a/extensions/codex/src/app-server/thread-lifecycle.ts
+++ b/extensions/codex/src/app-server/thread-lifecycle.ts
@@ -3,6 +3,7 @@ import {
  type EmbeddedRunAttemptParams,
 } from "openclaw/plugin-sdk/agent-harness-runtime";
 import { renderCodexPromptOverlay } from "../../prompt-overlay.js";
+import { isModernCodexModel } from "../../provider.js";
 import type { CodexAppServerClient } from "./client.js";
 import { codexSandboxPolicyForTurn, type CodexAppServerRuntimeOptions } from "./config.js";
 import {
@@ -178,7 +179,7 @@ export function buildTurnStartParams(
    sandboxPolicy: codexSandboxPolicyForTurn(options.appServer.sandbox, options.cwd),
    model: params.modelId,
    ...(options.appServer.serviceTier ? { serviceTier: options.appServer.serviceTier } : {}),
-    effort: resolveReasoningEffort(params.thinkLevel),
+    effort: resolveReasoningEffort(params.thinkLevel, params.modelId),
  };
 }

@@ -283,11 +284,22 @@ function resolveCodexAppServerModelProvider(provider: string): string | undefine
  return normalized === "openai-codex" ? "openai" : normalized;
 }

-function resolveReasoningEffort(
+// Modern Codex models (gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.2) use the
+// none/low/medium/high/xhigh effort enum and reject "minimal". The CLI
+// defaults thinkLevel to "minimal", so without translation EVERY agent turn
+// on those models pays a wasted first request + retry-with-low fallback in
+// pi-embedded-runner. Map "minimal" -> "low" upfront for modern models so the
+// first request is accepted. Older Codex models still accept "minimal"
+// directly. (#71946)
+// Exported for unit-test coverage of the model-aware translation path.
+export function resolveReasoningEffort(
  thinkLevel: EmbeddedRunAttemptParams["thinkLevel"],
+  modelId: string,
 ): "minimal" | "low" | "medium" | "high" | "xhigh" | null {
+  if (thinkLevel === "minimal") {
+    return isModernCodexModel(modelId) ? "low" : "minimal";
+  }
  if (
-    thinkLevel === "minimal" ||
    thinkLevel === "low" ||
    thinkLevel === "medium" ||
    thinkLevel === "high" ||