Recover Codex context overflow prompt errors (#85542)

* fix: recover codex context overflow prompt errors

* test: align Codex overflow prompt proof

* test: satisfy manifest registry mock contract

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Jason (Json)
2026-05-25 10:44:48 -06:00
committed by GitHub
parent d967760b41
commit 5cfa577778
7 changed files with 62 additions and 4 deletions

View File

@@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
- Agents/media: send direct fallback for generated media still missing after an active requester wake fails. (#85489) Thanks @fuller-stack-dev.
- Agents: derive overflow compaction budgets from provider-reported and synthetic over-budget token counts so confirmed context overflows compact before retrying. (#70473) Thanks @fuller-stack-dev.
- Agents/Codex: recover Codex context-window prompt errors through overflow compaction and surface reset guidance when recovery is exhausted. (#85542) Thanks @fuller-stack-dev.
- Agent transcript: include OpenClaw agent session logs when finding local transcript candidates.
- Sessions/doctor: load large session stores without clone amplification during read-only doctor checks and reclaim stale `sessions.json.*.tmp` sidecars. Fixes #56827. Thanks @openperf.
- Tests: clean successful plugin gateway gauntlet isolated temp roots while keeping an explicit preservation switch for failed/debug runs.

View File

@@ -3,7 +3,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { MALFORMED_STREAMING_FRAGMENT_ERROR_MESSAGE } from "../../shared/assistant-error-format.js";
import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js";
import { formatAssistantErrorText } from "./errors.js";
import { formatAssistantErrorText, isLikelyContextOverflowError } from "./errors.js";
const { toolPolicyAuditInfo } = vi.hoisted(() => ({
toolPolicyAuditInfo: vi.fn(),
@@ -92,3 +92,13 @@ describe("formatAssistantErrorText streaming JSON parse classification", () => {
);
});
});
describe("isLikelyContextOverflowError", () => {
it("detects Codex promptError wording for a full context window", () => {
expect(
isLikelyContextOverflowError(
"Codex ran out of room in the model's context window. Start a new thread or clear earlier history before retrying.",
),
).toBe(true);
});
});

View File

@@ -108,6 +108,8 @@ export function isContextOverflowError(errorMessage?: string): boolean {
lower.includes("context window") ||
lower.includes("context length") ||
lower.includes("maximum context length");
const hasContextWindowOutOfRoom =
hasContextWindow && (lower.includes("ran out of room") || lower.includes("ran out of space"));
return (
lower.includes("request_too_large") ||
(lower.includes("invalid_argument") && lower.includes("maximum number of tokens")) ||
@@ -119,6 +121,7 @@ export function isContextOverflowError(errorMessage?: string): boolean {
lower.includes("exceeds model context window") ||
lower.includes("model token limit") ||
(lower.includes("input exceeds") && lower.includes("maximum number of tokens")) ||
hasContextWindowOutOfRoom ||
(hasRequestSizeExceeds && hasContextWindow) ||
lower.includes("context overflow:") ||
lower.includes("exceed context limit") ||

View File

@@ -182,6 +182,7 @@ export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => {
return (
lower.includes("request_too_large") ||
lower.includes("context window exceeded") ||
(lower.includes("context window") && lower.includes("ran out of room")) ||
lower.includes("prompt is too long")
);
});
@@ -360,6 +361,7 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
return (
lower.includes("request_too_large") ||
lower.includes("context window exceeded") ||
(lower.includes("context window") && lower.includes("ran out of room")) ||
lower.includes("prompt is too long")
);
});

View File

@@ -23,6 +23,7 @@ import {
mockedExtractObservedOverflowTokenCount,
mockedGlobalHookRunner,
mockedGetApiKeyForModel,
mockedIsLikelyContextOverflowError,
mockedMarkAuthProfileSuccess,
mockedPickFallbackThinkingLevel,
mockedResolveAuthProfileOrder,
@@ -1543,6 +1544,39 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
expect(result.meta.error).toBeUndefined();
});
it("surfaces a visible blocked payload for Codex promptError overflow without assistant text", async () => {
const promptError = new Error(
"Codex ran out of room in the model's context window. Start a new thread or clear earlier history before retrying.",
);
const terminalLifecycleMeta: Array<Record<string, unknown>> = [];
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
promptError,
promptErrorSource: "prompt",
assistantTexts: [],
attemptUsage: { input: 0, output: 0, total: 0 },
setTerminalLifecycleMeta: (meta) => {
terminalLifecycleMeta.push(meta);
},
}),
);
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
expect(mockedIsLikelyContextOverflowError).toHaveBeenCalledWith(promptError.message);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]).toMatchObject({
isError: true,
text: expect.stringContaining("Context overflow"),
});
expect(result.payloads?.[0]?.text).toContain("/reset");
expect(result.payloads?.[0]?.text).toContain("/new");
expect(result.meta.error?.kind).toBe("context_overflow");
expect(result.meta.livenessState).toBe("blocked");
expect(result.meta.finalAssistantVisibleText).toBe(result.payloads?.[0]?.text);
expect(terminalLifecycleMeta.at(-1)).toMatchObject({ livenessState: "blocked" });
});
it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
const overflowError = queueOverflowAttemptWithOversizedToolOutput(
mockedRunEmbeddedAttempt,

View File

@@ -2149,6 +2149,13 @@ export async function runEmbeddedPiAgent(
);
}
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
const overflowRecoveryText =
"Context overflow: prompt too large for the model. " +
"Try /reset (or /new) to start a fresh session, or use a larger-context model.";
log.warn(
`[context-overflow-recovery] exhausted provider overflow recovery for ${provider}/${modelId}; ` +
`livenessState=blocked suggestedAction=reset_or_new kind=${kind}`,
);
attempt.setTerminalLifecycleMeta?.({
replayInvalid: resolveReplayInvalidForAttempt(),
livenessState: "blocked",
@@ -2156,9 +2163,7 @@ export async function runEmbeddedPiAgent(
return {
payloads: [
{
text:
"Context overflow: prompt too large for the model. " +
"Try /reset (or /new) to start a fresh session, or use a larger-context model.",
text: overflowRecoveryText,
isError: true,
},
],
@@ -2176,6 +2181,8 @@ export async function runEmbeddedPiAgent(
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
finalAssistantVisibleText: overflowRecoveryText,
finalAssistantRawText: overflowRecoveryText,
finalPromptText: attempt.finalPromptText,
replayInvalid: resolveReplayInvalidForAttempt(),
livenessState: "blocked",

View File

@@ -14,6 +14,7 @@ vi.mock("../plugins/setup-registry.js", () => ({
vi.mock("../plugins/manifest-registry.js", () => ({
loadPluginManifestRegistry: () => ({
diagnostics: [],
plugins: [
{
id: "brave",