From 9987e7797fc6a9247e7cbe7829c5863ef040f2b7 Mon Sep 17 00:00:00 2001 From: stainlu Date: Sun, 26 Apr 2026 22:03:04 +0800 Subject: [PATCH] fix(agents): scope claude-cli fallback seed and pair summary with boundary Addresses review on #72069: - Codex P1 ("Gate Claude prelude seeding by source provider"): the guard checked the *current* fallback candidate but not the failed attempt. A session that still carried a stale cliSessionBindings["claude-cli"] from an unrelated past run would inject Claude transcript context into a fallback chain that started on a different provider (e.g. openai -> openai-codex), leaking irrelevant prior conversation. Plumb `originalProvider` (the user-requested provider for the chain) through to runAgentAttempt and require `isClaudeCliProvider(originalProvider)` before reading Claude history. - Codex P2 ("Prefer latest compact boundary when summary is missing"): the resolver always preferred the most recent explicit summary, so a later compaction without its own summary entry (rare crash case) paired stale summary text with post-latest-boundary turns. Restructure readClaudeCliFallbackSeed to queue summaries into pendingSummary and flush each boundary's pair atomically. A boundary with no preceding summary now correctly falls back to the boundary's own content rather than serving an older summary alongside fresh turns. - Greptile P2 (newest-first break vs sparse coverage): the formatFallbackTurns walk intentionally stops on the first oversized turn so the prelude stays a contiguous "what was happening just before the failure" window. Document the design choice inline so a future maintainer doesn't reflexively change it to skip-and-continue. Tests: - New gateway cases for the boundary-without-summary edge case and for trailing summaries written without a paired boundary. - existing 33 attempt-execution + 14 cli-session-history tests still pass; broader src/agents/command suite stays green (63/63). --- src/agents/agent-command.ts | 1 + .../command/attempt-execution.helpers.ts | 15 +++-- src/agents/command/attempt-execution.ts | 18 +++++- src/gateway/cli-session-history.claude.ts | 37 +++++++---- src/gateway/cli-session-history.test.ts | 64 +++++++++++++++++++ 5 files changed, 116 insertions(+), 19 deletions(-) diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index 88b7d393987..934d872ff7e 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -892,6 +892,7 @@ async function agentCommandInternal( return attemptExecutionRuntime.runAgentAttempt({ providerOverride, modelOverride, + originalProvider: provider, cfg, sessionEntry, sessionId, diff --git a/src/agents/command/attempt-execution.helpers.ts b/src/agents/command/attempt-execution.helpers.ts index f6c01ac3dfb..1126566c75c 100644 --- a/src/agents/command/attempt-execution.helpers.ts +++ b/src/agents/command/attempt-execution.helpers.ts @@ -189,9 +189,15 @@ function formatFallbackTurns( if (turns.length === 0 || remainingBudget <= 0) { return { text: "", consumed: 0 }; } - // Walk newest -> oldest, prepending lines until we exceed the budget. - // Stops at the oldest turn we can include in full so we never deliver a - // truncated mid-turn fragment to the fallback model. + // Walk newest -> oldest, prepending lines until one does not fit. + // + // We stop on the FIRST oversized turn instead of skipping it and then + // continuing into older ones. The fallback prelude is a "most recent + // contiguous window" summary — what was happening just before the + // failed attempt — so a non-contiguous slice (newest + something from + // 20 turns ago, gap in the middle) would mislead the fallback model + // about the actual flow. Sparse coverage is worse than fewer turns: + // greptile flagged this as a P2 on #72069; behavior is intentional. const lines: string[] = []; let consumed = 0; for (let i = turns.length - 1; i >= 0; i -= 1) { @@ -209,9 +215,6 @@ function formatFallbackTurns( } const line = `${role}: ${text}`; if (consumed + line.length + 1 > remainingBudget) { - // Skip this turn rather than chop it; if even the most recent turn - // is too large to include cleanly, stop emitting (the prelude is a - // best-effort sketch, not a transcript). break; } lines.unshift(line); diff --git a/src/agents/command/attempt-execution.ts b/src/agents/command/attempt-execution.ts index 4b2aff5a5ea..f2689b5ed9c 100644 --- a/src/agents/command/attempt-execution.ts +++ b/src/agents/command/attempt-execution.ts @@ -233,6 +233,15 @@ export async function persistCliTurnTranscript(params: { export function runAgentAttempt(params: { providerOverride: string; modelOverride: string; + /** + * The provider the user originally requested for this turn (i.e. the + * primary candidate of the fallback chain). Used to scope claude-cli + * fallback context seeding to chains that actually started on claude-cli; + * a stale `cliSessionBindings["claude-cli"]` from an unrelated past run + * must not contaminate fallbacks that started on another provider + * (Codex review #72069 P1). + */ + originalProvider: string; cfg: OpenClawConfig; sessionEntry: SessionEntry | undefined; sessionId: string; @@ -266,8 +275,15 @@ export function runAgentAttempt(params: { // Harvest a compacted context (Claude's own `/compact` summary plus the // most recent post-boundary turns) and prepend it to the retry prompt. // This mirrors what Claude Code itself replays after compaction. + // + // Gate explicitly on `originalProvider === "claude-cli"`: if the user- + // requested provider for this run was not claude-cli, any claude-cli + // session binding on the entry is stale state from an earlier run and + // must not bleed into this fallback chain. const claudeCliFallbackPrelude = - params.isFallbackRetry && !isClaudeCliProvider(params.providerOverride) + params.isFallbackRetry && + isClaudeCliProvider(params.originalProvider) && + !isClaudeCliProvider(params.providerOverride) ? buildClaudeCliFallbackContextPrelude({ cliSessionId: getCliSessionBinding(params.sessionEntry, "claude-cli")?.sessionId, }) diff --git a/src/gateway/cli-session-history.claude.ts b/src/gateway/cli-session-history.claude.ts index 0e0d5afc02b..780b5a1a901 100644 --- a/src/gateway/cli-session-history.claude.ts +++ b/src/gateway/cli-session-history.claude.ts @@ -420,12 +420,20 @@ export function readClaudeCliFallbackSeed(params: { return undefined; } - let summaryText: string | undefined; - let boundaryFallbackText: string | undefined; - // Buffer turns into a window that resets every time we cross a compact - // boundary. After the walk completes, `windowedTurns` holds turns from - // the most recent (post-boundary) window, which matches what Claude Code - // would replay alongside the summary on its own resume. + // Pair each compact_boundary with the summary entry that preceded it + // since the previous boundary, so a later compaction whose summary is + // missing (e.g. crash mid-write) does not silently keep an older + // compaction's summary alive (#72069 Codex P2). Walk shape: + // - explicit `summary` entry queues into `pendingSummary` until the + // next boundary "consumes" it. + // - `compact_boundary` flushes the pending summary into `lastSummary`, + // refreshes `lastBoundaryFallback` from the boundary's content, and + // drops the windowed turns and tool registry. + // - everything after the latest boundary forms the recent-window the + // fallback runner will replay alongside the summary. + let pendingSummary: string | undefined; + let lastSummary: string | undefined; + let lastBoundaryFallback: string | undefined; let windowedTurns: TranscriptLikeMessage[] = []; const toolNameRegistry: ToolNameRegistry = new Map(); @@ -442,15 +450,17 @@ export function readClaudeCliFallbackSeed(params: { const explicitSummary = extractSummaryText(parsed); if (explicitSummary) { - // Explicit summary entries are written by `/compact`; later entries - // supersede earlier ones the same way Claude Code itself replays - // only the most recent summary. - summaryText = explicitSummary; + // Queue the summary; the next boundary will pair it with itself. + // Multiple summaries between boundaries (rare) take the last one, + // matching how Claude Code's resume picks the most recent summary. + pendingSummary = explicitSummary; continue; } if (isCompactBoundary(parsed)) { - boundaryFallbackText = extractCompactBoundaryFallbackText(parsed) ?? boundaryFallbackText; + lastSummary = pendingSummary; // may be undefined if no preceding summary + pendingSummary = undefined; + lastBoundaryFallback = extractCompactBoundaryFallbackText(parsed) ?? lastBoundaryFallback; // Drop turns that lived before this boundary — they are now // represented by the summary, and replaying them would double-count // their tokens against the fallback model's budget. @@ -468,7 +478,10 @@ export function readClaudeCliFallbackSeed(params: { } const recentTurns = coalesceClaudeCliToolMessages(windowedTurns); - const resolvedSummaryText = summaryText ?? boundaryFallbackText; + // Honor a `/compact` summary that was written but never followed by a + // boundary marker (older Claude Code build, or graceful-degrade case). + // `pendingSummary` then carries the latest such summary into the result. + const resolvedSummaryText = lastSummary ?? pendingSummary ?? lastBoundaryFallback; if (!resolvedSummaryText && recentTurns.length === 0) { return undefined; } diff --git a/src/gateway/cli-session-history.test.ts b/src/gateway/cli-session-history.test.ts index 3d4e7232eb9..0da58262ecf 100644 --- a/src/gateway/cli-session-history.test.ts +++ b/src/gateway/cli-session-history.test.ts @@ -511,4 +511,68 @@ describe("readClaudeCliFallbackSeed", () => { const seed = readClaudeCliFallbackSeed({ cliSessionId: "../escape" }); expect(seed).toBeUndefined(); }); + + // Codex P2 on #72069: each compact_boundary must pair with the summary + // entry that preceded it since the previous boundary. A later boundary + // without its own summary must not silently keep an older compaction's + // summary alive — that paired stale text with fresh post-boundary turns. + it("falls back to the latest boundary content when a newer compaction has no summary", async () => { + await writeJsonl([ + { type: "summary", summary: "FIRST compact summary", leafUuid: "x" }, + { + type: "system", + subtype: "compact_boundary", + content: "Conversation compacted (1)", + compactMetadata: { trigger: "manual", preTokens: 1000 }, + }, + { + type: "user", + uuid: "u-mid", + message: { role: "user", content: "post-first-compact turn" }, + }, + // Second compaction: boundary written, but the summary entry never + // landed (e.g. crash between writes). The seed must NOT serve the + // FIRST summary alongside post-second-boundary turns. + { + type: "system", + subtype: "compact_boundary", + content: "Conversation compacted (2)", + compactMetadata: { trigger: "auto", preTokens: 2000 }, + }, + { + type: "user", + uuid: "u-tail", + message: { role: "user", content: "post-second-compact turn" }, + }, + ]); + + const seed = readClaudeCliFallbackSeed({ cliSessionId: SESSION_ID }); + expect(seed).toBeDefined(); + expect(seed?.summaryText).toBe("Conversation compacted (2)"); + expect(seed?.summaryText).not.toBe("FIRST compact summary"); + expect(seed?.recentTurns).toHaveLength(1); + expect(JSON.stringify(seed?.recentTurns)).toContain("post-second-compact turn"); + }); + + it("uses a trailing summary that has no following compact_boundary marker", async () => { + // Older Claude Code builds wrote the summary entry without a paired + // boundary marker. The seed should still surface that summary so a + // subsequent fallback at least has a labeled context block. + await writeJsonl([ + { + type: "user", + uuid: "u-1", + message: { role: "user", content: "earlier turn" }, + }, + { type: "summary", summary: "trailing summary without boundary", leafUuid: "x" }, + { + type: "user", + uuid: "u-2", + message: { role: "user", content: "later turn" }, + }, + ]); + + const seed = readClaudeCliFallbackSeed({ cliSessionId: SESSION_ID }); + expect(seed?.summaryText).toBe("trailing summary without boundary"); + }); });