From 9987e7797fc6a9247e7cbe7829c5863ef040f2b7 Mon Sep 17 00:00:00 2001
From: stainlu <stainlu@grimo.ai>
Date: Sun, 26 Apr 2026 22:03:04 +0800
Subject: [PATCH] fix(agents): scope claude-cli fallback seed and pair summary
 with boundary

Addresses review on #72069:

- Codex P1 ("Gate Claude prelude seeding by source provider"): the
  guard checked the *current* fallback candidate but not the failed
  attempt. A session that still carried a stale
  cliSessionBindings["claude-cli"] from an unrelated past run would
  inject Claude transcript context into a fallback chain that started
  on a different provider (e.g. openai -> openai-codex), leaking
  irrelevant prior conversation. Plumb `originalProvider` (the
  user-requested provider for the chain) through to runAgentAttempt
  and require `isClaudeCliProvider(originalProvider)` before reading
  Claude history.

- Codex P2 ("Prefer latest compact boundary when summary is missing"):
  the resolver always preferred the most recent explicit summary, so
  a later compaction without its own summary entry (rare crash case)
  paired stale summary text with post-latest-boundary turns. Restructure
  readClaudeCliFallbackSeed to queue summaries into pendingSummary and
  flush each boundary's pair atomically. A boundary with no preceding
  summary now correctly falls back to the boundary's own content
  rather than serving an older summary alongside fresh turns.

- Greptile P2 (newest-first break vs sparse coverage): the
  formatFallbackTurns walk intentionally stops on the first oversized
  turn so the prelude stays a contiguous "what was happening just
  before the failure" window. Document the design choice inline so a
  future maintainer doesn't reflexively change it to skip-and-continue.

Tests:
- New gateway cases for the boundary-without-summary edge case and
  for trailing summaries written without a paired boundary.
- existing 33 attempt-execution + 14 cli-session-history tests still
  pass; broader src/agents/command suite stays green (63/63).
---
 src/agents/agent-command.ts                   |  1 +
 .../command/attempt-execution.helpers.ts      | 15 +++--
 src/agents/command/attempt-execution.ts       | 18 +++++-
 src/gateway/cli-session-history.claude.ts     | 37 +++++++----
 src/gateway/cli-session-history.test.ts       | 64 +++++++++++++++++++
 5 files changed, 116 insertions(+), 19 deletions(-)

diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts
index 88b7d393987..934d872ff7e 100644
--- a/src/agents/agent-command.ts
+++ b/src/agents/agent-command.ts
@@ -892,6 +892,7 @@ async function agentCommandInternal(
             return attemptExecutionRuntime.runAgentAttempt({
               providerOverride,
               modelOverride,
+              originalProvider: provider,
               cfg,
               sessionEntry,
               sessionId,
diff --git a/src/agents/command/attempt-execution.helpers.ts b/src/agents/command/attempt-execution.helpers.ts
index f6c01ac3dfb..1126566c75c 100644
--- a/src/agents/command/attempt-execution.helpers.ts
+++ b/src/agents/command/attempt-execution.helpers.ts
@@ -189,9 +189,15 @@ function formatFallbackTurns(
   if (turns.length === 0 || remainingBudget <= 0) {
     return { text: "", consumed: 0 };
   }
-  // Walk newest -> oldest, prepending lines until we exceed the budget.
-  // Stops at the oldest turn we can include in full so we never deliver a
-  // truncated mid-turn fragment to the fallback model.
+  // Walk newest -> oldest, prepending lines until one does not fit.
+  //
+  // We stop on the FIRST oversized turn instead of skipping it and then
+  // continuing into older ones. The fallback prelude is a "most recent
+  // contiguous window" summary — what was happening just before the
+  // failed attempt — so a non-contiguous slice (newest + something from
+  // 20 turns ago, gap in the middle) would mislead the fallback model
+  // about the actual flow. Sparse coverage is worse than fewer turns:
+  // greptile flagged this as a P2 on #72069; behavior is intentional.
   const lines: string[] = [];
   let consumed = 0;
   for (let i = turns.length - 1; i >= 0; i -= 1) {
@@ -209,9 +215,6 @@ function formatFallbackTurns(
     }
     const line = `${role}: ${text}`;
     if (consumed + line.length + 1 > remainingBudget) {
-      // Skip this turn rather than chop it; if even the most recent turn
-      // is too large to include cleanly, stop emitting (the prelude is a
-      // best-effort sketch, not a transcript).
       break;
     }
     lines.unshift(line);
diff --git a/src/agents/command/attempt-execution.ts b/src/agents/command/attempt-execution.ts
index 4b2aff5a5ea..f2689b5ed9c 100644
--- a/src/agents/command/attempt-execution.ts
+++ b/src/agents/command/attempt-execution.ts
@@ -233,6 +233,15 @@ export async function persistCliTurnTranscript(params: {
 export function runAgentAttempt(params: {
   providerOverride: string;
   modelOverride: string;
+  /**
+   * The provider the user originally requested for this turn (i.e. the
+   * primary candidate of the fallback chain). Used to scope claude-cli
+   * fallback context seeding to chains that actually started on claude-cli;
+   * a stale `cliSessionBindings["claude-cli"]` from an unrelated past run
+   * must not contaminate fallbacks that started on another provider
+   * (Codex review #72069 P1).
+   */
+  originalProvider: string;
   cfg: OpenClawConfig;
   sessionEntry: SessionEntry | undefined;
   sessionId: string;
@@ -266,8 +275,15 @@ export function runAgentAttempt(params: {
   // Harvest a compacted context (Claude's own `/compact` summary plus the
   // most recent post-boundary turns) and prepend it to the retry prompt.
   // This mirrors what Claude Code itself replays after compaction.
+  //
+  // Gate explicitly on `originalProvider === "claude-cli"`: if the user-
+  // requested provider for this run was not claude-cli, any claude-cli
+  // session binding on the entry is stale state from an earlier run and
+  // must not bleed into this fallback chain.
   const claudeCliFallbackPrelude =
-    params.isFallbackRetry && !isClaudeCliProvider(params.providerOverride)
+    params.isFallbackRetry &&
+    isClaudeCliProvider(params.originalProvider) &&
+    !isClaudeCliProvider(params.providerOverride)
       ? buildClaudeCliFallbackContextPrelude({
           cliSessionId: getCliSessionBinding(params.sessionEntry, "claude-cli")?.sessionId,
         })
diff --git a/src/gateway/cli-session-history.claude.ts b/src/gateway/cli-session-history.claude.ts
index 0e0d5afc02b..780b5a1a901 100644
--- a/src/gateway/cli-session-history.claude.ts
+++ b/src/gateway/cli-session-history.claude.ts
@@ -420,12 +420,20 @@ export function readClaudeCliFallbackSeed(params: {
     return undefined;
   }
 
-  let summaryText: string | undefined;
-  let boundaryFallbackText: string | undefined;
-  // Buffer turns into a window that resets every time we cross a compact
-  // boundary. After the walk completes, `windowedTurns` holds turns from
-  // the most recent (post-boundary) window, which matches what Claude Code
-  // would replay alongside the summary on its own resume.
+  // Pair each compact_boundary with the summary entry that preceded it
+  // since the previous boundary, so a later compaction whose summary is
+  // missing (e.g. crash mid-write) does not silently keep an older
+  // compaction's summary alive (#72069 Codex P2). Walk shape:
+  //   - explicit `summary` entry queues into `pendingSummary` until the
+  //     next boundary "consumes" it.
+  //   - `compact_boundary` flushes the pending summary into `lastSummary`,
+  //     refreshes `lastBoundaryFallback` from the boundary's content, and
+  //     drops the windowed turns and tool registry.
+  //   - everything after the latest boundary forms the recent-window the
+  //     fallback runner will replay alongside the summary.
+  let pendingSummary: string | undefined;
+  let lastSummary: string | undefined;
+  let lastBoundaryFallback: string | undefined;
   let windowedTurns: TranscriptLikeMessage[] = [];
   const toolNameRegistry: ToolNameRegistry = new Map();
 
@@ -442,15 +450,17 @@ export function readClaudeCliFallbackSeed(params: {
 
     const explicitSummary = extractSummaryText(parsed);
     if (explicitSummary) {
-      // Explicit summary entries are written by `/compact`; later entries
-      // supersede earlier ones the same way Claude Code itself replays
-      // only the most recent summary.
-      summaryText = explicitSummary;
+      // Queue the summary; the next boundary will pair it with itself.
+      // Multiple summaries between boundaries (rare) take the last one,
+      // matching how Claude Code's resume picks the most recent summary.
+      pendingSummary = explicitSummary;
       continue;
     }
 
     if (isCompactBoundary(parsed)) {
-      boundaryFallbackText = extractCompactBoundaryFallbackText(parsed) ?? boundaryFallbackText;
+      lastSummary = pendingSummary; // may be undefined if no preceding summary
+      pendingSummary = undefined;
+      lastBoundaryFallback = extractCompactBoundaryFallbackText(parsed) ?? lastBoundaryFallback;
       // Drop turns that lived before this boundary — they are now
       // represented by the summary, and replaying them would double-count
       // their tokens against the fallback model's budget.
@@ -468,7 +478,10 @@ export function readClaudeCliFallbackSeed(params: {
   }
 
   const recentTurns = coalesceClaudeCliToolMessages(windowedTurns);
-  const resolvedSummaryText = summaryText ?? boundaryFallbackText;
+  // Honor a `/compact` summary that was written but never followed by a
+  // boundary marker (older Claude Code build, or graceful-degrade case).
+  // `pendingSummary` then carries the latest such summary into the result.
+  const resolvedSummaryText = lastSummary ?? pendingSummary ?? lastBoundaryFallback;
   if (!resolvedSummaryText && recentTurns.length === 0) {
     return undefined;
   }
diff --git a/src/gateway/cli-session-history.test.ts b/src/gateway/cli-session-history.test.ts
index 3d4e7232eb9..0da58262ecf 100644
--- a/src/gateway/cli-session-history.test.ts
+++ b/src/gateway/cli-session-history.test.ts
@@ -511,4 +511,68 @@ describe("readClaudeCliFallbackSeed", () => {
     const seed = readClaudeCliFallbackSeed({ cliSessionId: "../escape" });
     expect(seed).toBeUndefined();
   });
+
+  // Codex P2 on #72069: each compact_boundary must pair with the summary
+  // entry that preceded it since the previous boundary. A later boundary
+  // without its own summary must not silently keep an older compaction's
+  // summary alive — that paired stale text with fresh post-boundary turns.
+  it("falls back to the latest boundary content when a newer compaction has no summary", async () => {
+    await writeJsonl([
+      { type: "summary", summary: "FIRST compact summary", leafUuid: "x" },
+      {
+        type: "system",
+        subtype: "compact_boundary",
+        content: "Conversation compacted (1)",
+        compactMetadata: { trigger: "manual", preTokens: 1000 },
+      },
+      {
+        type: "user",
+        uuid: "u-mid",
+        message: { role: "user", content: "post-first-compact turn" },
+      },
+      // Second compaction: boundary written, but the summary entry never
+      // landed (e.g. crash between writes). The seed must NOT serve the
+      // FIRST summary alongside post-second-boundary turns.
+      {
+        type: "system",
+        subtype: "compact_boundary",
+        content: "Conversation compacted (2)",
+        compactMetadata: { trigger: "auto", preTokens: 2000 },
+      },
+      {
+        type: "user",
+        uuid: "u-tail",
+        message: { role: "user", content: "post-second-compact turn" },
+      },
+    ]);
+
+    const seed = readClaudeCliFallbackSeed({ cliSessionId: SESSION_ID });
+    expect(seed).toBeDefined();
+    expect(seed?.summaryText).toBe("Conversation compacted (2)");
+    expect(seed?.summaryText).not.toBe("FIRST compact summary");
+    expect(seed?.recentTurns).toHaveLength(1);
+    expect(JSON.stringify(seed?.recentTurns)).toContain("post-second-compact turn");
+  });
+
+  it("uses a trailing summary that has no following compact_boundary marker", async () => {
+    // Older Claude Code builds wrote the summary entry without a paired
+    // boundary marker. The seed should still surface that summary so a
+    // subsequent fallback at least has a labeled context block.
+    await writeJsonl([
+      {
+        type: "user",
+        uuid: "u-1",
+        message: { role: "user", content: "earlier turn" },
+      },
+      { type: "summary", summary: "trailing summary without boundary", leafUuid: "x" },
+      {
+        type: "user",
+        uuid: "u-2",
+        message: { role: "user", content: "later turn" },
+      },
+    ]);
+
+    const seed = readClaudeCliFallbackSeed({ cliSessionId: SESSION_ID });
+    expect(seed?.summaryText).toBe("trailing summary without boundary");
+  });
 });