From 5e4c29e9bca4d7333fd6a5c0702faffc91de6d8f Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Tue, 28 Apr 2026 13:27:06 +0530 Subject: [PATCH] fix(agents): require claude fallback source provider --- .../auth-profile-runtime-contract.test.ts | 1 + .../command/attempt-execution.cli.test.ts | 10 ++++ .../command/attempt-execution.helpers.ts | 17 ------- src/agents/command/attempt-execution.test.ts | 23 ++++++--- src/agents/command/attempt-execution.ts | 27 +--------- src/gateway/cli-session-history.claude.ts | 50 +------------------ src/gateway/cli-session-history.test.ts | 16 +----- 7 files changed, 31 insertions(+), 113 deletions(-) diff --git a/src/agents/auth-profile-runtime-contract.test.ts b/src/agents/auth-profile-runtime-contract.test.ts index e09e3d42b6f..b36c11c3b12 100644 --- a/src/agents/auth-profile-runtime-contract.test.ts +++ b/src/agents/auth-profile-runtime-contract.test.ts @@ -147,6 +147,7 @@ async function runAuthContractAttempt(params: { await runAgentAttempt({ providerOverride: params.providerOverride, + originalProvider: params.providerOverride, modelOverride: "gpt-5.4", cfg, sessionEntry, diff --git a/src/agents/command/attempt-execution.cli.test.ts b/src/agents/command/attempt-execution.cli.test.ts index dbf0887d705..df95c4bae4d 100644 --- a/src/agents/command/attempt-execution.cli.test.ts +++ b/src/agents/command/attempt-execution.cli.test.ts @@ -102,6 +102,7 @@ describe("CLI attempt execution", () => { }) { await runAgentAttempt({ providerOverride: "claude-cli", + originalProvider: "claude-cli", modelOverride: "opus", cfg: {} as OpenClawConfig, sessionEntry: params.sessionEntry, @@ -166,6 +167,7 @@ describe("CLI attempt execution", () => { await runAgentAttempt({ providerOverride: "claude-cli", + originalProvider: "claude-cli", modelOverride: "opus", cfg: {} as OpenClawConfig, sessionEntry, @@ -317,6 +319,7 @@ describe("CLI attempt execution", () => { await runAgentAttempt({ providerOverride: "codex-cli", + originalProvider: "codex-cli", modelOverride: "gpt-5.4", cfg: {} as OpenClawConfig, sessionEntry, @@ -433,6 +436,7 @@ describe("CLI attempt execution", () => { await runAgentAttempt({ providerOverride: "claude-cli", + originalProvider: "claude-cli", modelOverride: "opus", cfg: {} as OpenClawConfig, sessionEntry, @@ -482,6 +486,7 @@ describe("CLI attempt execution", () => { await runAgentAttempt({ providerOverride: "anthropic", + originalProvider: "anthropic", modelOverride: "claude-opus-4-7", cfg: { agents: { @@ -536,6 +541,7 @@ describe("CLI attempt execution", () => { await runAgentAttempt({ providerOverride: "claude-cli", + originalProvider: "claude-cli", modelOverride: "claude-opus-4-7", cfg: {} as OpenClawConfig, sessionEntry, @@ -601,6 +607,7 @@ describe("embedded attempt harness pinning", () => { await runAgentAttempt({ providerOverride: "openai", + originalProvider: "openai", modelOverride: "gpt-5.4", cfg: {} as OpenClawConfig, sessionEntry, @@ -644,6 +651,7 @@ describe("embedded attempt harness pinning", () => { await runAgentAttempt({ providerOverride: "codex", + originalProvider: "codex", modelOverride: "gpt-5.4", cfg: { agents: { @@ -693,6 +701,7 @@ describe("embedded attempt harness pinning", () => { await runAgentAttempt({ providerOverride: "openai", + originalProvider: "openai", modelOverride: "gpt-5.4", cfg: {} as OpenClawConfig, sessionEntry, @@ -736,6 +745,7 @@ describe("embedded attempt harness pinning", () => { await runAgentAttempt({ providerOverride: "openai", + originalProvider: "claude-cli", modelOverride: "gpt-5.4", cfg: { agents: { diff --git a/src/agents/command/attempt-execution.helpers.ts b/src/agents/command/attempt-execution.helpers.ts index 1126566c75c..523615fbef3 100644 --- a/src/agents/command/attempt-execution.helpers.ts +++ b/src/agents/command/attempt-execution.helpers.ts @@ -109,14 +109,6 @@ export function resolveFallbackRetryPrompt(params: { body: string; isFallbackRetry: boolean; sessionHasHistory?: boolean; - /** - * Optional context prelude (e.g., a compacted summary harvested from a - * non-OpenClaw transcript such as Claude Code's local JSONL). Prepended - * before the retry marker so the fallback candidate has prior context - * even when OpenClaw's own session file is empty for the current - * provider — see `buildClaudeCliFallbackContextPrelude` for the - * claude-cli case (#69973). - */ priorContextPrelude?: string; }): string { if (!params.isFallbackRetry) { @@ -189,15 +181,6 @@ function formatFallbackTurns( if (turns.length === 0 || remainingBudget <= 0) { return { text: "", consumed: 0 }; } - // Walk newest -> oldest, prepending lines until one does not fit. - // - // We stop on the FIRST oversized turn instead of skipping it and then - // continuing into older ones. The fallback prelude is a "most recent - // contiguous window" summary — what was happening just before the - // failed attempt — so a non-contiguous slice (newest + something from - // 20 turns ago, gap in the middle) would mislead the fallback model - // about the actual flow. Sparse coverage is worse than fewer turns: - // greptile flagged this as a P2 on #72069; behavior is intentional. const lines: string[] = []; let consumed = 0; for (let i = turns.length - 1; i >= 0; i -= 1) { diff --git a/src/agents/command/attempt-execution.test.ts b/src/agents/command/attempt-execution.test.ts index b599449f606..3306e7b9554 100644 --- a/src/agents/command/attempt-execution.test.ts +++ b/src/agents/command/attempt-execution.test.ts @@ -80,10 +80,6 @@ describe("resolveFallbackRetryPrompt", () => { ).toBe(originalBody); }); - // #69973: even when OpenClaw's own session file is empty (the claude-cli - // case where Claude Code maintains its own JSONL), a harvested - // priorContextPrelude must still seed the retry prompt so the fallback - // candidate has prior context. it("prepends priorContextPrelude before the retry marker on fallback retry", () => { const prelude = "## Prior session context (from claude-cli)\nuser: prior question"; const result = resolveFallbackRetryPrompt({ @@ -191,8 +187,6 @@ describe("formatClaudeCliFallbackPrelude", () => { ); expect(out).toContain("Summary of earlier conversation (truncated):"); expect(out.length).toBeLessThan(800); - // Trailing ellipsis tells the model the summary was clipped — better - // than silently emitting a fragment that looks complete. expect(out).toMatch(/…$/); }); @@ -206,6 +200,23 @@ describe("formatClaudeCliFallbackPrelude", () => { expect(out).toContain("turn 10"); expect(out).not.toContain("turn 1 "); }); + + it("keeps the recent turn window contiguous when an adjacent turn is oversized", () => { + const out = formatClaudeCliFallbackPrelude( + { + recentTurns: [ + { role: "user", content: "older small turn" }, + { role: "assistant", content: `oversized adjacent turn ${"x".repeat(500)}` }, + { role: "user", content: "newest small turn" }, + ], + }, + { charBudget: 260 }, + ); + + expect(out).toContain("newest small turn"); + expect(out).not.toContain("oversized adjacent turn"); + expect(out).not.toContain("older small turn"); + }); }); describe("buildClaudeCliFallbackContextPrelude", () => { diff --git a/src/agents/command/attempt-execution.ts b/src/agents/command/attempt-execution.ts index 46e2c5eb9a5..9faf8e40f74 100644 --- a/src/agents/command/attempt-execution.ts +++ b/src/agents/command/attempt-execution.ts @@ -234,19 +234,7 @@ export async function persistCliTurnTranscript(params: { export function runAgentAttempt(params: { providerOverride: string; modelOverride: string; - /** - * The provider the user originally requested for this turn (i.e. the - * primary candidate of the fallback chain). Used to scope claude-cli - * fallback context seeding to chains that actually started on claude-cli; - * a stale `cliSessionBindings["claude-cli"]` from an unrelated past run - * must not contaminate fallbacks that started on another provider - * (Codex review #72069 P1). Optional for callers that don't drive - * a fallback chain (tests, ad-hoc invocations); when omitted, the - * claude-cli fallback seed is skipped — that's a no-op for non-fallback - * paths and a defensive default for fallback paths that didn't plumb - * the original provider through. - */ - originalProvider?: string; + originalProvider: string; cfg: OpenClawConfig; sessionEntry: SessionEntry | undefined; sessionId: string; @@ -273,21 +261,8 @@ export function runAgentAttempt(params: { allowTransientCooldownProbe?: boolean; sessionHasHistory?: boolean; }) { - // #69973: when a fallback fires from claude-cli to a non-CLI candidate - // (or a different CLI backend), the next runner cannot see Claude Code's - // local JSONL history. Without a seed, the fallback model starts cold — - // even though the original Claude session is still alive on disk. - // Harvest a compacted context (Claude's own `/compact` summary plus the - // most recent post-boundary turns) and prepend it to the retry prompt. - // This mirrors what Claude Code itself replays after compaction. - // - // Gate explicitly on `originalProvider === "claude-cli"`: if the user- - // requested provider for this run was not claude-cli, any claude-cli - // session binding on the entry is stale state from an earlier run and - // must not bleed into this fallback chain. const claudeCliFallbackPrelude = params.isFallbackRetry && - typeof params.originalProvider === "string" && isClaudeCliProvider(params.originalProvider) && !isClaudeCliProvider(params.providerOverride) ? buildClaudeCliFallbackContextPrelude({ diff --git a/src/gateway/cli-session-history.claude.ts b/src/gateway/cli-session-history.claude.ts index 780b5a1a901..f75db6509c8 100644 --- a/src/gateway/cli-session-history.claude.ts +++ b/src/gateway/cli-session-history.claude.ts @@ -334,14 +334,6 @@ export function readClaudeCliSessionMessages(params: { return coalesceClaudeCliToolMessages(messages); } -// Compaction surface in Claude Code's JSONL: `/compact` writes a -// `type: "summary"` entry whose `summary` field holds the condensed text, -// and an associated `type: "system", subtype: "compact_boundary"` entry -// whose `compactMetadata` carries `trigger`/`preTokens`. After a boundary, -// only post-compaction `user`/`assistant` turns are written as individual -// entries; pre-compaction context lives in the summary. This shape mirrors -// what Claude Code itself sends to the model after compaction (summary plus -// recent turns), per the upstream session-management docs. type ClaudeCliCompactBoundaryEntry = { type: "system"; subtype?: unknown; @@ -361,20 +353,7 @@ type ClaudeCliSummaryEntry = { }; export type ClaudeCliFallbackSeed = { - /** - * The most recent compaction summary, if the session has been `/compact`-ed - * at any point. Sourced from the latest `type: "summary"` entry, falling - * back to the latest `compact_boundary` content when no explicit summary - * is present (older Claude Code builds). - */ summaryText?: string; - /** - * User/assistant turns after the most recent compact boundary, or all - * turns when the session has never been compacted. Tool-result turns are - * coalesced into adjacent assistant turns the same way - * `readClaudeCliSessionMessages` does, so consumers can format them like - * a regular transcript. - */ recentTurns: TranscriptLikeMessage[]; }; @@ -387,11 +366,6 @@ function isCompactBoundary(entry: ClaudeCliProjectEntry): boolean { } function extractCompactBoundaryFallbackText(entry: ClaudeCliProjectEntry): string | undefined { - // When `/compact` is invoked, Claude Code writes a separate summary entry - // — but on older builds the boundary's `content` ("Conversation compacted") - // is the only signal that compaction happened. Prefer the explicit summary - // when both exist; this fallback gives a non-empty hint when only the - // boundary is present so the seed at least labels the gap honestly. const content = (entry as ClaudeCliCompactBoundaryEntry).content; return typeof content === "string" && content.trim() ? content.trim() : undefined; } @@ -420,17 +394,6 @@ export function readClaudeCliFallbackSeed(params: { return undefined; } - // Pair each compact_boundary with the summary entry that preceded it - // since the previous boundary, so a later compaction whose summary is - // missing (e.g. crash mid-write) does not silently keep an older - // compaction's summary alive (#72069 Codex P2). Walk shape: - // - explicit `summary` entry queues into `pendingSummary` until the - // next boundary "consumes" it. - // - `compact_boundary` flushes the pending summary into `lastSummary`, - // refreshes `lastBoundaryFallback` from the boundary's content, and - // drops the windowed turns and tool registry. - // - everything after the latest boundary forms the recent-window the - // fallback runner will replay alongside the summary. let pendingSummary: string | undefined; let lastSummary: string | undefined; let lastBoundaryFallback: string | undefined; @@ -450,23 +413,15 @@ export function readClaudeCliFallbackSeed(params: { const explicitSummary = extractSummaryText(parsed); if (explicitSummary) { - // Queue the summary; the next boundary will pair it with itself. - // Multiple summaries between boundaries (rare) take the last one, - // matching how Claude Code's resume picks the most recent summary. pendingSummary = explicitSummary; continue; } if (isCompactBoundary(parsed)) { - lastSummary = pendingSummary; // may be undefined if no preceding summary + lastSummary = pendingSummary; pendingSummary = undefined; lastBoundaryFallback = extractCompactBoundaryFallbackText(parsed) ?? lastBoundaryFallback; - // Drop turns that lived before this boundary — they are now - // represented by the summary, and replaying them would double-count - // their tokens against the fallback model's budget. windowedTurns = []; - // Reset tool-name registry too: tool ids before a compact boundary - // are no longer visible to the post-boundary turns. toolNameRegistry.clear(); continue; } @@ -478,9 +433,6 @@ export function readClaudeCliFallbackSeed(params: { } const recentTurns = coalesceClaudeCliToolMessages(windowedTurns); - // Honor a `/compact` summary that was written but never followed by a - // boundary marker (older Claude Code build, or graceful-degrade case). - // `pendingSummary` then carries the latest such summary into the result. const resolvedSummaryText = lastSummary ?? pendingSummary ?? lastBoundaryFallback; if (!resolvedSummaryText && recentTurns.length === 0) { return undefined; diff --git a/src/gateway/cli-session-history.test.ts b/src/gateway/cli-session-history.test.ts index 0da58262ecf..9530e06bd15 100644 --- a/src/gateway/cli-session-history.test.ts +++ b/src/gateway/cli-session-history.test.ts @@ -299,10 +299,6 @@ describe("cli session history", () => { }); }); -// Regression coverage for #69973 — claude-cli fallback context loss. The -// new reader exposes the explicit `/compact` summary and the post-boundary -// turn window so a fallback to a non-CLI candidate can replay the same -// shape Claude Code itself uses on resume after compaction. describe("readClaudeCliFallbackSeed", () => { let tmpRoot: string; let homeDir: string; @@ -372,7 +368,7 @@ describe("readClaudeCliFallbackSeed", () => { { type: "user", uuid: "u-pre", - message: { role: "user", content: "PRE-COMPACT user turn that must NOT be in seed" }, + message: { role: "user", content: "pre-compact user turn excluded from seed" }, }, { type: "assistant", @@ -512,10 +508,6 @@ describe("readClaudeCliFallbackSeed", () => { expect(seed).toBeUndefined(); }); - // Codex P2 on #72069: each compact_boundary must pair with the summary - // entry that preceded it since the previous boundary. A later boundary - // without its own summary must not silently keep an older compaction's - // summary alive — that paired stale text with fresh post-boundary turns. it("falls back to the latest boundary content when a newer compaction has no summary", async () => { await writeJsonl([ { type: "summary", summary: "FIRST compact summary", leafUuid: "x" }, @@ -530,9 +522,6 @@ describe("readClaudeCliFallbackSeed", () => { uuid: "u-mid", message: { role: "user", content: "post-first-compact turn" }, }, - // Second compaction: boundary written, but the summary entry never - // landed (e.g. crash between writes). The seed must NOT serve the - // FIRST summary alongside post-second-boundary turns. { type: "system", subtype: "compact_boundary", @@ -555,9 +544,6 @@ describe("readClaudeCliFallbackSeed", () => { }); it("uses a trailing summary that has no following compact_boundary marker", async () => { - // Older Claude Code builds wrote the summary entry without a paired - // boundary marker. The seed should still surface that summary so a - // subsequent fallback at least has a labeled context block. await writeJsonl([ { type: "user",