fix(agents): require claude fallback source provider

This commit is contained in:
Ayaan Zaidi
2026-04-28 13:27:06 +05:30
parent 4369c20bfe
commit 5e4c29e9bc
7 changed files with 31 additions and 113 deletions

View File

@@ -147,6 +147,7 @@ async function runAuthContractAttempt(params: {
await runAgentAttempt({
providerOverride: params.providerOverride,
originalProvider: params.providerOverride,
modelOverride: "gpt-5.4",
cfg,
sessionEntry,

View File

@@ -102,6 +102,7 @@ describe("CLI attempt execution", () => {
}) {
await runAgentAttempt({
providerOverride: "claude-cli",
originalProvider: "claude-cli",
modelOverride: "opus",
cfg: {} as OpenClawConfig,
sessionEntry: params.sessionEntry,
@@ -166,6 +167,7 @@ describe("CLI attempt execution", () => {
await runAgentAttempt({
providerOverride: "claude-cli",
originalProvider: "claude-cli",
modelOverride: "opus",
cfg: {} as OpenClawConfig,
sessionEntry,
@@ -317,6 +319,7 @@ describe("CLI attempt execution", () => {
await runAgentAttempt({
providerOverride: "codex-cli",
originalProvider: "codex-cli",
modelOverride: "gpt-5.4",
cfg: {} as OpenClawConfig,
sessionEntry,
@@ -433,6 +436,7 @@ describe("CLI attempt execution", () => {
await runAgentAttempt({
providerOverride: "claude-cli",
originalProvider: "claude-cli",
modelOverride: "opus",
cfg: {} as OpenClawConfig,
sessionEntry,
@@ -482,6 +486,7 @@ describe("CLI attempt execution", () => {
await runAgentAttempt({
providerOverride: "anthropic",
originalProvider: "anthropic",
modelOverride: "claude-opus-4-7",
cfg: {
agents: {
@@ -536,6 +541,7 @@ describe("CLI attempt execution", () => {
await runAgentAttempt({
providerOverride: "claude-cli",
originalProvider: "claude-cli",
modelOverride: "claude-opus-4-7",
cfg: {} as OpenClawConfig,
sessionEntry,
@@ -601,6 +607,7 @@ describe("embedded attempt harness pinning", () => {
await runAgentAttempt({
providerOverride: "openai",
originalProvider: "openai",
modelOverride: "gpt-5.4",
cfg: {} as OpenClawConfig,
sessionEntry,
@@ -644,6 +651,7 @@ describe("embedded attempt harness pinning", () => {
await runAgentAttempt({
providerOverride: "codex",
originalProvider: "codex",
modelOverride: "gpt-5.4",
cfg: {
agents: {
@@ -693,6 +701,7 @@ describe("embedded attempt harness pinning", () => {
await runAgentAttempt({
providerOverride: "openai",
originalProvider: "openai",
modelOverride: "gpt-5.4",
cfg: {} as OpenClawConfig,
sessionEntry,
@@ -736,6 +745,7 @@ describe("embedded attempt harness pinning", () => {
await runAgentAttempt({
providerOverride: "openai",
originalProvider: "claude-cli",
modelOverride: "gpt-5.4",
cfg: {
agents: {

View File

@@ -109,14 +109,6 @@ export function resolveFallbackRetryPrompt(params: {
body: string;
isFallbackRetry: boolean;
sessionHasHistory?: boolean;
/**
* Optional context prelude (e.g., a compacted summary harvested from a
* non-OpenClaw transcript such as Claude Code's local JSONL). Prepended
* before the retry marker so the fallback candidate has prior context
* even when OpenClaw's own session file is empty for the current
* provider — see `buildClaudeCliFallbackContextPrelude` for the
* claude-cli case (#69973).
*/
priorContextPrelude?: string;
}): string {
if (!params.isFallbackRetry) {
@@ -189,15 +181,6 @@ function formatFallbackTurns(
if (turns.length === 0 || remainingBudget <= 0) {
return { text: "", consumed: 0 };
}
// Walk newest -> oldest, prepending lines until one does not fit.
//
// We stop on the FIRST oversized turn instead of skipping it and then
// continuing into older ones. The fallback prelude is a "most recent
// contiguous window" summary — what was happening just before the
// failed attempt — so a non-contiguous slice (newest + something from
// 20 turns ago, gap in the middle) would mislead the fallback model
// about the actual flow. Sparse coverage is worse than fewer turns:
// greptile flagged this as a P2 on #72069; behavior is intentional.
const lines: string[] = [];
let consumed = 0;
for (let i = turns.length - 1; i >= 0; i -= 1) {

View File

@@ -80,10 +80,6 @@ describe("resolveFallbackRetryPrompt", () => {
).toBe(originalBody);
});
// #69973: even when OpenClaw's own session file is empty (the claude-cli
// case where Claude Code maintains its own JSONL), a harvested
// priorContextPrelude must still seed the retry prompt so the fallback
// candidate has prior context.
it("prepends priorContextPrelude before the retry marker on fallback retry", () => {
const prelude = "## Prior session context (from claude-cli)\nuser: prior question";
const result = resolveFallbackRetryPrompt({
@@ -191,8 +187,6 @@ describe("formatClaudeCliFallbackPrelude", () => {
);
expect(out).toContain("Summary of earlier conversation (truncated):");
expect(out.length).toBeLessThan(800);
// Trailing ellipsis tells the model the summary was clipped — better
// than silently emitting a fragment that looks complete.
expect(out).toMatch(/…$/);
});
@@ -206,6 +200,23 @@ describe("formatClaudeCliFallbackPrelude", () => {
expect(out).toContain("turn 10");
expect(out).not.toContain("turn 1 ");
});
it("keeps the recent turn window contiguous when an adjacent turn is oversized", () => {
const out = formatClaudeCliFallbackPrelude(
{
recentTurns: [
{ role: "user", content: "older small turn" },
{ role: "assistant", content: `oversized adjacent turn ${"x".repeat(500)}` },
{ role: "user", content: "newest small turn" },
],
},
{ charBudget: 260 },
);
expect(out).toContain("newest small turn");
expect(out).not.toContain("oversized adjacent turn");
expect(out).not.toContain("older small turn");
});
});
describe("buildClaudeCliFallbackContextPrelude", () => {

View File

@@ -234,19 +234,7 @@ export async function persistCliTurnTranscript(params: {
export function runAgentAttempt(params: {
providerOverride: string;
modelOverride: string;
/**
* The provider the user originally requested for this turn (i.e. the
* primary candidate of the fallback chain). Used to scope claude-cli
* fallback context seeding to chains that actually started on claude-cli;
* a stale `cliSessionBindings["claude-cli"]` from an unrelated past run
* must not contaminate fallbacks that started on another provider
* (Codex review #72069 P1). Optional for callers that don't drive
* a fallback chain (tests, ad-hoc invocations); when omitted, the
* claude-cli fallback seed is skipped — that's a no-op for non-fallback
* paths and a defensive default for fallback paths that didn't plumb
* the original provider through.
*/
originalProvider?: string;
originalProvider: string;
cfg: OpenClawConfig;
sessionEntry: SessionEntry | undefined;
sessionId: string;
@@ -273,21 +261,8 @@ export function runAgentAttempt(params: {
allowTransientCooldownProbe?: boolean;
sessionHasHistory?: boolean;
}) {
// #69973: when a fallback fires from claude-cli to a non-CLI candidate
// (or a different CLI backend), the next runner cannot see Claude Code's
// local JSONL history. Without a seed, the fallback model starts cold —
// even though the original Claude session is still alive on disk.
// Harvest a compacted context (Claude's own `/compact` summary plus the
// most recent post-boundary turns) and prepend it to the retry prompt.
// This mirrors what Claude Code itself replays after compaction.
//
// Gate explicitly on `originalProvider === "claude-cli"`: if the user-
// requested provider for this run was not claude-cli, any claude-cli
// session binding on the entry is stale state from an earlier run and
// must not bleed into this fallback chain.
const claudeCliFallbackPrelude =
params.isFallbackRetry &&
typeof params.originalProvider === "string" &&
isClaudeCliProvider(params.originalProvider) &&
!isClaudeCliProvider(params.providerOverride)
? buildClaudeCliFallbackContextPrelude({

View File

@@ -334,14 +334,6 @@ export function readClaudeCliSessionMessages(params: {
return coalesceClaudeCliToolMessages(messages);
}
// Compaction surface in Claude Code's JSONL: `/compact` writes a
// `type: "summary"` entry whose `summary` field holds the condensed text,
// and an associated `type: "system", subtype: "compact_boundary"` entry
// whose `compactMetadata` carries `trigger`/`preTokens`. After a boundary,
// only post-compaction `user`/`assistant` turns are written as individual
// entries; pre-compaction context lives in the summary. This shape mirrors
// what Claude Code itself sends to the model after compaction (summary plus
// recent turns), per the upstream session-management docs.
type ClaudeCliCompactBoundaryEntry = {
type: "system";
subtype?: unknown;
@@ -361,20 +353,7 @@ type ClaudeCliSummaryEntry = {
};
export type ClaudeCliFallbackSeed = {
/**
* The most recent compaction summary, if the session has been `/compact`-ed
* at any point. Sourced from the latest `type: "summary"` entry, falling
* back to the latest `compact_boundary` content when no explicit summary
* is present (older Claude Code builds).
*/
summaryText?: string;
/**
* User/assistant turns after the most recent compact boundary, or all
* turns when the session has never been compacted. Tool-result turns are
* coalesced into adjacent assistant turns the same way
* `readClaudeCliSessionMessages` does, so consumers can format them like
* a regular transcript.
*/
recentTurns: TranscriptLikeMessage[];
};
@@ -387,11 +366,6 @@ function isCompactBoundary(entry: ClaudeCliProjectEntry): boolean {
}
function extractCompactBoundaryFallbackText(entry: ClaudeCliProjectEntry): string | undefined {
// When `/compact` is invoked, Claude Code writes a separate summary entry
// — but on older builds the boundary's `content` ("Conversation compacted")
// is the only signal that compaction happened. Prefer the explicit summary
// when both exist; this fallback gives a non-empty hint when only the
// boundary is present so the seed at least labels the gap honestly.
const content = (entry as ClaudeCliCompactBoundaryEntry).content;
return typeof content === "string" && content.trim() ? content.trim() : undefined;
}
@@ -420,17 +394,6 @@ export function readClaudeCliFallbackSeed(params: {
return undefined;
}
// Pair each compact_boundary with the summary entry that preceded it
// since the previous boundary, so a later compaction whose summary is
// missing (e.g. crash mid-write) does not silently keep an older
// compaction's summary alive (#72069 Codex P2). Walk shape:
// - explicit `summary` entry queues into `pendingSummary` until the
// next boundary "consumes" it.
// - `compact_boundary` flushes the pending summary into `lastSummary`,
// refreshes `lastBoundaryFallback` from the boundary's content, and
// drops the windowed turns and tool registry.
// - everything after the latest boundary forms the recent-window the
// fallback runner will replay alongside the summary.
let pendingSummary: string | undefined;
let lastSummary: string | undefined;
let lastBoundaryFallback: string | undefined;
@@ -450,23 +413,15 @@ export function readClaudeCliFallbackSeed(params: {
const explicitSummary = extractSummaryText(parsed);
if (explicitSummary) {
// Queue the summary; the next boundary will pair it with itself.
// Multiple summaries between boundaries (rare) take the last one,
// matching how Claude Code's resume picks the most recent summary.
pendingSummary = explicitSummary;
continue;
}
if (isCompactBoundary(parsed)) {
lastSummary = pendingSummary; // may be undefined if no preceding summary
lastSummary = pendingSummary;
pendingSummary = undefined;
lastBoundaryFallback = extractCompactBoundaryFallbackText(parsed) ?? lastBoundaryFallback;
// Drop turns that lived before this boundary — they are now
// represented by the summary, and replaying them would double-count
// their tokens against the fallback model's budget.
windowedTurns = [];
// Reset tool-name registry too: tool ids before a compact boundary
// are no longer visible to the post-boundary turns.
toolNameRegistry.clear();
continue;
}
@@ -478,9 +433,6 @@ export function readClaudeCliFallbackSeed(params: {
}
const recentTurns = coalesceClaudeCliToolMessages(windowedTurns);
// Honor a `/compact` summary that was written but never followed by a
// boundary marker (older Claude Code build, or graceful-degrade case).
// `pendingSummary` then carries the latest such summary into the result.
const resolvedSummaryText = lastSummary ?? pendingSummary ?? lastBoundaryFallback;
if (!resolvedSummaryText && recentTurns.length === 0) {
return undefined;

View File

@@ -299,10 +299,6 @@ describe("cli session history", () => {
});
});
// Regression coverage for #69973 — claude-cli fallback context loss. The
// new reader exposes the explicit `/compact` summary and the post-boundary
// turn window so a fallback to a non-CLI candidate can replay the same
// shape Claude Code itself uses on resume after compaction.
describe("readClaudeCliFallbackSeed", () => {
let tmpRoot: string;
let homeDir: string;
@@ -372,7 +368,7 @@ describe("readClaudeCliFallbackSeed", () => {
{
type: "user",
uuid: "u-pre",
message: { role: "user", content: "PRE-COMPACT user turn that must NOT be in seed" },
message: { role: "user", content: "pre-compact user turn excluded from seed" },
},
{
type: "assistant",
@@ -512,10 +508,6 @@ describe("readClaudeCliFallbackSeed", () => {
expect(seed).toBeUndefined();
});
// Codex P2 on #72069: each compact_boundary must pair with the summary
// entry that preceded it since the previous boundary. A later boundary
// without its own summary must not silently keep an older compaction's
// summary alive — that paired stale text with fresh post-boundary turns.
it("falls back to the latest boundary content when a newer compaction has no summary", async () => {
await writeJsonl([
{ type: "summary", summary: "FIRST compact summary", leafUuid: "x" },
@@ -530,9 +522,6 @@ describe("readClaudeCliFallbackSeed", () => {
uuid: "u-mid",
message: { role: "user", content: "post-first-compact turn" },
},
// Second compaction: boundary written, but the summary entry never
// landed (e.g. crash between writes). The seed must NOT serve the
// FIRST summary alongside post-second-boundary turns.
{
type: "system",
subtype: "compact_boundary",
@@ -555,9 +544,6 @@ describe("readClaudeCliFallbackSeed", () => {
});
it("uses a trailing summary that has no following compact_boundary marker", async () => {
// Older Claude Code builds wrote the summary entry without a paired
// boundary marker. The seed should still surface that summary so a
// subsequent fallback at least has a labeled context block.
await writeJsonl([
{
type: "user",