mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:50:43 +00:00
fix(agents): scope claude-cli fallback seed and pair summary with boundary
Addresses review on #72069: - Codex P1 ("Gate Claude prelude seeding by source provider"): the guard checked the *current* fallback candidate but not the failed attempt. A session that still carried a stale cliSessionBindings["claude-cli"] from an unrelated past run would inject Claude transcript context into a fallback chain that started on a different provider (e.g. openai -> openai-codex), leaking irrelevant prior conversation. Plumb `originalProvider` (the user-requested provider for the chain) through to runAgentAttempt and require `isClaudeCliProvider(originalProvider)` before reading Claude history. - Codex P2 ("Prefer latest compact boundary when summary is missing"): the resolver always preferred the most recent explicit summary, so a later compaction without its own summary entry (rare crash case) paired stale summary text with post-latest-boundary turns. Restructure readClaudeCliFallbackSeed to queue summaries into pendingSummary and flush each boundary's pair atomically. A boundary with no preceding summary now correctly falls back to the boundary's own content rather than serving an older summary alongside fresh turns. - Greptile P2 (newest-first break vs sparse coverage): the formatFallbackTurns walk intentionally stops on the first oversized turn so the prelude stays a contiguous "what was happening just before the failure" window. Document the design choice inline so a future maintainer doesn't reflexively change it to skip-and-continue. Tests: - New gateway cases for the boundary-without-summary edge case and for trailing summaries written without a paired boundary. - existing 33 attempt-execution + 14 cli-session-history tests still pass; broader src/agents/command suite stays green (63/63).
This commit is contained in:
@@ -892,6 +892,7 @@ async function agentCommandInternal(
|
|||||||
return attemptExecutionRuntime.runAgentAttempt({
|
return attemptExecutionRuntime.runAgentAttempt({
|
||||||
providerOverride,
|
providerOverride,
|
||||||
modelOverride,
|
modelOverride,
|
||||||
|
originalProvider: provider,
|
||||||
cfg,
|
cfg,
|
||||||
sessionEntry,
|
sessionEntry,
|
||||||
sessionId,
|
sessionId,
|
||||||
|
|||||||
@@ -189,9 +189,15 @@ function formatFallbackTurns(
|
|||||||
if (turns.length === 0 || remainingBudget <= 0) {
|
if (turns.length === 0 || remainingBudget <= 0) {
|
||||||
return { text: "", consumed: 0 };
|
return { text: "", consumed: 0 };
|
||||||
}
|
}
|
||||||
// Walk newest -> oldest, prepending lines until we exceed the budget.
|
// Walk newest -> oldest, prepending lines until one does not fit.
|
||||||
// Stops at the oldest turn we can include in full so we never deliver a
|
//
|
||||||
// truncated mid-turn fragment to the fallback model.
|
// We stop on the FIRST oversized turn instead of skipping it and then
|
||||||
|
// continuing into older ones. The fallback prelude is a "most recent
|
||||||
|
// contiguous window" summary — what was happening just before the
|
||||||
|
// failed attempt — so a non-contiguous slice (newest + something from
|
||||||
|
// 20 turns ago, gap in the middle) would mislead the fallback model
|
||||||
|
// about the actual flow. Sparse coverage is worse than fewer turns:
|
||||||
|
// greptile flagged this as a P2 on #72069; behavior is intentional.
|
||||||
const lines: string[] = [];
|
const lines: string[] = [];
|
||||||
let consumed = 0;
|
let consumed = 0;
|
||||||
for (let i = turns.length - 1; i >= 0; i -= 1) {
|
for (let i = turns.length - 1; i >= 0; i -= 1) {
|
||||||
@@ -209,9 +215,6 @@ function formatFallbackTurns(
|
|||||||
}
|
}
|
||||||
const line = `${role}: ${text}`;
|
const line = `${role}: ${text}`;
|
||||||
if (consumed + line.length + 1 > remainingBudget) {
|
if (consumed + line.length + 1 > remainingBudget) {
|
||||||
// Skip this turn rather than chop it; if even the most recent turn
|
|
||||||
// is too large to include cleanly, stop emitting (the prelude is a
|
|
||||||
// best-effort sketch, not a transcript).
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
lines.unshift(line);
|
lines.unshift(line);
|
||||||
|
|||||||
@@ -233,6 +233,15 @@ export async function persistCliTurnTranscript(params: {
|
|||||||
export function runAgentAttempt(params: {
|
export function runAgentAttempt(params: {
|
||||||
providerOverride: string;
|
providerOverride: string;
|
||||||
modelOverride: string;
|
modelOverride: string;
|
||||||
|
/**
|
||||||
|
* The provider the user originally requested for this turn (i.e. the
|
||||||
|
* primary candidate of the fallback chain). Used to scope claude-cli
|
||||||
|
* fallback context seeding to chains that actually started on claude-cli;
|
||||||
|
* a stale `cliSessionBindings["claude-cli"]` from an unrelated past run
|
||||||
|
* must not contaminate fallbacks that started on another provider
|
||||||
|
* (Codex review #72069 P1).
|
||||||
|
*/
|
||||||
|
originalProvider: string;
|
||||||
cfg: OpenClawConfig;
|
cfg: OpenClawConfig;
|
||||||
sessionEntry: SessionEntry | undefined;
|
sessionEntry: SessionEntry | undefined;
|
||||||
sessionId: string;
|
sessionId: string;
|
||||||
@@ -266,8 +275,15 @@ export function runAgentAttempt(params: {
|
|||||||
// Harvest a compacted context (Claude's own `/compact` summary plus the
|
// Harvest a compacted context (Claude's own `/compact` summary plus the
|
||||||
// most recent post-boundary turns) and prepend it to the retry prompt.
|
// most recent post-boundary turns) and prepend it to the retry prompt.
|
||||||
// This mirrors what Claude Code itself replays after compaction.
|
// This mirrors what Claude Code itself replays after compaction.
|
||||||
|
//
|
||||||
|
// Gate explicitly on `originalProvider === "claude-cli"`: if the user-
|
||||||
|
// requested provider for this run was not claude-cli, any claude-cli
|
||||||
|
// session binding on the entry is stale state from an earlier run and
|
||||||
|
// must not bleed into this fallback chain.
|
||||||
const claudeCliFallbackPrelude =
|
const claudeCliFallbackPrelude =
|
||||||
params.isFallbackRetry && !isClaudeCliProvider(params.providerOverride)
|
params.isFallbackRetry &&
|
||||||
|
isClaudeCliProvider(params.originalProvider) &&
|
||||||
|
!isClaudeCliProvider(params.providerOverride)
|
||||||
? buildClaudeCliFallbackContextPrelude({
|
? buildClaudeCliFallbackContextPrelude({
|
||||||
cliSessionId: getCliSessionBinding(params.sessionEntry, "claude-cli")?.sessionId,
|
cliSessionId: getCliSessionBinding(params.sessionEntry, "claude-cli")?.sessionId,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -420,12 +420,20 @@ export function readClaudeCliFallbackSeed(params: {
|
|||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
let summaryText: string | undefined;
|
// Pair each compact_boundary with the summary entry that preceded it
|
||||||
let boundaryFallbackText: string | undefined;
|
// since the previous boundary, so a later compaction whose summary is
|
||||||
// Buffer turns into a window that resets every time we cross a compact
|
// missing (e.g. crash mid-write) does not silently keep an older
|
||||||
// boundary. After the walk completes, `windowedTurns` holds turns from
|
// compaction's summary alive (#72069 Codex P2). Walk shape:
|
||||||
// the most recent (post-boundary) window, which matches what Claude Code
|
// - explicit `summary` entry queues into `pendingSummary` until the
|
||||||
// would replay alongside the summary on its own resume.
|
// next boundary "consumes" it.
|
||||||
|
// - `compact_boundary` flushes the pending summary into `lastSummary`,
|
||||||
|
// refreshes `lastBoundaryFallback` from the boundary's content, and
|
||||||
|
// drops the windowed turns and tool registry.
|
||||||
|
// - everything after the latest boundary forms the recent-window the
|
||||||
|
// fallback runner will replay alongside the summary.
|
||||||
|
let pendingSummary: string | undefined;
|
||||||
|
let lastSummary: string | undefined;
|
||||||
|
let lastBoundaryFallback: string | undefined;
|
||||||
let windowedTurns: TranscriptLikeMessage[] = [];
|
let windowedTurns: TranscriptLikeMessage[] = [];
|
||||||
const toolNameRegistry: ToolNameRegistry = new Map();
|
const toolNameRegistry: ToolNameRegistry = new Map();
|
||||||
|
|
||||||
@@ -442,15 +450,17 @@ export function readClaudeCliFallbackSeed(params: {
|
|||||||
|
|
||||||
const explicitSummary = extractSummaryText(parsed);
|
const explicitSummary = extractSummaryText(parsed);
|
||||||
if (explicitSummary) {
|
if (explicitSummary) {
|
||||||
// Explicit summary entries are written by `/compact`; later entries
|
// Queue the summary; the next boundary will pair it with itself.
|
||||||
// supersede earlier ones the same way Claude Code itself replays
|
// Multiple summaries between boundaries (rare) take the last one,
|
||||||
// only the most recent summary.
|
// matching how Claude Code's resume picks the most recent summary.
|
||||||
summaryText = explicitSummary;
|
pendingSummary = explicitSummary;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isCompactBoundary(parsed)) {
|
if (isCompactBoundary(parsed)) {
|
||||||
boundaryFallbackText = extractCompactBoundaryFallbackText(parsed) ?? boundaryFallbackText;
|
lastSummary = pendingSummary; // may be undefined if no preceding summary
|
||||||
|
pendingSummary = undefined;
|
||||||
|
lastBoundaryFallback = extractCompactBoundaryFallbackText(parsed) ?? lastBoundaryFallback;
|
||||||
// Drop turns that lived before this boundary — they are now
|
// Drop turns that lived before this boundary — they are now
|
||||||
// represented by the summary, and replaying them would double-count
|
// represented by the summary, and replaying them would double-count
|
||||||
// their tokens against the fallback model's budget.
|
// their tokens against the fallback model's budget.
|
||||||
@@ -468,7 +478,10 @@ export function readClaudeCliFallbackSeed(params: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const recentTurns = coalesceClaudeCliToolMessages(windowedTurns);
|
const recentTurns = coalesceClaudeCliToolMessages(windowedTurns);
|
||||||
const resolvedSummaryText = summaryText ?? boundaryFallbackText;
|
// Honor a `/compact` summary that was written but never followed by a
|
||||||
|
// boundary marker (older Claude Code build, or graceful-degrade case).
|
||||||
|
// `pendingSummary` then carries the latest such summary into the result.
|
||||||
|
const resolvedSummaryText = lastSummary ?? pendingSummary ?? lastBoundaryFallback;
|
||||||
if (!resolvedSummaryText && recentTurns.length === 0) {
|
if (!resolvedSummaryText && recentTurns.length === 0) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -511,4 +511,68 @@ describe("readClaudeCliFallbackSeed", () => {
|
|||||||
const seed = readClaudeCliFallbackSeed({ cliSessionId: "../escape" });
|
const seed = readClaudeCliFallbackSeed({ cliSessionId: "../escape" });
|
||||||
expect(seed).toBeUndefined();
|
expect(seed).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Codex P2 on #72069: each compact_boundary must pair with the summary
|
||||||
|
// entry that preceded it since the previous boundary. A later boundary
|
||||||
|
// without its own summary must not silently keep an older compaction's
|
||||||
|
// summary alive — that paired stale text with fresh post-boundary turns.
|
||||||
|
it("falls back to the latest boundary content when a newer compaction has no summary", async () => {
|
||||||
|
await writeJsonl([
|
||||||
|
{ type: "summary", summary: "FIRST compact summary", leafUuid: "x" },
|
||||||
|
{
|
||||||
|
type: "system",
|
||||||
|
subtype: "compact_boundary",
|
||||||
|
content: "Conversation compacted (1)",
|
||||||
|
compactMetadata: { trigger: "manual", preTokens: 1000 },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: "user",
|
||||||
|
uuid: "u-mid",
|
||||||
|
message: { role: "user", content: "post-first-compact turn" },
|
||||||
|
},
|
||||||
|
// Second compaction: boundary written, but the summary entry never
|
||||||
|
// landed (e.g. crash between writes). The seed must NOT serve the
|
||||||
|
// FIRST summary alongside post-second-boundary turns.
|
||||||
|
{
|
||||||
|
type: "system",
|
||||||
|
subtype: "compact_boundary",
|
||||||
|
content: "Conversation compacted (2)",
|
||||||
|
compactMetadata: { trigger: "auto", preTokens: 2000 },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: "user",
|
||||||
|
uuid: "u-tail",
|
||||||
|
message: { role: "user", content: "post-second-compact turn" },
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
const seed = readClaudeCliFallbackSeed({ cliSessionId: SESSION_ID });
|
||||||
|
expect(seed).toBeDefined();
|
||||||
|
expect(seed?.summaryText).toBe("Conversation compacted (2)");
|
||||||
|
expect(seed?.summaryText).not.toBe("FIRST compact summary");
|
||||||
|
expect(seed?.recentTurns).toHaveLength(1);
|
||||||
|
expect(JSON.stringify(seed?.recentTurns)).toContain("post-second-compact turn");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("uses a trailing summary that has no following compact_boundary marker", async () => {
|
||||||
|
// Older Claude Code builds wrote the summary entry without a paired
|
||||||
|
// boundary marker. The seed should still surface that summary so a
|
||||||
|
// subsequent fallback at least has a labeled context block.
|
||||||
|
await writeJsonl([
|
||||||
|
{
|
||||||
|
type: "user",
|
||||||
|
uuid: "u-1",
|
||||||
|
message: { role: "user", content: "earlier turn" },
|
||||||
|
},
|
||||||
|
{ type: "summary", summary: "trailing summary without boundary", leafUuid: "x" },
|
||||||
|
{
|
||||||
|
type: "user",
|
||||||
|
uuid: "u-2",
|
||||||
|
message: { role: "user", content: "later turn" },
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
const seed = readClaudeCliFallbackSeed({ cliSessionId: SESSION_ID });
|
||||||
|
expect(seed?.summaryText).toBe("trailing summary without boundary");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user