fix(agents): require claude fallback source provider

2026-05-06 07:30:43 +00:00 · 2026-04-28 13:27:06 +05:30
parent 4369c20bfe
commit 5e4c29e9bc
7 changed files with 31 additions and 113 deletions
--- a/src/agents/auth-profile-runtime-contract.test.ts
+++ b/src/agents/auth-profile-runtime-contract.test.ts
@@ -147,6 +147,7 @@ async function runAuthContractAttempt(params: {

  await runAgentAttempt({
    providerOverride: params.providerOverride,
+    originalProvider: params.providerOverride,
    modelOverride: "gpt-5.4",
    cfg,
    sessionEntry,
--- a/src/agents/command/attempt-execution.cli.test.ts
+++ b/src/agents/command/attempt-execution.cli.test.ts
@@ -102,6 +102,7 @@ describe("CLI attempt execution", () => {
  }) {
    await runAgentAttempt({
      providerOverride: "claude-cli",
+      originalProvider: "claude-cli",
      modelOverride: "opus",
      cfg: {} as OpenClawConfig,
      sessionEntry: params.sessionEntry,
@@ -166,6 +167,7 @@ describe("CLI attempt execution", () => {

    await runAgentAttempt({
      providerOverride: "claude-cli",
+      originalProvider: "claude-cli",
      modelOverride: "opus",
      cfg: {} as OpenClawConfig,
      sessionEntry,
@@ -317,6 +319,7 @@ describe("CLI attempt execution", () => {

    await runAgentAttempt({
      providerOverride: "codex-cli",
+      originalProvider: "codex-cli",
      modelOverride: "gpt-5.4",
      cfg: {} as OpenClawConfig,
      sessionEntry,
@@ -433,6 +436,7 @@ describe("CLI attempt execution", () => {

    await runAgentAttempt({
      providerOverride: "claude-cli",
+      originalProvider: "claude-cli",
      modelOverride: "opus",
      cfg: {} as OpenClawConfig,
      sessionEntry,
@@ -482,6 +486,7 @@ describe("CLI attempt execution", () => {

    await runAgentAttempt({
      providerOverride: "anthropic",
+      originalProvider: "anthropic",
      modelOverride: "claude-opus-4-7",
      cfg: {
        agents: {
@@ -536,6 +541,7 @@ describe("CLI attempt execution", () => {

    await runAgentAttempt({
      providerOverride: "claude-cli",
+      originalProvider: "claude-cli",
      modelOverride: "claude-opus-4-7",
      cfg: {} as OpenClawConfig,
      sessionEntry,
@@ -601,6 +607,7 @@ describe("embedded attempt harness pinning", () => {

    await runAgentAttempt({
      providerOverride: "openai",
+      originalProvider: "openai",
      modelOverride: "gpt-5.4",
      cfg: {} as OpenClawConfig,
      sessionEntry,
@@ -644,6 +651,7 @@ describe("embedded attempt harness pinning", () => {

    await runAgentAttempt({
      providerOverride: "codex",
+      originalProvider: "codex",
      modelOverride: "gpt-5.4",
      cfg: {
        agents: {
@@ -693,6 +701,7 @@ describe("embedded attempt harness pinning", () => {

    await runAgentAttempt({
      providerOverride: "openai",
+      originalProvider: "openai",
      modelOverride: "gpt-5.4",
      cfg: {} as OpenClawConfig,
      sessionEntry,
@@ -736,6 +745,7 @@ describe("embedded attempt harness pinning", () => {

    await runAgentAttempt({
      providerOverride: "openai",
+      originalProvider: "claude-cli",
      modelOverride: "gpt-5.4",
      cfg: {
        agents: {
--- a/src/agents/command/attempt-execution.helpers.ts
+++ b/src/agents/command/attempt-execution.helpers.ts
@@ -109,14 +109,6 @@ export function resolveFallbackRetryPrompt(params: {
  body: string;
  isFallbackRetry: boolean;
  sessionHasHistory?: boolean;
-  /**
-   * Optional context prelude (e.g., a compacted summary harvested from a
-   * non-OpenClaw transcript such as Claude Code's local JSONL). Prepended
-   * before the retry marker so the fallback candidate has prior context
-   * even when OpenClaw's own session file is empty for the current
-   * provider — see `buildClaudeCliFallbackContextPrelude` for the
-   * claude-cli case (#69973).
-   */
  priorContextPrelude?: string;
 }): string {
  if (!params.isFallbackRetry) {
@@ -189,15 +181,6 @@ function formatFallbackTurns(
  if (turns.length === 0 || remainingBudget <= 0) {
    return { text: "", consumed: 0 };
  }
-  // Walk newest -> oldest, prepending lines until one does not fit.
-  //
-  // We stop on the FIRST oversized turn instead of skipping it and then
-  // continuing into older ones. The fallback prelude is a "most recent
-  // contiguous window" summary — what was happening just before the
-  // failed attempt — so a non-contiguous slice (newest + something from
-  // 20 turns ago, gap in the middle) would mislead the fallback model
-  // about the actual flow. Sparse coverage is worse than fewer turns:
-  // greptile flagged this as a P2 on #72069; behavior is intentional.
  const lines: string[] = [];
  let consumed = 0;
  for (let i = turns.length - 1; i >= 0; i -= 1) {
--- a/src/agents/command/attempt-execution.test.ts
+++ b/src/agents/command/attempt-execution.test.ts
@@ -80,10 +80,6 @@ describe("resolveFallbackRetryPrompt", () => {
    ).toBe(originalBody);
  });

-  // #69973: even when OpenClaw's own session file is empty (the claude-cli
-  // case where Claude Code maintains its own JSONL), a harvested
-  // priorContextPrelude must still seed the retry prompt so the fallback
-  // candidate has prior context.
  it("prepends priorContextPrelude before the retry marker on fallback retry", () => {
    const prelude = "## Prior session context (from claude-cli)\nuser: prior question";
    const result = resolveFallbackRetryPrompt({
@@ -191,8 +187,6 @@ describe("formatClaudeCliFallbackPrelude", () => {
    );
    expect(out).toContain("Summary of earlier conversation (truncated):");
    expect(out.length).toBeLessThan(800);
-    // Trailing ellipsis tells the model the summary was clipped — better
-    // than silently emitting a fragment that looks complete.
    expect(out).toMatch(/…$/);
  });

@@ -206,6 +200,23 @@ describe("formatClaudeCliFallbackPrelude", () => {
    expect(out).toContain("turn 10");
    expect(out).not.toContain("turn 1 ");
  });
+
+  it("keeps the recent turn window contiguous when an adjacent turn is oversized", () => {
+    const out = formatClaudeCliFallbackPrelude(
+      {
+        recentTurns: [
+          { role: "user", content: "older small turn" },
+          { role: "assistant", content: `oversized adjacent turn ${"x".repeat(500)}` },
+          { role: "user", content: "newest small turn" },
+        ],
+      },
+      { charBudget: 260 },
+    );
+
+    expect(out).toContain("newest small turn");
+    expect(out).not.toContain("oversized adjacent turn");
+    expect(out).not.toContain("older small turn");
+  });
 });

 describe("buildClaudeCliFallbackContextPrelude", () => {
--- a/src/agents/command/attempt-execution.ts
+++ b/src/agents/command/attempt-execution.ts
@@ -234,19 +234,7 @@ export async function persistCliTurnTranscript(params: {
 export function runAgentAttempt(params: {
  providerOverride: string;
  modelOverride: string;
-  /**
-   * The provider the user originally requested for this turn (i.e. the
-   * primary candidate of the fallback chain). Used to scope claude-cli
-   * fallback context seeding to chains that actually started on claude-cli;
-   * a stale `cliSessionBindings["claude-cli"]` from an unrelated past run
-   * must not contaminate fallbacks that started on another provider
-   * (Codex review #72069 P1). Optional for callers that don't drive
-   * a fallback chain (tests, ad-hoc invocations); when omitted, the
-   * claude-cli fallback seed is skipped — that's a no-op for non-fallback
-   * paths and a defensive default for fallback paths that didn't plumb
-   * the original provider through.
-   */
-  originalProvider?: string;
+  originalProvider: string;
  cfg: OpenClawConfig;
  sessionEntry: SessionEntry | undefined;
  sessionId: string;
@@ -273,21 +261,8 @@ export function runAgentAttempt(params: {
  allowTransientCooldownProbe?: boolean;
  sessionHasHistory?: boolean;
 }) {
-  // #69973: when a fallback fires from claude-cli to a non-CLI candidate
-  // (or a different CLI backend), the next runner cannot see Claude Code's
-  // local JSONL history. Without a seed, the fallback model starts cold —
-  // even though the original Claude session is still alive on disk.
-  // Harvest a compacted context (Claude's own `/compact` summary plus the
-  // most recent post-boundary turns) and prepend it to the retry prompt.
-  // This mirrors what Claude Code itself replays after compaction.
-  //
-  // Gate explicitly on `originalProvider === "claude-cli"`: if the user-
-  // requested provider for this run was not claude-cli, any claude-cli
-  // session binding on the entry is stale state from an earlier run and
-  // must not bleed into this fallback chain.
  const claudeCliFallbackPrelude =
    params.isFallbackRetry &&
-    typeof params.originalProvider === "string" &&
    isClaudeCliProvider(params.originalProvider) &&
    !isClaudeCliProvider(params.providerOverride)
      ? buildClaudeCliFallbackContextPrelude({
--- a/src/gateway/cli-session-history.claude.ts
+++ b/src/gateway/cli-session-history.claude.ts
@@ -334,14 +334,6 @@ export function readClaudeCliSessionMessages(params: {
  return coalesceClaudeCliToolMessages(messages);
 }

-// Compaction surface in Claude Code's JSONL: `/compact` writes a
-// `type: "summary"` entry whose `summary` field holds the condensed text,
-// and an associated `type: "system", subtype: "compact_boundary"` entry
-// whose `compactMetadata` carries `trigger`/`preTokens`. After a boundary,
-// only post-compaction `user`/`assistant` turns are written as individual
-// entries; pre-compaction context lives in the summary. This shape mirrors
-// what Claude Code itself sends to the model after compaction (summary plus
-// recent turns), per the upstream session-management docs.
 type ClaudeCliCompactBoundaryEntry = {
  type: "system";
  subtype?: unknown;
@@ -361,20 +353,7 @@ type ClaudeCliSummaryEntry = {
 };

 export type ClaudeCliFallbackSeed = {
-  /**
-   * The most recent compaction summary, if the session has been `/compact`-ed
-   * at any point. Sourced from the latest `type: "summary"` entry, falling
-   * back to the latest `compact_boundary` content when no explicit summary
-   * is present (older Claude Code builds).
-   */
  summaryText?: string;
-  /**
-   * User/assistant turns after the most recent compact boundary, or all
-   * turns when the session has never been compacted. Tool-result turns are
-   * coalesced into adjacent assistant turns the same way
-   * `readClaudeCliSessionMessages` does, so consumers can format them like
-   * a regular transcript.
-   */
  recentTurns: TranscriptLikeMessage[];
 };

@@ -387,11 +366,6 @@ function isCompactBoundary(entry: ClaudeCliProjectEntry): boolean {
 }

 function extractCompactBoundaryFallbackText(entry: ClaudeCliProjectEntry): string | undefined {
-  // When `/compact` is invoked, Claude Code writes a separate summary entry
-  // — but on older builds the boundary's `content` ("Conversation compacted")
-  // is the only signal that compaction happened. Prefer the explicit summary
-  // when both exist; this fallback gives a non-empty hint when only the
-  // boundary is present so the seed at least labels the gap honestly.
  const content = (entry as ClaudeCliCompactBoundaryEntry).content;
  return typeof content === "string" && content.trim() ? content.trim() : undefined;
 }
@@ -420,17 +394,6 @@ export function readClaudeCliFallbackSeed(params: {
    return undefined;
  }

-  // Pair each compact_boundary with the summary entry that preceded it
-  // since the previous boundary, so a later compaction whose summary is
-  // missing (e.g. crash mid-write) does not silently keep an older
-  // compaction's summary alive (#72069 Codex P2). Walk shape:
-  //   - explicit `summary` entry queues into `pendingSummary` until the
-  //     next boundary "consumes" it.
-  //   - `compact_boundary` flushes the pending summary into `lastSummary`,
-  //     refreshes `lastBoundaryFallback` from the boundary's content, and
-  //     drops the windowed turns and tool registry.
-  //   - everything after the latest boundary forms the recent-window the
-  //     fallback runner will replay alongside the summary.
  let pendingSummary: string | undefined;
  let lastSummary: string | undefined;
  let lastBoundaryFallback: string | undefined;
@@ -450,23 +413,15 @@ export function readClaudeCliFallbackSeed(params: {

    const explicitSummary = extractSummaryText(parsed);
    if (explicitSummary) {
-      // Queue the summary; the next boundary will pair it with itself.
-      // Multiple summaries between boundaries (rare) take the last one,
-      // matching how Claude Code's resume picks the most recent summary.
      pendingSummary = explicitSummary;
      continue;
    }

    if (isCompactBoundary(parsed)) {
-      lastSummary = pendingSummary; // may be undefined if no preceding summary
+      lastSummary = pendingSummary;
      pendingSummary = undefined;
      lastBoundaryFallback = extractCompactBoundaryFallbackText(parsed) ?? lastBoundaryFallback;
-      // Drop turns that lived before this boundary — they are now
-      // represented by the summary, and replaying them would double-count
-      // their tokens against the fallback model's budget.
      windowedTurns = [];
-      // Reset tool-name registry too: tool ids before a compact boundary
-      // are no longer visible to the post-boundary turns.
      toolNameRegistry.clear();
      continue;
    }
@@ -478,9 +433,6 @@ export function readClaudeCliFallbackSeed(params: {
  }

  const recentTurns = coalesceClaudeCliToolMessages(windowedTurns);
-  // Honor a `/compact` summary that was written but never followed by a
-  // boundary marker (older Claude Code build, or graceful-degrade case).
-  // `pendingSummary` then carries the latest such summary into the result.
  const resolvedSummaryText = lastSummary ?? pendingSummary ?? lastBoundaryFallback;
  if (!resolvedSummaryText && recentTurns.length === 0) {
    return undefined;
--- a/src/gateway/cli-session-history.test.ts
+++ b/src/gateway/cli-session-history.test.ts
@@ -299,10 +299,6 @@ describe("cli session history", () => {
  });
 });

-// Regression coverage for #69973 — claude-cli fallback context loss. The
-// new reader exposes the explicit `/compact` summary and the post-boundary
-// turn window so a fallback to a non-CLI candidate can replay the same
-// shape Claude Code itself uses on resume after compaction.
 describe("readClaudeCliFallbackSeed", () => {
  let tmpRoot: string;
  let homeDir: string;
@@ -372,7 +368,7 @@ describe("readClaudeCliFallbackSeed", () => {
      {
        type: "user",
        uuid: "u-pre",
-        message: { role: "user", content: "PRE-COMPACT user turn that must NOT be in seed" },
+        message: { role: "user", content: "pre-compact user turn excluded from seed" },
      },
      {
        type: "assistant",
@@ -512,10 +508,6 @@ describe("readClaudeCliFallbackSeed", () => {
    expect(seed).toBeUndefined();
  });

-  // Codex P2 on #72069: each compact_boundary must pair with the summary
-  // entry that preceded it since the previous boundary. A later boundary
-  // without its own summary must not silently keep an older compaction's
-  // summary alive — that paired stale text with fresh post-boundary turns.
  it("falls back to the latest boundary content when a newer compaction has no summary", async () => {
    await writeJsonl([
      { type: "summary", summary: "FIRST compact summary", leafUuid: "x" },
@@ -530,9 +522,6 @@ describe("readClaudeCliFallbackSeed", () => {
        uuid: "u-mid",
        message: { role: "user", content: "post-first-compact turn" },
      },
-      // Second compaction: boundary written, but the summary entry never
-      // landed (e.g. crash between writes). The seed must NOT serve the
-      // FIRST summary alongside post-second-boundary turns.
      {
        type: "system",
        subtype: "compact_boundary",
@@ -555,9 +544,6 @@ describe("readClaudeCliFallbackSeed", () => {
  });

  it("uses a trailing summary that has no following compact_boundary marker", async () => {
-    // Older Claude Code builds wrote the summary entry without a paired
-    // boundary marker. The seed should still surface that summary so a
-    // subsequent fallback at least has a labeled context block.
    await writeJsonl([
      {
        type: "user",