fix: per-model cooldown scope, stepped backoff, and user-facing rate-limit message (#49834)

Merged via squash. Prepared head SHA: 7c488c070c Co-authored-by: kiranvk-2011 <91108465+kiranvk-2011@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf
2026-05-05 18:10:21 +00:00 · 2026-03-26 00:33:49 +05:30
parent 6efc4e8ef2
commit 84401223c7
14 changed files with 647 additions and 34 deletions
--- a/src/agents/auth-profiles.getsoonestcooldownexpiry.test.ts
+++ b/src/agents/auth-profiles.getsoonestcooldownexpiry.test.ts
@@ -74,4 +74,45 @@ describe("getSoonestCooldownExpiry", () => {

    expect(getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"])).toBe(1_700_000_000_000);
  });
+
+  it("ignores unrelated model-scoped rate limits for the requested model", () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({
+      "openai:p1": {
+        cooldownUntil: now + 10_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "gpt-5.4",
+      },
+      "openai:p2": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "gpt-5.2",
+      },
+    });
+
+    expect(
+      getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
+    ).toBe(now + 30_000);
+  });
+
+  it("still counts profile-wide disables for other models", () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({
+      "openai:p1": {
+        cooldownUntil: now + 10_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "gpt-5.4",
+        disabledUntil: now + 20_000,
+      },
+      "openai:p2": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "gpt-5.2",
+      },
+    });
+
+    expect(
+      getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
+    ).toBe(now + 20_000);
+  });
 });
--- a/src/agents/auth-profiles.markauthprofilefailure.test.ts
+++ b/src/agents/auth-profiles.markauthprofilefailure.test.ts
@@ -299,12 +299,12 @@ describe("markAuthProfileFailure", () => {

      const stats = store.usageStats?.["anthropic:default"];
      // Error count should reset to 1 (not escalate to 4) because the
-      // previous cooldown expired. Cooldown should be ~1 min, not ~60 min.
+      // previous cooldown expired. Cooldown should be ~30s, not ~5 min.
      expect(stats?.errorCount).toBe(1);
      expect(stats?.failureCounts?.rate_limit).toBe(1);
      const cooldownMs = (stats?.cooldownUntil ?? 0) - now;
-      // calculateAuthProfileCooldownMs(1) = 60_000 (1 minute)
-      expect(cooldownMs).toBeLessThan(120_000);
+      // calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
+      expect(cooldownMs).toBeLessThan(60_000);
      expect(cooldownMs).toBeGreaterThan(0);
    } finally {
      fs.rmSync(agentDir, { recursive: true, force: true });
@@ -336,11 +336,11 @@ describe("markAuthProfileFailure", () => {
 });

 describe("calculateAuthProfileCooldownMs", () => {
-  it("applies exponential backoff with a 1h cap", () => {
-    expect(calculateAuthProfileCooldownMs(1)).toBe(60_000);
-    expect(calculateAuthProfileCooldownMs(2)).toBe(5 * 60_000);
-    expect(calculateAuthProfileCooldownMs(3)).toBe(25 * 60_000);
-    expect(calculateAuthProfileCooldownMs(4)).toBe(60 * 60_000);
-    expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000);
+  it("applies stepped backoff with a 5-min cap", () => {
+    expect(calculateAuthProfileCooldownMs(1)).toBe(30_000); // 30 seconds
+    expect(calculateAuthProfileCooldownMs(2)).toBe(60_000); // 1 minute
+    expect(calculateAuthProfileCooldownMs(3)).toBe(5 * 60_000); // 5 minutes
+    expect(calculateAuthProfileCooldownMs(4)).toBe(5 * 60_000); // 5 minutes (cap)
+    expect(calculateAuthProfileCooldownMs(5)).toBe(5 * 60_000); // 5 minutes (cap)
  });
 });
--- a/src/agents/auth-profiles/types.ts
+++ b/src/agents/auth-profiles/types.ts
@@ -54,6 +54,8 @@ export type AuthProfileFailureReason =
 export type ProfileUsageStats = {
  lastUsed?: number;
  cooldownUntil?: number;
+  cooldownReason?: AuthProfileFailureReason;
+  cooldownModel?: string;
  disabledUntil?: number;
  disabledReason?: AuthProfileFailureReason;
  errorCount?: number;
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -147,6 +147,53 @@ describe("isProfileInCooldown", () => {
    });
    expect(isProfileInCooldown(store, "kilocode:default")).toBe(false);
  });
+
+  it("returns false for a different model when cooldown is model-scoped (rate_limit)", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+      },
+    });
+    // Different model bypasses the cooldown
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(false);
+    // Same model is still blocked
+    expect(
+      isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
+    ).toBe(true);
+    // No model specified — blocked (conservative)
+    expect(isProfileInCooldown(store, "github-copilot:github")).toBe(true);
+  });
+
+  it("returns true for all models when cooldownModel is undefined (profile-wide)", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: undefined,
+      },
+    });
+    expect(
+      isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
+    ).toBe(true);
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
+  });
+
+  it("does not bypass model-scoped cooldown when disabledUntil is active", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        disabledUntil: Date.now() + 120_000,
+        disabledReason: "billing",
+      },
+    });
+    // Even though cooldownModel is for a different model, billing disable
+    // should keep the profile blocked for all models.
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
+  });
 });

 describe("resolveProfilesUnavailableReason", () => {
@@ -636,8 +683,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
        errorCount: 3,
        lastFailureAt: now - 60_000,
      }),
-      // errorCount resets → calculateAuthProfileCooldownMs(1) = 60_000
-      expectedUntil: (now: number) => now + 60_000,
+      // errorCount resets → calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
+      expectedUntil: (now: number) => now + 30_000,
      readUntil: (stats: WindowStats | undefined) => stats?.cooldownUntil,
    },
    {
@@ -690,3 +737,142 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
    });
  }
 });
+
+describe("markAuthProfileFailure — per-model cooldown metadata", () => {
+  function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore {
+    const store = makeStore(usageStats);
+    store.profiles["github-copilot:github"] = {
+      type: "api_key",
+      provider: "github-copilot",
+      key: "ghu_test",
+    };
+    return store;
+  }
+
+  async function markFailure(params: {
+    store: ReturnType<typeof makeStoreWithCopilot>;
+    now: number;
+    modelId?: string;
+  }): Promise<void> {
+    vi.useFakeTimers();
+    vi.setSystemTime(params.now);
+    try {
+      await markAuthProfileFailure({
+        store: params.store,
+        profileId: "github-copilot:github",
+        reason: "rate_limit",
+        modelId: params.modelId,
+      });
+    } finally {
+      vi.useRealTimers();
+    }
+  }
+
+  it("records cooldownModel on first rate_limit failure", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({});
+    await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    expect(stats?.cooldownReason).toBe("rate_limit");
+    expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
+  });
+
+  it("widens cooldownModel to undefined when a different model fails during active cooldown", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    // Different model fails during active cooldown
+    await markFailure({ store, now, modelId: "gpt-4.1" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Scope widened to all models
+    expect(stats?.cooldownModel).toBeUndefined();
+    expect(stats?.cooldownReason).toBe("rate_limit");
+  });
+
+  it("preserves cooldownModel when the same model fails again during active cooldown", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
+  });
+
+  it("widens cooldownModel when rate_limit failure during active cooldown has no modelId", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markFailure({ store, now, modelId: undefined });
+    const stats = store.usageStats?.["github-copilot:github"];
+    expect(stats?.cooldownReason).toBe("rate_limit");
+    expect(stats?.cooldownModel).toBeUndefined();
+  });
+
+  it("updates cooldownReason when auth failure occurs during active rate_limit window", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markAuthProfileFailure({
+      store,
+      profileId: "github-copilot:github",
+      reason: "auth",
+      modelId: "claude-opus-4.6",
+    });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Reason should update to the new failure type, not stay as rate_limit
+    expect(stats?.cooldownReason).toBe("auth");
+    // Model scope should be cleared — auth failures are profile-wide
+    expect(stats?.cooldownModel).toBeUndefined();
+  });
+
+  it("clears cooldownModel when non-rate_limit failure hits same model during active window", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markAuthProfileFailure({
+      store,
+      profileId: "github-copilot:github",
+      reason: "auth",
+      modelId: "claude-sonnet-4.6",
+    });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Even same-model auth failure should clear model scope (auth is profile-wide)
+    expect(stats?.cooldownReason).toBe("auth");
+    expect(stats?.cooldownModel).toBeUndefined();
+  });
+});
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -63,6 +63,7 @@ export function isProfileInCooldown(
  store: AuthProfileStore,
  profileId: string,
  now?: number,
+  forModel?: string,
 ): boolean {
  if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
    return false;
@@ -71,8 +72,15 @@ export function isProfileInCooldown(
  if (!stats) {
    return false;
  }
-  const unusableUntil = resolveProfileUnusableUntil(stats);
  const ts = now ?? Date.now();
+  // Model-aware bypass: if the cooldown was caused by a rate_limit on a
+  // specific model and the caller is requesting a *different* model, allow it.
+  // We still honour any active billing/auth disable (`disabledUntil`) — those
+  // are profile-wide and must not be short-circuited by model scoping.
+  if (shouldBypassModelScopedCooldown(stats, ts, forModel)) {
+    return false;
+  }
+  const unusableUntil = resolveProfileUnusableUntil(stats);
  return unusableUntil ? ts < unusableUntil : false;
 }

@@ -167,13 +175,18 @@ export function resolveProfilesUnavailableReason(params: {
 export function getSoonestCooldownExpiry(
  store: AuthProfileStore,
  profileIds: string[],
+  options?: { now?: number; forModel?: string },
 ): number | null {
+  const ts = options?.now ?? Date.now();
  let soonest: number | null = null;
  for (const id of profileIds) {
    const stats = store.usageStats?.[id];
    if (!stats) {
      continue;
    }
+    if (shouldBypassModelScopedCooldown(stats, ts, options?.forModel)) {
+      continue;
+    }
    const until = resolveProfileUnusableUntil(stats);
    if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) {
      continue;
@@ -185,6 +198,20 @@ export function getSoonestCooldownExpiry(
  return soonest;
 }

+function shouldBypassModelScopedCooldown(
+  stats: Pick<ProfileUsageStats, "cooldownReason" | "cooldownModel" | "disabledUntil">,
+  now: number,
+  forModel?: string,
+): boolean {
+  return !!(
+    forModel &&
+    stats.cooldownReason === "rate_limit" &&
+    stats.cooldownModel &&
+    stats.cooldownModel !== forModel &&
+    !isActiveUnusableWindow(stats.disabledUntil, now)
+  );
+}
+
 /**
 * Clear expired cooldowns from all profiles in the store.
 *
@@ -231,6 +258,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo

    if (cooldownExpired) {
      stats.cooldownUntil = undefined;
+      stats.cooldownReason = undefined;
+      stats.cooldownModel = undefined;
      profileMutated = true;
    }
    if (disabledExpired) {
@@ -294,10 +323,13 @@ export async function markAuthProfileUsed(params: {

 export function calculateAuthProfileCooldownMs(errorCount: number): number {
  const normalized = Math.max(1, errorCount);
-  return Math.min(
-    60 * 60 * 1000, // 1 hour max
-    60 * 1000 * 5 ** Math.min(normalized - 1, 3),
-  );
+  if (normalized <= 1) {
+    return 30_000; // 30 seconds
+  }
+  if (normalized <= 2) {
+    return 60_000; // 1 minute
+  }
+  return 5 * 60_000; // 5 minutes max
 }

 type ResolvedAuthCooldownConfig = {
@@ -385,6 +417,8 @@ function resetUsageStats(
    ...existing,
    errorCount: 0,
    cooldownUntil: undefined,
+    cooldownReason: undefined,
+    cooldownModel: undefined,
    disabledUntil: undefined,
    disabledReason: undefined,
    failureCounts: undefined,
@@ -417,6 +451,7 @@ function computeNextProfileUsageStats(params: {
  now: number;
  reason: AuthProfileFailureReason;
  cfgResolved: ResolvedAuthCooldownConfig;
+  modelId?: string;
 }): ProfileUsageStats {
  const windowMs = params.cfgResolved.failureWindowMs;
  const windowExpired =
@@ -470,6 +505,44 @@ function computeNextProfileUsageStats(params: {
      now: params.now,
      recomputedUntil: params.now + backoffMs,
    });
+    // Update cooldown metadata based on whether the window is still active
+    // and whether the same or a different model is failing.
+    const existingCooldownActive =
+      typeof params.existing.cooldownUntil === "number" &&
+      params.existing.cooldownUntil > params.now;
+    if (existingCooldownActive) {
+      // Always use the latest failure reason so that downstream consumers
+      // (e.g. isProfileInCooldown model-bypass) see the most recent signal.
+      // A non-rate_limit failure (auth, billing, …) is profile-wide, so
+      // upgrading from rate_limit → auth correctly blocks all models.
+      updatedStats.cooldownReason = params.reason;
+      // If a different model fails during an active window, widen the scope
+      // to all models (undefined) so neither model bypasses the cooldown.
+      if (
+        params.existing.cooldownModel &&
+        params.modelId &&
+        params.existing.cooldownModel !== params.modelId
+      ) {
+        updatedStats.cooldownModel = undefined;
+      } else if (
+        params.reason === "rate_limit" &&
+        !params.modelId &&
+        params.existing.cooldownModel
+      ) {
+        // Unknown originating model during an active model-scoped cooldown:
+        // widen scope conservatively so no model can bypass on stale metadata.
+        updatedStats.cooldownModel = undefined;
+      } else if (params.reason !== "rate_limit") {
+        // Non-rate-limit failures are profile-wide — clear model scope even
+        // when the same model fails, so that no model can bypass.
+        updatedStats.cooldownModel = undefined;
+      } else {
+        updatedStats.cooldownModel = params.existing.cooldownModel;
+      }
+    } else {
+      updatedStats.cooldownReason = params.reason;
+      updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined;
+    }
  }

  return updatedStats;
@@ -487,8 +560,9 @@ export async function markAuthProfileFailure(params: {
  cfg?: OpenClawConfig;
  agentDir?: string;
  runId?: string;
+  modelId?: string;
 }): Promise<void> {
-  const { store, profileId, reason, agentDir, cfg, runId } = params;
+  const { store, profileId, reason, agentDir, cfg, runId, modelId } = params;
  const profile = store.profiles[profileId];
  if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
    return;
@@ -517,6 +591,7 @@ export async function markAuthProfileFailure(params: {
        now,
        reason,
        cfgResolved,
+        modelId,
      });
      nextStats = computed;
      updateUsageStatsEntry(freshStore, profileId, () => computed);
@@ -555,6 +630,7 @@ export async function markAuthProfileFailure(params: {
    now,
    reason,
    cfgResolved,
+    modelId,
  });
  nextStats = computed;
  updateUsageStatsEntry(store, profileId, () => computed);
@@ -571,8 +647,8 @@ export async function markAuthProfileFailure(params: {
 }

 /**
- * Mark a profile as transiently failed. Applies exponential backoff cooldown.
- * Cooldown times: 1min, 5min, 25min, max 1 hour.
+ * Mark a profile as transiently failed. Applies stepped backoff cooldown.
+ * Cooldown times: 30s, 1min, 5min (capped).
 * Uses store lock to avoid overwriting concurrent usage updates.
 */
 export async function markAuthProfileCooldown(params: {
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -679,6 +679,119 @@ describe("runWithModelFallback", () => {
    ]);
  });

+  it("refreshes cooldown expiry from persisted auth state before fallback summary", async () => {
+    const expiry = Date.now() + 120_000;
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "anthropic/claude-opus-4-5",
+            fallbacks: ["openai/gpt-5.2"],
+          },
+        },
+      },
+    });
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "anthropic:default": { type: "api_key", provider: "anthropic", key: "anthropic-key" },
+        "openai:default": { type: "api_key", provider: "openai", key: "openai-key" },
+      },
+    };
+
+    await withTempAuthStore(store, async (tempDir) => {
+      const run = vi.fn().mockImplementation(async (provider: string, model: string) => {
+        if (provider === "anthropic" && model === "claude-opus-4-5") {
+          saveAuthProfileStore(
+            {
+              ...store,
+              usageStats: {
+                "anthropic:default": {
+                  cooldownUntil: expiry,
+                  cooldownReason: "rate_limit",
+                  cooldownModel: "claude-opus-4-5",
+                  failureCounts: { rate_limit: 1 },
+                },
+              },
+            },
+            tempDir,
+          );
+        }
+
+        throw Object.assign(new Error("rate limited"), { status: 429 });
+      });
+
+      await expect(
+        runWithModelFallback({
+          cfg,
+          provider: "anthropic",
+          model: "claude-opus-4-5",
+          agentDir: tempDir,
+          run,
+        }),
+      ).rejects.toMatchObject({
+        name: "FallbackSummaryError",
+        soonestCooldownExpiry: expiry,
+      });
+    });
+  });
+
+  it("filters fallback summary cooldown expiry to attempted model scopes", async () => {
+    const now = Date.now();
+    const unrelatedExpiry = now + 15_000;
+    const relevantExpiry = now + 90_000;
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "anthropic/claude-opus-4-5",
+            fallbacks: ["openai/gpt-5.2"],
+          },
+        },
+      },
+    });
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "anthropic:default": { type: "api_key", provider: "anthropic", key: "anthropic-key" },
+        "openai:default": { type: "api_key", provider: "openai", key: "openai-key" },
+      },
+      usageStats: {
+        "anthropic:default": {
+          cooldownUntil: unrelatedExpiry,
+          cooldownReason: "rate_limit",
+          cooldownModel: "claude-haiku-3-5",
+          failureCounts: { rate_limit: 1 },
+        },
+        "openai:default": {
+          cooldownUntil: relevantExpiry,
+          cooldownReason: "rate_limit",
+          cooldownModel: "gpt-5.2",
+          failureCounts: { rate_limit: 1 },
+        },
+      },
+    };
+
+    await withTempAuthStore(store, async (tempDir) => {
+      const run = vi
+        .fn()
+        .mockRejectedValue(Object.assign(new Error("rate limited"), { status: 429 }));
+
+      await expect(
+        runWithModelFallback({
+          cfg,
+          provider: "anthropic",
+          model: "claude-opus-4-5",
+          agentDir: tempDir,
+          run,
+        }),
+      ).rejects.toMatchObject({
+        name: "FallbackSummaryError",
+        soonestCooldownExpiry: relevantExpiry,
+      });
+    });
+  });
+
  it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => {
    const cfg = makeFallbacksOnlyCfg();

--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -9,6 +9,7 @@ import {
  ensureAuthProfileStore,
  getSoonestCooldownExpiry,
  isProfileInCooldown,
+  loadAuthProfileStoreForRuntime,
  resolveProfilesUnavailableReason,
  resolveAuthProfileOrder,
 } from "./auth-profiles.js";
@@ -39,6 +40,32 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js";

 const log = createSubsystemLogger("model-fallback");

+/**
+ * Structured error thrown when all model fallback candidates have been
+ * exhausted. Carries per-attempt details so callers can build informative
+ * user-facing messages (e.g. "rate-limited, retry in 30 s").
+ */
+export class FallbackSummaryError extends Error {
+  readonly attempts: FallbackAttempt[];
+  readonly soonestCooldownExpiry: number | null;
+
+  constructor(
+    message: string,
+    attempts: FallbackAttempt[],
+    soonestCooldownExpiry: number | null,
+    cause?: Error,
+  ) {
+    super(message, { cause });
+    this.name = "FallbackSummaryError";
+    this.attempts = attempts;
+    this.soonestCooldownExpiry = soonestCooldownExpiry;
+  }
+}
+
+export function isFallbackSummaryError(err: unknown): err is FallbackSummaryError {
+  return err instanceof FallbackSummaryError;
+}
+
 export type ModelFallbackRunOptions = {
  allowTransientCooldownProbe?: boolean;
 };
@@ -194,20 +221,59 @@ function throwFallbackFailureSummary(params: {
  lastError: unknown;
  label: string;
  formatAttempt: (attempt: FallbackAttempt) => string;
+  soonestCooldownExpiry?: number | null;
 }): never {
  if (params.attempts.length <= 1 && params.lastError) {
    throw params.lastError;
  }
  const summary =
    params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown";
-  throw new Error(
+  throw new FallbackSummaryError(
    `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`,
-    {
-      cause: params.lastError instanceof Error ? params.lastError : undefined,
-    },
+    params.attempts,
+    params.soonestCooldownExpiry ?? null,
+    params.lastError instanceof Error ? params.lastError : undefined,
  );
 }

+function resolveFallbackSoonestCooldownExpiry(params: {
+  authStore: ReturnType<typeof ensureAuthProfileStore> | null;
+  agentDir?: string;
+  cfg: OpenClawConfig | undefined;
+  candidates: ModelCandidate[];
+}): number | null {
+  if (!params.authStore) {
+    return null;
+  }
+
+  // Refresh from persisted state because embedded attempts can update auth
+  // cooldowns through a separate store instance while the fallback loop runs.
+  const refreshedStore = loadAuthProfileStoreForRuntime(params.agentDir, {
+    readOnly: true,
+    allowKeychainPrompt: false,
+  });
+  let soonest: number | null = null;
+  for (const candidate of params.candidates) {
+    const ids = resolveAuthProfileOrder({
+      cfg: params.cfg,
+      store: refreshedStore,
+      provider: candidate.provider,
+    });
+    const candidateSoonest = getSoonestCooldownExpiry(refreshedStore, ids, {
+      forModel: candidate.model,
+    });
+    if (
+      typeof candidateSoonest === "number" &&
+      Number.isFinite(candidateSoonest) &&
+      (soonest === null || candidateSoonest < soonest)
+    ) {
+      soonest = candidateSoonest;
+    }
+  }
+
+  return soonest;
+}
+
 function resolveImageFallbackCandidates(params: {
  cfg: OpenClawConfig | undefined;
  defaultProvider: string;
@@ -393,6 +459,7 @@ function shouldProbePrimaryDuringCooldown(params: {
  throttleKey: string;
  authStore: ReturnType<typeof ensureAuthProfileStore>;
  profileIds: string[];
+  model: string;
 }): boolean {
  if (!params.isPrimary || !params.hasFallbackCandidates) {
    return false;
@@ -402,7 +469,10 @@ function shouldProbePrimaryDuringCooldown(params: {
    return false;
  }

-  const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds);
+  const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds, {
+    now: params.now,
+    forModel: params.model,
+  });
  if (soonest === null || !Number.isFinite(soonest)) {
    return true;
  }
@@ -453,6 +523,7 @@ function resolveCooldownDecision(params: {
    throttleKey: params.probeThrottleKey,
    authStore: params.authStore,
    profileIds: params.profileIds,
+    model: params.candidate.model,
  });

  const inferredReason =
@@ -553,7 +624,9 @@ export async function runWithModelFallback<T>(params: {
        store: authStore,
        provider: candidate.provider,
      });
-      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
+      const isAnyProfileAvailable = profileIds.some(
+        (id) => !isProfileInCooldown(authStore, id, undefined, candidate.model),
+      );

      if (profileIds.length > 0 && !isAnyProfileAvailable) {
        // All profiles for this provider are in cooldown.
@@ -762,6 +835,12 @@ export async function runWithModelFallback<T>(params: {
      `${attempt.provider}/${attempt.model}: ${attempt.error}${
        attempt.reason ? ` (${attempt.reason})` : ""
      }`,
+    soonestCooldownExpiry: resolveFallbackSoonestCooldownExpiry({
+      authStore,
+      agentDir: params.agentDir,
+      cfg: params.cfg,
+      candidates,
+    }),
  });
 }

--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -661,7 +661,7 @@ export async function runEmbeddedPiAgent(
        let nextIndex = profileIndex + 1;
        while (nextIndex < profileCandidates.length) {
          const candidate = profileCandidates[nextIndex];
-          if (candidate && isProfileInCooldown(authStore, candidate)) {
+          if (candidate && isProfileInCooldown(authStore, candidate, undefined, modelId)) {
            nextIndex += 1;
            continue;
          }
@@ -688,7 +688,9 @@ export async function runEmbeddedPiAgent(
        );
        const allAutoProfilesInCooldown =
          autoProfileCandidates.length > 0 &&
-          autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
+          autoProfileCandidates.every((candidate) =>
+            isProfileInCooldown(authStore, candidate, undefined, modelId),
+          );
        const unavailableReason = allAutoProfilesInCooldown
          ? (resolveProfilesUnavailableReason({
              store: authStore,
@@ -704,7 +706,9 @@ export async function runEmbeddedPiAgent(
        while (profileIndex < profileCandidates.length) {
          const candidate = profileCandidates[profileIndex];
          const inCooldown =
-            candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
+            candidate &&
+            candidate !== lockedProfileId &&
+            isProfileInCooldown(authStore, candidate, undefined, modelId);
          if (inCooldown) {
            if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
              didTransientCooldownProbe = true;
@@ -774,6 +778,7 @@ export async function runEmbeddedPiAgent(
        reason?: AuthProfileFailureReason | null;
        config?: RunEmbeddedPiAgentParams["config"];
        agentDir?: RunEmbeddedPiAgentParams["agentDir"];
+        modelId?: string;
      }) => {
        const { profileId, reason } = failure;
        if (!profileId || !reason || reason === "timeout") {
@@ -786,6 +791,7 @@ export async function runEmbeddedPiAgent(
          cfg: params.config,
          agentDir,
          runId: params.runId,
+          modelId: failure.modelId,
        });
      };
      const resolveAuthProfileFailureReason = (
@@ -1336,6 +1342,7 @@ export async function runEmbeddedPiAgent(
            await maybeMarkAuthProfileFailure({
              profileId: lastProfileId,
              reason: promptProfileFailureReason,
+              modelId,
            });
            const promptFailoverFailure =
              promptFailoverReason !== null || isFailoverErrorMessage(errorText);
@@ -1477,6 +1484,7 @@ export async function runEmbeddedPiAgent(
              await maybeMarkAuthProfileFailure({
                profileId: lastProfileId,
                reason,
+                modelId,
              });
              if (timedOut && !isProbeSession) {
                log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);