diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts index 261eae6efd5..6dd5697cc99 100644 --- a/src/agents/auth-profiles/usage.test.ts +++ b/src/agents/auth-profiles/usage.test.ts @@ -207,7 +207,7 @@ describe("resolveProfilesUnavailableReason", () => { ).toBe("overloaded"); }); - it("falls back to rate_limit when active cooldown has no reason history", () => { + it("falls back to unknown when active cooldown has no reason history", () => { const now = Date.now(); const store = makeStore({ "anthropic:default": { @@ -221,7 +221,7 @@ describe("resolveProfilesUnavailableReason", () => { profileIds: ["anthropic:default"], now, }), - ).toBe("rate_limit"); + ).toBe("unknown"); }); it("ignores expired windows and returns null when no profile is actively unavailable", () => { diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 273fd754595..20e1cbaa497 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -110,7 +110,11 @@ export function resolveProfilesUnavailableReason(params: { recordedReason = true; } if (!recordedReason) { - addScore("rate_limit", 1); + // No failure counts recorded for this cooldown window. Previously this + // defaulted to "rate_limit", which caused false "rate limit reached" + // warnings when the actual reason was unknown (e.g. transient network + // blip or server error without a classified failure count). + addScore("unknown", 1); } } diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index cda7771d329..d14ede7658b 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -449,7 +449,7 @@ function resolveCooldownDecision(params: { store: params.authStore, profileIds: params.profileIds, now: params.now, - }) ?? "rate_limit"; + }) ?? "unknown"; const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent"; if (isPersistentAuthIssue) { return { @@ -483,7 +483,10 @@ function resolveCooldownDecision(params: { // limits, which are often model-scoped and can recover on a sibling model. const shouldAttemptDespiteCooldown = (params.isPrimary && (!params.requestedModel || shouldProbe)) || - (!params.isPrimary && (inferredReason === "rate_limit" || inferredReason === "overloaded")); + (!params.isPrimary && + (inferredReason === "rate_limit" || + inferredReason === "overloaded" || + inferredReason === "unknown")); if (!shouldAttemptDespiteCooldown) { return { type: "skip", @@ -588,13 +591,16 @@ export async function runWithModelFallback(params: { if ( decision.reason === "rate_limit" || decision.reason === "overloaded" || - decision.reason === "billing" + decision.reason === "billing" || + decision.reason === "unknown" ) { // Probe at most once per provider per fallback run when all profiles // are cooldowned. Re-probing every same-provider candidate can stall // cross-provider fallback on providers with long internal retries. const isTransientCooldownReason = - decision.reason === "rate_limit" || decision.reason === "overloaded"; + decision.reason === "rate_limit" || + decision.reason === "overloaded" || + decision.reason === "unknown"; if (isTransientCooldownReason && cooldownProbeUsedProviders.has(candidate.provider)) { const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`; attempts.push({