fix: stop defaulting to rate_limit when cooldown reason is unknown

When a profile enters cooldown but no failure counts are recorded (e.g.
a transient network error or unclassified server error), the inferred
reason was hardcoded to "rate_limit". This caused misleading "API rate
limit reached" warnings for users who hadn't actually hit any rate limit.

Change both default-reason sites to use "unknown" instead:
- resolveProfilesUnavailableReason fallback in usage.ts
- resolveCooldownDecision null-coalesce in model-fallback.ts

Treat "unknown" as a transient reason for probe and same-provider
fallback purposes, matching the existing behavior for rate_limit and
overloaded cooldowns so recovery attempts still work.

Fixes #32828
This commit is contained in:
VibhorGautam
2026-03-11 12:16:38 +05:30
committed by Altay
parent 60aed95346
commit 8b8be59532
3 changed files with 17 additions and 7 deletions

View File

@@ -207,7 +207,7 @@ describe("resolveProfilesUnavailableReason", () => {
).toBe("overloaded");
});
it("falls back to rate_limit when active cooldown has no reason history", () => {
it("falls back to unknown when active cooldown has no reason history", () => {
const now = Date.now();
const store = makeStore({
"anthropic:default": {
@@ -221,7 +221,7 @@ describe("resolveProfilesUnavailableReason", () => {
profileIds: ["anthropic:default"],
now,
}),
).toBe("rate_limit");
).toBe("unknown");
});
it("ignores expired windows and returns null when no profile is actively unavailable", () => {

View File

@@ -110,7 +110,11 @@ export function resolveProfilesUnavailableReason(params: {
recordedReason = true;
}
if (!recordedReason) {
addScore("rate_limit", 1);
// No failure counts recorded for this cooldown window. Previously this
// defaulted to "rate_limit", which caused false "rate limit reached"
// warnings when the actual reason was unknown (e.g. transient network
// blip or server error without a classified failure count).
addScore("unknown", 1);
}
}

View File

@@ -449,7 +449,7 @@ function resolveCooldownDecision(params: {
store: params.authStore,
profileIds: params.profileIds,
now: params.now,
}) ?? "rate_limit";
}) ?? "unknown";
const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
if (isPersistentAuthIssue) {
return {
@@ -483,7 +483,10 @@ function resolveCooldownDecision(params: {
// limits, which are often model-scoped and can recover on a sibling model.
const shouldAttemptDespiteCooldown =
(params.isPrimary && (!params.requestedModel || shouldProbe)) ||
(!params.isPrimary && (inferredReason === "rate_limit" || inferredReason === "overloaded"));
(!params.isPrimary &&
(inferredReason === "rate_limit" ||
inferredReason === "overloaded" ||
inferredReason === "unknown"));
if (!shouldAttemptDespiteCooldown) {
return {
type: "skip",
@@ -588,13 +591,16 @@ export async function runWithModelFallback<T>(params: {
if (
decision.reason === "rate_limit" ||
decision.reason === "overloaded" ||
decision.reason === "billing"
decision.reason === "billing" ||
decision.reason === "unknown"
) {
// Probe at most once per provider per fallback run when all profiles
// are cooldowned. Re-probing every same-provider candidate can stall
// cross-provider fallback on providers with long internal retries.
const isTransientCooldownReason =
decision.reason === "rate_limit" || decision.reason === "overloaded";
decision.reason === "rate_limit" ||
decision.reason === "overloaded" ||
decision.reason === "unknown";
if (isTransientCooldownReason && cooldownProbeUsedProviders.has(candidate.provider)) {
const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`;
attempts.push({