mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-02 16:20:23 +00:00
fix(auth): use shorter backoff for auth_permanent failures
auth_permanent errors (e.g. API_KEY_INVALID) can be caused by transient provider outages rather than genuinely revoked credentials. Previously these used the same 5h-24h billing backoff, which left providers disabled long after the upstream issue resolved. Introduce separate authPermanentBackoffMinutes (default: 10) and authPermanentMaxMinutes (default: 60) config options so auth_permanent failures recover in minutes rather than hours. Fixes #56838
This commit is contained in:
committed by
Peter Steinberger
parent
022a24ec48
commit
42e1d489fd
@@ -516,6 +516,8 @@ export function calculateAuthProfileCooldownMs(errorCount: number): number {
|
||||
type ResolvedAuthCooldownConfig = {
|
||||
billingBackoffMs: number;
|
||||
billingMaxMs: number;
|
||||
authPermanentBackoffMs: number;
|
||||
authPermanentMaxMs: number;
|
||||
failureWindowMs: number;
|
||||
};
|
||||
|
||||
@@ -556,9 +558,17 @@ function resolveAuthCooldownConfig(params: {
|
||||
defaults.failureWindowHours,
|
||||
);
|
||||
|
||||
const resolveMinutes = (value: unknown, fallback: number) =>
|
||||
typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
|
||||
|
||||
const authPermanentBackoffMinutes = resolveMinutes(cooldowns?.authPermanentBackoffMinutes, 10);
|
||||
const authPermanentMaxMinutes = resolveMinutes(cooldowns?.authPermanentMaxMinutes, 60);
|
||||
|
||||
return {
|
||||
billingBackoffMs: billingBackoffHours * 60 * 60 * 1000,
|
||||
billingMaxMs: billingMaxHours * 60 * 60 * 1000,
|
||||
authPermanentBackoffMs: authPermanentBackoffMinutes * 60 * 1000,
|
||||
authPermanentMaxMs: authPermanentMaxMinutes * 60 * 1000,
|
||||
failureWindowMs: failureWindowHours * 60 * 60 * 1000,
|
||||
};
|
||||
}
|
||||
@@ -662,7 +672,7 @@ function computeNextProfileUsageStats(params: {
|
||||
lastFailureAt: params.now,
|
||||
};
|
||||
|
||||
if (params.reason === "billing" || params.reason === "auth_permanent") {
|
||||
if (params.reason === "billing") {
|
||||
const billingCount = failureCounts[params.reason] ?? 1;
|
||||
const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({
|
||||
errorCount: billingCount,
|
||||
@@ -677,6 +687,23 @@ function computeNextProfileUsageStats(params: {
|
||||
recomputedUntil: params.now + backoffMs,
|
||||
});
|
||||
updatedStats.disabledReason = params.reason;
|
||||
} else if (params.reason === "auth_permanent") {
|
||||
// auth_permanent errors can be caused by transient provider outages (e.g.
|
||||
// GCP returning API_KEY_INVALID during an incident). Use a much shorter
|
||||
// backoff than billing so the provider recovers automatically once the
|
||||
// upstream issue resolves.
|
||||
const authPermCount = failureCounts[params.reason] ?? 1;
|
||||
const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({
|
||||
errorCount: authPermCount,
|
||||
baseMs: params.cfgResolved.authPermanentBackoffMs,
|
||||
maxMs: params.cfgResolved.authPermanentMaxMs,
|
||||
});
|
||||
updatedStats.disabledUntil = keepActiveWindowOrRecompute({
|
||||
existingUntil: params.existing.disabledUntil,
|
||||
now: params.now,
|
||||
recomputedUntil: params.now + backoffMs,
|
||||
});
|
||||
updatedStats.disabledReason = params.reason;
|
||||
} else {
|
||||
const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount);
|
||||
// Keep active cooldown windows immutable so retries within the window
|
||||
|
||||
Reference in New Issue
Block a user