mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-11 01:01:13 +00:00
refactor: clarify auth failover policy
This commit is contained in:
@@ -709,7 +709,7 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
|
||||
lastFailureAt: now - 60_000,
|
||||
}),
|
||||
// errorCount resets, billing count resets to 1 →
|
||||
// calculateAuthProfileBillingDisableMsWithConfig(1, 5h, 24h) = 5h
|
||||
// calculateDisabledLaneBackoffMs(1, 5h, 24h) = 5h
|
||||
expectedUntil: (now: number) => now + 5 * 60 * 60 * 1000,
|
||||
readUntil: (stats: WindowStats | undefined) => stats?.disabledUntil,
|
||||
},
|
||||
@@ -724,7 +724,7 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
|
||||
lastFailureAt: now - 60_000,
|
||||
}),
|
||||
// errorCount resets, auth_permanent count resets to 1 →
|
||||
// calculateAuthProfileBillingDisableMsWithConfig(1, 10m, 60m) = 10m
|
||||
// calculateDisabledLaneBackoffMs(1, 10m, 60m) = 10m
|
||||
expectedUntil: (now: number) => now + 10 * 60 * 1000,
|
||||
readUntil: (stats: WindowStats | undefined) => stats?.disabledUntil,
|
||||
},
|
||||
|
||||
@@ -518,6 +518,27 @@ type ResolvedAuthCooldownConfig = {
|
||||
failureWindowMs: number;
|
||||
};
|
||||
|
||||
type DisabledFailureReason = Extract<AuthProfileFailureReason, "billing" | "auth_permanent">;
|
||||
|
||||
type DisabledFailureBackoffPolicy = {
|
||||
baseMs: (cfg: ResolvedAuthCooldownConfig) => number;
|
||||
maxMs: (cfg: ResolvedAuthCooldownConfig) => number;
|
||||
};
|
||||
|
||||
const DISABLED_FAILURE_BACKOFF_POLICIES = {
|
||||
billing: {
|
||||
baseMs: (cfg) => cfg.billingBackoffMs,
|
||||
maxMs: (cfg) => cfg.billingMaxMs,
|
||||
},
|
||||
auth_permanent: {
|
||||
// Keep high-confidence permanent-auth failures in the disabled lane, but
|
||||
// recover much sooner than billing because some providers surface
|
||||
// auth-looking payloads transiently during incidents.
|
||||
baseMs: (cfg) => cfg.authPermanentBackoffMs,
|
||||
maxMs: (cfg) => cfg.authPermanentMaxMs,
|
||||
},
|
||||
} as const satisfies Record<DisabledFailureReason, DisabledFailureBackoffPolicy>;
|
||||
|
||||
function resolveAuthCooldownConfig(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
providerId: string;
|
||||
@@ -530,9 +551,7 @@ function resolveAuthCooldownConfig(params: {
|
||||
failureWindowHours: 24,
|
||||
} as const;
|
||||
|
||||
const resolveHours = (value: unknown, fallback: number) =>
|
||||
typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
|
||||
const resolveMinutes = (value: unknown, fallback: number) =>
|
||||
const resolvePositiveNumber = (value: unknown, fallback: number) =>
|
||||
typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
|
||||
|
||||
const cooldowns = params.cfg?.auth?.cooldowns;
|
||||
@@ -549,20 +568,23 @@ function resolveAuthCooldownConfig(params: {
|
||||
return undefined;
|
||||
})();
|
||||
|
||||
const billingBackoffHours = resolveHours(
|
||||
const billingBackoffHours = resolvePositiveNumber(
|
||||
billingOverride ?? cooldowns?.billingBackoffHours,
|
||||
defaults.billingBackoffHours,
|
||||
);
|
||||
const billingMaxHours = resolveHours(cooldowns?.billingMaxHours, defaults.billingMaxHours);
|
||||
const authPermanentBackoffMinutes = resolveMinutes(
|
||||
const billingMaxHours = resolvePositiveNumber(
|
||||
cooldowns?.billingMaxHours,
|
||||
defaults.billingMaxHours,
|
||||
);
|
||||
const authPermanentBackoffMinutes = resolvePositiveNumber(
|
||||
cooldowns?.authPermanentBackoffMinutes,
|
||||
defaults.authPermanentBackoffMinutes,
|
||||
);
|
||||
const authPermanentMaxMinutes = resolveMinutes(
|
||||
const authPermanentMaxMinutes = resolvePositiveNumber(
|
||||
cooldowns?.authPermanentMaxMinutes,
|
||||
defaults.authPermanentMaxMinutes,
|
||||
);
|
||||
const failureWindowHours = resolveHours(
|
||||
const failureWindowHours = resolvePositiveNumber(
|
||||
cooldowns?.failureWindowHours,
|
||||
defaults.failureWindowHours,
|
||||
);
|
||||
@@ -576,7 +598,7 @@ function resolveAuthCooldownConfig(params: {
|
||||
};
|
||||
}
|
||||
|
||||
function calculateAuthProfileBillingDisableMsWithConfig(params: {
|
||||
function calculateDisabledLaneBackoffMs(params: {
|
||||
errorCount: number;
|
||||
baseMs: number;
|
||||
maxMs: number;
|
||||
@@ -589,6 +611,19 @@ function calculateAuthProfileBillingDisableMsWithConfig(params: {
|
||||
return Math.min(maxMs, raw);
|
||||
}
|
||||
|
||||
function resolveDisabledFailureBackoffMs(params: {
|
||||
reason: DisabledFailureReason;
|
||||
errorCount: number;
|
||||
cfgResolved: ResolvedAuthCooldownConfig;
|
||||
}): number {
|
||||
const policy = DISABLED_FAILURE_BACKOFF_POLICIES[params.reason];
|
||||
return calculateDisabledLaneBackoffMs({
|
||||
errorCount: params.errorCount,
|
||||
baseMs: policy.baseMs(params.cfgResolved),
|
||||
maxMs: policy.maxMs(params.cfgResolved),
|
||||
});
|
||||
}
|
||||
|
||||
export function resolveProfileUnusableUntilForDisplay(
|
||||
store: AuthProfileStore,
|
||||
profileId: string,
|
||||
@@ -675,12 +710,15 @@ function computeNextProfileUsageStats(params: {
|
||||
lastFailureAt: params.now,
|
||||
};
|
||||
|
||||
if (params.reason === "billing") {
|
||||
const billingCount = failureCounts[params.reason] ?? 1;
|
||||
const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({
|
||||
errorCount: billingCount,
|
||||
baseMs: params.cfgResolved.billingBackoffMs,
|
||||
maxMs: params.cfgResolved.billingMaxMs,
|
||||
const disabledFailureReason =
|
||||
params.reason === "billing" || params.reason === "auth_permanent" ? params.reason : null;
|
||||
|
||||
if (disabledFailureReason) {
|
||||
const disableCount = failureCounts[disabledFailureReason] ?? 1;
|
||||
const backoffMs = resolveDisabledFailureBackoffMs({
|
||||
reason: disabledFailureReason,
|
||||
errorCount: disableCount,
|
||||
cfgResolved: params.cfgResolved,
|
||||
});
|
||||
// Keep active disable windows immutable so retries within the window cannot
|
||||
// extend recovery time indefinitely.
|
||||
@@ -689,23 +727,7 @@ function computeNextProfileUsageStats(params: {
|
||||
now: params.now,
|
||||
recomputedUntil: params.now + backoffMs,
|
||||
});
|
||||
updatedStats.disabledReason = params.reason;
|
||||
} else if (params.reason === "auth_permanent") {
|
||||
// Keep permanent-auth failures in the disabled lane, but use a much
|
||||
// shorter backoff than billing. Some upstream incidents surface auth-ish
|
||||
// payloads transiently, so the provider should recover automatically.
|
||||
const authPermanentCount = failureCounts[params.reason] ?? 1;
|
||||
const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({
|
||||
errorCount: authPermanentCount,
|
||||
baseMs: params.cfgResolved.authPermanentBackoffMs,
|
||||
maxMs: params.cfgResolved.authPermanentMaxMs,
|
||||
});
|
||||
updatedStats.disabledUntil = keepActiveWindowOrRecompute({
|
||||
existingUntil: params.existing.disabledUntil,
|
||||
now: params.now,
|
||||
recomputedUntil: params.now + backoffMs,
|
||||
});
|
||||
updatedStats.disabledReason = params.reason;
|
||||
updatedStats.disabledReason = disabledFailureReason;
|
||||
} else {
|
||||
const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount);
|
||||
// Keep active cooldown windows immutable so retries within the window
|
||||
|
||||
@@ -109,7 +109,7 @@ describe("failover-error", () => {
|
||||
status: 410,
|
||||
message: "invalid_api_key",
|
||||
}),
|
||||
).toBe("auth_permanent");
|
||||
).toBe("auth");
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 410,
|
||||
|
||||
@@ -68,14 +68,12 @@ describe("isAuthPermanentErrorMessage", () => {
|
||||
{
|
||||
name: "matches permanent auth failure patterns",
|
||||
samples: [
|
||||
"invalid_api_key",
|
||||
"api key revoked",
|
||||
"api key deactivated",
|
||||
"key has been disabled",
|
||||
"key has been revoked",
|
||||
"account has been deactivated",
|
||||
"could not authenticate api key",
|
||||
"could not validate credentials",
|
||||
"OAuth authentication is currently not allowed for this organization",
|
||||
"API_KEY_REVOKED",
|
||||
"api_key_deleted",
|
||||
],
|
||||
@@ -84,6 +82,8 @@ describe("isAuthPermanentErrorMessage", () => {
|
||||
{
|
||||
name: "does not match transient auth errors",
|
||||
samples: [
|
||||
"invalid_api_key",
|
||||
"permission_error",
|
||||
"unauthorized",
|
||||
"invalid token",
|
||||
"authentication failed",
|
||||
@@ -102,8 +102,12 @@ describe("isAuthErrorMessage", () => {
|
||||
it.each([
|
||||
'No credentials found for profile "anthropic:default".',
|
||||
"No API key found for profile openai.",
|
||||
"invalid_api_key",
|
||||
"permission_error",
|
||||
"OAuth token refresh failed for anthropic: Failed to refresh OAuth token for anthropic. Please try again or re-authenticate.",
|
||||
"Please re-authenticate to continue.",
|
||||
"could not authenticate api key",
|
||||
"could not validate credentials",
|
||||
"Failed to extract accountId from token",
|
||||
])("matches auth errors for %j", (sample) => {
|
||||
expect(isAuthErrorMessage(sample)).toBe(true);
|
||||
|
||||
@@ -3,6 +3,41 @@ type ErrorPattern = RegExp | string;
|
||||
const PERIODIC_USAGE_LIMIT_RE =
|
||||
/\b(?:daily|weekly|monthly)(?:\/(?:daily|weekly|monthly))* (?:usage )?limit(?:s)?(?: (?:exhausted|reached|exceeded))?\b/i;
|
||||
|
||||
const HIGH_CONFIDENCE_AUTH_PERMANENT_PATTERNS = [
|
||||
/api[_ ]?key[_ ]?(?:revoked|deactivated|deleted)/i,
|
||||
"key has been disabled",
|
||||
"key has been revoked",
|
||||
"account has been deactivated",
|
||||
"not allowed for this organization",
|
||||
] as const satisfies readonly ErrorPattern[];
|
||||
|
||||
const AMBIGUOUS_AUTH_ERROR_PATTERNS = [
|
||||
/invalid[_ ]?api[_ ]?key/,
|
||||
/could not (?:authenticate|validate).*(?:api[_ ]?key|credentials)/i,
|
||||
"permission_error",
|
||||
] as const satisfies readonly ErrorPattern[];
|
||||
|
||||
const COMMON_AUTH_ERROR_PATTERNS = [
|
||||
"incorrect api key",
|
||||
"invalid token",
|
||||
"authentication",
|
||||
"re-authenticate",
|
||||
"oauth token refresh failed",
|
||||
"unauthorized",
|
||||
"forbidden",
|
||||
"access denied",
|
||||
"insufficient permissions",
|
||||
"insufficient permission",
|
||||
/missing scopes?:/i,
|
||||
"expired",
|
||||
"token has expired",
|
||||
/\b401\b/,
|
||||
/\b403\b/,
|
||||
"no credentials found",
|
||||
"no api key found",
|
||||
/\bfailed to (?:extract|parse|validate|decode)\b.*\btoken\b/,
|
||||
] as const satisfies readonly ErrorPattern[];
|
||||
|
||||
const ERROR_PATTERNS = {
|
||||
rateLimit: [
|
||||
/rate[_ ]limit|too many requests|429/,
|
||||
@@ -80,36 +115,8 @@ const ERROR_PATTERNS = {
|
||||
"insufficient usd or diem balance",
|
||||
/requires?\s+more\s+credits/i,
|
||||
],
|
||||
authPermanent: [
|
||||
/api[_ ]?key[_ ]?(?:revoked|deactivated|deleted)/i,
|
||||
"key has been disabled",
|
||||
"key has been revoked",
|
||||
"account has been deactivated",
|
||||
"not allowed for this organization",
|
||||
],
|
||||
auth: [
|
||||
/invalid[_ ]?api[_ ]?key/,
|
||||
"incorrect api key",
|
||||
"invalid token",
|
||||
"authentication",
|
||||
"re-authenticate",
|
||||
"oauth token refresh failed",
|
||||
/could not (?:authenticate|validate).*(?:api[_ ]?key|credentials)/i,
|
||||
"permission_error",
|
||||
"unauthorized",
|
||||
"forbidden",
|
||||
"access denied",
|
||||
"insufficient permissions",
|
||||
"insufficient permission",
|
||||
/missing scopes?:/i,
|
||||
"expired",
|
||||
"token has expired",
|
||||
/\b401\b/,
|
||||
/\b403\b/,
|
||||
"no credentials found",
|
||||
"no api key found",
|
||||
/\bfailed to (?:extract|parse|validate|decode)\b.*\btoken\b/,
|
||||
],
|
||||
authPermanent: HIGH_CONFIDENCE_AUTH_PERMANENT_PATTERNS,
|
||||
auth: [...AMBIGUOUS_AUTH_ERROR_PATTERNS, ...COMMON_AUTH_ERROR_PATTERNS],
|
||||
format: [
|
||||
"string should match pattern",
|
||||
"tool_use.id",
|
||||
@@ -136,6 +143,13 @@ function matchesErrorPatterns(raw: string, patterns: readonly ErrorPattern[]): b
|
||||
);
|
||||
}
|
||||
|
||||
function matchesErrorPatternGroups(
|
||||
raw: string,
|
||||
groups: readonly (readonly ErrorPattern[])[],
|
||||
): boolean {
|
||||
return groups.some((patterns) => matchesErrorPatterns(raw, patterns));
|
||||
}
|
||||
|
||||
export function matchesFormatErrorPattern(raw: string): boolean {
|
||||
return matchesErrorPatterns(raw, ERROR_PATTERNS.format);
|
||||
}
|
||||
@@ -176,11 +190,14 @@ export function isBillingErrorMessage(raw: string): boolean {
|
||||
}
|
||||
|
||||
export function isAuthPermanentErrorMessage(raw: string): boolean {
|
||||
return matchesErrorPatterns(raw, ERROR_PATTERNS.authPermanent);
|
||||
return matchesErrorPatternGroups(raw, [HIGH_CONFIDENCE_AUTH_PERMANENT_PATTERNS]);
|
||||
}
|
||||
|
||||
export function isAuthErrorMessage(raw: string): boolean {
|
||||
return matchesErrorPatterns(raw, ERROR_PATTERNS.auth);
|
||||
return matchesErrorPatternGroups(raw, [
|
||||
AMBIGUOUS_AUTH_ERROR_PATTERNS,
|
||||
COMMON_AUTH_ERROR_PATTERNS,
|
||||
]);
|
||||
}
|
||||
|
||||
export function isOverloadedErrorMessage(raw: string): boolean {
|
||||
|
||||
Reference in New Issue
Block a user