mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-06 14:51:08 +00:00
fix: narrow auth permanent lockouts
This commit is contained in:
@@ -7816,6 +7816,38 @@
|
||||
"help": "Cooldown/backoff controls for temporary profile suppression after billing-related failures and retry windows. Use these to prevent rapid re-selection of profiles that are still blocked.",
|
||||
"hasChildren": true
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.authPermanentBackoffMinutes",
|
||||
"kind": "core",
|
||||
"type": "number",
|
||||
"required": false,
|
||||
"deprecated": false,
|
||||
"sensitive": false,
|
||||
"tags": [
|
||||
"access",
|
||||
"auth",
|
||||
"reliability"
|
||||
],
|
||||
"label": "Auth-Permanent Backoff (minutes)",
|
||||
"help": "Base backoff (minutes) for high-confidence auth_permanent failures (default: 10). Keep this shorter than billing so providers recover automatically after transient upstream auth incidents.",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.authPermanentMaxMinutes",
|
||||
"kind": "core",
|
||||
"type": "number",
|
||||
"required": false,
|
||||
"deprecated": false,
|
||||
"sensitive": false,
|
||||
"tags": [
|
||||
"access",
|
||||
"auth",
|
||||
"performance"
|
||||
],
|
||||
"label": "Auth-Permanent Backoff Cap (minutes)",
|
||||
"help": "Cap (minutes) for auth_permanent backoff (default: 60).",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.billingBackoffHours",
|
||||
"kind": "core",
|
||||
|
||||
@@ -7815,6 +7815,38 @@
|
||||
"help": "Cooldown/backoff controls for temporary profile suppression after billing-related failures and retry windows. Use these to prevent rapid re-selection of profiles that are still blocked.",
|
||||
"hasChildren": true
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.authPermanentBackoffMinutes",
|
||||
"kind": "core",
|
||||
"type": "number",
|
||||
"required": false,
|
||||
"deprecated": false,
|
||||
"sensitive": false,
|
||||
"tags": [
|
||||
"access",
|
||||
"auth",
|
||||
"reliability"
|
||||
],
|
||||
"label": "Auth-Permanent Backoff (minutes)",
|
||||
"help": "Base backoff (minutes) for high-confidence auth_permanent failures (default: 10). Keep this shorter than billing so providers recover automatically after transient upstream auth incidents.",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.authPermanentMaxMinutes",
|
||||
"kind": "core",
|
||||
"type": "number",
|
||||
"required": false,
|
||||
"deprecated": false,
|
||||
"sensitive": false,
|
||||
"tags": [
|
||||
"access",
|
||||
"auth",
|
||||
"performance"
|
||||
],
|
||||
"label": "Auth-Permanent Backoff Cap (minutes)",
|
||||
"help": "Cap (minutes) for auth_permanent backoff (default: 60).",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.billingBackoffHours",
|
||||
"kind": "core",
|
||||
|
||||
@@ -3037,6 +3037,8 @@ Notes:
|
||||
billingBackoffHours: 5,
|
||||
billingBackoffHoursByProvider: { anthropic: 3, openai: 8 },
|
||||
billingMaxHours: 24,
|
||||
authPermanentBackoffMinutes: 10,
|
||||
authPermanentMaxMinutes: 60,
|
||||
failureWindowHours: 24,
|
||||
overloadedProfileRotations: 1,
|
||||
overloadedBackoffMs: 0,
|
||||
@@ -3049,6 +3051,8 @@ Notes:
|
||||
- `billingBackoffHours`: base backoff in hours when a profile fails due to billing/insufficient credits (default: `5`).
|
||||
- `billingBackoffHoursByProvider`: optional per-provider overrides for billing backoff hours.
|
||||
- `billingMaxHours`: cap in hours for billing backoff exponential growth (default: `24`).
|
||||
- `authPermanentBackoffMinutes`: base backoff in minutes for high-confidence `auth_permanent` failures (default: `10`).
|
||||
- `authPermanentMaxMinutes`: cap in minutes for `auth_permanent` backoff growth (default: `60`).
|
||||
- `failureWindowHours`: rolling window in hours used for backoff counters (default: `24`).
|
||||
- `overloadedProfileRotations`: maximum same-provider auth-profile rotations for overloaded errors before switching to model fallback (default: `1`).
|
||||
- `overloadedBackoffMs`: fixed delay before retrying an overloaded provider/profile rotation (default: `0`).
|
||||
|
||||
@@ -199,8 +199,9 @@ describe("markAuthProfileFailure", () => {
|
||||
expect(stats?.failureCounts?.overloaded).toBe(1);
|
||||
});
|
||||
});
|
||||
it("disables auth_permanent failures via disabledUntil (like billing)", async () => {
|
||||
it("disables auth_permanent failures for ~10 minutes by default", async () => {
|
||||
await withAuthProfileStore(async ({ agentDir, store }) => {
|
||||
const startedAt = Date.now();
|
||||
await markAuthProfileFailure({
|
||||
store,
|
||||
profileId: "anthropic:default",
|
||||
@@ -213,6 +214,33 @@ describe("markAuthProfileFailure", () => {
|
||||
expect(stats?.disabledReason).toBe("auth_permanent");
|
||||
// Should NOT set cooldownUntil (that's for transient errors)
|
||||
expect(stats?.cooldownUntil).toBeUndefined();
|
||||
const remainingMs = (stats?.disabledUntil as number) - startedAt;
|
||||
expectCooldownInRange(remainingMs, 9 * 60 * 1000, 11 * 60 * 1000);
|
||||
});
|
||||
});
|
||||
|
||||
it("honors auth_permanent backoff overrides", async () => {
|
||||
await withAuthProfileStore(async ({ agentDir, store }) => {
|
||||
const startedAt = Date.now();
|
||||
await markAuthProfileFailure({
|
||||
store,
|
||||
profileId: "anthropic:default",
|
||||
reason: "auth_permanent",
|
||||
agentDir,
|
||||
cfg: {
|
||||
auth: {
|
||||
cooldowns: {
|
||||
authPermanentBackoffMinutes: 15,
|
||||
authPermanentMaxMinutes: 45,
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
});
|
||||
|
||||
const disabledUntil = store.usageStats?.["anthropic:default"]?.disabledUntil;
|
||||
expect(typeof disabledUntil).toBe("number");
|
||||
const remainingMs = (disabledUntil as number) - startedAt;
|
||||
expectCooldownInRange(remainingMs, 14 * 60 * 1000, 16 * 60 * 1000);
|
||||
});
|
||||
});
|
||||
it("resets backoff counters outside the failure window", async () => {
|
||||
|
||||
@@ -130,10 +130,7 @@ function applyWhamCooldownResult(params: {
|
||||
: 0;
|
||||
return {
|
||||
...params.computed,
|
||||
cooldownUntil: Math.max(
|
||||
existingActiveCooldownUntil,
|
||||
params.now + params.whamResult.cooldownMs,
|
||||
),
|
||||
cooldownUntil: Math.max(existingActiveCooldownUntil, params.now + params.whamResult.cooldownMs),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -528,11 +525,15 @@ function resolveAuthCooldownConfig(params: {
|
||||
const defaults = {
|
||||
billingBackoffHours: 5,
|
||||
billingMaxHours: 24,
|
||||
authPermanentBackoffMinutes: 10,
|
||||
authPermanentMaxMinutes: 60,
|
||||
failureWindowHours: 24,
|
||||
} as const;
|
||||
|
||||
const resolveHours = (value: unknown, fallback: number) =>
|
||||
typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
|
||||
const resolveMinutes = (value: unknown, fallback: number) =>
|
||||
typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
|
||||
|
||||
const cooldowns = params.cfg?.auth?.cooldowns;
|
||||
const billingOverride = (() => {
|
||||
@@ -553,17 +554,19 @@ function resolveAuthCooldownConfig(params: {
|
||||
defaults.billingBackoffHours,
|
||||
);
|
||||
const billingMaxHours = resolveHours(cooldowns?.billingMaxHours, defaults.billingMaxHours);
|
||||
const authPermanentBackoffMinutes = resolveMinutes(
|
||||
cooldowns?.authPermanentBackoffMinutes,
|
||||
defaults.authPermanentBackoffMinutes,
|
||||
);
|
||||
const authPermanentMaxMinutes = resolveMinutes(
|
||||
cooldowns?.authPermanentMaxMinutes,
|
||||
defaults.authPermanentMaxMinutes,
|
||||
);
|
||||
const failureWindowHours = resolveHours(
|
||||
cooldowns?.failureWindowHours,
|
||||
defaults.failureWindowHours,
|
||||
);
|
||||
|
||||
const resolveMinutes = (value: unknown, fallback: number) =>
|
||||
typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
|
||||
|
||||
const authPermanentBackoffMinutes = resolveMinutes(cooldowns?.authPermanentBackoffMinutes, 10);
|
||||
const authPermanentMaxMinutes = resolveMinutes(cooldowns?.authPermanentMaxMinutes, 60);
|
||||
|
||||
return {
|
||||
billingBackoffMs: billingBackoffHours * 60 * 60 * 1000,
|
||||
billingMaxMs: billingMaxHours * 60 * 60 * 1000,
|
||||
@@ -688,13 +691,12 @@ function computeNextProfileUsageStats(params: {
|
||||
});
|
||||
updatedStats.disabledReason = params.reason;
|
||||
} else if (params.reason === "auth_permanent") {
|
||||
// auth_permanent errors can be caused by transient provider outages (e.g.
|
||||
// GCP returning API_KEY_INVALID during an incident). Use a much shorter
|
||||
// backoff than billing so the provider recovers automatically once the
|
||||
// upstream issue resolves.
|
||||
const authPermCount = failureCounts[params.reason] ?? 1;
|
||||
// Keep permanent-auth failures in the disabled lane, but use a much
|
||||
// shorter backoff than billing. Some upstream incidents surface auth-ish
|
||||
// payloads transiently, so the provider should recover automatically.
|
||||
const authPermanentCount = failureCounts[params.reason] ?? 1;
|
||||
const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({
|
||||
errorCount: authPermCount,
|
||||
errorCount: authPermanentCount,
|
||||
baseMs: params.cfgResolved.authPermanentBackoffMs,
|
||||
maxMs: params.cfgResolved.authPermanentMaxMs,
|
||||
});
|
||||
|
||||
@@ -500,9 +500,9 @@ describe("failover-error", () => {
|
||||
expect(resolveFailoverReasonFromError({ status: 403, message: "Forbidden" })).toBe("auth");
|
||||
});
|
||||
|
||||
it("401 with permanent auth message returns auth_permanent", () => {
|
||||
it("401 with ambiguous auth message returns auth", () => {
|
||||
expect(resolveFailoverReasonFromError({ status: 401, message: "invalid_api_key" })).toBe(
|
||||
"auth_permanent",
|
||||
"auth",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -516,26 +516,22 @@ describe("failover-error", () => {
|
||||
expect(resolveFailoverStatus("auth_permanent")).toBe(403);
|
||||
});
|
||||
|
||||
it("coerces permanent auth error with correct reason", () => {
|
||||
it("coerces ambiguous auth error into the short auth lane", () => {
|
||||
const err = coerceToFailoverError(
|
||||
{ status: 401, message: "invalid_api_key" },
|
||||
{ provider: "anthropic", model: "claude-opus-4-6" },
|
||||
);
|
||||
expect(err?.reason).toBe("auth_permanent");
|
||||
expect(err?.reason).toBe("auth");
|
||||
expect(err?.provider).toBe("anthropic");
|
||||
});
|
||||
|
||||
it("403 permission_error returns auth_permanent", () => {
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 403,
|
||||
message:
|
||||
"permission_error: OAuth authentication is currently not allowed for this organization.",
|
||||
}),
|
||||
).toBe("auth_permanent");
|
||||
it("403 bare permission_error returns auth", () => {
|
||||
expect(resolveFailoverReasonFromError({ status: 403, message: "permission_error" })).toBe(
|
||||
"auth",
|
||||
);
|
||||
});
|
||||
|
||||
it("permission_error in error message string classifies as auth_permanent", () => {
|
||||
it("permission_error with organization denial stays auth_permanent", () => {
|
||||
const err = coerceToFailoverError(
|
||||
"HTTP 403 permission_error: OAuth authentication is currently not allowed for this organization.",
|
||||
{ provider: "anthropic", model: "claude-opus-4-6" },
|
||||
|
||||
@@ -528,8 +528,8 @@ describe("isTransientHttpError", () => {
|
||||
});
|
||||
|
||||
describe("classifyFailoverReasonFromHttpStatus", () => {
|
||||
it("treats HTTP 401 permanent auth failures as auth_permanent", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(401, "invalid_api_key")).toBe("auth_permanent");
|
||||
it("treats HTTP 401 invalid_api_key as ambiguous auth", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(401, "invalid_api_key")).toBe("auth");
|
||||
});
|
||||
|
||||
it("treats HTTP 422 as format error", () => {
|
||||
@@ -591,7 +591,7 @@ describe("classifyFailoverReasonFromHttpStatus", () => {
|
||||
});
|
||||
|
||||
it("preserves explicit billing and auth signals on HTTP 410", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(410, "invalid_api_key")).toBe("auth_permanent");
|
||||
expect(classifyFailoverReasonFromHttpStatus(410, "invalid_api_key")).toBe("auth");
|
||||
expect(classifyFailoverReasonFromHttpStatus(410, "authentication failed")).toBe("auth");
|
||||
expect(classifyFailoverReasonFromHttpStatus(410, "insufficient credits")).toBe("billing");
|
||||
});
|
||||
@@ -613,7 +613,7 @@ describe("classifyFailoverReason", () => {
|
||||
});
|
||||
|
||||
it("keeps explicit billing and auth signals on 410 text", () => {
|
||||
expect(classifyFailoverReason("HTTP 410: invalid_api_key")).toBe("auth_permanent");
|
||||
expect(classifyFailoverReason("HTTP 410: invalid_api_key")).toBe("auth");
|
||||
expect(classifyFailoverReason("HTTP 410: authentication failed")).toBe("auth");
|
||||
expect(classifyFailoverReason("HTTP 410: insufficient credits")).toBe("billing");
|
||||
});
|
||||
@@ -935,11 +935,15 @@ describe("classifyFailoverReason", () => {
|
||||
expect(classifyFailoverReason("LLM error: monthly limit reached")).toBe("rate_limit");
|
||||
expect(classifyFailoverReason("LLM error: daily limit exceeded")).toBe("rate_limit");
|
||||
});
|
||||
it("classifies permanent auth errors as auth_permanent", () => {
|
||||
expect(classifyFailoverReason("invalid_api_key")).toBe("auth_permanent");
|
||||
it("keeps only high-confidence auth failures in auth_permanent", () => {
|
||||
expect(classifyFailoverReason("invalid_api_key")).toBe("auth");
|
||||
expect(classifyFailoverReason("permission_error")).toBe("auth");
|
||||
expect(classifyFailoverReason("Your api key has been revoked")).toBe("auth_permanent");
|
||||
expect(classifyFailoverReason("key has been disabled")).toBe("auth_permanent");
|
||||
expect(classifyFailoverReason("account has been deactivated")).toBe("auth_permanent");
|
||||
expect(
|
||||
classifyFailoverReason("OAuth authentication is currently not allowed for this organization"),
|
||||
).toBe("auth_permanent");
|
||||
});
|
||||
it("classifies JSON api_error with transient signal as timeout", () => {
|
||||
expect(
|
||||
@@ -1013,6 +1017,11 @@ describe("classifyFailoverReason", () => {
|
||||
classifyFailoverReason(
|
||||
'{"type":"error","error":{"type":"api_error","message":"permission_error"}}',
|
||||
),
|
||||
).toBe("auth");
|
||||
expect(
|
||||
classifyFailoverReason(
|
||||
'{"type":"error","error":{"type":"api_error","message":"permission_error: OAuth authentication is currently not allowed for this organization"}}',
|
||||
),
|
||||
).toBe("auth_permanent");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -81,13 +81,10 @@ const ERROR_PATTERNS = {
|
||||
/requires?\s+more\s+credits/i,
|
||||
],
|
||||
authPermanent: [
|
||||
/api[_ ]?key[_ ]?(?:revoked|invalid|deactivated|deleted)/i,
|
||||
"invalid_api_key",
|
||||
/api[_ ]?key[_ ]?(?:revoked|deactivated|deleted)/i,
|
||||
"key has been disabled",
|
||||
"key has been revoked",
|
||||
"account has been deactivated",
|
||||
/could not (?:authenticate|validate).*(?:api[_ ]?key|credentials)/i,
|
||||
"permission_error",
|
||||
"not allowed for this organization",
|
||||
],
|
||||
auth: [
|
||||
@@ -97,6 +94,8 @@ const ERROR_PATTERNS = {
|
||||
"authentication",
|
||||
"re-authenticate",
|
||||
"oauth token refresh failed",
|
||||
/could not (?:authenticate|validate).*(?:api[_ ]?key|credentials)/i,
|
||||
"permission_error",
|
||||
"unauthorized",
|
||||
"forbidden",
|
||||
"access denied",
|
||||
|
||||
@@ -52,6 +52,20 @@ describe("plugins.slots.contextEngine", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("auth.cooldowns auth_permanent backoff config", () => {
|
||||
it("accepts auth_permanent backoff knobs", () => {
|
||||
const result = OpenClawSchema.safeParse({
|
||||
auth: {
|
||||
cooldowns: {
|
||||
authPermanentBackoffMinutes: 10,
|
||||
authPermanentMaxMinutes: 60,
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("ui.seamColor", () => {
|
||||
it("accepts hex colors", () => {
|
||||
const res = validateConfigObject({ ui: { seamColor: "#FF4500" } });
|
||||
|
||||
@@ -782,6 +782,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
|
||||
type: "number",
|
||||
exclusiveMinimum: 0,
|
||||
},
|
||||
authPermanentBackoffMinutes: {
|
||||
type: "number",
|
||||
exclusiveMinimum: 0,
|
||||
},
|
||||
authPermanentMaxMinutes: {
|
||||
type: "number",
|
||||
exclusiveMinimum: 0,
|
||||
},
|
||||
failureWindowHours: {
|
||||
type: "number",
|
||||
exclusiveMinimum: 0,
|
||||
@@ -22567,6 +22575,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
|
||||
help: "Cap (hours) for billing backoff (default: 24).",
|
||||
tags: ["auth", "access", "performance"],
|
||||
},
|
||||
"auth.cooldowns.authPermanentBackoffMinutes": {
|
||||
label: "Auth-Permanent Backoff (minutes)",
|
||||
help: "Base backoff (minutes) for high-confidence auth_permanent failures (default: 10). Keep this shorter than billing so providers recover automatically after transient upstream auth incidents.",
|
||||
tags: ["auth", "access", "reliability"],
|
||||
},
|
||||
"auth.cooldowns.authPermanentMaxMinutes": {
|
||||
label: "Auth-Permanent Backoff Cap (minutes)",
|
||||
help: "Cap (minutes) for auth_permanent backoff (default: 60).",
|
||||
tags: ["auth", "access", "performance"],
|
||||
},
|
||||
"auth.cooldowns.failureWindowHours": {
|
||||
label: "Failover Window (hours)",
|
||||
help: "Failure window (hours) for backoff counters (default: 24).",
|
||||
|
||||
@@ -823,7 +823,7 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"Optional per-provider overrides for billing backoff (hours).",
|
||||
"auth.cooldowns.billingMaxHours": "Cap (hours) for billing backoff (default: 24).",
|
||||
"auth.cooldowns.authPermanentBackoffMinutes":
|
||||
"Base backoff (minutes) for auth_permanent failures (default: 10). Shorter than billing because these errors are often caused by transient provider outages.",
|
||||
"Base backoff (minutes) for high-confidence auth_permanent failures (default: 10). Keep this shorter than billing so providers recover automatically after transient upstream auth incidents.",
|
||||
"auth.cooldowns.authPermanentMaxMinutes":
|
||||
"Cap (minutes) for auth_permanent backoff (default: 60).",
|
||||
"auth.cooldowns.failureWindowHours": "Failure window (hours) for backoff counters (default: 24).",
|
||||
|
||||
@@ -22,13 +22,12 @@ export type AuthConfig = {
|
||||
/** Billing backoff cap (hours). Default: 24. */
|
||||
billingMaxHours?: number;
|
||||
/**
|
||||
* Base backoff for permanent-auth failures (minutes). These errors (e.g.
|
||||
* API_KEY_INVALID) can be caused by transient provider outages, so the
|
||||
* default is much shorter than billing backoff. Default: 10.
|
||||
* Base backoff for high-confidence permanent-auth failures (minutes).
|
||||
* Default: 10.
|
||||
*/
|
||||
authPermanentBackoffMinutes?: number;
|
||||
/**
|
||||
* Cap for permanent-auth backoff (minutes). Default: 60.
|
||||
* Cap for high-confidence permanent-auth backoff (minutes). Default: 60.
|
||||
*/
|
||||
authPermanentMaxMinutes?: number;
|
||||
/**
|
||||
|
||||
@@ -443,6 +443,8 @@ export const OpenClawSchema = z
|
||||
billingBackoffHours: z.number().positive().optional(),
|
||||
billingBackoffHoursByProvider: z.record(z.string(), z.number().positive()).optional(),
|
||||
billingMaxHours: z.number().positive().optional(),
|
||||
authPermanentBackoffMinutes: z.number().positive().optional(),
|
||||
authPermanentMaxMinutes: z.number().positive().optional(),
|
||||
failureWindowHours: z.number().positive().optional(),
|
||||
overloadedProfileRotations: z.number().int().nonnegative().optional(),
|
||||
overloadedBackoffMs: z.number().int().nonnegative().optional(),
|
||||
|
||||
Reference in New Issue
Block a user