mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-05 18:10:21 +00:00
fix: per-model cooldown scope, stepped backoff, and user-facing rate-limit message (#49834)
Merged via squash.
Prepared head SHA: 7c488c070c
Co-authored-by: kiranvk-2011 <91108465+kiranvk-2011@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
@@ -74,4 +74,45 @@ describe("getSoonestCooldownExpiry", () => {
|
||||
|
||||
expect(getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"])).toBe(1_700_000_000_000);
|
||||
});
|
||||
|
||||
it("ignores unrelated model-scoped rate limits for the requested model", () => {
|
||||
const now = 1_700_000_000_000;
|
||||
const store = makeStore({
|
||||
"openai:p1": {
|
||||
cooldownUntil: now + 10_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "gpt-5.4",
|
||||
},
|
||||
"openai:p2": {
|
||||
cooldownUntil: now + 30_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "gpt-5.2",
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
|
||||
).toBe(now + 30_000);
|
||||
});
|
||||
|
||||
it("still counts profile-wide disables for other models", () => {
|
||||
const now = 1_700_000_000_000;
|
||||
const store = makeStore({
|
||||
"openai:p1": {
|
||||
cooldownUntil: now + 10_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "gpt-5.4",
|
||||
disabledUntil: now + 20_000,
|
||||
},
|
||||
"openai:p2": {
|
||||
cooldownUntil: now + 30_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "gpt-5.2",
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
getSoonestCooldownExpiry(store, ["openai:p1", "openai:p2"], { now, forModel: "gpt-5.2" }),
|
||||
).toBe(now + 20_000);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -299,12 +299,12 @@ describe("markAuthProfileFailure", () => {
|
||||
|
||||
const stats = store.usageStats?.["anthropic:default"];
|
||||
// Error count should reset to 1 (not escalate to 4) because the
|
||||
// previous cooldown expired. Cooldown should be ~1 min, not ~60 min.
|
||||
// previous cooldown expired. Cooldown should be ~30s, not ~5 min.
|
||||
expect(stats?.errorCount).toBe(1);
|
||||
expect(stats?.failureCounts?.rate_limit).toBe(1);
|
||||
const cooldownMs = (stats?.cooldownUntil ?? 0) - now;
|
||||
// calculateAuthProfileCooldownMs(1) = 60_000 (1 minute)
|
||||
expect(cooldownMs).toBeLessThan(120_000);
|
||||
// calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
|
||||
expect(cooldownMs).toBeLessThan(60_000);
|
||||
expect(cooldownMs).toBeGreaterThan(0);
|
||||
} finally {
|
||||
fs.rmSync(agentDir, { recursive: true, force: true });
|
||||
@@ -336,11 +336,11 @@ describe("markAuthProfileFailure", () => {
|
||||
});
|
||||
|
||||
describe("calculateAuthProfileCooldownMs", () => {
|
||||
it("applies exponential backoff with a 1h cap", () => {
|
||||
expect(calculateAuthProfileCooldownMs(1)).toBe(60_000);
|
||||
expect(calculateAuthProfileCooldownMs(2)).toBe(5 * 60_000);
|
||||
expect(calculateAuthProfileCooldownMs(3)).toBe(25 * 60_000);
|
||||
expect(calculateAuthProfileCooldownMs(4)).toBe(60 * 60_000);
|
||||
expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000);
|
||||
it("applies stepped backoff with a 5-min cap", () => {
|
||||
expect(calculateAuthProfileCooldownMs(1)).toBe(30_000); // 30 seconds
|
||||
expect(calculateAuthProfileCooldownMs(2)).toBe(60_000); // 1 minute
|
||||
expect(calculateAuthProfileCooldownMs(3)).toBe(5 * 60_000); // 5 minutes
|
||||
expect(calculateAuthProfileCooldownMs(4)).toBe(5 * 60_000); // 5 minutes (cap)
|
||||
expect(calculateAuthProfileCooldownMs(5)).toBe(5 * 60_000); // 5 minutes (cap)
|
||||
});
|
||||
});
|
||||
|
||||
@@ -54,6 +54,8 @@ export type AuthProfileFailureReason =
|
||||
export type ProfileUsageStats = {
|
||||
lastUsed?: number;
|
||||
cooldownUntil?: number;
|
||||
cooldownReason?: AuthProfileFailureReason;
|
||||
cooldownModel?: string;
|
||||
disabledUntil?: number;
|
||||
disabledReason?: AuthProfileFailureReason;
|
||||
errorCount?: number;
|
||||
|
||||
@@ -147,6 +147,53 @@ describe("isProfileInCooldown", () => {
|
||||
});
|
||||
expect(isProfileInCooldown(store, "kilocode:default")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false for a different model when cooldown is model-scoped (rate_limit)", () => {
|
||||
const store = makeStore({
|
||||
"github-copilot:github": {
|
||||
cooldownUntil: Date.now() + 60_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-sonnet-4.6",
|
||||
},
|
||||
});
|
||||
// Different model bypasses the cooldown
|
||||
expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(false);
|
||||
// Same model is still blocked
|
||||
expect(
|
||||
isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
|
||||
).toBe(true);
|
||||
// No model specified — blocked (conservative)
|
||||
expect(isProfileInCooldown(store, "github-copilot:github")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for all models when cooldownModel is undefined (profile-wide)", () => {
|
||||
const store = makeStore({
|
||||
"github-copilot:github": {
|
||||
cooldownUntil: Date.now() + 60_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: undefined,
|
||||
},
|
||||
});
|
||||
expect(
|
||||
isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
|
||||
).toBe(true);
|
||||
expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
|
||||
});
|
||||
|
||||
it("does not bypass model-scoped cooldown when disabledUntil is active", () => {
|
||||
const store = makeStore({
|
||||
"github-copilot:github": {
|
||||
cooldownUntil: Date.now() + 60_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-sonnet-4.6",
|
||||
disabledUntil: Date.now() + 120_000,
|
||||
disabledReason: "billing",
|
||||
},
|
||||
});
|
||||
// Even though cooldownModel is for a different model, billing disable
|
||||
// should keep the profile blocked for all models.
|
||||
expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveProfilesUnavailableReason", () => {
|
||||
@@ -636,8 +683,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
|
||||
errorCount: 3,
|
||||
lastFailureAt: now - 60_000,
|
||||
}),
|
||||
// errorCount resets → calculateAuthProfileCooldownMs(1) = 60_000
|
||||
expectedUntil: (now: number) => now + 60_000,
|
||||
// errorCount resets → calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
|
||||
expectedUntil: (now: number) => now + 30_000,
|
||||
readUntil: (stats: WindowStats | undefined) => stats?.cooldownUntil,
|
||||
},
|
||||
{
|
||||
@@ -690,3 +737,142 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
describe("markAuthProfileFailure — per-model cooldown metadata", () => {
|
||||
function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore {
|
||||
const store = makeStore(usageStats);
|
||||
store.profiles["github-copilot:github"] = {
|
||||
type: "api_key",
|
||||
provider: "github-copilot",
|
||||
key: "ghu_test",
|
||||
};
|
||||
return store;
|
||||
}
|
||||
|
||||
async function markFailure(params: {
|
||||
store: ReturnType<typeof makeStoreWithCopilot>;
|
||||
now: number;
|
||||
modelId?: string;
|
||||
}): Promise<void> {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(params.now);
|
||||
try {
|
||||
await markAuthProfileFailure({
|
||||
store: params.store,
|
||||
profileId: "github-copilot:github",
|
||||
reason: "rate_limit",
|
||||
modelId: params.modelId,
|
||||
});
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
}
|
||||
|
||||
it("records cooldownModel on first rate_limit failure", async () => {
|
||||
const now = 1_000_000;
|
||||
const store = makeStoreWithCopilot({});
|
||||
await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
|
||||
const stats = store.usageStats?.["github-copilot:github"];
|
||||
expect(stats?.cooldownReason).toBe("rate_limit");
|
||||
expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
|
||||
});
|
||||
|
||||
it("widens cooldownModel to undefined when a different model fails during active cooldown", async () => {
|
||||
const now = 1_000_000;
|
||||
const store = makeStoreWithCopilot({
|
||||
"github-copilot:github": {
|
||||
cooldownUntil: now + 30_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-sonnet-4.6",
|
||||
errorCount: 1,
|
||||
lastFailureAt: now - 1000,
|
||||
},
|
||||
});
|
||||
// Different model fails during active cooldown
|
||||
await markFailure({ store, now, modelId: "gpt-4.1" });
|
||||
const stats = store.usageStats?.["github-copilot:github"];
|
||||
// Scope widened to all models
|
||||
expect(stats?.cooldownModel).toBeUndefined();
|
||||
expect(stats?.cooldownReason).toBe("rate_limit");
|
||||
});
|
||||
|
||||
it("preserves cooldownModel when the same model fails again during active cooldown", async () => {
|
||||
const now = 1_000_000;
|
||||
const store = makeStoreWithCopilot({
|
||||
"github-copilot:github": {
|
||||
cooldownUntil: now + 30_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-sonnet-4.6",
|
||||
errorCount: 1,
|
||||
lastFailureAt: now - 1000,
|
||||
},
|
||||
});
|
||||
await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
|
||||
const stats = store.usageStats?.["github-copilot:github"];
|
||||
expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
|
||||
});
|
||||
|
||||
it("widens cooldownModel when rate_limit failure during active cooldown has no modelId", async () => {
|
||||
const now = 1_000_000;
|
||||
const store = makeStoreWithCopilot({
|
||||
"github-copilot:github": {
|
||||
cooldownUntil: now + 30_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-sonnet-4.6",
|
||||
errorCount: 1,
|
||||
lastFailureAt: now - 1000,
|
||||
},
|
||||
});
|
||||
await markFailure({ store, now, modelId: undefined });
|
||||
const stats = store.usageStats?.["github-copilot:github"];
|
||||
expect(stats?.cooldownReason).toBe("rate_limit");
|
||||
expect(stats?.cooldownModel).toBeUndefined();
|
||||
});
|
||||
|
||||
it("updates cooldownReason when auth failure occurs during active rate_limit window", async () => {
|
||||
const now = 1_000_000;
|
||||
const store = makeStoreWithCopilot({
|
||||
"github-copilot:github": {
|
||||
cooldownUntil: now + 30_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-sonnet-4.6",
|
||||
errorCount: 1,
|
||||
lastFailureAt: now - 1000,
|
||||
},
|
||||
});
|
||||
await markAuthProfileFailure({
|
||||
store,
|
||||
profileId: "github-copilot:github",
|
||||
reason: "auth",
|
||||
modelId: "claude-opus-4.6",
|
||||
});
|
||||
const stats = store.usageStats?.["github-copilot:github"];
|
||||
// Reason should update to the new failure type, not stay as rate_limit
|
||||
expect(stats?.cooldownReason).toBe("auth");
|
||||
// Model scope should be cleared — auth failures are profile-wide
|
||||
expect(stats?.cooldownModel).toBeUndefined();
|
||||
});
|
||||
|
||||
it("clears cooldownModel when non-rate_limit failure hits same model during active window", async () => {
|
||||
const now = 1_000_000;
|
||||
const store = makeStoreWithCopilot({
|
||||
"github-copilot:github": {
|
||||
cooldownUntil: now + 30_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-sonnet-4.6",
|
||||
errorCount: 1,
|
||||
lastFailureAt: now - 1000,
|
||||
},
|
||||
});
|
||||
await markAuthProfileFailure({
|
||||
store,
|
||||
profileId: "github-copilot:github",
|
||||
reason: "auth",
|
||||
modelId: "claude-sonnet-4.6",
|
||||
});
|
||||
const stats = store.usageStats?.["github-copilot:github"];
|
||||
// Even same-model auth failure should clear model scope (auth is profile-wide)
|
||||
expect(stats?.cooldownReason).toBe("auth");
|
||||
expect(stats?.cooldownModel).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -63,6 +63,7 @@ export function isProfileInCooldown(
|
||||
store: AuthProfileStore,
|
||||
profileId: string,
|
||||
now?: number,
|
||||
forModel?: string,
|
||||
): boolean {
|
||||
if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
|
||||
return false;
|
||||
@@ -71,8 +72,15 @@ export function isProfileInCooldown(
|
||||
if (!stats) {
|
||||
return false;
|
||||
}
|
||||
const unusableUntil = resolveProfileUnusableUntil(stats);
|
||||
const ts = now ?? Date.now();
|
||||
// Model-aware bypass: if the cooldown was caused by a rate_limit on a
|
||||
// specific model and the caller is requesting a *different* model, allow it.
|
||||
// We still honour any active billing/auth disable (`disabledUntil`) — those
|
||||
// are profile-wide and must not be short-circuited by model scoping.
|
||||
if (shouldBypassModelScopedCooldown(stats, ts, forModel)) {
|
||||
return false;
|
||||
}
|
||||
const unusableUntil = resolveProfileUnusableUntil(stats);
|
||||
return unusableUntil ? ts < unusableUntil : false;
|
||||
}
|
||||
|
||||
@@ -167,13 +175,18 @@ export function resolveProfilesUnavailableReason(params: {
|
||||
export function getSoonestCooldownExpiry(
|
||||
store: AuthProfileStore,
|
||||
profileIds: string[],
|
||||
options?: { now?: number; forModel?: string },
|
||||
): number | null {
|
||||
const ts = options?.now ?? Date.now();
|
||||
let soonest: number | null = null;
|
||||
for (const id of profileIds) {
|
||||
const stats = store.usageStats?.[id];
|
||||
if (!stats) {
|
||||
continue;
|
||||
}
|
||||
if (shouldBypassModelScopedCooldown(stats, ts, options?.forModel)) {
|
||||
continue;
|
||||
}
|
||||
const until = resolveProfileUnusableUntil(stats);
|
||||
if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) {
|
||||
continue;
|
||||
@@ -185,6 +198,20 @@ export function getSoonestCooldownExpiry(
|
||||
return soonest;
|
||||
}
|
||||
|
||||
function shouldBypassModelScopedCooldown(
|
||||
stats: Pick<ProfileUsageStats, "cooldownReason" | "cooldownModel" | "disabledUntil">,
|
||||
now: number,
|
||||
forModel?: string,
|
||||
): boolean {
|
||||
return !!(
|
||||
forModel &&
|
||||
stats.cooldownReason === "rate_limit" &&
|
||||
stats.cooldownModel &&
|
||||
stats.cooldownModel !== forModel &&
|
||||
!isActiveUnusableWindow(stats.disabledUntil, now)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear expired cooldowns from all profiles in the store.
|
||||
*
|
||||
@@ -231,6 +258,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo
|
||||
|
||||
if (cooldownExpired) {
|
||||
stats.cooldownUntil = undefined;
|
||||
stats.cooldownReason = undefined;
|
||||
stats.cooldownModel = undefined;
|
||||
profileMutated = true;
|
||||
}
|
||||
if (disabledExpired) {
|
||||
@@ -294,10 +323,13 @@ export async function markAuthProfileUsed(params: {
|
||||
|
||||
export function calculateAuthProfileCooldownMs(errorCount: number): number {
|
||||
const normalized = Math.max(1, errorCount);
|
||||
return Math.min(
|
||||
60 * 60 * 1000, // 1 hour max
|
||||
60 * 1000 * 5 ** Math.min(normalized - 1, 3),
|
||||
);
|
||||
if (normalized <= 1) {
|
||||
return 30_000; // 30 seconds
|
||||
}
|
||||
if (normalized <= 2) {
|
||||
return 60_000; // 1 minute
|
||||
}
|
||||
return 5 * 60_000; // 5 minutes max
|
||||
}
|
||||
|
||||
type ResolvedAuthCooldownConfig = {
|
||||
@@ -385,6 +417,8 @@ function resetUsageStats(
|
||||
...existing,
|
||||
errorCount: 0,
|
||||
cooldownUntil: undefined,
|
||||
cooldownReason: undefined,
|
||||
cooldownModel: undefined,
|
||||
disabledUntil: undefined,
|
||||
disabledReason: undefined,
|
||||
failureCounts: undefined,
|
||||
@@ -417,6 +451,7 @@ function computeNextProfileUsageStats(params: {
|
||||
now: number;
|
||||
reason: AuthProfileFailureReason;
|
||||
cfgResolved: ResolvedAuthCooldownConfig;
|
||||
modelId?: string;
|
||||
}): ProfileUsageStats {
|
||||
const windowMs = params.cfgResolved.failureWindowMs;
|
||||
const windowExpired =
|
||||
@@ -470,6 +505,44 @@ function computeNextProfileUsageStats(params: {
|
||||
now: params.now,
|
||||
recomputedUntil: params.now + backoffMs,
|
||||
});
|
||||
// Update cooldown metadata based on whether the window is still active
|
||||
// and whether the same or a different model is failing.
|
||||
const existingCooldownActive =
|
||||
typeof params.existing.cooldownUntil === "number" &&
|
||||
params.existing.cooldownUntil > params.now;
|
||||
if (existingCooldownActive) {
|
||||
// Always use the latest failure reason so that downstream consumers
|
||||
// (e.g. isProfileInCooldown model-bypass) see the most recent signal.
|
||||
// A non-rate_limit failure (auth, billing, …) is profile-wide, so
|
||||
// upgrading from rate_limit → auth correctly blocks all models.
|
||||
updatedStats.cooldownReason = params.reason;
|
||||
// If a different model fails during an active window, widen the scope
|
||||
// to all models (undefined) so neither model bypasses the cooldown.
|
||||
if (
|
||||
params.existing.cooldownModel &&
|
||||
params.modelId &&
|
||||
params.existing.cooldownModel !== params.modelId
|
||||
) {
|
||||
updatedStats.cooldownModel = undefined;
|
||||
} else if (
|
||||
params.reason === "rate_limit" &&
|
||||
!params.modelId &&
|
||||
params.existing.cooldownModel
|
||||
) {
|
||||
// Unknown originating model during an active model-scoped cooldown:
|
||||
// widen scope conservatively so no model can bypass on stale metadata.
|
||||
updatedStats.cooldownModel = undefined;
|
||||
} else if (params.reason !== "rate_limit") {
|
||||
// Non-rate-limit failures are profile-wide — clear model scope even
|
||||
// when the same model fails, so that no model can bypass.
|
||||
updatedStats.cooldownModel = undefined;
|
||||
} else {
|
||||
updatedStats.cooldownModel = params.existing.cooldownModel;
|
||||
}
|
||||
} else {
|
||||
updatedStats.cooldownReason = params.reason;
|
||||
updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined;
|
||||
}
|
||||
}
|
||||
|
||||
return updatedStats;
|
||||
@@ -487,8 +560,9 @@ export async function markAuthProfileFailure(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
runId?: string;
|
||||
modelId?: string;
|
||||
}): Promise<void> {
|
||||
const { store, profileId, reason, agentDir, cfg, runId } = params;
|
||||
const { store, profileId, reason, agentDir, cfg, runId, modelId } = params;
|
||||
const profile = store.profiles[profileId];
|
||||
if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
|
||||
return;
|
||||
@@ -517,6 +591,7 @@ export async function markAuthProfileFailure(params: {
|
||||
now,
|
||||
reason,
|
||||
cfgResolved,
|
||||
modelId,
|
||||
});
|
||||
nextStats = computed;
|
||||
updateUsageStatsEntry(freshStore, profileId, () => computed);
|
||||
@@ -555,6 +630,7 @@ export async function markAuthProfileFailure(params: {
|
||||
now,
|
||||
reason,
|
||||
cfgResolved,
|
||||
modelId,
|
||||
});
|
||||
nextStats = computed;
|
||||
updateUsageStatsEntry(store, profileId, () => computed);
|
||||
@@ -571,8 +647,8 @@ export async function markAuthProfileFailure(params: {
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a profile as transiently failed. Applies exponential backoff cooldown.
|
||||
* Cooldown times: 1min, 5min, 25min, max 1 hour.
|
||||
* Mark a profile as transiently failed. Applies stepped backoff cooldown.
|
||||
* Cooldown times: 30s, 1min, 5min (capped).
|
||||
* Uses store lock to avoid overwriting concurrent usage updates.
|
||||
*/
|
||||
export async function markAuthProfileCooldown(params: {
|
||||
|
||||
@@ -679,6 +679,119 @@ describe("runWithModelFallback", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("refreshes cooldown expiry from persisted auth state before fallback summary", async () => {
|
||||
const expiry = Date.now() + 120_000;
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-5",
|
||||
fallbacks: ["openai/gpt-5.2"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"anthropic:default": { type: "api_key", provider: "anthropic", key: "anthropic-key" },
|
||||
"openai:default": { type: "api_key", provider: "openai", key: "openai-key" },
|
||||
},
|
||||
};
|
||||
|
||||
await withTempAuthStore(store, async (tempDir) => {
|
||||
const run = vi.fn().mockImplementation(async (provider: string, model: string) => {
|
||||
if (provider === "anthropic" && model === "claude-opus-4-5") {
|
||||
saveAuthProfileStore(
|
||||
{
|
||||
...store,
|
||||
usageStats: {
|
||||
"anthropic:default": {
|
||||
cooldownUntil: expiry,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-opus-4-5",
|
||||
failureCounts: { rate_limit: 1 },
|
||||
},
|
||||
},
|
||||
},
|
||||
tempDir,
|
||||
);
|
||||
}
|
||||
|
||||
throw Object.assign(new Error("rate limited"), { status: 429 });
|
||||
});
|
||||
|
||||
await expect(
|
||||
runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-5",
|
||||
agentDir: tempDir,
|
||||
run,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
name: "FallbackSummaryError",
|
||||
soonestCooldownExpiry: expiry,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("filters fallback summary cooldown expiry to attempted model scopes", async () => {
|
||||
const now = Date.now();
|
||||
const unrelatedExpiry = now + 15_000;
|
||||
const relevantExpiry = now + 90_000;
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-5",
|
||||
fallbacks: ["openai/gpt-5.2"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"anthropic:default": { type: "api_key", provider: "anthropic", key: "anthropic-key" },
|
||||
"openai:default": { type: "api_key", provider: "openai", key: "openai-key" },
|
||||
},
|
||||
usageStats: {
|
||||
"anthropic:default": {
|
||||
cooldownUntil: unrelatedExpiry,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-haiku-3-5",
|
||||
failureCounts: { rate_limit: 1 },
|
||||
},
|
||||
"openai:default": {
|
||||
cooldownUntil: relevantExpiry,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "gpt-5.2",
|
||||
failureCounts: { rate_limit: 1 },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
await withTempAuthStore(store, async (tempDir) => {
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValue(Object.assign(new Error("rate limited"), { status: 429 }));
|
||||
|
||||
await expect(
|
||||
runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-5",
|
||||
agentDir: tempDir,
|
||||
run,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
name: "FallbackSummaryError",
|
||||
soonestCooldownExpiry: relevantExpiry,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => {
|
||||
const cfg = makeFallbacksOnlyCfg();
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
ensureAuthProfileStore,
|
||||
getSoonestCooldownExpiry,
|
||||
isProfileInCooldown,
|
||||
loadAuthProfileStoreForRuntime,
|
||||
resolveProfilesUnavailableReason,
|
||||
resolveAuthProfileOrder,
|
||||
} from "./auth-profiles.js";
|
||||
@@ -39,6 +40,32 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js";
|
||||
|
||||
const log = createSubsystemLogger("model-fallback");
|
||||
|
||||
/**
|
||||
* Structured error thrown when all model fallback candidates have been
|
||||
* exhausted. Carries per-attempt details so callers can build informative
|
||||
* user-facing messages (e.g. "rate-limited, retry in 30 s").
|
||||
*/
|
||||
export class FallbackSummaryError extends Error {
|
||||
readonly attempts: FallbackAttempt[];
|
||||
readonly soonestCooldownExpiry: number | null;
|
||||
|
||||
constructor(
|
||||
message: string,
|
||||
attempts: FallbackAttempt[],
|
||||
soonestCooldownExpiry: number | null,
|
||||
cause?: Error,
|
||||
) {
|
||||
super(message, { cause });
|
||||
this.name = "FallbackSummaryError";
|
||||
this.attempts = attempts;
|
||||
this.soonestCooldownExpiry = soonestCooldownExpiry;
|
||||
}
|
||||
}
|
||||
|
||||
export function isFallbackSummaryError(err: unknown): err is FallbackSummaryError {
|
||||
return err instanceof FallbackSummaryError;
|
||||
}
|
||||
|
||||
export type ModelFallbackRunOptions = {
|
||||
allowTransientCooldownProbe?: boolean;
|
||||
};
|
||||
@@ -194,20 +221,59 @@ function throwFallbackFailureSummary(params: {
|
||||
lastError: unknown;
|
||||
label: string;
|
||||
formatAttempt: (attempt: FallbackAttempt) => string;
|
||||
soonestCooldownExpiry?: number | null;
|
||||
}): never {
|
||||
if (params.attempts.length <= 1 && params.lastError) {
|
||||
throw params.lastError;
|
||||
}
|
||||
const summary =
|
||||
params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown";
|
||||
throw new Error(
|
||||
throw new FallbackSummaryError(
|
||||
`All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`,
|
||||
{
|
||||
cause: params.lastError instanceof Error ? params.lastError : undefined,
|
||||
},
|
||||
params.attempts,
|
||||
params.soonestCooldownExpiry ?? null,
|
||||
params.lastError instanceof Error ? params.lastError : undefined,
|
||||
);
|
||||
}
|
||||
|
||||
function resolveFallbackSoonestCooldownExpiry(params: {
|
||||
authStore: ReturnType<typeof ensureAuthProfileStore> | null;
|
||||
agentDir?: string;
|
||||
cfg: OpenClawConfig | undefined;
|
||||
candidates: ModelCandidate[];
|
||||
}): number | null {
|
||||
if (!params.authStore) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Refresh from persisted state because embedded attempts can update auth
|
||||
// cooldowns through a separate store instance while the fallback loop runs.
|
||||
const refreshedStore = loadAuthProfileStoreForRuntime(params.agentDir, {
|
||||
readOnly: true,
|
||||
allowKeychainPrompt: false,
|
||||
});
|
||||
let soonest: number | null = null;
|
||||
for (const candidate of params.candidates) {
|
||||
const ids = resolveAuthProfileOrder({
|
||||
cfg: params.cfg,
|
||||
store: refreshedStore,
|
||||
provider: candidate.provider,
|
||||
});
|
||||
const candidateSoonest = getSoonestCooldownExpiry(refreshedStore, ids, {
|
||||
forModel: candidate.model,
|
||||
});
|
||||
if (
|
||||
typeof candidateSoonest === "number" &&
|
||||
Number.isFinite(candidateSoonest) &&
|
||||
(soonest === null || candidateSoonest < soonest)
|
||||
) {
|
||||
soonest = candidateSoonest;
|
||||
}
|
||||
}
|
||||
|
||||
return soonest;
|
||||
}
|
||||
|
||||
function resolveImageFallbackCandidates(params: {
|
||||
cfg: OpenClawConfig | undefined;
|
||||
defaultProvider: string;
|
||||
@@ -393,6 +459,7 @@ function shouldProbePrimaryDuringCooldown(params: {
|
||||
throttleKey: string;
|
||||
authStore: ReturnType<typeof ensureAuthProfileStore>;
|
||||
profileIds: string[];
|
||||
model: string;
|
||||
}): boolean {
|
||||
if (!params.isPrimary || !params.hasFallbackCandidates) {
|
||||
return false;
|
||||
@@ -402,7 +469,10 @@ function shouldProbePrimaryDuringCooldown(params: {
|
||||
return false;
|
||||
}
|
||||
|
||||
const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds);
|
||||
const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds, {
|
||||
now: params.now,
|
||||
forModel: params.model,
|
||||
});
|
||||
if (soonest === null || !Number.isFinite(soonest)) {
|
||||
return true;
|
||||
}
|
||||
@@ -453,6 +523,7 @@ function resolveCooldownDecision(params: {
|
||||
throttleKey: params.probeThrottleKey,
|
||||
authStore: params.authStore,
|
||||
profileIds: params.profileIds,
|
||||
model: params.candidate.model,
|
||||
});
|
||||
|
||||
const inferredReason =
|
||||
@@ -553,7 +624,9 @@ export async function runWithModelFallback<T>(params: {
|
||||
store: authStore,
|
||||
provider: candidate.provider,
|
||||
});
|
||||
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
|
||||
const isAnyProfileAvailable = profileIds.some(
|
||||
(id) => !isProfileInCooldown(authStore, id, undefined, candidate.model),
|
||||
);
|
||||
|
||||
if (profileIds.length > 0 && !isAnyProfileAvailable) {
|
||||
// All profiles for this provider are in cooldown.
|
||||
@@ -762,6 +835,12 @@ export async function runWithModelFallback<T>(params: {
|
||||
`${attempt.provider}/${attempt.model}: ${attempt.error}${
|
||||
attempt.reason ? ` (${attempt.reason})` : ""
|
||||
}`,
|
||||
soonestCooldownExpiry: resolveFallbackSoonestCooldownExpiry({
|
||||
authStore,
|
||||
agentDir: params.agentDir,
|
||||
cfg: params.cfg,
|
||||
candidates,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -661,7 +661,7 @@ export async function runEmbeddedPiAgent(
|
||||
let nextIndex = profileIndex + 1;
|
||||
while (nextIndex < profileCandidates.length) {
|
||||
const candidate = profileCandidates[nextIndex];
|
||||
if (candidate && isProfileInCooldown(authStore, candidate)) {
|
||||
if (candidate && isProfileInCooldown(authStore, candidate, undefined, modelId)) {
|
||||
nextIndex += 1;
|
||||
continue;
|
||||
}
|
||||
@@ -688,7 +688,9 @@ export async function runEmbeddedPiAgent(
|
||||
);
|
||||
const allAutoProfilesInCooldown =
|
||||
autoProfileCandidates.length > 0 &&
|
||||
autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
|
||||
autoProfileCandidates.every((candidate) =>
|
||||
isProfileInCooldown(authStore, candidate, undefined, modelId),
|
||||
);
|
||||
const unavailableReason = allAutoProfilesInCooldown
|
||||
? (resolveProfilesUnavailableReason({
|
||||
store: authStore,
|
||||
@@ -704,7 +706,9 @@ export async function runEmbeddedPiAgent(
|
||||
while (profileIndex < profileCandidates.length) {
|
||||
const candidate = profileCandidates[profileIndex];
|
||||
const inCooldown =
|
||||
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
|
||||
candidate &&
|
||||
candidate !== lockedProfileId &&
|
||||
isProfileInCooldown(authStore, candidate, undefined, modelId);
|
||||
if (inCooldown) {
|
||||
if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
|
||||
didTransientCooldownProbe = true;
|
||||
@@ -774,6 +778,7 @@ export async function runEmbeddedPiAgent(
|
||||
reason?: AuthProfileFailureReason | null;
|
||||
config?: RunEmbeddedPiAgentParams["config"];
|
||||
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
|
||||
modelId?: string;
|
||||
}) => {
|
||||
const { profileId, reason } = failure;
|
||||
if (!profileId || !reason || reason === "timeout") {
|
||||
@@ -786,6 +791,7 @@ export async function runEmbeddedPiAgent(
|
||||
cfg: params.config,
|
||||
agentDir,
|
||||
runId: params.runId,
|
||||
modelId: failure.modelId,
|
||||
});
|
||||
};
|
||||
const resolveAuthProfileFailureReason = (
|
||||
@@ -1336,6 +1342,7 @@ export async function runEmbeddedPiAgent(
|
||||
await maybeMarkAuthProfileFailure({
|
||||
profileId: lastProfileId,
|
||||
reason: promptProfileFailureReason,
|
||||
modelId,
|
||||
});
|
||||
const promptFailoverFailure =
|
||||
promptFailoverReason !== null || isFailoverErrorMessage(errorText);
|
||||
@@ -1477,6 +1484,7 @@ export async function runEmbeddedPiAgent(
|
||||
await maybeMarkAuthProfileFailure({
|
||||
profileId: lastProfileId,
|
||||
reason,
|
||||
modelId,
|
||||
});
|
||||
if (timedOut && !isProbeSession) {
|
||||
log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
|
||||
|
||||
Reference in New Issue
Block a user