Agents: allow cooldown probe for timeout failover reason

This commit is contained in:
Neerav Makwana
2026-04-09 21:16:54 -04:00
committed by Ayaan Zaidi
parent a12c2ecd8a
commit 75deed54f3
5 changed files with 53 additions and 30 deletions

View File

@@ -70,8 +70,8 @@ const CASES: ReasonCase[] = [
},
{
reason: "timeout",
allowCooldownProbe: false,
useTransientProbeSlot: false,
allowCooldownProbe: true,
useTransientProbeSlot: true,
preserveTransientProbeSlot: false,
},
{

View File

@@ -7,14 +7,20 @@ export function shouldAllowCooldownProbeForReason(
reason === "rate_limit" ||
reason === "overloaded" ||
reason === "billing" ||
reason === "unknown"
reason === "unknown" ||
reason === "timeout"
);
}
export function shouldUseTransientCooldownProbeSlot(
reason: FailoverReason | null | undefined,
): boolean {
return reason === "rate_limit" || reason === "overloaded" || reason === "unknown";
return (
reason === "rate_limit" ||
reason === "overloaded" ||
reason === "unknown" ||
reason === "timeout"
);
}
export function shouldPreserveTransientCooldownProbeSlot(

View File

@@ -1277,11 +1277,10 @@ describe("runWithModelFallback", () => {
});
});
// Tests for Bug B fix: Rate limit vs auth/billing cooldown distinction
describe("fallback behavior with provider cooldowns", () => {
async function makeAuthStoreWithCooldown(
provider: string,
reason: "rate_limit" | "overloaded" | "auth" | "billing",
reason: "rate_limit" | "overloaded" | "timeout" | "auth" | "billing",
): Promise<{ store: AuthProfileStore; dir: string }> {
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
const now = Date.now();
@@ -1292,15 +1291,12 @@ describe("runWithModelFallback", () => {
},
usageStats: {
[`${provider}:default`]:
reason === "rate_limit" || reason === "overloaded"
reason === "rate_limit" || reason === "overloaded" || reason === "timeout"
? {
// Transient cooldown reasons are tracked through
// cooldownUntil and failureCounts, not disabledReason.
cooldownUntil: now + 300000,
failureCounts: { [reason]: 1 },
}
: {
// Auth/billing issues use disabledUntil
disabledUntil: now + 300000,
disabledReason: reason,
},
@@ -1323,7 +1319,7 @@ describe("runWithModelFallback", () => {
},
});
const run = vi.fn().mockResolvedValueOnce("sonnet success"); // Fallback succeeds
const run = vi.fn().mockResolvedValueOnce("sonnet success");
const result = await runWithModelFallback({
cfg,
@@ -1334,7 +1330,7 @@ describe("runWithModelFallback", () => {
});
expect(result.result).toBe("sonnet success");
expect(run).toHaveBeenCalledTimes(1); // Primary skipped, fallback attempted
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
});
@@ -1370,6 +1366,36 @@ describe("runWithModelFallback", () => {
});
});
it("attempts same-provider fallbacks during timeout cooldown", async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", "timeout");
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
},
},
},
});
const run = vi.fn().mockResolvedValueOnce("sonnet success");
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
expect(result.result).toBe("sonnet success");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
});
});
it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
const cfg = makeCfg({
@@ -1427,7 +1453,6 @@ describe("runWithModelFallback", () => {
});
it("tries cross-provider fallbacks when same provider has rate limit", async () => {
// Anthropic in rate limit cooldown, Groq available
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
const store: AuthProfileStore = {
version: AUTH_STORE_VERSION,
@@ -1437,11 +1462,9 @@ describe("runWithModelFallback", () => {
},
usageStats: {
"anthropic:default": {
// Rate-limit reason is inferred from failureCounts for cooldown windows.
cooldownUntil: Date.now() + 300000,
failureCounts: { rate_limit: 2 },
},
// Groq not in cooldown
},
};
saveAuthProfileStore(store, tmpDir);
@@ -1459,8 +1482,8 @@ describe("runWithModelFallback", () => {
const run = vi
.fn()
.mockRejectedValueOnce(new Error("Still rate limited")) // Sonnet still fails
.mockResolvedValueOnce("groq success"); // Groq works
.mockRejectedValueOnce(new Error("Still rate limited"))
.mockResolvedValueOnce("groq success");
const result = await runWithModelFallback({
cfg,
@@ -1474,8 +1497,8 @@ describe("runWithModelFallback", () => {
expect(run).toHaveBeenCalledTimes(2);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
}); // Rate limit allows attempt
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
});
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
});
it("limits cooldown probes to one per provider before moving to cross-provider fallback", async () => {
@@ -1497,8 +1520,8 @@ describe("runWithModelFallback", () => {
const run = vi
.fn()
.mockRejectedValueOnce(new Error("Still rate limited")) // First same-provider probe fails
.mockResolvedValueOnce("groq success"); // Next provider succeeds
.mockRejectedValueOnce(new Error("Still rate limited"))
.mockResolvedValueOnce("groq success");
const result = await runWithModelFallback({
cfg,
@@ -1509,8 +1532,6 @@ describe("runWithModelFallback", () => {
});
expect(result.result).toBe("groq success");
// Primary is skipped, first same-provider fallback is probed, second same-provider fallback
// is skipped (probe already attempted), then cross-provider fallback runs.
expect(run).toHaveBeenCalledTimes(2);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,

View File

@@ -605,15 +605,9 @@ function resolveCooldownDecision(params: {
};
}
// For primary: try when requested model or when probe allows.
// For same-provider fallbacks: only relax cooldown on transient provider
// limits, which are often model-scoped and can recover on a sibling model.
const shouldAttemptDespiteCooldown =
(params.isPrimary && (!params.requestedModel || shouldProbe)) ||
(!params.isPrimary &&
(inferredReason === "rate_limit" ||
inferredReason === "overloaded" ||
inferredReason === "unknown"));
(!params.isPrimary && shouldUseTransientCooldownProbeSlot(inferredReason));
if (!shouldAttemptDespiteCooldown) {
return {
type: "skip",