mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-29 18:12:52 +00:00
Agents: allow cooldown probe for timeout failover reason
This commit is contained in:
committed by
Ayaan Zaidi
parent
a12c2ecd8a
commit
75deed54f3
@@ -70,8 +70,8 @@ const CASES: ReasonCase[] = [
|
||||
},
|
||||
{
|
||||
reason: "timeout",
|
||||
allowCooldownProbe: false,
|
||||
useTransientProbeSlot: false,
|
||||
allowCooldownProbe: true,
|
||||
useTransientProbeSlot: true,
|
||||
preserveTransientProbeSlot: false,
|
||||
},
|
||||
{
|
||||
|
||||
@@ -7,14 +7,20 @@ export function shouldAllowCooldownProbeForReason(
|
||||
reason === "rate_limit" ||
|
||||
reason === "overloaded" ||
|
||||
reason === "billing" ||
|
||||
reason === "unknown"
|
||||
reason === "unknown" ||
|
||||
reason === "timeout"
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldUseTransientCooldownProbeSlot(
|
||||
reason: FailoverReason | null | undefined,
|
||||
): boolean {
|
||||
return reason === "rate_limit" || reason === "overloaded" || reason === "unknown";
|
||||
return (
|
||||
reason === "rate_limit" ||
|
||||
reason === "overloaded" ||
|
||||
reason === "unknown" ||
|
||||
reason === "timeout"
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldPreserveTransientCooldownProbeSlot(
|
||||
|
||||
@@ -1277,11 +1277,10 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// Tests for Bug B fix: Rate limit vs auth/billing cooldown distinction
|
||||
describe("fallback behavior with provider cooldowns", () => {
|
||||
async function makeAuthStoreWithCooldown(
|
||||
provider: string,
|
||||
reason: "rate_limit" | "overloaded" | "auth" | "billing",
|
||||
reason: "rate_limit" | "overloaded" | "timeout" | "auth" | "billing",
|
||||
): Promise<{ store: AuthProfileStore; dir: string }> {
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||
const now = Date.now();
|
||||
@@ -1292,15 +1291,12 @@ describe("runWithModelFallback", () => {
|
||||
},
|
||||
usageStats: {
|
||||
[`${provider}:default`]:
|
||||
reason === "rate_limit" || reason === "overloaded"
|
||||
reason === "rate_limit" || reason === "overloaded" || reason === "timeout"
|
||||
? {
|
||||
// Transient cooldown reasons are tracked through
|
||||
// cooldownUntil and failureCounts, not disabledReason.
|
||||
cooldownUntil: now + 300000,
|
||||
failureCounts: { [reason]: 1 },
|
||||
}
|
||||
: {
|
||||
// Auth/billing issues use disabledUntil
|
||||
disabledUntil: now + 300000,
|
||||
disabledReason: reason,
|
||||
},
|
||||
@@ -1323,7 +1319,7 @@ describe("runWithModelFallback", () => {
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("sonnet success"); // Fallback succeeds
|
||||
const run = vi.fn().mockResolvedValueOnce("sonnet success");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
@@ -1334,7 +1330,7 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
|
||||
expect(result.result).toBe("sonnet success");
|
||||
expect(run).toHaveBeenCalledTimes(1); // Primary skipped, fallback attempted
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
@@ -1370,6 +1366,36 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("attempts same-provider fallbacks during timeout cooldown", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "timeout");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("sonnet success");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("sonnet success");
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
|
||||
const cfg = makeCfg({
|
||||
@@ -1427,7 +1453,6 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
|
||||
it("tries cross-provider fallbacks when same provider has rate limit", async () => {
|
||||
// Anthropic in rate limit cooldown, Groq available
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
@@ -1437,11 +1462,9 @@ describe("runWithModelFallback", () => {
|
||||
},
|
||||
usageStats: {
|
||||
"anthropic:default": {
|
||||
// Rate-limit reason is inferred from failureCounts for cooldown windows.
|
||||
cooldownUntil: Date.now() + 300000,
|
||||
failureCounts: { rate_limit: 2 },
|
||||
},
|
||||
// Groq not in cooldown
|
||||
},
|
||||
};
|
||||
saveAuthProfileStore(store, tmpDir);
|
||||
@@ -1459,8 +1482,8 @@ describe("runWithModelFallback", () => {
|
||||
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("Still rate limited")) // Sonnet still fails
|
||||
.mockResolvedValueOnce("groq success"); // Groq works
|
||||
.mockRejectedValueOnce(new Error("Still rate limited"))
|
||||
.mockResolvedValueOnce("groq success");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
@@ -1474,8 +1497,8 @@ describe("runWithModelFallback", () => {
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowTransientCooldownProbe: true,
|
||||
}); // Rate limit allows attempt
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
|
||||
});
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
|
||||
});
|
||||
|
||||
it("limits cooldown probes to one per provider before moving to cross-provider fallback", async () => {
|
||||
@@ -1497,8 +1520,8 @@ describe("runWithModelFallback", () => {
|
||||
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("Still rate limited")) // First same-provider probe fails
|
||||
.mockResolvedValueOnce("groq success"); // Next provider succeeds
|
||||
.mockRejectedValueOnce(new Error("Still rate limited"))
|
||||
.mockResolvedValueOnce("groq success");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
@@ -1509,8 +1532,6 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
|
||||
expect(result.result).toBe("groq success");
|
||||
// Primary is skipped, first same-provider fallback is probed, second same-provider fallback
|
||||
// is skipped (probe already attempted), then cross-provider fallback runs.
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowTransientCooldownProbe: true,
|
||||
|
||||
@@ -605,15 +605,9 @@ function resolveCooldownDecision(params: {
|
||||
};
|
||||
}
|
||||
|
||||
// For primary: try when requested model or when probe allows.
|
||||
// For same-provider fallbacks: only relax cooldown on transient provider
|
||||
// limits, which are often model-scoped and can recover on a sibling model.
|
||||
const shouldAttemptDespiteCooldown =
|
||||
(params.isPrimary && (!params.requestedModel || shouldProbe)) ||
|
||||
(!params.isPrimary &&
|
||||
(inferredReason === "rate_limit" ||
|
||||
inferredReason === "overloaded" ||
|
||||
inferredReason === "unknown"));
|
||||
(!params.isPrimary && shouldUseTransientCooldownProbeSlot(inferredReason));
|
||||
if (!shouldAttemptDespiteCooldown) {
|
||||
return {
|
||||
type: "skip",
|
||||
|
||||
Reference in New Issue
Block a user