diff --git a/CHANGELOG.md b/CHANGELOG.md index 0df4711abaf..b4b4a5b4064 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai - Security/Browser uploads: revalidate upload paths at use-time in Playwright file-chooser and direct-input flows so missing/rebound paths are rejected before `setFiles`, with regression coverage for strict missing-path handling. - Security/LINE: cap unsigned webhook body reads before auth/signature handling to bound unauthenticated body processing. (#26095) Thanks @bmendonca3. - Agents/Model fallback: keep explicit text + image fallback chains reachable even when `agents.defaults.models` allowlists are present, prefer explicit run `agentId` over session-key parsing for followup fallback override resolution (with session-key fallback), treat agent-level fallback overrides as configured in embedded runner preflight, and classify `model_cooldown` / `cooling down` errors as `rate_limit` so failover continues. (#11972, #24137, #17231) +- Agents/Model fallback: keep same-provider fallback chains active when session model differs from configured primary, infer cooldown reason from provider profile state (instead of `disabledReason` only), keep no-profile fallback providers eligible (env/models.json paths), and only relax same-provider cooldown fallback attempts for `rate_limit`. (#23816) thanks @ramezgaberiel. - Followups/Routing: when explicit origin routing fails, allow same-channel fallback dispatch (while still blocking cross-channel fallback) so followup replies do not get dropped on transient origin-adapter failures. (#26109) Thanks @Sid-Qin. - Agents/Model fallback: continue fallback traversal on unrecognized errors when candidates remain, while still throwing the original unknown error on the last candidate. (#26106) Thanks @Sid-Qin. - Telegram/Markdown spoilers: keep valid `||spoiler||` pairs while leaving unmatched trailing `||` delimiters as literal text, avoiding false all-or-nothing spoiler suppression. (#26105) Thanks @Sid-Qin. diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index 0c222ec2115..3e36366c4ad 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -163,7 +163,7 @@ describe("runWithModelFallback – probe logic", () => { expectPrimaryProbeSuccess(result, run, "recovered"); }); - it("does NOT probe non-primary candidates during cooldown", async () => { + it("attempts non-primary fallbacks during rate-limit cooldown after primary probe failure", async () => { const cfg = makeCfg({ agents: { defaults: { @@ -182,25 +182,23 @@ describe("runWithModelFallback – probe logic", () => { const almostExpired = NOW + 30 * 1000; // 30s remaining mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); - // Primary probe fails with 429 + // Primary probe fails with 429; fallback should still be attempted for rate_limit cooldowns. const run = vi .fn() .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 })) - .mockResolvedValue("should-not-reach"); + .mockResolvedValue("fallback-ok"); - try { - await runWithModelFallback({ - cfg, - provider: "openai", - model: "gpt-4.1-mini", - run, - }); - expect.unreachable("should have thrown since all candidates exhausted"); - } catch { - // Primary was probed (i === 0 + within margin), non-primary were skipped - expect(run).toHaveBeenCalledTimes(1); // only primary was actually called - expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); - } + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("fallback-ok"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini"); + expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5"); }); it("throttles probe when called within 30s interval", async () => { diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index 16592cdb456..cd0217faafc 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -143,10 +143,22 @@ async function expectSkippedUnavailableProvider(params: { }) { const provider = `${params.providerPrefix}-${crypto.randomUUID()}`; const cfg = makeProviderFallbackCfg(provider); - const store = makeSingleProviderStore({ + const primaryStore = makeSingleProviderStore({ provider, usageStat: params.usageStat, }); + // Include fallback provider profile so the fallback is attempted (not skipped as no-profile). + const store: AuthProfileStore = { + ...primaryStore, + profiles: { + ...primaryStore.profiles, + "fallback:default": { + type: "api_key", + provider: "fallback", + key: "test-key", + }, + }, + }; const run = createFallbackOnlyRun(); const result = await runWithStoredAuth({ @@ -436,11 +448,11 @@ describe("runWithModelFallback", () => { run, }); - // Override model failed with model_not_found → falls back to configured primary. + // Override model failed with model_not_found → tries fallbacks first (same provider). expect(result.result).toBe("ok"); expect(run).toHaveBeenCalledTimes(2); - expect(run.mock.calls[1]?.[0]).toBe("openai"); - expect(run.mock.calls[1]?.[1]).toBe("gpt-4.1-mini"); + expect(run.mock.calls[1]?.[0]).toBe("anthropic"); + expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); }); it("skips providers when all profiles are in cooldown", async () => { @@ -794,6 +806,296 @@ describe("runWithModelFallback", () => { expect(result.provider).toBe("openai"); expect(result.model).toBe("gpt-4.1-mini"); }); + + // Tests for Bug A fix: Model fallback with session overrides + describe("fallback behavior with session model overrides", () => { + it("allows fallbacks when session model differs from config within same provider", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "google/gemini-2.5-flash"], + }, + }, + }, + }); + + const run = vi + .fn() + .mockRejectedValueOnce(new Error("Rate limit exceeded")) // Session model fails + .mockResolvedValueOnce("fallback success"); // First fallback succeeds + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-sonnet-4-20250514", // Different from config primary + run, + }); + + expect(result.result).toBe("fallback success"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-20250514"); + expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-sonnet-4-5"); // Fallback tried + }); + + it("allows fallbacks with model version differences within same provider", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi + .fn() + .mockRejectedValueOnce(new Error("Weekly quota exceeded")) + .mockResolvedValueOnce("groq success"); + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-5", // Version difference from config + run, + }); + + expect(result.result).toBe("groq success"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); + }); + + it("still skips fallbacks when using different provider than config", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: [], // Empty fallbacks to match working pattern + }, + }, + }, + }); + + const run = vi + .fn() + .mockRejectedValueOnce(new Error('No credentials found for profile "openai:default".')) + .mockResolvedValueOnce("config primary worked"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", // Different provider + model: "gpt-4.1-mini", + run, + }); + + // Cross-provider requests should skip configured fallbacks but still try configured primary + expect(result.result).toBe("config primary worked"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini"); // Original request + expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-opus-4-6"); // Config primary as final fallback + }); + + it("uses fallbacks when session model exactly matches config primary", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi + .fn() + .mockRejectedValueOnce(new Error("Quota exceeded")) + .mockResolvedValueOnce("fallback worked"); + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", // Exact match + run, + }); + + expect(result.result).toBe("fallback worked"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); + }); + }); + + // Tests for Bug B fix: Rate limit vs auth/billing cooldown distinction + describe("fallback behavior with provider cooldowns", () => { + async function makeAuthStoreWithCooldown( + provider: string, + reason: "rate_limit" | "auth" | "billing", + ): Promise<{ store: AuthProfileStore; dir: string }> { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-")); + const now = Date.now(); + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + [`${provider}:default`]: { type: "api_key", provider, key: "test-key" }, + }, + usageStats: { + [`${provider}:default`]: + reason === "rate_limit" + ? { + // Real rate-limit cooldowns are tracked through cooldownUntil + // and failureCounts, not disabledReason. + cooldownUntil: now + 300000, + failureCounts: { rate_limit: 1 }, + } + : { + // Auth/billing issues use disabledUntil + disabledUntil: now + 300000, + disabledReason: reason, + }, + }, + }; + saveAuthProfileStore(store, tmpDir); + return { store, dir: tmpDir }; + } + + it("attempts same-provider fallbacks during rate limit cooldown", async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit"); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi.fn().mockResolvedValueOnce("sonnet success"); // Fallback succeeds + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); + + expect(result.result).toBe("sonnet success"); + expect(run).toHaveBeenCalledTimes(1); // Primary skipped, fallback attempted + expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); + }); + + it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth"); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi.fn().mockResolvedValueOnce("groq success"); + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); + + expect(result.result).toBe("groq success"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile"); + }); + + it("skips same-provider models on billing cooldown but still tries no-profile fallback providers", async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing"); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi.fn().mockResolvedValueOnce("groq success"); + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); + + expect(result.result).toBe("groq success"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile"); + }); + + it("tries cross-provider fallbacks when same provider has rate limit", async () => { + // Anthropic in rate limit cooldown, Groq available + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-")); + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "anthropic:default": { type: "api_key", provider: "anthropic", key: "test-key" }, + "groq:default": { type: "api_key", provider: "groq", key: "test-key" }, + }, + usageStats: { + "anthropic:default": { + // Rate-limit reason is inferred from failureCounts for cooldown windows. + cooldownUntil: Date.now() + 300000, + failureCounts: { rate_limit: 2 }, + }, + // Groq not in cooldown + }, + }; + saveAuthProfileStore(store, tmpDir); + + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi + .fn() + .mockRejectedValueOnce(new Error("Still rate limited")) // Sonnet still fails + .mockResolvedValueOnce("groq success"); // Groq works + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: tmpDir, + }); + + expect(result.result).toBe("groq success"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); // Rate limit allows attempt + expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works + }); + }); }); describe("runWithImageModelFallback", () => { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index e59d9e9357c..da03d88d847 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -224,21 +224,21 @@ function resolveFallbackCandidates(params: { const configuredFallbacks = resolveAgentModelFallbackValues( params.cfg?.agents?.defaults?.model, ); - if (sameModelCandidate(normalizedPrimary, configuredPrimary)) { - return configuredFallbacks; - } - // Preserve resilience after failover: when current model is one of the - // configured fallback refs, keep traversing the configured fallback chain. - const isConfiguredFallback = configuredFallbacks.some((raw) => { - const resolved = resolveModelRefFromString({ - raw: String(raw ?? ""), - defaultProvider, - aliasIndex, + // When user runs a different provider than config, only use configured fallbacks + // if the current model is already in that chain (e.g. session on first fallback). + if (normalizedPrimary.provider !== configuredPrimary.provider) { + const isConfiguredFallback = configuredFallbacks.some((raw) => { + const resolved = resolveModelRefFromString({ + raw: String(raw ?? ""), + defaultProvider, + aliasIndex, + }); + return resolved ? sameModelCandidate(resolved.ref, normalizedPrimary) : false; }); - return resolved ? sameModelCandidate(resolved.ref, normalizedPrimary) : false; - }); - // Keep legacy override behavior for ad-hoc models outside configured chain. - return isConfiguredFallback ? configuredFallbacks : []; + return isConfiguredFallback ? configuredFallbacks : []; + } + // Same provider: always use full fallback chain (model version differences within provider). + return configuredFallbacks; })(); for (const raw of modelFallbacks) { @@ -306,6 +306,76 @@ export const _probeThrottleInternals = { resolveProbeThrottleKey, } as const; +type CooldownDecision = + | { + type: "skip"; + reason: FailoverReason; + error: string; + } + | { + type: "attempt"; + reason: FailoverReason; + markProbe: boolean; + }; + +function resolveCooldownDecision(params: { + candidate: ModelCandidate; + isPrimary: boolean; + requestedModel: boolean; + hasFallbackCandidates: boolean; + now: number; + probeThrottleKey: string; + authStore: ReturnType; + profileIds: string[]; +}): CooldownDecision { + const shouldProbe = shouldProbePrimaryDuringCooldown({ + isPrimary: params.isPrimary, + hasFallbackCandidates: params.hasFallbackCandidates, + now: params.now, + throttleKey: params.probeThrottleKey, + authStore: params.authStore, + profileIds: params.profileIds, + }); + + const inferredReason = + resolveProfilesUnavailableReason({ + store: params.authStore, + profileIds: params.profileIds, + now: params.now, + }) ?? "rate_limit"; + const isPersistentIssue = + inferredReason === "auth" || + inferredReason === "auth_permanent" || + inferredReason === "billing"; + if (isPersistentIssue) { + return { + type: "skip", + reason: inferredReason, + error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`, + }; + } + + // For primary: try when requested model or when probe allows. + // For same-provider fallbacks: only relax cooldown on rate_limit, which + // is commonly model-scoped and can recover on a sibling model. + const shouldAttemptDespiteCooldown = + (params.isPrimary && (!params.requestedModel || shouldProbe)) || + (!params.isPrimary && inferredReason === "rate_limit"); + if (!shouldAttemptDespiteCooldown) { + return { + type: "skip", + reason: inferredReason, + error: `Provider ${params.candidate.provider} is in cooldown (all profiles unavailable)`, + }; + } + + return { + type: "attempt", + reason: inferredReason, + markProbe: params.isPrimary && shouldProbe, + }; +} + export async function runWithModelFallback(params: { cfg: OpenClawConfig | undefined; provider: string; @@ -342,41 +412,38 @@ export async function runWithModelFallback(params: { if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. - // For the primary model (i === 0), probe it if the soonest cooldown - // expiry is close or already past. This avoids staying on a fallback - // model long after the real rate-limit window clears. + const isPrimary = i === 0; + const requestedModel = + params.provider === candidate.provider && params.model === candidate.model; const now = Date.now(); const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir); - const shouldProbe = shouldProbePrimaryDuringCooldown({ - isPrimary: i === 0, + const decision = resolveCooldownDecision({ + candidate, + isPrimary, + requestedModel, hasFallbackCandidates, now, - throttleKey: probeThrottleKey, + probeThrottleKey, authStore, profileIds, }); - if (!shouldProbe) { - const inferredReason = - resolveProfilesUnavailableReason({ - store: authStore, - profileIds, - now, - }) ?? "rate_limit"; - // Skip without attempting + + if (decision.type === "skip") { attempts.push({ provider: candidate.provider, model: candidate.model, - error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`, - reason: inferredReason, + error: decision.error, + reason: decision.reason, }); continue; } - // Primary model probe: attempt it despite cooldown to detect recovery. - // If it fails, the error is caught below and we fall through to the - // next candidate as usual. - lastProbeAttempt.set(probeThrottleKey, now); + + if (decision.markProbe) { + lastProbeAttempt.set(probeThrottleKey, now); + } } } + try { const result = await params.run(candidate.provider, candidate.model); return {