import type { OpenClawConfig } from "../config/config.js"; import { resolveAgentModelFallbackValues, resolveAgentModelPrimaryValue, } from "../config/model-input.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { sanitizeForLog } from "../terminal/ansi.js"; import { ensureAuthProfileStore, getSoonestCooldownExpiry, isProfileInCooldown, resolveProfilesUnavailableReason, resolveAuthProfileOrder, } from "./auth-profiles.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; import { coerceToFailoverError, describeFailoverError, isFailoverError, isTimeoutError, } from "./failover-error.js"; import { logModelFallbackDecision } from "./model-fallback-observation.js"; import type { FallbackAttempt, ModelCandidate } from "./model-fallback.types.js"; import { buildConfiguredAllowlistKeys, buildModelAliasIndex, modelKey, normalizeModelRef, resolveConfiguredModelRef, resolveModelRefFromString, } from "./model-selection.js"; import type { FailoverReason } from "./pi-embedded-helpers.js"; import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js"; const log = createSubsystemLogger("model-fallback"); export type ModelFallbackRunOptions = { allowTransientCooldownProbe?: boolean; }; type ModelFallbackRunFn = ( provider: string, model: string, options?: ModelFallbackRunOptions, ) => Promise; /** * Fallback abort check. Only treats explicit AbortError names as user aborts. * Message-based checks (e.g., "aborted") can mask timeouts and skip fallback. */ function isFallbackAbortError(err: unknown): boolean { if (!err || typeof err !== "object") { return false; } if (isFailoverError(err)) { return false; } const name = "name" in err ? String(err.name) : ""; return name === "AbortError"; } function shouldRethrowAbort(err: unknown): boolean { return isFallbackAbortError(err) && !isTimeoutError(err); } function createModelCandidateCollector(allowlist: Set | null | undefined): { candidates: ModelCandidate[]; addExplicitCandidate: (candidate: ModelCandidate) => void; addAllowlistedCandidate: (candidate: ModelCandidate) => void; } { const seen = new Set(); const candidates: ModelCandidate[] = []; const addCandidate = (candidate: ModelCandidate, enforceAllowlist: boolean) => { if (!candidate.provider || !candidate.model) { return; } const key = modelKey(candidate.provider, candidate.model); if (seen.has(key)) { return; } if (enforceAllowlist && allowlist && !allowlist.has(key)) { return; } seen.add(key); candidates.push(candidate); }; const addExplicitCandidate = (candidate: ModelCandidate) => { addCandidate(candidate, false); }; const addAllowlistedCandidate = (candidate: ModelCandidate) => { addCandidate(candidate, true); }; return { candidates, addExplicitCandidate, addAllowlistedCandidate }; } type ModelFallbackErrorHandler = (attempt: { provider: string; model: string; error: unknown; attempt: number; total: number; }) => void | Promise; type ModelFallbackRunResult = { result: T; provider: string; model: string; attempts: FallbackAttempt[]; }; function buildFallbackSuccess(params: { result: T; provider: string; model: string; attempts: FallbackAttempt[]; }): ModelFallbackRunResult { return { result: params.result, provider: params.provider, model: params.model, attempts: params.attempts, }; } async function runFallbackCandidate(params: { run: ModelFallbackRunFn; provider: string; model: string; options?: ModelFallbackRunOptions; }): Promise<{ ok: true; result: T } | { ok: false; error: unknown }> { try { const result = params.options ? await params.run(params.provider, params.model, params.options) : await params.run(params.provider, params.model); return { ok: true, result, }; } catch (err) { // Normalize abort-wrapped rate-limit errors (e.g. Google Vertex RESOURCE_EXHAUSTED) // so they become FailoverErrors and continue the fallback loop instead of aborting. const normalizedFailover = coerceToFailoverError(err, { provider: params.provider, model: params.model, }); if (shouldRethrowAbort(err) && !normalizedFailover) { throw err; } return { ok: false, error: normalizedFailover ?? err }; } } async function runFallbackAttempt(params: { run: ModelFallbackRunFn; provider: string; model: string; attempts: FallbackAttempt[]; options?: ModelFallbackRunOptions; }): Promise<{ success: ModelFallbackRunResult } | { error: unknown }> { const runResult = await runFallbackCandidate({ run: params.run, provider: params.provider, model: params.model, options: params.options, }); if (runResult.ok) { return { success: buildFallbackSuccess({ result: runResult.result, provider: params.provider, model: params.model, attempts: params.attempts, }), }; } return { error: runResult.error }; } function sameModelCandidate(a: ModelCandidate, b: ModelCandidate): boolean { return a.provider === b.provider && a.model === b.model; } function throwFallbackFailureSummary(params: { attempts: FallbackAttempt[]; candidates: ModelCandidate[]; lastError: unknown; label: string; formatAttempt: (attempt: FallbackAttempt) => string; }): never { if (params.attempts.length <= 1 && params.lastError) { throw params.lastError; } const summary = params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown"; throw new Error( `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`, { cause: params.lastError instanceof Error ? params.lastError : undefined, }, ); } function resolveImageFallbackCandidates(params: { cfg: OpenClawConfig | undefined; defaultProvider: string; modelOverride?: string; }): ModelCandidate[] { const aliasIndex = buildModelAliasIndex({ cfg: params.cfg ?? {}, defaultProvider: params.defaultProvider, }); const allowlist = buildConfiguredAllowlistKeys({ cfg: params.cfg, defaultProvider: params.defaultProvider, }); const { candidates, addExplicitCandidate, addAllowlistedCandidate } = createModelCandidateCollector(allowlist); const addRaw = (raw: string, opts?: { allowlist?: boolean }) => { const resolved = resolveModelRefFromString({ raw: String(raw ?? ""), defaultProvider: params.defaultProvider, aliasIndex, }); if (!resolved) { return; } if (opts?.allowlist) { addAllowlistedCandidate(resolved.ref); return; } addExplicitCandidate(resolved.ref); }; if (params.modelOverride?.trim()) { addRaw(params.modelOverride); } else { const primary = resolveAgentModelPrimaryValue(params.cfg?.agents?.defaults?.imageModel); if (primary?.trim()) { addRaw(primary); } } const imageFallbacks = resolveAgentModelFallbackValues(params.cfg?.agents?.defaults?.imageModel); for (const raw of imageFallbacks) { // Explicitly configured image fallbacks should remain reachable even when a // model allowlist is present. addRaw(raw); } return candidates; } function resolveFallbackCandidates(params: { cfg: OpenClawConfig | undefined; provider: string; model: string; /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */ fallbacksOverride?: string[]; }): ModelCandidate[] { const primary = params.cfg ? resolveConfiguredModelRef({ cfg: params.cfg, defaultProvider: DEFAULT_PROVIDER, defaultModel: DEFAULT_MODEL, }) : null; const defaultProvider = primary?.provider ?? DEFAULT_PROVIDER; const defaultModel = primary?.model ?? DEFAULT_MODEL; const providerRaw = String(params.provider ?? "").trim() || defaultProvider; const modelRaw = String(params.model ?? "").trim() || defaultModel; const normalizedPrimary = normalizeModelRef(providerRaw, modelRaw); const configuredPrimary = normalizeModelRef(defaultProvider, defaultModel); const aliasIndex = buildModelAliasIndex({ cfg: params.cfg ?? {}, defaultProvider, }); const allowlist = buildConfiguredAllowlistKeys({ cfg: params.cfg, defaultProvider, }); const { candidates, addExplicitCandidate } = createModelCandidateCollector(allowlist); addExplicitCandidate(normalizedPrimary); const modelFallbacks = (() => { if (params.fallbacksOverride !== undefined) { return params.fallbacksOverride; } const configuredFallbacks = resolveAgentModelFallbackValues( params.cfg?.agents?.defaults?.model, ); // When user runs a different provider than config, only use configured fallbacks // if the current model is already in that chain (e.g. session on first fallback). if (normalizedPrimary.provider !== configuredPrimary.provider) { const isConfiguredFallback = configuredFallbacks.some((raw) => { const resolved = resolveModelRefFromString({ raw: String(raw ?? ""), defaultProvider, aliasIndex, }); return resolved ? sameModelCandidate(resolved.ref, normalizedPrimary) : false; }); return isConfiguredFallback ? configuredFallbacks : []; } // Same provider: always use full fallback chain (model version differences within provider). return configuredFallbacks; })(); for (const raw of modelFallbacks) { const resolved = resolveModelRefFromString({ raw: String(raw ?? ""), defaultProvider, aliasIndex, }); if (!resolved) { continue; } // Fallbacks are explicit user intent; do not silently filter them by the // model allowlist. addExplicitCandidate(resolved.ref); } if (params.fallbacksOverride === undefined && primary?.provider && primary.model) { addExplicitCandidate({ provider: primary.provider, model: primary.model }); } return candidates; } const lastProbeAttempt = new Map(); const MIN_PROBE_INTERVAL_MS = 30_000; // 30 seconds between probes per key const PROBE_MARGIN_MS = 2 * 60 * 1000; const PROBE_SCOPE_DELIMITER = "::"; const PROBE_STATE_TTL_MS = 24 * 60 * 60 * 1000; const MAX_PROBE_KEYS = 256; function resolveProbeThrottleKey(provider: string, agentDir?: string): string { const scope = String(agentDir ?? "").trim(); return scope ? `${scope}${PROBE_SCOPE_DELIMITER}${provider}` : provider; } function pruneProbeState(now: number): void { for (const [key, ts] of lastProbeAttempt) { if (!Number.isFinite(ts) || ts <= 0 || now - ts > PROBE_STATE_TTL_MS) { lastProbeAttempt.delete(key); } } } function enforceProbeStateCap(): void { while (lastProbeAttempt.size > MAX_PROBE_KEYS) { let oldestKey: string | null = null; let oldestTs = Number.POSITIVE_INFINITY; for (const [key, ts] of lastProbeAttempt) { if (ts < oldestTs) { oldestKey = key; oldestTs = ts; } } if (!oldestKey) { break; } lastProbeAttempt.delete(oldestKey); } } function isProbeThrottleOpen(now: number, throttleKey: string): boolean { pruneProbeState(now); const lastProbe = lastProbeAttempt.get(throttleKey) ?? 0; return now - lastProbe >= MIN_PROBE_INTERVAL_MS; } function markProbeAttempt(now: number, throttleKey: string): void { pruneProbeState(now); lastProbeAttempt.set(throttleKey, now); enforceProbeStateCap(); } function shouldProbePrimaryDuringCooldown(params: { isPrimary: boolean; hasFallbackCandidates: boolean; now: number; throttleKey: string; authStore: ReturnType; profileIds: string[]; }): boolean { if (!params.isPrimary || !params.hasFallbackCandidates) { return false; } if (!isProbeThrottleOpen(params.now, params.throttleKey)) { return false; } const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds); if (soonest === null || !Number.isFinite(soonest)) { return true; } // Probe when cooldown already expired or within the configured margin. return params.now >= soonest - PROBE_MARGIN_MS; } /** @internal – exposed for unit tests only */ export const _probeThrottleInternals = { lastProbeAttempt, MIN_PROBE_INTERVAL_MS, PROBE_MARGIN_MS, PROBE_STATE_TTL_MS, MAX_PROBE_KEYS, resolveProbeThrottleKey, isProbeThrottleOpen, pruneProbeState, markProbeAttempt, } as const; type CooldownDecision = | { type: "skip"; reason: FailoverReason; error: string; } | { type: "attempt"; reason: FailoverReason; markProbe: boolean; }; function resolveCooldownDecision(params: { candidate: ModelCandidate; isPrimary: boolean; requestedModel: boolean; hasFallbackCandidates: boolean; now: number; probeThrottleKey: string; authStore: ReturnType; profileIds: string[]; }): CooldownDecision { const shouldProbe = shouldProbePrimaryDuringCooldown({ isPrimary: params.isPrimary, hasFallbackCandidates: params.hasFallbackCandidates, now: params.now, throttleKey: params.probeThrottleKey, authStore: params.authStore, profileIds: params.profileIds, }); const inferredReason = resolveProfilesUnavailableReason({ store: params.authStore, profileIds: params.profileIds, now: params.now, }) ?? "unknown"; const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent"; if (isPersistentAuthIssue) { return { type: "skip", reason: inferredReason, error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`, }; } // Billing is semi-persistent: the user may fix their balance, or a transient // 402 might have been misclassified. Probe single-provider setups on the // standard throttle so they can recover without a restart; when fallbacks // exist, only probe near cooldown expiry so the fallback chain stays preferred. if (inferredReason === "billing") { const shouldProbeSingleProviderBilling = params.isPrimary && !params.hasFallbackCandidates && isProbeThrottleOpen(params.now, params.probeThrottleKey); if (params.isPrimary && (shouldProbe || shouldProbeSingleProviderBilling)) { return { type: "attempt", reason: inferredReason, markProbe: true }; } return { type: "skip", reason: inferredReason, error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`, }; } // For primary: try when requested model or when probe allows. // For same-provider fallbacks: only relax cooldown on transient provider // limits, which are often model-scoped and can recover on a sibling model. const shouldAttemptDespiteCooldown = (params.isPrimary && (!params.requestedModel || shouldProbe)) || (!params.isPrimary && (inferredReason === "rate_limit" || inferredReason === "overloaded" || inferredReason === "unknown")); if (!shouldAttemptDespiteCooldown) { return { type: "skip", reason: inferredReason, error: `Provider ${params.candidate.provider} is in cooldown (all profiles unavailable)`, }; } return { type: "attempt", reason: inferredReason, markProbe: params.isPrimary && shouldProbe, }; } export async function runWithModelFallback(params: { cfg: OpenClawConfig | undefined; provider: string; model: string; runId?: string; agentDir?: string; /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */ fallbacksOverride?: string[]; run: ModelFallbackRunFn; onError?: ModelFallbackErrorHandler; }): Promise> { const candidates = resolveFallbackCandidates({ cfg: params.cfg, provider: params.provider, model: params.model, fallbacksOverride: params.fallbacksOverride, }); const authStore = params.cfg ? ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false }) : null; const attempts: FallbackAttempt[] = []; let lastError: unknown; const cooldownProbeUsedProviders = new Set(); const hasFallbackCandidates = candidates.length > 1; for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; const isPrimary = i === 0; const requestedModel = params.provider === candidate.provider && params.model === candidate.model; let runOptions: ModelFallbackRunOptions | undefined; let attemptedDuringCooldown = false; let transientProbeProviderForAttempt: string | null = null; if (authStore) { const profileIds = resolveAuthProfileOrder({ cfg: params.cfg, store: authStore, provider: candidate.provider, }); const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id)); if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. const now = Date.now(); const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir); const decision = resolveCooldownDecision({ candidate, isPrimary, requestedModel, hasFallbackCandidates, now, probeThrottleKey, authStore, profileIds, }); if (decision.type === "skip") { attempts.push({ provider: candidate.provider, model: candidate.model, error: decision.error, reason: decision.reason, }); logModelFallbackDecision({ decision: "skip_candidate", runId: params.runId, requestedProvider: params.provider, requestedModel: params.model, candidate, attempt: i + 1, total: candidates.length, reason: decision.reason, error: decision.error, nextCandidate: candidates[i + 1], isPrimary, requestedModelMatched: requestedModel, fallbackConfigured: hasFallbackCandidates, profileCount: profileIds.length, }); continue; } if (decision.markProbe) { markProbeAttempt(now, probeThrottleKey); } if ( decision.reason === "rate_limit" || decision.reason === "overloaded" || decision.reason === "billing" || decision.reason === "unknown" ) { // Probe at most once per provider per fallback run when all profiles // are cooldowned. Re-probing every same-provider candidate can stall // cross-provider fallback on providers with long internal retries. const isTransientCooldownReason = decision.reason === "rate_limit" || decision.reason === "overloaded" || decision.reason === "unknown"; if (isTransientCooldownReason && cooldownProbeUsedProviders.has(candidate.provider)) { const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`; attempts.push({ provider: candidate.provider, model: candidate.model, error, reason: decision.reason, }); logModelFallbackDecision({ decision: "skip_candidate", runId: params.runId, requestedProvider: params.provider, requestedModel: params.model, candidate, attempt: i + 1, total: candidates.length, reason: decision.reason, error, nextCandidate: candidates[i + 1], isPrimary, requestedModelMatched: requestedModel, fallbackConfigured: hasFallbackCandidates, profileCount: profileIds.length, }); continue; } runOptions = { allowTransientCooldownProbe: true }; if (isTransientCooldownReason) { transientProbeProviderForAttempt = candidate.provider; } } attemptedDuringCooldown = true; logModelFallbackDecision({ decision: "probe_cooldown_candidate", runId: params.runId, requestedProvider: params.provider, requestedModel: params.model, candidate, attempt: i + 1, total: candidates.length, reason: decision.reason, nextCandidate: candidates[i + 1], isPrimary, requestedModelMatched: requestedModel, fallbackConfigured: hasFallbackCandidates, allowTransientCooldownProbe: runOptions?.allowTransientCooldownProbe, profileCount: profileIds.length, }); } } const attemptRun = await runFallbackAttempt({ run: params.run, ...candidate, attempts, options: runOptions, }); if ("success" in attemptRun) { if (i > 0 || attempts.length > 0 || attemptedDuringCooldown) { logModelFallbackDecision({ decision: "candidate_succeeded", runId: params.runId, requestedProvider: params.provider, requestedModel: params.model, candidate, attempt: i + 1, total: candidates.length, previousAttempts: attempts, isPrimary, requestedModelMatched: requestedModel, fallbackConfigured: hasFallbackCandidates, }); } const notFoundAttempt = i > 0 ? attempts.find((a) => a.reason === "model_not_found") : undefined; if (notFoundAttempt) { log.warn( `Model "${sanitizeForLog(notFoundAttempt.provider)}/${sanitizeForLog(notFoundAttempt.model)}" not found. Fell back to "${sanitizeForLog(candidate.provider)}/${sanitizeForLog(candidate.model)}".`, ); } return attemptRun.success; } const err = attemptRun.error; { if (transientProbeProviderForAttempt) { const probeFailureReason = describeFailoverError(err).reason; const shouldPreserveTransientProbeSlot = probeFailureReason === "model_not_found" || probeFailureReason === "format" || probeFailureReason === "auth" || probeFailureReason === "auth_permanent" || probeFailureReason === "session_expired"; if (!shouldPreserveTransientProbeSlot) { cooldownProbeUsedProviders.add(transientProbeProviderForAttempt); } } // Context overflow errors should be handled by the inner runner's // compaction/retry logic, not by model fallback. If one escapes as a // throw, rethrow it immediately rather than trying a different model // that may have a smaller context window and fail worse. const errMessage = err instanceof Error ? err.message : String(err); if (isLikelyContextOverflowError(errMessage)) { throw err; } const normalized = coerceToFailoverError(err, { provider: candidate.provider, model: candidate.model, }) ?? err; // Even unrecognized errors should not abort the fallback loop when // there are remaining candidates. Only abort/context-overflow errors // (handled above) are truly non-retryable. const isKnownFailover = isFailoverError(normalized); if (!isKnownFailover && i === candidates.length - 1) { throw err; } lastError = isKnownFailover ? normalized : err; const described = describeFailoverError(normalized); attempts.push({ provider: candidate.provider, model: candidate.model, error: described.message, reason: described.reason ?? "unknown", status: described.status, code: described.code, }); logModelFallbackDecision({ decision: "candidate_failed", runId: params.runId, requestedProvider: params.provider, requestedModel: params.model, candidate, attempt: i + 1, total: candidates.length, reason: described.reason, status: described.status, code: described.code, error: described.message, nextCandidate: candidates[i + 1], isPrimary, requestedModelMatched: requestedModel, fallbackConfigured: hasFallbackCandidates, }); await params.onError?.({ provider: candidate.provider, model: candidate.model, error: isKnownFailover ? normalized : err, attempt: i + 1, total: candidates.length, }); } } throwFallbackFailureSummary({ attempts, candidates, lastError, label: "models", formatAttempt: (attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}${ attempt.reason ? ` (${attempt.reason})` : "" }`, }); } export async function runWithImageModelFallback(params: { cfg: OpenClawConfig | undefined; modelOverride?: string; run: (provider: string, model: string) => Promise; onError?: ModelFallbackErrorHandler; }): Promise> { const candidates = resolveImageFallbackCandidates({ cfg: params.cfg, defaultProvider: DEFAULT_PROVIDER, modelOverride: params.modelOverride, }); if (candidates.length === 0) { throw new Error( "No image model configured. Set agents.defaults.imageModel.primary or agents.defaults.imageModel.fallbacks.", ); } const attempts: FallbackAttempt[] = []; let lastError: unknown; for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; const attemptRun = await runFallbackAttempt({ run: params.run, ...candidate, attempts }); if ("success" in attemptRun) { return attemptRun.success; } { const err = attemptRun.error; lastError = err; attempts.push({ provider: candidate.provider, model: candidate.model, error: err instanceof Error ? err.message : String(err), }); await params.onError?.({ provider: candidate.provider, model: candidate.model, error: err, attempt: i + 1, total: candidates.length, }); } } throwFallbackFailureSummary({ attempts, candidates, lastError, label: "image models", formatAttempt: (attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`, }); }