diff --git a/CHANGELOG.md b/CHANGELOG.md index 724ef66eb3e..53dcfac9c64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -136,7 +136,7 @@ Docs: https://docs.openclaw.ai - CLI/models: include explicitly configured provider models in `openclaw models list --provider ` without requiring the full catalog path, so configured Ollama models are visible. Fixes #65207. Thanks @drzeast-png. - Docker/QA: add observability coverage to the normal Docker aggregate so QA-lab OTEL and Prometheus diagnostics run inside Docker. Thanks @vincentkoc. - Auto-reply: poison inbound message dedupe after replay-unsafe provider/runtime failures so retries stay safe before visible progress but cannot duplicate messages after block output, tool side effects, or session progress. Fixes #69303; keeps #58549 and #64606 as duplicate validation. Thanks @martingarramon, @NikolaFC, and @zeroth-blip. -- Agents/model fallback: clear auto-persisted fallback model overrides before the next run so recovered primaries are retried instead of leaving sessions permanently pinned to the fallback provider. Fixes #72697. Thanks @kibedu. +- Agents/model fallback: keep auto-persisted fallback model overrides selected across turns until `/new` or reset clears them, avoiding repeated probes of a known-bad primary while `/status` shows the selected and active models. Thanks @kibedu. - Agents/model fallback: jump directly to a known later live-session model redirect instead of walking unrelated fallback candidates, while preserving the already-landed live-session/fallback loop guard. Fixes #57471; related loop family already closed via #58496. Thanks @yuxiaoyang2007-prog. - Gateway/Bonjour: keep @homebridge/ciao cancellation handlers registered across advertiser restarts so late probing cancellations cannot crash Linux and other mDNS-churned gateways. Thanks @codex. - Plugins/startup: load the default `memory-core` slot during Gateway startup when permitted so active-memory recall can call `memory_search` and `memory_get` without requiring an explicit `plugins.slots.memory` entry, while preserving `plugins.slots.memory: "none"`. Thanks @codex. diff --git a/docs/concepts/model-failover.md b/docs/concepts/model-failover.md index 66eadf6bddd..8c9984ca235 100644 --- a/docs/concepts/model-failover.md +++ b/docs/concepts/model-failover.md @@ -21,7 +21,7 @@ For a normal text run, OpenClaw evaluates candidates in this order: - Resolve the active session model and auth-profile preference. A session override with `modelOverrideSource: "auto"` came from an earlier fallback, so the next run clears it first and retries the configured primary; user-selected overrides stay sticky. + Resolve the active session model and auth-profile preference. Build the model candidate chain from the currently selected session model, then `agents.defaults.model.fallbacks` in order, ending with the configured primary when the run started from an override. @@ -47,6 +47,7 @@ This is intentionally narrower than "save and restore the whole session". The re - `providerOverride` - `modelOverride` +- `modelOverrideSource` - `authProfileOverride` - `authProfileOverrideSource` - `authProfileOverrideCompactionCount` @@ -264,8 +265,8 @@ That means fallback retries have to coordinate with live model switching: - Only explicit user-driven model changes mark a pending live switch. That includes `/model`, `session_status(model=...)`, and `sessions.patch`. - System-driven model changes such as fallback rotation, heartbeat overrides, or compaction never mark a pending live switch on their own. - Before a fallback retry starts, the reply runner persists the selected fallback override fields to the session entry. -- On the next run, auto fallback overrides are cleared before model selection so the configured primary is retried. If it is still unhealthy, the fallback loop records a fresh auto override for that new attempt. -- User model overrides (`modelOverrideSource: "user"`) and legacy overrides without a source field remain persistent across turns. +- Auto fallback overrides remain selected on subsequent turns so OpenClaw does not probe a known-bad primary on every message. `/new`, `/reset`, and `sessions.reset` clear auto-sourced overrides and return the session to the configured default. +- `/status` shows the selected model and, when fallback state differs, the active fallback model and reason. - Live-session reconciliation prefers persisted session overrides over stale runtime model fields. - If a live-switch error points at a later candidate in the active fallback chain, OpenClaw jumps directly to that selected model instead of walking unrelated candidates first. - If the fallback attempt fails, the runner rolls back only the override fields it wrote, and only if they still match that failed candidate. diff --git a/src/agents/agent-command.live-model-switch.test.ts b/src/agents/agent-command.live-model-switch.test.ts index 21f1cc5a1ca..03bbef69dfc 100644 --- a/src/agents/agent-command.live-model-switch.test.ts +++ b/src/agents/agent-command.live-model-switch.test.ts @@ -15,15 +15,11 @@ const state = vi.hoisted(() => ({ registerAgentRunContextMock: vi.fn(), clearAgentRunContextMock: vi.fn(), updateSessionStoreAfterAgentRunMock: vi.fn(), - updateSessionStoreFileMock: vi.fn(), deliverAgentCommandResultMock: vi.fn(), clearSessionAuthProfileOverrideMock: vi.fn(), - applyModelOverrideToSessionEntryMock: vi.fn((..._args: unknown[]) => ({ updated: false })), authProfileStoreMock: { profiles: {} } as { profiles: Record }, sessionEntryMock: undefined as unknown, sessionStoreMock: undefined as unknown, - sessionStorePathMock: undefined as string | undefined, - persistedSessionStoreMock: undefined as Record | undefined, })); vi.mock("./model-fallback.js", () => ({ @@ -77,7 +73,7 @@ vi.mock("./command/session.js", () => ({ skillsSnapshot: { prompt: "", skills: [], version: 0 }, }, sessionStore: state.sessionStoreMock, - storePath: state.sessionStorePathMock, + storePath: undefined, isNewSession: false, persistedThinking: undefined, persistedVerbose: undefined, @@ -181,10 +177,6 @@ vi.mock("../config/sessions.js", () => ({ ), })); -vi.mock("../config/sessions/store.js", () => ({ - updateSessionStore: (...args: unknown[]) => state.updateSessionStoreFileMock(...args), -})); - vi.mock("../config/sessions/transcript-resolve.runtime.js", () => ({ resolveSessionTranscriptFile: async () => ({ sessionFile: "/tmp/session.jsonl", @@ -239,8 +231,7 @@ vi.mock("../sessions/level-overrides.js", () => ({ })); vi.mock("../sessions/model-overrides.js", () => ({ - applyModelOverrideToSessionEntry: (...args: unknown[]) => - state.applyModelOverrideToSessionEntryMock(...args), + applyModelOverrideToSessionEntry: () => ({ updated: false }), })); vi.mock("../sessions/send-policy.js", () => ({ @@ -409,38 +400,6 @@ function expectFallbackOverrideCalls(first: boolean, second: boolean) { }); } -function useRealisticDefaultModelOverrideReset() { - state.applyModelOverrideToSessionEntryMock.mockImplementation((params: unknown) => { - const { entry, selection } = params as { - entry: { - providerOverride?: string; - modelOverride?: string; - modelOverrideSource?: string; - authProfileOverride?: string; - authProfileOverrideSource?: string; - }; - selection: { isDefault?: boolean }; - }; - if (!selection.isDefault) { - return { updated: false }; - } - const before = { ...entry }; - delete entry.providerOverride; - delete entry.modelOverride; - delete entry.modelOverrideSource; - delete entry.authProfileOverride; - delete entry.authProfileOverrideSource; - return { - updated: - before.providerOverride !== entry.providerOverride || - before.modelOverride !== entry.modelOverride || - before.modelOverrideSource !== entry.modelOverrideSource || - before.authProfileOverride !== entry.authProfileOverride || - before.authProfileOverrideSource !== entry.authProfileOverrideSource, - }; - }); -} - describe("agentCommand – LiveSessionModelSwitchError retry", () => { beforeEach(() => { vi.clearAllMocks(); @@ -471,13 +430,6 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { state.authProfileStoreMock = { profiles: {} }; state.sessionEntryMock = undefined; state.sessionStoreMock = undefined; - state.sessionStorePathMock = undefined; - state.persistedSessionStoreMock = undefined; - state.applyModelOverrideToSessionEntryMock.mockReturnValue({ updated: false }); - state.updateSessionStoreFileMock.mockImplementation( - async (_path: string, fn: (store: Record) => unknown) => - fn(state.persistedSessionStoreMock ?? {}), - ); state.deliverAgentCommandResultMock.mockResolvedValue(undefined); state.updateSessionStoreAfterAgentRunMock.mockResolvedValue(undefined); }); @@ -690,64 +642,4 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { expectFallbackOverrideCalls(false, true); }); - - it("clears auto-fallback model overrides before the next command retries primary", async () => { - useRealisticDefaultModelOverrideReset(); - - const sessionEntry = { - sessionId: "session-1", - updatedAt: Date.now(), - providerOverride: "openai", - modelOverride: "gpt-5.4", - modelOverrideSource: "auto", - authProfileOverride: "openai:default", - authProfileOverrideSource: "auto", - skillsSnapshot: { prompt: "", skills: [], version: 0 }, - }; - state.sessionEntryMock = sessionEntry; - state.sessionStoreMock = { "agent:main": sessionEntry }; - state.sessionStorePathMock = "/tmp/sessions.json"; - state.persistedSessionStoreMock = { "agent:main": { ...sessionEntry } }; - state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => { - const result = await params.run(params.provider, params.model); - return { - result, - provider: params.provider, - model: params.model, - attempts: [], - }; - }); - state.runAgentAttemptMock.mockResolvedValue(makeSuccessResult("anthropic", "claude")); - - await agentCommand({ - message: "hello", - sessionKey: "agent:main", - senderIsOwner: true, - }); - - const fallbackParams = state.runWithModelFallbackMock.mock.calls[0]?.[0] as - | FallbackRunnerParams - | undefined; - expect(fallbackParams).toMatchObject({ - provider: "anthropic", - model: "claude", - }); - expect(state.resolveEffectiveModelFallbacksMock).toHaveBeenCalledWith( - expect.objectContaining({ hasSessionModelOverride: false }), - ); - const activeStore = state.sessionStoreMock as Record; - const persistedStore = state.persistedSessionStoreMock as Record; - expect(activeStore["agent:main"]).toMatchObject({ - sessionId: "session-1", - }); - expect(activeStore["agent:main"].providerOverride).toBeUndefined(); - expect(activeStore["agent:main"].modelOverride).toBeUndefined(); - expect(activeStore["agent:main"].modelOverrideSource).toBeUndefined(); - expect(persistedStore["agent:main"]).toMatchObject({ - sessionId: "session-1", - }); - expect(persistedStore["agent:main"].providerOverride).toBeUndefined(); - expect(persistedStore["agent:main"].modelOverride).toBeUndefined(); - expect(persistedStore["agent:main"].modelOverrideSource).toBeUndefined(); - }); }); diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index aefc34ef313..af28a3d17dd 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -191,7 +191,6 @@ type PersistSessionEntryParams = { type OverrideFieldClearedByDelete = | "providerOverride" | "modelOverride" - | "modelOverrideSource" | "authProfileOverride" | "authProfileOverrideSource" | "authProfileOverrideCompactionCount" @@ -203,7 +202,6 @@ type OverrideFieldClearedByDelete = const OVERRIDE_FIELDS_CLEARED_BY_DELETE: OverrideFieldClearedByDelete[] = [ "providerOverride", "modelOverride", - "modelOverrideSource", "authProfileOverride", "authProfileOverrideSource", "authProfileOverrideCompactionCount", @@ -724,28 +722,7 @@ async function agentCommandInternal( allowAnyModel = allowed.allowAny ?? false; } - const hasAutoStoredOverride = - sessionEntry?.modelOverrideSource === "auto" && Boolean(sessionEntry.modelOverride?.trim()); - - if (sessionEntry && sessionStore && sessionKey && hasAutoStoredOverride) { - const { updated } = applyModelOverrideToSessionEntry({ - entry: sessionEntry, - selection: { provider: defaultProvider, model: defaultModel, isDefault: true }, - }); - if (updated) { - sessionStore[sessionKey] = sessionEntry; - if (storePath) { - await persistSessionEntry({ - sessionStore, - sessionKey, - storePath, - entry: sessionEntry, - }); - } - } - } - - if (sessionEntry && sessionStore && sessionKey && hasStoredOverride && !hasAutoStoredOverride) { + if (sessionEntry && sessionStore && sessionKey && hasStoredOverride) { const entry = sessionEntry; const overrideProvider = sessionEntry.providerOverride?.trim() || defaultProvider; const overrideModel = sessionEntry.modelOverride?.trim(); @@ -769,12 +746,8 @@ async function agentCommandInternal( } } - const storedProviderOverride = hasAutoStoredOverride - ? undefined - : sessionEntry?.providerOverride?.trim(); - let storedModelOverride = hasAutoStoredOverride - ? undefined - : sessionEntry?.modelOverride?.trim(); + const storedProviderOverride = sessionEntry?.providerOverride?.trim(); + let storedModelOverride = sessionEntry?.modelOverride?.trim(); if (storedModelOverride) { const candidateProvider = storedProviderOverride || defaultProvider; const normalizedStored = normalizeModelRef(candidateProvider, storedModelOverride); diff --git a/src/auto-reply/reply/model-selection.test.ts b/src/auto-reply/reply/model-selection.test.ts index 88e745258de..2a5f184a644 100644 --- a/src/auto-reply/reply/model-selection.test.ts +++ b/src/auto-reply/reply/model-selection.test.ts @@ -600,7 +600,7 @@ describe("createModelSelectionState respects session model override", () => { }); }); -describe("createModelSelectionState auto-failover override self-healing", () => { +describe("createModelSelectionState auto-failover overrides", () => { const defaultProvider = "mac-studio"; const defaultModel = "MiniMax-M2.7-MLX"; const sessionKey = "agent:main:telegram:direct:1"; @@ -632,26 +632,22 @@ describe("createModelSelectionState auto-failover override self-healing", () => return { state, sessionEntry, sessionStore }; } - it("clears auto-failover override and retries the configured primary", async () => { + it("preserves auto-failover overrides across turns until reset", async () => { const { state, sessionStore } = await resolveStateWithOverride({ providerOverride: "openrouter", modelOverride: "minimax/minimax-m2.7", modelOverrideSource: "auto", }); - // Provider/model should revert to the configured primary, not the fallback. - expect(state.provider).toBe(defaultProvider); - expect(state.model).toBe(defaultModel); - // The auto override should be cleared from session state. - expect(sessionStore[sessionKey]?.providerOverride).toBeUndefined(); - expect(sessionStore[sessionKey]?.modelOverride).toBeUndefined(); - expect(sessionStore[sessionKey]?.modelOverrideSource).toBeUndefined(); - // resetModelOverride must NOT be set — it triggers a "Model override not allowed" - // system event which is incorrect for auto-heal (the override was valid). + expect(state.provider).toBe("openrouter"); + expect(state.model).toBe("minimax/minimax-m2.7"); + expect(sessionStore[sessionKey]?.providerOverride).toBe("openrouter"); + expect(sessionStore[sessionKey]?.modelOverride).toBe("minimax/minimax-m2.7"); + expect(sessionStore[sessionKey]?.modelOverrideSource).toBe("auto"); expect(state.resetModelOverride).toBe(false); }); - it("clears a disallowed auto-failover override without reporting an allowlist reset", async () => { + it("still clears disallowed auto-failover overrides through allowlist validation", async () => { const cfg = { agents: { defaults: { @@ -682,18 +678,13 @@ describe("createModelSelectionState auto-failover override self-healing", () => hasModelDirective: false, }); - expect(state.provider).toBe(defaultProvider); - expect(state.model).toBe(defaultModel); - expect(state.resetModelOverride).toBe(false); + expect(state.resetModelOverride).toBe(true); expect(sessionStore[sessionKey]?.providerOverride).toBeUndefined(); expect(sessionStore[sessionKey]?.modelOverride).toBeUndefined(); expect(sessionStore[sessionKey]?.modelOverrideSource).toBeUndefined(); }); - it("resets in-memory provider/model even when caller pre-loaded the fallback", async () => { - // Simulates get-reply-directives.ts preloading provider/model from stored override - // before calling createModelSelectionState. Our fix must update those in-memory - // values so the current turn retries the primary, not the fallback. + it("keeps pre-loaded fallback provider/model for an auto-failover override", async () => { const cfg = {} as OpenClawConfig; const sessionEntry = makeEntry({ providerOverride: "openrouter", @@ -709,14 +700,14 @@ describe("createModelSelectionState auto-failover override self-healing", () => sessionKey, defaultProvider, defaultModel, - // Caller already preloaded fallback values from stored override provider: "openrouter", model: "minimax/minimax-m2.7", hasModelDirective: false, }); - expect(state.provider).toBe(defaultProvider); - expect(state.model).toBe(defaultModel); + expect(state.provider).toBe("openrouter"); + expect(state.model).toBe("minimax/minimax-m2.7"); + expect(sessionStore[sessionKey]?.modelOverrideSource).toBe("auto"); expect(state.resetModelOverride).toBe(false); }); diff --git a/src/auto-reply/reply/model-selection.ts b/src/auto-reply/reply/model-selection.ts index a4cb8566526..183c51ff68c 100644 --- a/src/auto-reply/reply/model-selection.ts +++ b/src/auto-reply/reply/model-selection.ts @@ -133,8 +133,6 @@ export async function createModelSelectionState(params: { overrideProvider: sessionEntry?.providerOverride, overrideModel: sessionEntry?.modelOverride, }); - const hadDirectAutoSessionOverride = - sessionEntry?.modelOverrideSource === "auto" && Boolean(directStoredOverride); if (needsModelCatalog) { modelCatalog = await (await loadModelCatalogRuntime()).loadModelCatalog({ config: cfg }); @@ -170,42 +168,7 @@ export async function createModelSelectionState(params: { logStage("configured-catalog-ready", `entries=${configuredModelCatalog.length}`); } - // Auto-failover overrides are transient: on this turn, retry the configured - // primary so the session self-heals when the primary recovers. The fallback loop - // in runWithModelFallback will re-set the override if the primary is still down. - // User-selected overrides (/model command) are preserved across turns. - // - // Clear this before allowlist validation so an old fallback outside the current - // agent allowlist does not emit the unrelated "Model override not allowed" event. - if (hadDirectAutoSessionOverride && sessionEntry && sessionStore && sessionKey) { - const { updated } = applyModelOverrideToSessionEntry({ - entry: sessionEntry, - selection: { provider: defaultProvider, model: defaultModel, isDefault: true }, - }); - if (updated) { - sessionStore[sessionKey] = sessionEntry; - if (storePath) { - await ( - await loadSessionStoreRuntime() - ).updateSessionStore(storePath, (store) => { - store[sessionKey] = sessionEntry; - }); - } - // Reset in-memory selection to the configured primary. The caller-provided - // provider/model may already be set to the fallback by stored-override preload - // in get-reply.ts; updating them here ensures this turn retries the primary. - provider = defaultProvider; - model = defaultModel; - } - } - - if ( - sessionEntry && - sessionStore && - sessionKey && - directStoredOverride && - !hadDirectAutoSessionOverride - ) { + if (sessionEntry && sessionStore && sessionKey && directStoredOverride) { const normalizedOverride = normalizeModelRef( directStoredOverride.provider, directStoredOverride.model, @@ -230,15 +193,13 @@ export async function createModelSelectionState(params: { } } - const storedOverride = hadDirectAutoSessionOverride - ? undefined - : resolveStoredModelOverride({ - sessionEntry, - sessionStore, - sessionKey, - parentSessionKey, - defaultProvider, - }); + const storedOverride = resolveStoredModelOverride({ + sessionEntry, + sessionStore, + sessionKey, + parentSessionKey, + defaultProvider, + }); // Skip stored session model override only when an explicit heartbeat.model // was resolved. Heartbeat runs without heartbeat.model should still inherit // the regular session/parent model override behavior.