From 622b91d04ede86b1c3cc91060b20d3d482ba2151 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 1 Apr 2026 16:12:17 +0900 Subject: [PATCH] fix: queue model switches behind busy runs --- CHANGELOG.md | 1 + docs/concepts/models.md | 1 + docs/tools/slash-commands.md | 1 + src/agents/pi-embedded-runner/run.ts | 40 ------------ .../reply/directive-handling.impl.ts | 21 ++++--- .../reply/directive-handling.model.test.ts | 39 +++++++++--- src/auto-reply/reply/followup-runner.test.ts | 45 +++++++++++--- src/auto-reply/reply/queue/state.test.ts | 62 +++++++++++++++++++ src/auto-reply/reply/queue/state.ts | 45 +++++++++++--- 9 files changed, 180 insertions(+), 75 deletions(-) create mode 100644 src/auto-reply/reply/queue/state.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 35697554ea7..bf4f8170495 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai - MiniMax/plugins: auto-enable the bundled MiniMax plugin for API-key auth/config so MiniMax image generation and other plugin-owned capabilities load without manual plugin allowlisting. (#57127) Thanks @tars90percent. - Memory/QMD: prefer `--mask` over `--glob` when creating QMD collections so default memory collections keep their intended patterns and stop colliding on restart. (#58643) Thanks @GitZhangChi. - Gateway/HTTP: skip failing HTTP request stages so one broken facade no longer forces every HTTP endpoint to return 500. (#58746) Thanks @yelog +- Sessions/model switching: keep `/model` changes queued behind busy runs instead of interrupting the active turn, and retarget queued followups so later work picks up the new model as soon as the current turn finishes. ## 2026.3.31 diff --git a/docs/concepts/models.md b/docs/concepts/models.md index 4b92dd1d148..2bdafdfa859 100644 --- a/docs/concepts/models.md +++ b/docs/concepts/models.md @@ -108,6 +108,7 @@ Notes: - `/model` (and `/model list`) is a compact, numbered picker (model family + available providers). - On Discord, `/model` and `/models` open an interactive picker with provider and model dropdowns plus a Submit step. - `/model <#>` selects from that picker. +- `/model` updates the session selection immediately. If the agent is idle, the next run uses the new model right away. If the agent is busy, the in-flight run finishes first and queued/future work uses the new model after that. - `/model status` is the detailed view (auth candidates and, when configured, provider endpoint `baseUrl` + `api` mode). - Model refs are parsed by splitting on the **first** `/`. Use `provider/model` when typing `/model `. - If the model ID itself contains `/` (OpenRouter-style), you must include the provider prefix (example: `/model openrouter/moonshotai/kimi-k2`). diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md index c681adad59e..78ca32c42bb 100644 --- a/docs/tools/slash-commands.md +++ b/docs/tools/slash-commands.md @@ -146,6 +146,7 @@ Notes: - `/fast` is provider-specific: OpenAI/OpenAI Codex map it to `service_tier=priority` on native Responses endpoints, while direct public Anthropic requests, including OAuth-authenticated traffic sent to `api.anthropic.com`, map it to `service_tier=auto` or `standard_only`. See [OpenAI](/providers/openai) and [Anthropic](/providers/anthropic). - Tool failure summaries are still shown when relevant, but detailed failure text is only included when `/verbose` is `on` or `full`. - `/reasoning` (and `/verbose`) are risky in group settings: they may reveal internal reasoning or tool output you did not intend to expose. Prefer leaving them off, especially in group chats. +- `/model` persists the new session model immediately, but it does not interrupt a busy run. The current turn finishes first, then queued or future work uses the updated model. - **Fast path:** command-only messages from allowlisted senders are handled immediately (bypass queue + model). - **Group mention gating:** command-only messages from allowlisted senders bypass mention requirements. - **Inline shortcuts (allowlisted senders only):** certain commands also work when embedded in a normal message and are stripped before the model sees the remaining text. diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index debd19b5740..fc34f8ac152 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -28,8 +28,6 @@ import { import { hasDifferentLiveSessionModelSelection, LiveSessionModelSwitchError, - resolveLiveSessionModelSelection, - shouldTrackPersistedLiveSessionModelSelection, consumeLiveSessionModelSwitch, } from "../live-model-switch.js"; import { @@ -238,18 +236,6 @@ export async function runEmbeddedPiAgent( authProfileId: preferredProfileId, authProfileIdSource: params.authProfileIdSource, }); - const resolvePersistedLiveSelection = () => - resolveLiveSessionModelSelection({ - cfg: params.config, - sessionKey: params.sessionKey, - agentId: workspaceResolution.agentId, - defaultProvider: provider, - defaultModel: modelId, - }); - const shouldTrackPersistedLiveSelection = shouldTrackPersistedLiveSessionModelSelection( - resolveCurrentLiveSelection(), - resolvePersistedLiveSelection(), - ); const { advanceAuthProfile, initializeAuthProfile, @@ -457,15 +443,6 @@ export async function runEmbeddedPiAgent( }; } runLoopIterations += 1; - const nextSelection = shouldTrackPersistedLiveSelection - ? resolvePersistedLiveSelection() - : null; - if (hasDifferentLiveSessionModelSelection(resolveCurrentLiveSelection(), nextSelection)) { - log.info( - `live session model switch detected before attempt for ${params.sessionId}: ${provider}/${modelId} -> ${nextSelection.provider}/${nextSelection.model}`, - ); - throw new LiveSessionModelSwitchError(nextSelection); - } const runtimeAuthRetry = authRetryPending; authRetryPending = false; attemptedThinking.add(thinkLevel); @@ -614,23 +591,6 @@ export async function runEmbeddedPiAgent( ); throw new LiveSessionModelSwitchError(requestedSelection); } - const failedOrAbortedAttempt = - aborted || Boolean(promptError) || Boolean(assistantErrorText) || timedOut; - const persistedSelection = - failedOrAbortedAttempt && shouldTrackPersistedLiveSelection - ? resolvePersistedLiveSelection() - : null; - if ( - failedOrAbortedAttempt && - canRestartForLiveSwitch && - hasDifferentLiveSessionModelSelection(resolveCurrentLiveSelection(), persistedSelection) - ) { - log.info( - `live session model switch detected after failed attempt for ${params.sessionId}: ${provider}/${modelId} -> ${persistedSelection.provider}/${persistedSelection.model}`, - ); - throw new LiveSessionModelSwitchError(persistedSelection); - } - // ── Timeout-triggered compaction ────────────────────────────────── // When the LLM times out with high context usage, compact before // retrying to break the death spiral of repeated timeouts. diff --git a/src/auto-reply/reply/directive-handling.impl.ts b/src/auto-reply/reply/directive-handling.impl.ts index fc3fa0c7f43..ad3f7693126 100644 --- a/src/auto-reply/reply/directive-handling.impl.ts +++ b/src/auto-reply/reply/directive-handling.impl.ts @@ -5,7 +5,6 @@ import { } from "../../agents/agent-scope.js"; import { renderExecTargetLabel, resolveExecTarget } from "../../agents/bash-tools.exec-runtime.js"; import { resolveFastModeState } from "../../agents/fast-mode.js"; -import { requestLiveSessionModelSwitch } from "../../agents/live-model-switch.js"; import { resolveSandboxRuntimeStatus } from "../../agents/sandbox.js"; import type { OpenClawConfig } from "../../config/config.js"; import { type SessionEntry, updateSessionStore } from "../../config/sessions.js"; @@ -32,6 +31,7 @@ import { withOptions, } from "./directive-handling.shared.js"; import type { ElevatedLevel, ReasoningLevel, ThinkLevel } from "./directives.js"; +import { refreshQueuedFollowupSession } from "./queue.js"; function resolveExecDefaults(params: { cfg: OpenClawConfig; @@ -442,15 +442,16 @@ export async function handleDirectiveOnly( store[sessionKey] = sessionEntry; }); } - if (modelSelection && modelSelectionUpdated) { - requestLiveSessionModelSwitch({ - sessionEntry, - selection: { - provider: modelSelection.provider, - model: modelSelection.model, - authProfileId: profileOverride, - authProfileIdSource: profileOverride ? "user" : undefined, - }, + if (modelSelection && modelSelectionUpdated && sessionKey) { + // `/model` should retarget queued/future work without interrupting the + // active run. Refresh queued followups so they pick up the persisted + // selection once the current turn finishes. + refreshQueuedFollowupSession({ + key: sessionKey, + nextProvider: modelSelection.provider, + nextModel: modelSelection.model, + nextAuthProfileId: profileOverride, + nextAuthProfileIdSource: profileOverride ? "user" : undefined, }); } } diff --git a/src/auto-reply/reply/directive-handling.model.test.ts b/src/auto-reply/reply/directive-handling.model.test.ts index bb64f61254e..e9c9105d72b 100644 --- a/src/auto-reply/reply/directive-handling.model.test.ts +++ b/src/auto-reply/reply/directive-handling.model.test.ts @@ -17,6 +17,9 @@ import { persistInlineDirectives } from "./directive-handling.persist.js"; const liveModelSwitchMocks = vi.hoisted(() => ({ requestLiveSessionModelSwitch: vi.fn(), })); +const queueMocks = vi.hoisted(() => ({ + refreshQueuedFollowupSession: vi.fn(), +})); // Mock dependencies for directive handling persistence. vi.mock("../../agents/agent-scope.js", () => ({ @@ -42,6 +45,11 @@ vi.mock("../../agents/live-model-switch.js", () => ({ liveModelSwitchMocks.requestLiveSessionModelSwitch(...args), })); +vi.mock("./queue.js", () => ({ + refreshQueuedFollowupSession: (...args: unknown[]) => + queueMocks.refreshQueuedFollowupSession(...args), +})); + const TEST_AGENT_DIR = "/tmp/agent"; const OPENAI_DATE_PROFILE_ID = "20251001"; @@ -75,6 +83,7 @@ beforeEach(() => { }, ]); liveModelSwitchMocks.requestLiveSessionModelSwitch.mockReset().mockReturnValue(false); + queueMocks.refreshQueuedFollowupSession.mockReset(); }); afterEach(() => { @@ -507,7 +516,7 @@ describe("handleDirectiveOnly model persist behavior (fixes #1435)", () => { expect(result?.text).not.toContain("failed"); }); - it("requests a live restart when /model mutates an active session", async () => { + it("does not request a live restart when /model mutates an active session", async () => { const directives = parseInlineDirectives("/model openai/gpt-4o"); const sessionEntry = createSessionEntry(); @@ -518,14 +527,26 @@ describe("handleDirectiveOnly model persist behavior (fixes #1435)", () => { }), ); - expect(liveModelSwitchMocks.requestLiveSessionModelSwitch).toHaveBeenCalledWith({ - sessionEntry, - selection: { - provider: "openai", - model: "gpt-4o", - authProfileId: undefined, - authProfileIdSource: undefined, - }, + expect(liveModelSwitchMocks.requestLiveSessionModelSwitch).not.toHaveBeenCalled(); + }); + + it("retargets queued followups when /model mutates session state", async () => { + const directives = parseInlineDirectives("/model openai/gpt-4o"); + const sessionEntry = createSessionEntry(); + + await handleDirectiveOnly( + createHandleParams({ + directives, + sessionEntry, + }), + ); + + expect(queueMocks.refreshQueuedFollowupSession).toHaveBeenCalledWith({ + key: sessionKey, + nextProvider: "openai", + nextModel: "gpt-4o", + nextAuthProfileId: undefined, + nextAuthProfileIdSource: undefined, }); }); diff --git a/src/auto-reply/reply/followup-runner.test.ts b/src/auto-reply/reply/followup-runner.test.ts index c60cb225d98..fe983fd9a98 100644 --- a/src/auto-reply/reply/followup-runner.test.ts +++ b/src/auto-reply/reply/followup-runner.test.ts @@ -125,25 +125,54 @@ function refreshQueuedFollowupSessionForFollowupTest(params: { previousSessionId?: string; nextSessionId?: string; nextSessionFile?: string; + nextProvider?: string; + nextModel?: string; + nextAuthProfileId?: string; + nextAuthProfileIdSource?: "auto" | "user"; }): void { const cleaned = params.key.trim(); - if (!cleaned || !params.previousSessionId || !params.nextSessionId) { - return; - } - if (params.previousSessionId === params.nextSessionId) { + if (!cleaned) { return; } const queue = FOLLOWUP_TEST_QUEUES.get(cleaned); if (!queue) { return; } + const shouldRewriteSession = + Boolean(params.previousSessionId) && + Boolean(params.nextSessionId) && + params.previousSessionId !== params.nextSessionId; + const shouldRewriteSelection = + typeof params.nextProvider === "string" || + typeof params.nextModel === "string" || + Object.hasOwn(params, "nextAuthProfileId") || + Object.hasOwn(params, "nextAuthProfileIdSource"); + if (!shouldRewriteSession && !shouldRewriteSelection) { + return; + } const rewrite = (run?: FollowupRun["run"]) => { - if (!run || run.sessionId !== params.previousSessionId) { + if (!run) { return; } - run.sessionId = params.nextSessionId!; - if (params.nextSessionFile?.trim()) { - run.sessionFile = params.nextSessionFile; + if (shouldRewriteSession && run.sessionId === params.previousSessionId) { + run.sessionId = params.nextSessionId!; + if (params.nextSessionFile?.trim()) { + run.sessionFile = params.nextSessionFile; + } + } + if (shouldRewriteSelection) { + if (typeof params.nextProvider === "string") { + run.provider = params.nextProvider; + } + if (typeof params.nextModel === "string") { + run.model = params.nextModel; + } + if (Object.hasOwn(params, "nextAuthProfileId")) { + run.authProfileId = params.nextAuthProfileId?.trim() || undefined; + } + if (Object.hasOwn(params, "nextAuthProfileIdSource")) { + run.authProfileIdSource = run.authProfileId ? params.nextAuthProfileIdSource : undefined; + } } }; rewrite(queue.lastRun); diff --git a/src/auto-reply/reply/queue/state.test.ts b/src/auto-reply/reply/queue/state.test.ts new file mode 100644 index 00000000000..d2cddcb9b82 --- /dev/null +++ b/src/auto-reply/reply/queue/state.test.ts @@ -0,0 +1,62 @@ +import { afterEach, describe, expect, it } from "vitest"; +import { clearFollowupQueue, getFollowupQueue, refreshQueuedFollowupSession } from "./state.js"; +import type { FollowupRun } from "./types.js"; + +const QUEUE_KEY = "agent:main:dm:test"; + +afterEach(() => { + clearFollowupQueue(QUEUE_KEY); +}); + +function makeRun(): FollowupRun["run"] { + return { + agentId: "main", + agentDir: "/tmp/agent", + sessionId: "session-1", + sessionKey: QUEUE_KEY, + sessionFile: "/tmp/session-1.jsonl", + workspaceDir: "/tmp/workspace", + config: {} as FollowupRun["run"]["config"], + provider: "anthropic", + model: "claude-opus-4-5", + authProfileId: "profile-a", + authProfileIdSource: "user", + timeoutMs: 30_000, + blockReplyBreak: "message_end", + }; +} + +describe("refreshQueuedFollowupSession", () => { + it("retargets queued runs to the persisted selection", () => { + const queue = getFollowupQueue(QUEUE_KEY, { mode: "queue" }); + const lastRun = makeRun(); + const queuedRun: FollowupRun = { + prompt: "queued message", + enqueuedAt: Date.now(), + run: makeRun(), + }; + queue.lastRun = lastRun; + queue.items.push(queuedRun); + + refreshQueuedFollowupSession({ + key: QUEUE_KEY, + nextProvider: "openai", + nextModel: "gpt-4o", + nextAuthProfileId: undefined, + nextAuthProfileIdSource: undefined, + }); + + expect(queue.lastRun).toMatchObject({ + provider: "openai", + model: "gpt-4o", + authProfileId: undefined, + authProfileIdSource: undefined, + }); + expect(queue.items[0]?.run).toMatchObject({ + provider: "openai", + model: "gpt-4o", + authProfileId: undefined, + authProfileIdSource: undefined, + }); + }); +}); diff --git a/src/auto-reply/reply/queue/state.ts b/src/auto-reply/reply/queue/state.ts index 36a09ef1ebc..954251ac59e 100644 --- a/src/auto-reply/reply/queue/state.ts +++ b/src/auto-reply/reply/queue/state.ts @@ -91,26 +91,55 @@ export function refreshQueuedFollowupSession(params: { previousSessionId?: string; nextSessionId?: string; nextSessionFile?: string; + nextProvider?: string; + nextModel?: string; + nextAuthProfileId?: string; + nextAuthProfileIdSource?: "auto" | "user"; }): void { const cleaned = params.key.trim(); - if (!cleaned || !params.previousSessionId || !params.nextSessionId) { - return; - } - if (params.previousSessionId === params.nextSessionId) { + if (!cleaned) { return; } const queue = getExistingFollowupQueue(cleaned); if (!queue) { return; } + const shouldRewriteSession = + Boolean(params.previousSessionId) && + Boolean(params.nextSessionId) && + params.previousSessionId !== params.nextSessionId; + const shouldRewriteSelection = + typeof params.nextProvider === "string" || + typeof params.nextModel === "string" || + Object.hasOwn(params, "nextAuthProfileId") || + Object.hasOwn(params, "nextAuthProfileIdSource"); + if (!shouldRewriteSession && !shouldRewriteSelection) { + return; + } const rewriteRun = (run?: FollowupRun["run"]) => { - if (!run || run.sessionId !== params.previousSessionId) { + if (!run) { return; } - run.sessionId = params.nextSessionId!; - if (params.nextSessionFile?.trim()) { - run.sessionFile = params.nextSessionFile; + if (shouldRewriteSession && run.sessionId === params.previousSessionId) { + run.sessionId = params.nextSessionId!; + if (params.nextSessionFile?.trim()) { + run.sessionFile = params.nextSessionFile; + } + } + if (shouldRewriteSelection) { + if (typeof params.nextProvider === "string") { + run.provider = params.nextProvider; + } + if (typeof params.nextModel === "string") { + run.model = params.nextModel; + } + if (Object.hasOwn(params, "nextAuthProfileId")) { + run.authProfileId = params.nextAuthProfileId?.trim() || undefined; + } + if (Object.hasOwn(params, "nextAuthProfileIdSource")) { + run.authProfileIdSource = run.authProfileId ? params.nextAuthProfileIdSource : undefined; + } } };