From 622b91d04ede86b1c3cc91060b20d3d482ba2151 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Wed, 1 Apr 2026 16:12:17 +0900
Subject: [PATCH] fix: queue model switches behind busy runs

---
 CHANGELOG.md                                  |  1 +
 docs/concepts/models.md                       |  1 +
 docs/tools/slash-commands.md                  |  1 +
 src/agents/pi-embedded-runner/run.ts          | 40 ------------
 .../reply/directive-handling.impl.ts          | 21 ++++---
 .../reply/directive-handling.model.test.ts    | 39 +++++++++---
 src/auto-reply/reply/followup-runner.test.ts  | 45 +++++++++++---
 src/auto-reply/reply/queue/state.test.ts      | 62 +++++++++++++++++++
 src/auto-reply/reply/queue/state.ts           | 45 +++++++++++---
 9 files changed, 180 insertions(+), 75 deletions(-)
 create mode 100644 src/auto-reply/reply/queue/state.test.ts
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 35697554ea7..bf4f8170495 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
 - MiniMax/plugins: auto-enable the bundled MiniMax plugin for API-key auth/config so MiniMax image generation and other plugin-owned capabilities load without manual plugin allowlisting. (#57127) Thanks @tars90percent.
 - Memory/QMD: prefer `--mask` over `--glob` when creating QMD collections so default memory collections keep their intended patterns and stop colliding on restart. (#58643) Thanks @GitZhangChi.
 - Gateway/HTTP: skip failing HTTP request stages so one broken facade no longer forces every HTTP endpoint to return 500. (#58746) Thanks @yelog
+- Sessions/model switching: keep `/model` changes queued behind busy runs instead of interrupting the active turn, and retarget queued followups so later work picks up the new model as soon as the current turn finishes.
 
 ## 2026.3.31
 
diff --git a/docs/concepts/models.md b/docs/concepts/models.md
index 4b92dd1d148..2bdafdfa859 100644
--- a/docs/concepts/models.md
+++ b/docs/concepts/models.md
@@ -108,6 +108,7 @@ Notes:
 - `/model` (and `/model list`) is a compact, numbered picker (model family + available providers).
 - On Discord, `/model` and `/models` open an interactive picker with provider and model dropdowns plus a Submit step.
 - `/model <#>` selects from that picker.
+- `/model` updates the session selection immediately. If the agent is idle, the next run uses the new model right away. If the agent is busy, the in-flight run finishes first and queued/future work uses the new model after that.
 - `/model status` is the detailed view (auth candidates and, when configured, provider endpoint `baseUrl` + `api` mode).
 - Model refs are parsed by splitting on the **first** `/`. Use `provider/model` when typing `/model <ref>`.
 - If the model ID itself contains `/` (OpenRouter-style), you must include the provider prefix (example: `/model openrouter/moonshotai/kimi-k2`).
diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md
index c681adad59e..78ca32c42bb 100644
--- a/docs/tools/slash-commands.md
+++ b/docs/tools/slash-commands.md
@@ -146,6 +146,7 @@ Notes:
 - `/fast` is provider-specific: OpenAI/OpenAI Codex map it to `service_tier=priority` on native Responses endpoints, while direct public Anthropic requests, including OAuth-authenticated traffic sent to `api.anthropic.com`, map it to `service_tier=auto` or `standard_only`. See [OpenAI](/providers/openai) and [Anthropic](/providers/anthropic).
 - Tool failure summaries are still shown when relevant, but detailed failure text is only included when `/verbose` is `on` or `full`.
 - `/reasoning` (and `/verbose`) are risky in group settings: they may reveal internal reasoning or tool output you did not intend to expose. Prefer leaving them off, especially in group chats.
+- `/model` persists the new session model immediately, but it does not interrupt a busy run. The current turn finishes first, then queued or future work uses the updated model.
 - **Fast path:** command-only messages from allowlisted senders are handled immediately (bypass queue + model).
 - **Group mention gating:** command-only messages from allowlisted senders bypass mention requirements.
 - **Inline shortcuts (allowlisted senders only):** certain commands also work when embedded in a normal message and are stripped before the model sees the remaining text.
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index debd19b5740..fc34f8ac152 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -28,8 +28,6 @@ import {
 import {
   hasDifferentLiveSessionModelSelection,
   LiveSessionModelSwitchError,
-  resolveLiveSessionModelSelection,
-  shouldTrackPersistedLiveSessionModelSelection,
   consumeLiveSessionModelSwitch,
 } from "../live-model-switch.js";
 import {
@@ -238,18 +236,6 @@ export async function runEmbeddedPiAgent(
         authProfileId: preferredProfileId,
         authProfileIdSource: params.authProfileIdSource,
       });
-      const resolvePersistedLiveSelection = () =>
-        resolveLiveSessionModelSelection({
-          cfg: params.config,
-          sessionKey: params.sessionKey,
-          agentId: workspaceResolution.agentId,
-          defaultProvider: provider,
-          defaultModel: modelId,
-        });
-      const shouldTrackPersistedLiveSelection = shouldTrackPersistedLiveSessionModelSelection(
-        resolveCurrentLiveSelection(),
-        resolvePersistedLiveSelection(),
-      );
       const {
         advanceAuthProfile,
         initializeAuthProfile,
@@ -457,15 +443,6 @@ export async function runEmbeddedPiAgent(
             };
           }
           runLoopIterations += 1;
-          const nextSelection = shouldTrackPersistedLiveSelection
-            ? resolvePersistedLiveSelection()
-            : null;
-          if (hasDifferentLiveSessionModelSelection(resolveCurrentLiveSelection(), nextSelection)) {
-            log.info(
-              `live session model switch detected before attempt for ${params.sessionId}: ${provider}/${modelId} -> ${nextSelection.provider}/${nextSelection.model}`,
-            );
-            throw new LiveSessionModelSwitchError(nextSelection);
-          }
           const runtimeAuthRetry = authRetryPending;
           authRetryPending = false;
           attemptedThinking.add(thinkLevel);
@@ -614,23 +591,6 @@ export async function runEmbeddedPiAgent(
             );
             throw new LiveSessionModelSwitchError(requestedSelection);
           }
-          const failedOrAbortedAttempt =
-            aborted || Boolean(promptError) || Boolean(assistantErrorText) || timedOut;
-          const persistedSelection =
-            failedOrAbortedAttempt && shouldTrackPersistedLiveSelection
-              ? resolvePersistedLiveSelection()
-              : null;
-          if (
-            failedOrAbortedAttempt &&
-            canRestartForLiveSwitch &&
-            hasDifferentLiveSessionModelSelection(resolveCurrentLiveSelection(), persistedSelection)
-          ) {
-            log.info(
-              `live session model switch detected after failed attempt for ${params.sessionId}: ${provider}/${modelId} -> ${persistedSelection.provider}/${persistedSelection.model}`,
-            );
-            throw new LiveSessionModelSwitchError(persistedSelection);
-          }
-
           // ── Timeout-triggered compaction ──────────────────────────────────
           // When the LLM times out with high context usage, compact before
           // retrying to break the death spiral of repeated timeouts.
diff --git a/src/auto-reply/reply/directive-handling.impl.ts b/src/auto-reply/reply/directive-handling.impl.ts
index fc3fa0c7f43..ad3f7693126 100644
--- a/src/auto-reply/reply/directive-handling.impl.ts
+++ b/src/auto-reply/reply/directive-handling.impl.ts
@@ -5,7 +5,6 @@ import {
 } from "../../agents/agent-scope.js";
 import { renderExecTargetLabel, resolveExecTarget } from "../../agents/bash-tools.exec-runtime.js";
 import { resolveFastModeState } from "../../agents/fast-mode.js";
-import { requestLiveSessionModelSwitch } from "../../agents/live-model-switch.js";
 import { resolveSandboxRuntimeStatus } from "../../agents/sandbox.js";
 import type { OpenClawConfig } from "../../config/config.js";
 import { type SessionEntry, updateSessionStore } from "../../config/sessions.js";
@@ -32,6 +31,7 @@ import {
   withOptions,
 } from "./directive-handling.shared.js";
 import type { ElevatedLevel, ReasoningLevel, ThinkLevel } from "./directives.js";
+import { refreshQueuedFollowupSession } from "./queue.js";
 
 function resolveExecDefaults(params: {
   cfg: OpenClawConfig;
@@ -442,15 +442,16 @@ export async function handleDirectiveOnly(
         store[sessionKey] = sessionEntry;
       });
     }
-    if (modelSelection && modelSelectionUpdated) {
-      requestLiveSessionModelSwitch({
-        sessionEntry,
-        selection: {
-          provider: modelSelection.provider,
-          model: modelSelection.model,
-          authProfileId: profileOverride,
-          authProfileIdSource: profileOverride ? "user" : undefined,
-        },
+    if (modelSelection && modelSelectionUpdated && sessionKey) {
+      // `/model` should retarget queued/future work without interrupting the
+      // active run. Refresh queued followups so they pick up the persisted
+      // selection once the current turn finishes.
+      refreshQueuedFollowupSession({
+        key: sessionKey,
+        nextProvider: modelSelection.provider,
+        nextModel: modelSelection.model,
+        nextAuthProfileId: profileOverride,
+        nextAuthProfileIdSource: profileOverride ? "user" : undefined,
       });
     }
   }
diff --git a/src/auto-reply/reply/directive-handling.model.test.ts b/src/auto-reply/reply/directive-handling.model.test.ts
index bb64f61254e..e9c9105d72b 100644
--- a/src/auto-reply/reply/directive-handling.model.test.ts
+++ b/src/auto-reply/reply/directive-handling.model.test.ts
@@ -17,6 +17,9 @@ import { persistInlineDirectives } from "./directive-handling.persist.js";
 const liveModelSwitchMocks = vi.hoisted(() => ({
   requestLiveSessionModelSwitch: vi.fn(),
 }));
+const queueMocks = vi.hoisted(() => ({
+  refreshQueuedFollowupSession: vi.fn(),
+}));
 
 // Mock dependencies for directive handling persistence.
 vi.mock("../../agents/agent-scope.js", () => ({
@@ -42,6 +45,11 @@ vi.mock("../../agents/live-model-switch.js", () => ({
     liveModelSwitchMocks.requestLiveSessionModelSwitch(...args),
 }));
 
+vi.mock("./queue.js", () => ({
+  refreshQueuedFollowupSession: (...args: unknown[]) =>
+    queueMocks.refreshQueuedFollowupSession(...args),
+}));
+
 const TEST_AGENT_DIR = "/tmp/agent";
 const OPENAI_DATE_PROFILE_ID = "20251001";
 
@@ -75,6 +83,7 @@ beforeEach(() => {
     },
   ]);
   liveModelSwitchMocks.requestLiveSessionModelSwitch.mockReset().mockReturnValue(false);
+  queueMocks.refreshQueuedFollowupSession.mockReset();
 });
 
 afterEach(() => {
@@ -507,7 +516,7 @@ describe("handleDirectiveOnly model persist behavior (fixes #1435)", () => {
     expect(result?.text).not.toContain("failed");
   });
 
-  it("requests a live restart when /model mutates an active session", async () => {
+  it("does not request a live restart when /model mutates an active session", async () => {
     const directives = parseInlineDirectives("/model openai/gpt-4o");
     const sessionEntry = createSessionEntry();
 
@@ -518,14 +527,26 @@ describe("handleDirectiveOnly model persist behavior (fixes #1435)", () => {
       }),
     );
 
-    expect(liveModelSwitchMocks.requestLiveSessionModelSwitch).toHaveBeenCalledWith({
-      sessionEntry,
-      selection: {
-        provider: "openai",
-        model: "gpt-4o",
-        authProfileId: undefined,
-        authProfileIdSource: undefined,
-      },
+    expect(liveModelSwitchMocks.requestLiveSessionModelSwitch).not.toHaveBeenCalled();
+  });
+
+  it("retargets queued followups when /model mutates session state", async () => {
+    const directives = parseInlineDirectives("/model openai/gpt-4o");
+    const sessionEntry = createSessionEntry();
+
+    await handleDirectiveOnly(
+      createHandleParams({
+        directives,
+        sessionEntry,
+      }),
+    );
+
+    expect(queueMocks.refreshQueuedFollowupSession).toHaveBeenCalledWith({
+      key: sessionKey,
+      nextProvider: "openai",
+      nextModel: "gpt-4o",
+      nextAuthProfileId: undefined,
+      nextAuthProfileIdSource: undefined,
     });
   });
 
diff --git a/src/auto-reply/reply/followup-runner.test.ts b/src/auto-reply/reply/followup-runner.test.ts
index c60cb225d98..fe983fd9a98 100644
--- a/src/auto-reply/reply/followup-runner.test.ts
+++ b/src/auto-reply/reply/followup-runner.test.ts
@@ -125,25 +125,54 @@ function refreshQueuedFollowupSessionForFollowupTest(params: {
   previousSessionId?: string;
   nextSessionId?: string;
   nextSessionFile?: string;
+  nextProvider?: string;
+  nextModel?: string;
+  nextAuthProfileId?: string;
+  nextAuthProfileIdSource?: "auto" | "user";
 }): void {
   const cleaned = params.key.trim();
-  if (!cleaned || !params.previousSessionId || !params.nextSessionId) {
-    return;
-  }
-  if (params.previousSessionId === params.nextSessionId) {
+  if (!cleaned) {
     return;
   }
   const queue = FOLLOWUP_TEST_QUEUES.get(cleaned);
   if (!queue) {
     return;
   }
+  const shouldRewriteSession =
+    Boolean(params.previousSessionId) &&
+    Boolean(params.nextSessionId) &&
+    params.previousSessionId !== params.nextSessionId;
+  const shouldRewriteSelection =
+    typeof params.nextProvider === "string" ||
+    typeof params.nextModel === "string" ||
+    Object.hasOwn(params, "nextAuthProfileId") ||
+    Object.hasOwn(params, "nextAuthProfileIdSource");
+  if (!shouldRewriteSession && !shouldRewriteSelection) {
+    return;
+  }
   const rewrite = (run?: FollowupRun["run"]) => {
-    if (!run || run.sessionId !== params.previousSessionId) {
+    if (!run) {
       return;
     }
-    run.sessionId = params.nextSessionId!;
-    if (params.nextSessionFile?.trim()) {
-      run.sessionFile = params.nextSessionFile;
+    if (shouldRewriteSession && run.sessionId === params.previousSessionId) {
+      run.sessionId = params.nextSessionId!;
+      if (params.nextSessionFile?.trim()) {
+        run.sessionFile = params.nextSessionFile;
+      }
+    }
+    if (shouldRewriteSelection) {
+      if (typeof params.nextProvider === "string") {
+        run.provider = params.nextProvider;
+      }
+      if (typeof params.nextModel === "string") {
+        run.model = params.nextModel;
+      }
+      if (Object.hasOwn(params, "nextAuthProfileId")) {
+        run.authProfileId = params.nextAuthProfileId?.trim() || undefined;
+      }
+      if (Object.hasOwn(params, "nextAuthProfileIdSource")) {
+        run.authProfileIdSource = run.authProfileId ? params.nextAuthProfileIdSource : undefined;
+      }
     }
   };
   rewrite(queue.lastRun);
diff --git a/src/auto-reply/reply/queue/state.test.ts b/src/auto-reply/reply/queue/state.test.ts
new file mode 100644
index 00000000000..d2cddcb9b82
--- /dev/null
+++ b/src/auto-reply/reply/queue/state.test.ts
@@ -0,0 +1,62 @@
+import { afterEach, describe, expect, it } from "vitest";
+import { clearFollowupQueue, getFollowupQueue, refreshQueuedFollowupSession } from "./state.js";
+import type { FollowupRun } from "./types.js";
+
+const QUEUE_KEY = "agent:main:dm:test";
+
+afterEach(() => {
+  clearFollowupQueue(QUEUE_KEY);
+});
+
+function makeRun(): FollowupRun["run"] {
+  return {
+    agentId: "main",
+    agentDir: "/tmp/agent",
+    sessionId: "session-1",
+    sessionKey: QUEUE_KEY,
+    sessionFile: "/tmp/session-1.jsonl",
+    workspaceDir: "/tmp/workspace",
+    config: {} as FollowupRun["run"]["config"],
+    provider: "anthropic",
+    model: "claude-opus-4-5",
+    authProfileId: "profile-a",
+    authProfileIdSource: "user",
+    timeoutMs: 30_000,
+    blockReplyBreak: "message_end",
+  };
+}
+
+describe("refreshQueuedFollowupSession", () => {
+  it("retargets queued runs to the persisted selection", () => {
+    const queue = getFollowupQueue(QUEUE_KEY, { mode: "queue" });
+    const lastRun = makeRun();
+    const queuedRun: FollowupRun = {
+      prompt: "queued message",
+      enqueuedAt: Date.now(),
+      run: makeRun(),
+    };
+    queue.lastRun = lastRun;
+    queue.items.push(queuedRun);
+
+    refreshQueuedFollowupSession({
+      key: QUEUE_KEY,
+      nextProvider: "openai",
+      nextModel: "gpt-4o",
+      nextAuthProfileId: undefined,
+      nextAuthProfileIdSource: undefined,
+    });
+
+    expect(queue.lastRun).toMatchObject({
+      provider: "openai",
+      model: "gpt-4o",
+      authProfileId: undefined,
+      authProfileIdSource: undefined,
+    });
+    expect(queue.items[0]?.run).toMatchObject({
+      provider: "openai",
+      model: "gpt-4o",
+      authProfileId: undefined,
+      authProfileIdSource: undefined,
+    });
+  });
+});
diff --git a/src/auto-reply/reply/queue/state.ts b/src/auto-reply/reply/queue/state.ts
index 36a09ef1ebc..954251ac59e 100644
--- a/src/auto-reply/reply/queue/state.ts
+++ b/src/auto-reply/reply/queue/state.ts
@@ -91,26 +91,55 @@ export function refreshQueuedFollowupSession(params: {
   previousSessionId?: string;
   nextSessionId?: string;
   nextSessionFile?: string;
+  nextProvider?: string;
+  nextModel?: string;
+  nextAuthProfileId?: string;
+  nextAuthProfileIdSource?: "auto" | "user";
 }): void {
   const cleaned = params.key.trim();
-  if (!cleaned || !params.previousSessionId || !params.nextSessionId) {
-    return;
-  }
-  if (params.previousSessionId === params.nextSessionId) {
+  if (!cleaned) {
     return;
   }
   const queue = getExistingFollowupQueue(cleaned);
   if (!queue) {
     return;
   }
+  const shouldRewriteSession =
+    Boolean(params.previousSessionId) &&
+    Boolean(params.nextSessionId) &&
+    params.previousSessionId !== params.nextSessionId;
+  const shouldRewriteSelection =
+    typeof params.nextProvider === "string" ||
+    typeof params.nextModel === "string" ||
+    Object.hasOwn(params, "nextAuthProfileId") ||
+    Object.hasOwn(params, "nextAuthProfileIdSource");
+  if (!shouldRewriteSession && !shouldRewriteSelection) {
+    return;
+  }
 
   const rewriteRun = (run?: FollowupRun["run"]) => {
-    if (!run || run.sessionId !== params.previousSessionId) {
+    if (!run) {
       return;
     }
-    run.sessionId = params.nextSessionId!;
-    if (params.nextSessionFile?.trim()) {
-      run.sessionFile = params.nextSessionFile;
+    if (shouldRewriteSession && run.sessionId === params.previousSessionId) {
+      run.sessionId = params.nextSessionId!;
+      if (params.nextSessionFile?.trim()) {
+        run.sessionFile = params.nextSessionFile;
+      }
+    }
+    if (shouldRewriteSelection) {
+      if (typeof params.nextProvider === "string") {
+        run.provider = params.nextProvider;
+      }
+      if (typeof params.nextModel === "string") {
+        run.model = params.nextModel;
+      }
+      if (Object.hasOwn(params, "nextAuthProfileId")) {
+        run.authProfileId = params.nextAuthProfileId?.trim() || undefined;
+      }
+      if (Object.hasOwn(params, "nextAuthProfileIdSource")) {
+        run.authProfileIdSource = run.authProfileId ? params.nextAuthProfileIdSource : undefined;
+      }
     }
   };