fix: unify reply lifecycle across stop, rotation, and restart (#61267) (thanks @dutifulbob)

2026-04-17 04:01:05 +00:00 · 2026-04-05 19:32:27 +02:00
parent bb494ea3ed
commit 3f6840230b
23 changed files with 2263 additions and 375 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - Security: preserve restrictive plugin-only tool allowlists, require owner access for `/allowlist add` and `/allowlist remove`, fail closed when `before_tool_call` hooks crash, block browser SSRF redirect bypasses earlier, and keep non-interactive auth-choice inference scoped to bundled and already-trusted plugins. (#58476, #59836, #59822, #58771, #59120) Thanks @eleqtrizit and @pgondhi987.
+- Auto-reply: unify reply lifecycle ownership across preflight compaction, session rotation, CLI-backed runs, and gateway restart handling so `/stop` and same-session overlap checks target the right active turn and restart-interrupted turns return the restart notice instead of being silently dropped. (#61267) Thanks @dutifulbob.
 - Agents/Claude CLI/security: clear inherited Claude Code config-root and plugin-root env overrides like `CLAUDE_CONFIG_DIR` and `CLAUDE_CODE_PLUGIN_*`, so OpenClaw-launched Claude CLI runs cannot be silently pointed at an alternate Claude config/plugin tree with different hooks, plugins, or auth context. Thanks @vincentkoc.
 - Agents/Claude CLI/security: clear inherited Claude Code provider-routing and managed-auth env overrides, and mark OpenClaw-launched Claude CLI runs as host-managed, so Claude CLI backdoor sessions cannot be silently redirected to proxy, Bedrock, Vertex, Foundry, or parent-managed token contexts. Thanks @vincentkoc.
 - Agents/Claude CLI/security: force host-managed Claude CLI backdoor runs to `--setting-sources user`, even under custom backend arg overrides, so repo-local `.claude` project/local settings, hooks, and plugin discovery do not silently execute inside non-interactive OpenClaw sessions. Thanks @vincentkoc.
--- a/src/agents/cli-runner.spawn.test.ts
+++ b/src/agents/cli-runner.spawn.test.ts
@@ -150,6 +150,70 @@ describe("runCliAgent spawn path", () => {
    expect(input.scopeKey).toContain("thread-123");
  });

+  it("cancels the managed CLI run when the abort signal fires", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    const abortController = new AbortController();
+    let resolveWait!: (value: {
+      reason:
+        | "manual-cancel"
+        | "overall-timeout"
+        | "no-output-timeout"
+        | "spawn-error"
+        | "signal"
+        | "exit";
+      exitCode: number | null;
+      exitSignal: NodeJS.Signals | number | null;
+      durationMs: number;
+      stdout: string;
+      stderr: string;
+      timedOut: boolean;
+      noOutputTimedOut: boolean;
+    }) => void;
+    const cancel = vi.fn((reason?: string) => {
+      resolveWait({
+        reason: reason === "manual-cancel" ? "manual-cancel" : "signal",
+        exitCode: null,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: "",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      });
+    });
+    supervisorSpawnMock.mockResolvedValueOnce({
+      runId: "run-supervisor",
+      pid: 1234,
+      startedAtMs: Date.now(),
+      stdin: undefined,
+      wait: vi.fn(
+        async () =>
+          await new Promise((resolve) => {
+            resolveWait = resolve;
+          }),
+      ),
+      cancel,
+    });
+
+    const runPromise = runCliAgent({
+      sessionId: "s1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      prompt: "hi",
+      provider: "codex-cli",
+      model: "gpt-5.4",
+      timeoutMs: 1_000,
+      runId: "run-abort",
+      abortSignal: abortController.signal,
+    });
+
+    await Promise.resolve();
+    abortController.abort();
+
+    await expect(runPromise).rejects.toMatchObject({ name: "AbortError" });
+    expect(cancel).toHaveBeenCalledWith("manual-cancel");
+  });
+
  it("streams CLI text deltas from JSONL stdout", async () => {
    const runCliAgent = await setupCliRunnerTestModule();
    const agentEvents: Array<{ stream: string; text?: string; delta?: string }> = [];
--- a/src/agents/cli-runner/execute.ts
+++ b/src/agents/cli-runner/execute.ts
@@ -36,6 +36,12 @@ export function setCliRunnerExecuteTestDeps(overrides: Partial<typeof executeDep
  Object.assign(executeDeps, overrides);
 }

+function createCliAbortError(): Error {
+  const error = new Error("CLI run aborted");
+  error.name = "AbortError";
+  return error;
+}
+
 function buildCliLogArgs(params: {
  args: string[];
  systemPromptArg?: string;
@@ -84,6 +90,9 @@ export async function executePreparedCliRun(
  cliSessionIdToUse?: string,
 ): Promise<CliOutput> {
  const params = context.params;
+  if (params.abortSignal?.aborted) {
+    throw createCliAbortError();
+  }
  const backend = context.preparedBackend.backend;
  const { sessionId: resolvedSessionId, isNew } = resolveSessionIdToSend({
    backend,
@@ -226,8 +235,38 @@ export async function executePreparedCliRun(
        input: stdinPayload,
        onStdout: streamingParser ? (chunk: string) => streamingParser.push(chunk) : undefined,
      });
-      const result = await managedRun.wait();
+      const replyBackendHandle = params.replyOperation
+        ? {
+            kind: "cli" as const,
+            cancel: () => {
+              managedRun.cancel("manual-cancel");
+            },
+            isStreaming: () => false,
+          }
+        : undefined;
+      if (replyBackendHandle) {
+        params.replyOperation?.attachBackend(replyBackendHandle);
+      }
+      const abortManagedRun = () => {
+        managedRun.cancel("manual-cancel");
+      };
+      params.abortSignal?.addEventListener("abort", abortManagedRun, { once: true });
+      if (params.abortSignal?.aborted) {
+        abortManagedRun();
+      }
+      let result: Awaited<ReturnType<typeof managedRun.wait>>;
+      try {
+        result = await managedRun.wait();
+      } finally {
+        if (replyBackendHandle) {
+          params.replyOperation?.detachBackend(replyBackendHandle);
+        }
+        params.abortSignal?.removeEventListener("abort", abortManagedRun);
+      }
      streamingParser?.finish();
+      if (params.abortSignal?.aborted && result.reason === "manual-cancel") {
+        throw createCliAbortError();
+      }

      const stdout = result.stdout.trim();
      const stderr = result.stderr.trim();
--- a/src/agents/cli-runner/types.ts
+++ b/src/agents/cli-runner/types.ts
@@ -1,4 +1,5 @@
 import type { ImageContent } from "@mariozechner/pi-ai";
+import type { ReplyOperation } from "../../auto-reply/reply/reply-run-registry.js";
 import type { ThinkLevel } from "../../auto-reply/thinking.js";
 import type { OpenClawConfig } from "../../config/config.js";
 import type { CliSessionBinding } from "../../config/sessions.js";
@@ -32,6 +33,8 @@ export type RunCliAgentParams = {
  imageOrder?: PromptImageOrderEntry[];
  messageProvider?: string;
  agentAccountId?: string;
+  abortSignal?: AbortSignal;
+  replyOperation?: ReplyOperation;
 };

 export type CliPreparedBackend = {
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -19,6 +19,7 @@ export {
  isEmbeddedPiRunActive,
  isEmbeddedPiRunStreaming,
  queueEmbeddedPiMessage,
+  resolveActiveEmbeddedRunSessionId,
  waitForEmbeddedPiRunEnd,
 } from "./pi-embedded-runner/runs.js";
 export { buildEmbeddedSandboxInfo } from "./pi-embedded-runner/sandbox-info.js";
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -120,8 +120,28 @@ export async function runEmbeddedPiAgent(
      : "markdown");
  const isProbeSession = params.sessionId?.startsWith("probe-") ?? false;

-  return enqueueSession(() =>
-    enqueueGlobal(async () => {
+  const throwIfAborted = () => {
+    if (!params.abortSignal?.aborted) {
+      return;
+    }
+    const reason = params.abortSignal.reason;
+    if (reason instanceof Error) {
+      throw reason;
+    }
+    const abortErr =
+      reason !== undefined
+        ? new Error("Operation aborted", { cause: reason })
+        : new Error("Operation aborted");
+    abortErr.name = "AbortError";
+    throw abortErr;
+  };
+
+  throwIfAborted();
+
+  return enqueueSession(() => {
+    throwIfAborted();
+    return enqueueGlobal(async () => {
+      throwIfAborted();
      const started = Date.now();
      const workspaceResolution = resolveRunWorkspaceDir({
        workspaceDir: params.workspaceDir,
@@ -569,6 +589,7 @@ export async function runEmbeddedPiAgent(
            timeoutMs: params.timeoutMs,
            runId: params.runId,
            abortSignal: params.abortSignal,
+            replyOperation: params.replyOperation,
            shouldEmitToolResult: params.shouldEmitToolResult,
            shouldEmitToolOutput: params.shouldEmitToolOutput,
            onPartialReply: params.onPartialReply,
@@ -1503,6 +1524,6 @@ export async function runEmbeddedPiAgent(
          });
        }
      }
-    }),
-  );
+    });
+  });
 }
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -1372,14 +1372,24 @@ export async function runEmbeddedAttempt(
        getCompactionCount,
      } = subscription;

-      const queueHandle: EmbeddedPiQueueHandle = {
+      const queueHandle: EmbeddedPiQueueHandle & {
+        kind: "embedded";
+        cancel: (reason?: "user_abort" | "restart" | "superseded") => void;
+      } = {
+        kind: "embedded",
        queueMessage: async (text: string) => {
          await activeSession.steer(text);
        },
        isStreaming: () => activeSession.isStreaming,
        isCompacting: () => subscription.isCompacting(),
+        cancel: () => {
+          abortRun();
+        },
        abort: abortRun,
      };
+      if (params.replyOperation) {
+        params.replyOperation.attachBackend(queueHandle);
+      }
      setActiveEmbeddedRun(params.sessionId, queueHandle, params.sessionKey);

      let abortWarnTimer: NodeJS.Timeout | undefined;
@@ -1945,6 +1955,9 @@ export async function runEmbeddedAttempt(
            `CRITICAL: unsubscribe failed, possible resource leak: runId=${params.runId} ${String(err)}`,
          );
        }
+        if (params.replyOperation) {
+          params.replyOperation.detachBackend(queueHandle);
+        }
        clearActiveEmbeddedRun(params.sessionId, queueHandle, params.sessionKey);
        params.abortSignal?.removeEventListener?.("abort", onAbort);
      }
--- a/src/agents/pi-embedded-runner/run/params.ts
+++ b/src/agents/pi-embedded-runner/run/params.ts
@@ -1,4 +1,5 @@
 import type { ImageContent } from "@mariozechner/pi-ai";
+import type { ReplyOperation } from "../../../auto-reply/reply/reply-run-registry.js";
 import type { ReasoningLevel, ThinkLevel, VerboseLevel } from "../../../auto-reply/thinking.js";
 import type { ReplyPayload } from "../../../auto-reply/types.js";
 import type { OpenClawConfig } from "../../../config/config.js";
@@ -108,6 +109,7 @@ export type RunEmbeddedPiAgentParams = {
  timeoutMs: number;
  runId: string;
  abortSignal?: AbortSignal;
+  replyOperation?: ReplyOperation;
  shouldEmitToolResult?: () => boolean;
  shouldEmitToolOutput?: () => boolean;
  onPartialReply?: (payload: { text?: string; mediaUrls?: string[] }) => void | Promise<void>;
--- a/src/agents/pi-embedded-runner/runs.ts
+++ b/src/agents/pi-embedded-runner/runs.ts
@@ -1,3 +1,14 @@
+import {
+  abortActiveReplyRuns,
+  abortReplyRunBySessionId,
+  getActiveReplyRunCount,
+  isReplyRunActiveForSessionId,
+  isReplyRunStreamingForSessionId,
+  listActiveReplyRunSessionIds,
+  queueReplyRunMessage,
+  resolveActiveReplyRunSessionId,
+  waitForReplyRunEndBySessionId,
+} from "../../auto-reply/reply/reply-run-registry.js";
 import {
  diagnosticLogger as diag,
  logMessageQueued,
@@ -5,10 +16,12 @@ import {
 } from "../../logging/diagnostic.js";
 import { resolveGlobalSingleton } from "../../shared/global-singleton.js";

-type EmbeddedPiQueueHandle = {
+export type EmbeddedPiQueueHandle = {
+  kind?: "embedded";
  queueMessage: (text: string) => Promise<void>;
  isStreaming: () => boolean;
  isCompacting: () => boolean;
+  cancel?: (reason?: "user_abort" | "restart" | "superseded") => void;
  abort: () => void;
 };

@@ -39,17 +52,47 @@ const EMBEDDED_RUN_STATE_KEY = Symbol.for("openclaw.embeddedRunState");
 const embeddedRunState = resolveGlobalSingleton(EMBEDDED_RUN_STATE_KEY, () => ({
  activeRuns: new Map<string, EmbeddedPiQueueHandle>(),
  snapshots: new Map<string, ActiveEmbeddedRunSnapshot>(),
+  sessionIdsByKey: new Map<string, string>(),
  waiters: new Map<string, Set<EmbeddedRunWaiter>>(),
  modelSwitchRequests: new Map<string, EmbeddedRunModelSwitchRequest>(),
 }));
 const ACTIVE_EMBEDDED_RUNS = embeddedRunState.activeRuns;
 const ACTIVE_EMBEDDED_RUN_SNAPSHOTS = embeddedRunState.snapshots;
+const ACTIVE_EMBEDDED_RUN_SESSION_IDS_BY_KEY = embeddedRunState.sessionIdsByKey;
 const EMBEDDED_RUN_WAITERS = embeddedRunState.waiters;
 const EMBEDDED_RUN_MODEL_SWITCH_REQUESTS = embeddedRunState.modelSwitchRequests;

+function setActiveRunSessionKey(sessionKey: string | undefined, sessionId: string): void {
+  const normalizedSessionKey = sessionKey?.trim();
+  if (!normalizedSessionKey) {
+    return;
+  }
+  ACTIVE_EMBEDDED_RUN_SESSION_IDS_BY_KEY.set(normalizedSessionKey, sessionId);
+}
+
+function clearActiveRunSessionKeys(sessionId: string, sessionKey?: string): void {
+  const normalizedSessionKey = sessionKey?.trim();
+  if (normalizedSessionKey) {
+    if (ACTIVE_EMBEDDED_RUN_SESSION_IDS_BY_KEY.get(normalizedSessionKey) === sessionId) {
+      ACTIVE_EMBEDDED_RUN_SESSION_IDS_BY_KEY.delete(normalizedSessionKey);
+    }
+    return;
+  }
+  for (const [key, activeSessionId] of ACTIVE_EMBEDDED_RUN_SESSION_IDS_BY_KEY) {
+    if (activeSessionId === sessionId) {
+      ACTIVE_EMBEDDED_RUN_SESSION_IDS_BY_KEY.delete(key);
+    }
+  }
+}
+
 export function queueEmbeddedPiMessage(sessionId: string, text: string): boolean {
  const handle = ACTIVE_EMBEDDED_RUNS.get(sessionId);
  if (!handle) {
+    const queuedReplyRunMessage = queueReplyRunMessage(sessionId, text);
+    if (queuedReplyRunMessage) {
+      logMessageQueued({ sessionId, source: "pi-embedded-runner" });
+      return true;
+    }
    diag.debug(`queue message failed: sessionId=${sessionId} reason=no_active_run`);
    return false;
  }
@@ -84,6 +127,9 @@ export function abortEmbeddedPiRun(
  if (typeof sessionId === "string" && sessionId.length > 0) {
    const handle = ACTIVE_EMBEDDED_RUNS.get(sessionId);
    if (!handle) {
+      if (abortReplyRunBySessionId(sessionId)) {
+        return true;
+      }
      diag.debug(`abort failed: sessionId=${sessionId} reason=no_active_run`);
      return false;
    }
@@ -112,7 +158,7 @@ export function abortEmbeddedPiRun(
        diag.warn(`abort failed: sessionId=${id} err=${String(err)}`);
      }
    }
-    return aborted;
+    return abortActiveReplyRuns({ mode }) || aborted;
  }

  if (mode === "all") {
@@ -126,14 +172,14 @@ export function abortEmbeddedPiRun(
        diag.warn(`abort failed: sessionId=${id} err=${String(err)}`);
      }
    }
-    return aborted;
+    return abortActiveReplyRuns({ mode }) || aborted;
  }

  return false;
 }

 export function isEmbeddedPiRunActive(sessionId: string): boolean {
-  const active = ACTIVE_EMBEDDED_RUNS.has(sessionId);
+  const active = ACTIVE_EMBEDDED_RUNS.has(sessionId) || isReplyRunActiveForSessionId(sessionId);
  if (active) {
    diag.debug(`run active check: sessionId=${sessionId} active=true`);
  }
@@ -143,13 +189,30 @@ export function isEmbeddedPiRunActive(sessionId: string): boolean {
 export function isEmbeddedPiRunStreaming(sessionId: string): boolean {
  const handle = ACTIVE_EMBEDDED_RUNS.get(sessionId);
  if (!handle) {
-    return false;
+    return isReplyRunStreamingForSessionId(sessionId);
  }
  return handle.isStreaming();
 }

+export function resolveActiveEmbeddedRunSessionId(sessionKey: string): string | undefined {
+  const normalizedSessionKey = sessionKey.trim();
+  if (!normalizedSessionKey) {
+    return undefined;
+  }
+  return (
+    resolveActiveReplyRunSessionId(normalizedSessionKey) ??
+    ACTIVE_EMBEDDED_RUN_SESSION_IDS_BY_KEY.get(normalizedSessionKey)
+  );
+}
+
 export function getActiveEmbeddedRunCount(): number {
-  return ACTIVE_EMBEDDED_RUNS.size;
+  let activeCount = ACTIVE_EMBEDDED_RUNS.size;
+  for (const sessionId of listActiveReplyRunSessionIds()) {
+    if (!ACTIVE_EMBEDDED_RUNS.has(sessionId)) {
+      activeCount += 1;
+    }
+  }
+  return Math.max(activeCount, getActiveReplyRunCount());
 }

 export function getActiveEmbeddedRunSnapshot(
@@ -210,13 +273,13 @@ export async function waitForActiveEmbeddedRuns(

  const startedAt = Date.now();
  while (true) {
-    if (ACTIVE_EMBEDDED_RUNS.size === 0) {
+    if (getActiveEmbeddedRunCount() === 0) {
      return { drained: true };
    }
    const elapsedMs = Date.now() - startedAt;
    if (elapsedMs >= maxWaitMs) {
      diag.warn(
-        `wait for active embedded runs timed out: activeRuns=${ACTIVE_EMBEDDED_RUNS.size} timeoutMs=${maxWaitMs}`,
+        `wait for active embedded runs timed out: activeRuns=${getActiveEmbeddedRunCount()} timeoutMs=${maxWaitMs}`,
      );
      return { drained: false };
    }
@@ -225,9 +288,12 @@ export async function waitForActiveEmbeddedRuns(
 }

 export function waitForEmbeddedPiRunEnd(sessionId: string, timeoutMs = 15_000): Promise<boolean> {
-  if (!sessionId || !ACTIVE_EMBEDDED_RUNS.has(sessionId)) {
+  if (!sessionId) {
    return Promise.resolve(true);
  }
+  if (!ACTIVE_EMBEDDED_RUNS.has(sessionId)) {
+    return waitForReplyRunEndBySessionId(sessionId, timeoutMs);
+  }
  diag.debug(`waiting for run end: sessionId=${sessionId} timeoutMs=${timeoutMs}`);
  return new Promise((resolve) => {
    const waiters = EMBEDDED_RUN_WAITERS.get(sessionId) ?? new Set();
@@ -278,6 +344,7 @@ export function setActiveEmbeddedRun(
 ) {
  const wasActive = ACTIVE_EMBEDDED_RUNS.has(sessionId);
  ACTIVE_EMBEDDED_RUNS.set(sessionId, handle);
+  setActiveRunSessionKey(sessionKey, sessionId);
  logSessionStateChange({
    sessionId,
    sessionKey,
@@ -308,6 +375,7 @@ export function clearActiveEmbeddedRun(
    ACTIVE_EMBEDDED_RUNS.delete(sessionId);
    ACTIVE_EMBEDDED_RUN_SNAPSHOTS.delete(sessionId);
    EMBEDDED_RUN_MODEL_SWITCH_REQUESTS.delete(sessionId);
+    clearActiveRunSessionKeys(sessionId, sessionKey);
    logSessionStateChange({ sessionId, sessionKey, state: "idle", reason: "run_completed" });
    if (!sessionId.startsWith("probe-")) {
      diag.debug(`run cleared: sessionId=${sessionId} totalActive=${ACTIVE_EMBEDDED_RUNS.size}`);
@@ -329,8 +397,7 @@ export const __testing = {
    EMBEDDED_RUN_WAITERS.clear();
    ACTIVE_EMBEDDED_RUNS.clear();
    ACTIVE_EMBEDDED_RUN_SNAPSHOTS.clear();
+    ACTIVE_EMBEDDED_RUN_SESSION_IDS_BY_KEY.clear();
    EMBEDDED_RUN_MODEL_SWITCH_REQUESTS.clear();
  },
 };
-
-export type { EmbeddedPiQueueHandle };
--- a/src/agents/pi-embedded.runtime.ts
+++ b/src/agents/pi-embedded.runtime.ts
@@ -2,6 +2,8 @@ export {
  abortEmbeddedPiRun,
  isEmbeddedPiRunActive,
  isEmbeddedPiRunStreaming,
+  resolveActiveEmbeddedRunSessionId,
  runEmbeddedPiAgent,
  resolveEmbeddedSessionLane,
+  waitForEmbeddedPiRunEnd,
 } from "./pi-embedded.js";
--- a/src/agents/pi-embedded.ts
+++ b/src/agents/pi-embedded.ts
@@ -10,6 +10,7 @@ export {
  isEmbeddedPiRunActive,
  isEmbeddedPiRunStreaming,
  queueEmbeddedPiMessage,
+  resolveActiveEmbeddedRunSessionId,
  resolveEmbeddedSessionLane,
  runEmbeddedPiAgent,
  waitForEmbeddedPiRunEnd,
--- a/src/auto-reply/reply/abort.ts
+++ b/src/auto-reply/reply/abort.ts
@@ -38,6 +38,7 @@ import {
 } from "./abort-primitives.js";
 import { stripMentions, stripStructuralPrefixes } from "./mentions.js";
 import { clearSessionQueues } from "./queue.js";
+import { replyRunRegistry } from "./reply-run-registry.js";

 export { resolveAbortCutoffFromContext, shouldSkipMessageByAbortCutoff } from "./abort-cutoff.js";
 export {
@@ -187,8 +188,10 @@ export function stopSubagentsForRequester(params: {
        storeCache.set(storePath, store);
      }
      const entry = store[childKey];
-      const sessionId = entry?.sessionId;
-      const aborted = sessionId ? abortDeps.abortEmbeddedPiRun(sessionId) : false;
+      const sessionId = replyRunRegistry.resolveSessionId(childKey) ?? entry?.sessionId;
+      const aborted =
+        (childKey ? replyRunRegistry.abort(childKey) : false) ||
+        (sessionId ? abortDeps.abortEmbeddedPiRun(sessionId) : false);
      const markedTerminated =
        abortDeps.markSubagentRunTerminated({
          runId: run.runId,
@@ -270,8 +273,10 @@ export async function tryFastAbortFromMessage(params: {
        );
      }
    }
-    const sessionId = entry?.sessionId;
-    const aborted = sessionId ? abortDeps.abortEmbeddedPiRun(sessionId) : false;
+    const sessionId = replyRunRegistry.resolveSessionId(resolvedTargetKey) ?? entry?.sessionId;
+    const aborted =
+      replyRunRegistry.abort(resolvedTargetKey) ||
+      (sessionId ? abortDeps.abortEmbeddedPiRun(sessionId) : false);
    const cleared = clearSessionQueues([resolvedTargetKey, sessionId]);
    if (cleared.followupCleared > 0 || cleared.laneCleared > 0) {
      logVerbose(
--- a/src/auto-reply/reply/agent-runner-execution.test.ts
+++ b/src/auto-reply/reply/agent-runner-execution.test.ts
@@ -1,10 +1,12 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import { LiveSessionModelSwitchError } from "../../agents/live-model-switch-error.js";
 import type { SessionEntry } from "../../config/sessions.js";
+import { CommandLaneClearedError, GatewayDrainingError } from "../../process/command-queue.js";
 import type { TemplateContext } from "../templating.js";
 import type { GetReplyOptions } from "../types.js";
 import { MAX_LIVE_SWITCH_RETRIES } from "./agent-runner-execution.js";
 import type { FollowupRun } from "./queue.js";
+import type { ReplyOperation } from "./reply-run-registry.js";
 import type { TypingSignaler } from "./typing-mode.js";

 const state = vi.hoisted(() => ({
@@ -181,6 +183,32 @@ function createFollowupRun(): FollowupRun {
  } as unknown as FollowupRun;
 }

+function createMockReplyOperation(): {
+  replyOperation: ReplyOperation;
+  failMock: ReturnType<typeof vi.fn>;
+} {
+  const failMock = vi.fn();
+  return {
+    failMock,
+    replyOperation: {
+      key: "main",
+      sessionId: "session",
+      abortSignal: new AbortController().signal,
+      resetTriggered: false,
+      phase: "running",
+      result: null,
+      setPhase: vi.fn(),
+      updateSessionId: vi.fn(),
+      attachBackend: vi.fn(),
+      detachBackend: vi.fn(),
+      complete: vi.fn(),
+      fail: failMock,
+      abortByUser: vi.fn(),
+      abortForRestart: vi.fn(),
+    },
+  };
+}
+
 describe("runAgentTurnWithFallback", () => {
  beforeEach(() => {
    state.runEmbeddedPiAgentMock.mockReset();
@@ -732,6 +760,158 @@ describe("runAgentTurnWithFallback", () => {
    }
  });

+  it("surfaces gateway restart text when fallback exhaustion wraps a drain error", async () => {
+    const { replyOperation, failMock } = createMockReplyOperation();
+    state.runWithModelFallbackMock.mockRejectedValueOnce(
+      Object.assign(new Error("fallback exhausted"), {
+        name: "FallbackSummaryError",
+        attempts: [
+          {
+            provider: "anthropic",
+            model: "claude",
+            error: new GatewayDrainingError(),
+          },
+        ],
+        soonestCooldownExpiry: null,
+        cause: new GatewayDrainingError(),
+      }),
+    );
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const result = await runAgentTurnWithFallback({
+      commandBody: "hello",
+      followupRun: createFollowupRun(),
+      sessionCtx: {
+        Provider: "whatsapp",
+        MessageSid: "msg",
+      } as unknown as TemplateContext,
+      replyOperation,
+      opts: {},
+      typingSignals: createMockTypingSignaler(),
+      blockReplyPipeline: null,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      applyReplyToMode: (payload) => payload,
+      shouldEmitToolResult: () => true,
+      shouldEmitToolOutput: () => false,
+      pendingToolTasks: new Set(),
+      resetSessionAfterCompactionFailure: async () => false,
+      resetSessionAfterRoleOrderingConflict: async () => false,
+      isHeartbeat: false,
+      sessionKey: "main",
+      getActiveSessionEntry: () => undefined,
+      resolvedVerboseLevel: "off",
+    });
+
+    expect(result.kind).toBe("final");
+    if (result.kind === "final") {
+      expect(result.payload.text).toBe(
+        "⚠️ Gateway is restarting. Please wait a few seconds and try again.",
+      );
+    }
+    expect(failMock).toHaveBeenCalledWith("gateway_draining", expect.any(GatewayDrainingError));
+  });
+
+  it("surfaces gateway restart text when fallback exhaustion wraps a cleared lane error", async () => {
+    const { replyOperation, failMock } = createMockReplyOperation();
+    state.runWithModelFallbackMock.mockRejectedValueOnce(
+      Object.assign(new Error("fallback exhausted"), {
+        name: "FallbackSummaryError",
+        attempts: [
+          {
+            provider: "anthropic",
+            model: "claude",
+            error: new CommandLaneClearedError("session:main"),
+          },
+        ],
+        soonestCooldownExpiry: null,
+        cause: new CommandLaneClearedError("session:main"),
+      }),
+    );
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const result = await runAgentTurnWithFallback({
+      commandBody: "hello",
+      followupRun: createFollowupRun(),
+      sessionCtx: {
+        Provider: "whatsapp",
+        MessageSid: "msg",
+      } as unknown as TemplateContext,
+      replyOperation,
+      opts: {},
+      typingSignals: createMockTypingSignaler(),
+      blockReplyPipeline: null,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      applyReplyToMode: (payload) => payload,
+      shouldEmitToolResult: () => true,
+      shouldEmitToolOutput: () => false,
+      pendingToolTasks: new Set(),
+      resetSessionAfterCompactionFailure: async () => false,
+      resetSessionAfterRoleOrderingConflict: async () => false,
+      isHeartbeat: false,
+      sessionKey: "main",
+      getActiveSessionEntry: () => undefined,
+      resolvedVerboseLevel: "off",
+    });
+
+    expect(result.kind).toBe("final");
+    if (result.kind === "final") {
+      expect(result.payload.text).toBe(
+        "⚠️ Gateway is restarting. Please wait a few seconds and try again.",
+      );
+    }
+    expect(failMock).toHaveBeenCalledWith(
+      "command_lane_cleared",
+      expect.any(CommandLaneClearedError),
+    );
+  });
+
+  it("surfaces gateway restart text when the reply operation was aborted for restart", async () => {
+    const { replyOperation, failMock } = createMockReplyOperation();
+    Object.defineProperty(replyOperation, "result", {
+      value: { kind: "aborted", code: "aborted_for_restart" } as const,
+      configurable: true,
+    });
+    state.runWithModelFallbackMock.mockRejectedValueOnce(
+      Object.assign(new Error("aborted"), { name: "AbortError" }),
+    );
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const result = await runAgentTurnWithFallback({
+      commandBody: "hello",
+      followupRun: createFollowupRun(),
+      sessionCtx: {
+        Provider: "whatsapp",
+        MessageSid: "msg",
+      } as unknown as TemplateContext,
+      replyOperation,
+      opts: {},
+      typingSignals: createMockTypingSignaler(),
+      blockReplyPipeline: null,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      applyReplyToMode: (payload) => payload,
+      shouldEmitToolResult: () => true,
+      shouldEmitToolOutput: () => false,
+      pendingToolTasks: new Set(),
+      resetSessionAfterCompactionFailure: async () => false,
+      resetSessionAfterRoleOrderingConflict: async () => false,
+      isHeartbeat: false,
+      sessionKey: "main",
+      getActiveSessionEntry: () => undefined,
+      resolvedVerboseLevel: "off",
+    });
+
+    expect(result.kind).toBe("final");
+    if (result.kind === "final") {
+      expect(result.payload.text).toBe(
+        "⚠️ Gateway is restarting. Please wait a few seconds and try again.",
+      );
+    }
+    expect(failMock).not.toHaveBeenCalled();
+  });
+
  it("returns a friendly generic error on external chat channels", async () => {
    state.runEmbeddedPiAgentMock.mockRejectedValueOnce(
      new Error("INVALID_ARGUMENT: some other failure"),
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -31,6 +31,7 @@ import {
 } from "../../config/sessions.js";
 import { logVerbose } from "../../globals.js";
 import { emitAgentEvent, registerAgentRunContext } from "../../infra/agent-events.js";
+import { CommandLaneClearedError, GatewayDrainingError } from "../../process/command-queue.js";
 import { defaultRuntime } from "../../runtime.js";
 import { sanitizeForLog } from "../../terminal/ansi.js";
 import {
@@ -57,6 +58,7 @@ import { type BlockReplyPipeline } from "./block-reply-pipeline.js";
 import type { FollowupRun } from "./queue.js";
 import { createBlockReplyDeliveryHandler } from "./reply-delivery.js";
 import { createReplyMediaPathNormalizer } from "./reply-media-paths.runtime.js";
+import type { ReplyOperation } from "./reply-run-registry.js";
 import type { TypingSignaler } from "./typing-mode.js";

 // Maximum number of LiveSessionModelSwitchError retries before surfacing a
@@ -408,10 +410,59 @@ function applyOpenAIGptChatReplyGuard(params: {
  }
 }

+function buildRestartLifecycleReplyText(): string {
+  return "⚠️ Gateway is restarting. Please wait a few seconds and try again.";
+}
+
+function resolveRestartLifecycleError(
+  err: unknown,
+): GatewayDrainingError | CommandLaneClearedError | undefined {
+  const pending = [err];
+  const seen = new Set<unknown>();
+
+  while (pending.length > 0) {
+    const candidate = pending.shift();
+    if (!candidate || seen.has(candidate)) {
+      continue;
+    }
+    seen.add(candidate);
+
+    if (candidate instanceof GatewayDrainingError || candidate instanceof CommandLaneClearedError) {
+      return candidate;
+    }
+
+    if (isFallbackSummaryError(candidate)) {
+      for (const attempt of candidate.attempts) {
+        pending.push(attempt.error);
+      }
+    }
+
+    if (candidate instanceof Error && "cause" in candidate) {
+      pending.push(candidate.cause);
+    }
+  }
+
+  return undefined;
+}
+
+function isReplyOperationUserAbort(replyOperation?: ReplyOperation): boolean {
+  return (
+    replyOperation?.result?.kind === "aborted" && replyOperation.result.code === "aborted_by_user"
+  );
+}
+
+function isReplyOperationRestartAbort(replyOperation?: ReplyOperation): boolean {
+  return (
+    replyOperation?.result?.kind === "aborted" &&
+    replyOperation.result.code === "aborted_for_restart"
+  );
+}
+
 export async function runAgentTurnWithFallback(params: {
  commandBody: string;
  followupRun: FollowupRun;
  sessionCtx: TemplateContext;
+  replyOperation?: ReplyOperation;
  opts?: GetReplyOptions;
  typingSignals: TypingSignaler;
  blockReplyPipeline: BlockReplyPipeline | null;
@@ -695,6 +746,8 @@ export async function runAgentTurnWithFallback(params: {
                  imageOrder: params.opts?.imageOrder,
                  messageProvider: params.followupRun.run.messageProvider,
                  agentAccountId: params.followupRun.run.agentAccountId,
+                  abortSignal: params.replyOperation?.abortSignal ?? params.opts?.abortSignal,
+                  replyOperation: params.replyOperation,
                });
                bootstrapPromptWarningSignaturesSeen = resolveBootstrapWarningSignaturesSeen(
                  result.meta?.systemPromptReport,
@@ -805,7 +858,8 @@ export async function runAgentTurnWithFallback(params: {
                bootstrapContextRunKind: params.opts?.isHeartbeat ? "heartbeat" : "default",
                images: params.opts?.images,
                imageOrder: params.opts?.imageOrder,
-                abortSignal: params.opts?.abortSignal,
+                abortSignal: params.replyOperation?.abortSignal ?? params.opts?.abortSignal,
+                replyOperation: params.replyOperation,
                blockReplyBreak: params.resolvedBlockStreamingBreak,
                blockReplyChunking: params.blockReplyChunking,
                onPartialReply: async (payload) => {
@@ -1094,6 +1148,7 @@ export async function runAgentTurnWithFallback(params: {
        (await params.resetSessionAfterCompactionFailure(embeddedError.message))
      ) {
        didResetAfterCompactionFailure = true;
+        params.replyOperation?.fail("run_failed", embeddedError);
        return {
          kind: "final",
          payload: {
@@ -1104,6 +1159,7 @@ export async function runAgentTurnWithFallback(params: {
      if (embeddedError?.kind === "role_ordering") {
        const didReset = await params.resetSessionAfterRoleOrderingConflict(embeddedError.message);
        if (didReset) {
+          params.replyOperation?.fail("run_failed", embeddedError);
          return {
            kind: "final",
            payload: {
@@ -1133,6 +1189,7 @@ export async function runAgentTurnWithFallback(params: {
              "Logs: openclaw logs --follow"
            : "⚠️ Agent failed before reply: model switch could not be completed. " +
              "The requested model may be temporarily unavailable. Please try again shortly.";
+          params.replyOperation?.fail("run_failed", err);
          return {
            kind: "final",
            payload: {
@@ -1158,12 +1215,52 @@ export async function runAgentTurnWithFallback(params: {
      const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message);
      const isTransientHttp = isTransientHttpError(message);

+      if (isReplyOperationRestartAbort(params.replyOperation)) {
+        return {
+          kind: "final",
+          payload: {
+            text: buildRestartLifecycleReplyText(),
+          },
+        };
+      }
+
+      if (isReplyOperationUserAbort(params.replyOperation)) {
+        return {
+          kind: "final",
+          payload: {
+            text: SILENT_REPLY_TOKEN,
+          },
+        };
+      }
+
+      const restartLifecycleError = resolveRestartLifecycleError(err);
+      if (restartLifecycleError instanceof GatewayDrainingError) {
+        params.replyOperation?.fail("gateway_draining", restartLifecycleError);
+        return {
+          kind: "final",
+          payload: {
+            text: buildRestartLifecycleReplyText(),
+          },
+        };
+      }
+
+      if (restartLifecycleError instanceof CommandLaneClearedError) {
+        params.replyOperation?.fail("command_lane_cleared", restartLifecycleError);
+        return {
+          kind: "final",
+          payload: {
+            text: buildRestartLifecycleReplyText(),
+          },
+        };
+      }
+
      if (
        isCompactionFailure &&
        !didResetAfterCompactionFailure &&
        (await params.resetSessionAfterCompactionFailure(message))
      ) {
        didResetAfterCompactionFailure = true;
+        params.replyOperation?.fail("run_failed", err);
        return {
          kind: "final",
          payload: {
@@ -1174,6 +1271,7 @@ export async function runAgentTurnWithFallback(params: {
      if (isRoleOrderingError) {
        const didReset = await params.resetSessionAfterRoleOrderingConflict(message);
        if (didReset) {
+          params.replyOperation?.fail("run_failed", err);
          return {
            kind: "final",
            payload: {
@@ -1220,6 +1318,7 @@ export async function runAgentTurnWithFallback(params: {
          );
        }

+        params.replyOperation?.fail("session_corruption_reset", err);
        return {
          kind: "final",
          payload: {
@@ -1267,6 +1366,7 @@ export async function runAgentTurnWithFallback(params: {
                ? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
                : buildExternalRunFailureText(message);

+      params.replyOperation?.fail("run_failed", err);
      return {
        kind: "final",
        payload: {
@@ -1286,6 +1386,7 @@ export async function runAgentTurnWithFallback(params: {
  if (finalEmbeddedError && !hasPayloadText) {
    const errorMsg = finalEmbeddedError.message ?? "";
    if (isContextOverflowError(errorMsg)) {
+      params.replyOperation?.fail("run_failed", finalEmbeddedError);
      return {
        kind: "final",
        payload: {
--- a/src/auto-reply/reply/agent-runner-memory.ts
+++ b/src/auto-reply/reply/agent-runner-memory.ts
@@ -41,6 +41,7 @@ import {
 } from "./memory-flush.js";
 import { readPostCompactionContext } from "./post-compaction-context.js";
 import { refreshQueuedFollowupSession, type FollowupRun } from "./queue.js";
+import type { ReplyOperation } from "./reply-run-registry.js";
 import { incrementCompactionCount } from "./session-updates.js";

 export function estimatePromptTokensForMemoryFlush(prompt?: string): number | undefined {
@@ -310,6 +311,7 @@ export async function runPreflightCompactionIfNeeded(params: {
  sessionKey?: string;
  storePath?: string;
  isHeartbeat: boolean;
+  replyOperation: ReplyOperation;
 }): Promise<SessionEntry | undefined> {
  if (!params.sessionKey) {
    return params.sessionEntry;
@@ -397,6 +399,7 @@ export async function runPreflightCompactionIfNeeded(params: {
      `threshold=${threshold}`,
  );

+  params.replyOperation.setPhase("preflight_compacting");
  const sessionFile = resolveSessionLogPath(
    entry.sessionId,
    entry.sessionFile ? entry : { ...entry, sessionFile: params.followupRun.run.sessionFile },
@@ -424,6 +427,7 @@ export async function runPreflightCompactionIfNeeded(params: {
    currentTokenCount: tokenCountForCompaction,
    senderIsOwner: params.followupRun.run.senderIsOwner,
    ownerNumbers: params.followupRun.run.ownerNumbers,
+    abortSignal: params.replyOperation.abortSignal,
  });

  if (!result?.ok || !result.compacted) {
@@ -463,6 +467,7 @@ export async function runMemoryFlushIfNeeded(params: {
  sessionKey?: string;
  storePath?: string;
  isHeartbeat: boolean;
+  replyOperation: ReplyOperation;
 }): Promise<SessionEntry | undefined> {
  const memoryFlushPlan = resolveMemoryFlushPlan({ cfg: params.cfg });
  if (!memoryFlushPlan) {
@@ -650,6 +655,7 @@ export async function runMemoryFlushIfNeeded(params: {
    `memoryFlush triggered: sessionKey=${params.sessionKey} tokenCount=${tokenCountForFlush ?? "undefined"} threshold=${flushThreshold}`,
  );

+  params.replyOperation.setPhase("memory_flushing");
  let activeSessionEntry = entry ?? params.sessionEntry;
  const activeSessionStore = params.sessionStore;
  let bootstrapPromptWarningSignaturesSeen = resolveBootstrapWarningSignaturesSeen(
@@ -705,6 +711,8 @@ export async function runMemoryFlushIfNeeded(params: {
          bootstrapPromptWarningSignaturesSeen,
          bootstrapPromptWarningSignature:
            bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1],
+          abortSignal: params.replyOperation.abortSignal,
+          replyOperation: params.replyOperation,
          onAgentEvent: (evt) => {
            if (evt.stream === "compaction") {
              const phase = typeof evt.data.phase === "string" ? evt.data.phase : "";
@@ -741,6 +749,7 @@ export async function runMemoryFlushIfNeeded(params: {
      if (updatedEntry) {
        activeSessionEntry = updatedEntry;
        params.followupRun.run.sessionId = updatedEntry.sessionId;
+        params.replyOperation.updateSessionId(updatedEntry.sessionId);
        if (updatedEntry.sessionFile) {
          params.followupRun.run.sessionFile = updatedEntry.sessionFile;
        }
@@ -771,6 +780,7 @@ export async function runMemoryFlushIfNeeded(params: {
        if (updatedEntry) {
          activeSessionEntry = updatedEntry;
          params.followupRun.run.sessionId = updatedEntry.sessionId;
+          params.replyOperation.updateSessionId(updatedEntry.sessionId);
          if (updatedEntry.sessionFile) {
            params.followupRun.run.sessionFile = updatedEntry.sessionFile;
          }
--- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
+++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
@@ -3,6 +3,12 @@ import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import {
+  abortEmbeddedPiRun,
+  getActiveEmbeddedRunCount,
+  isEmbeddedPiRunActive,
+} from "../../agents/pi-embedded-runner/runs.js";
+import type { OpenClawConfig } from "../../config/config.js";
 import type { SessionEntry } from "../../config/sessions.js";
 import { loadSessionStore, saveSessionStore } from "../../config/sessions.js";
 import { onAgentEvent } from "../../infra/agent-events.js";
@@ -12,7 +18,9 @@ import {
  registerMemoryFlushPlanResolver,
 } from "../../plugins/memory-state.js";
 import type { TemplateContext } from "../templating.js";
+import { __testing as abortTesting, tryFastAbortFromMessage } from "./abort.js";
 import type { FollowupRun, QueueSettings } from "./queue.js";
+import { buildTestCtx } from "./test-ctx.js";
 import { createMockTypingController } from "./test-helpers.js";

 function createCliBackendTestConfig() {
@@ -32,6 +40,22 @@ const runEmbeddedPiAgentMock = vi.fn();
 const runCliAgentMock = vi.fn();
 const runWithModelFallbackMock = vi.fn();
 const runtimeErrorMock = vi.fn();
+const compactState = vi.hoisted(() => ({
+  compactEmbeddedPiSessionMock: vi.fn(),
+  actualCompactEmbeddedPiSession: undefined as
+    | typeof import("../../agents/pi-embedded.js").compactEmbeddedPiSession
+    | undefined,
+}));
+
+function createDeferred<T>() {
+  let resolve!: (value: T) => void;
+  let reject!: (reason?: unknown) => void;
+  const promise = new Promise<T>((res, rej) => {
+    resolve = res;
+    reject = rej;
+  });
+  return { promise, resolve, reject };
+}

 vi.mock("../../agents/model-fallback.js", () => ({
  runWithModelFallback: (params: {
@@ -49,8 +73,11 @@ vi.mock("../../agents/pi-embedded.js", async () => {
  const actual = await vi.importActual<typeof import("../../agents/pi-embedded.js")>(
    "../../agents/pi-embedded.js",
  );
+  compactState.actualCompactEmbeddedPiSession = actual.compactEmbeddedPiSession;
  return {
    ...actual,
+    compactEmbeddedPiSession: (params: unknown) =>
+      compactState.compactEmbeddedPiSessionMock(params),
    queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
    runEmbeddedPiAgent: (params: unknown) => runEmbeddedPiAgentMock(params),
  };
@@ -451,6 +478,7 @@ describe("runReplyAgent auto-compaction token update", () => {
  type EmbeddedRunParams = {
    prompt?: string;
    extraSystemPrompt?: string;
+    abortSignal?: AbortSignal;
    onAgentEvent?: (evt: {
      stream?: string;
      data?: { phase?: string; willRetry?: boolean; completed?: boolean };
@@ -517,6 +545,330 @@ describe("runReplyAgent auto-compaction token update", () => {
    return { typing, sessionCtx, resolvedQueue, followupRun };
  }

+  it("lets /stop abort a run that is still in preflight compaction", async () => {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-preflight-stop-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    const sessionFile = "session-relative.jsonl";
+    const workspaceDir = tmp;
+    const transcriptPath = path.join(tmp, sessionFile);
+    const cfg = { session: { store: storePath } } as OpenClawConfig;
+
+    await fs.writeFile(
+      transcriptPath,
+      `${JSON.stringify({
+        message: {
+          role: "user",
+          content: "x".repeat(320_000),
+          timestamp: Date.now(),
+        },
+      })}\n`,
+      "utf-8",
+    );
+
+    const sessionEntry: SessionEntry = {
+      sessionId: "session",
+      updatedAt: Date.now(),
+      sessionFile,
+      totalTokens: 10,
+      totalTokensFresh: false,
+      compactionCount: 1,
+    };
+
+    await seedSessionStore({ storePath, sessionKey, entry: sessionEntry });
+
+    const compactionDeferred = createDeferred<{
+      ok: true;
+      compacted: true;
+      result: {
+        summary: string;
+        firstKeptEntryId: string;
+        tokensBefore: number;
+        tokensAfter: number;
+      };
+    }>();
+
+    compactState.compactEmbeddedPiSessionMock.mockImplementationOnce(
+      async () => await compactionDeferred.promise,
+    );
+    runEmbeddedPiAgentMock.mockResolvedValueOnce({
+      payloads: [{ text: "ok" }],
+      meta: { agentMeta: { usage: { input: 1, output: 1 } } },
+    });
+
+    abortTesting.setDepsForTests({
+      getAcpSessionManager: (() =>
+        ({
+          resolveSession: () => ({ kind: "none" }),
+          cancelSession: async () => {},
+        }) as never) as never,
+      getLatestSubagentRunByChildSessionKey: () => null,
+      listSubagentRunsForController: () => [],
+      markSubagentRunTerminated: () => 0,
+    });
+
+    const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
+      storePath,
+      sessionEntry,
+      config: cfg,
+      sessionFile,
+      workspaceDir,
+    });
+
+    const runPromise = runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: sessionKey,
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing,
+      sessionCtx,
+      sessionEntry,
+      sessionStore: { [sessionKey]: sessionEntry },
+      sessionKey,
+      storePath,
+      defaultModel: "anthropic/claude-opus-4-6",
+      agentCfgContextTokens: 100_000,
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+
+    try {
+      await vi.waitFor(() => {
+        expect(compactState.compactEmbeddedPiSessionMock).toHaveBeenCalledOnce();
+      });
+      expect(getActiveEmbeddedRunCount()).toBe(1);
+
+      const abortResult = await tryFastAbortFromMessage({
+        ctx: buildTestCtx({
+          Body: "/stop",
+          RawBody: "/stop",
+          CommandBody: "/stop",
+          CommandSource: "text",
+          CommandAuthorized: true,
+          ChatType: "direct",
+          Provider: "whatsapp",
+          Surface: "whatsapp",
+          From: "whatsapp:+15550001111",
+          To: "whatsapp:+15550002222",
+          SessionKey: sessionKey,
+        }),
+        cfg,
+      });
+
+      expect(abortResult).toMatchObject({
+        handled: true,
+        aborted: true,
+      });
+    } finally {
+      compactionDeferred.resolve({
+        ok: true,
+        compacted: true,
+        result: {
+          summary: "compacted",
+          firstKeptEntryId: "first-kept",
+          tokensBefore: 90_000,
+          tokensAfter: 8_000,
+        },
+      });
+      await runPromise;
+    }
+
+    expect(getActiveEmbeddedRunCount()).toBe(0);
+  });
+
+  it("surfaces the restart notice when gateway shutdown aborts preflight compaction", async () => {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-preflight-restart-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "main";
+    const sessionFile = "session-relative.jsonl";
+    const workspaceDir = tmp;
+    const transcriptPath = path.join(tmp, sessionFile);
+    const cfg = { session: { store: storePath } } as OpenClawConfig;
+
+    await fs.writeFile(
+      transcriptPath,
+      `${JSON.stringify({
+        message: {
+          role: "user",
+          content: "x".repeat(320_000),
+          timestamp: Date.now(),
+        },
+      })}\n`,
+      "utf-8",
+    );
+
+    const sessionEntry: SessionEntry = {
+      sessionId: "session",
+      updatedAt: Date.now(),
+      sessionFile,
+      totalTokens: 10,
+      totalTokensFresh: false,
+      compactionCount: 1,
+    };
+
+    await seedSessionStore({ storePath, sessionKey, entry: sessionEntry });
+
+    compactState.compactEmbeddedPiSessionMock.mockImplementationOnce(
+      async (params: { abortSignal?: AbortSignal }) =>
+        await new Promise<never>((_, reject) => {
+          const abortError = Object.assign(new Error("aborted"), { name: "AbortError" });
+          const onAbort = () => reject(abortError);
+          if (params.abortSignal?.aborted) {
+            onAbort();
+            return;
+          }
+          params.abortSignal?.addEventListener("abort", onAbort, { once: true });
+        }),
+    );
+
+    const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
+      storePath,
+      sessionEntry,
+      config: cfg,
+      sessionFile,
+      workspaceDir,
+    });
+
+    const runPromise = runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: sessionKey,
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing,
+      sessionCtx,
+      sessionEntry,
+      sessionStore: { [sessionKey]: sessionEntry },
+      sessionKey,
+      storePath,
+      defaultModel: "anthropic/claude-opus-4-6",
+      agentCfgContextTokens: 100_000,
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+
+    await vi.waitFor(() => {
+      expect(compactState.compactEmbeddedPiSessionMock).toHaveBeenCalledOnce();
+    });
+    expect(getActiveEmbeddedRunCount()).toBe(1);
+
+    expect(abortEmbeddedPiRun(undefined, { mode: "compacting" })).toBe(true);
+
+    await expect(runPromise).resolves.toEqual({
+      text: "⚠️ Gateway is restarting. Please wait a few seconds and try again.",
+    });
+    expect(getActiveEmbeddedRunCount()).toBe(0);
+  });
+
+  it("rebinds the active run to the rotated session id after memory flush", async () => {
+    registerMemoryFlushPlanResolver(() => ({
+      softThresholdTokens: 1_000,
+      forceFlushTranscriptBytes: Number.MAX_SAFE_INTEGER,
+      reserveTokensFloor: 20_000,
+      prompt: "Pre-compaction memory flush.",
+      systemPrompt: "Flush memory into the configured memory file.",
+      relativePath: "memory/active.md",
+    }));
+
+    runEmbeddedPiAgentMock.mockImplementation(async (params: EmbeddedRunParams) => {
+      if (params.prompt?.includes("Pre-compaction memory flush.")) {
+        params.onAgentEvent?.({
+          stream: "compaction",
+          data: { phase: "end", completed: true },
+        });
+        return {
+          payloads: [],
+          meta: {
+            agentMeta: {
+              sessionId: "session-rotated",
+            },
+          },
+        };
+      }
+
+      await new Promise<never>((_, reject) => {
+        const abortError = Object.assign(new Error("aborted"), { name: "AbortError" });
+        const onAbort = () => reject(abortError);
+        if (params.abortSignal?.aborted) {
+          onAbort();
+          return;
+        }
+        params.abortSignal?.addEventListener("abort", onAbort, { once: true });
+      });
+    });
+
+    const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
+      storePath: "/tmp/session-store.json",
+      sessionEntry: {
+        sessionId: "session",
+        updatedAt: Date.now(),
+        totalTokens: 1_000_000,
+        compactionCount: 0,
+      },
+    });
+
+    const runPromise = runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: "main",
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing,
+      sessionCtx,
+      sessionEntry: {
+        sessionId: "session",
+        updatedAt: Date.now(),
+        totalTokens: 1_000_000,
+        compactionCount: 0,
+      },
+      sessionStore: {
+        main: {
+          sessionId: "session",
+          updatedAt: Date.now(),
+          totalTokens: 1_000_000,
+          compactionCount: 0,
+        },
+      },
+      sessionKey: "main",
+      defaultModel: "anthropic/claude-opus-4-6",
+      agentCfgContextTokens: 100_000,
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+
+    await vi.waitFor(() => {
+      expect(isEmbeddedPiRunActive("session-rotated")).toBe(true);
+    });
+    expect(isEmbeddedPiRunActive("session")).toBe(false);
+    expect(abortEmbeddedPiRun("session-rotated")).toBe(true);
+
+    await runPromise;
+
+    expect(isEmbeddedPiRunActive("session-rotated")).toBe(false);
+  });
+
  it("updates totalTokens after auto-compaction using lastCallUsage", async () => {
    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-compact-tokens-"));
    const storePath = path.join(tmp, "sessions.json");
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -19,6 +19,7 @@ import { emitAgentEvent } from "../../infra/agent-events.js";
 import { emitDiagnosticEvent, isDiagnosticsEnabled } from "../../infra/diagnostic-events.js";
 import { generateSecureUuid } from "../../infra/secure-random.js";
 import { enqueueSystemEvent } from "../../infra/system-events.js";
+import { CommandLaneClearedError, GatewayDrainingError } from "../../process/command-queue.js";
 import { defaultRuntime } from "../../runtime.js";
 import { estimateUsageCost, resolveModelCostConfig } from "../../utils/usage-format.js";
 import {
@@ -28,6 +29,7 @@ import {
 } from "../fallback-state.js";
 import type { OriginatingChannelType, TemplateContext } from "../templating.js";
 import { resolveResponseUsageMode, type VerboseLevel } from "../thinking.js";
+import { SILENT_REPLY_TOKEN } from "../tokens.js";
 import type { GetReplyOptions, ReplyPayload } from "../types.js";
 import { runAgentTurnWithFallback } from "./agent-runner-execution.js";
 import {
@@ -58,6 +60,12 @@ import {
  type QueueSettings,
 } from "./queue.js";
 import { createReplyMediaPathNormalizer } from "./reply-media-paths.js";
+import {
+  createReplyOperation,
+  ReplyRunAlreadyActiveError,
+  replyRunRegistry,
+  type ReplyOperation,
+} from "./reply-run-registry.js";
 import { createReplyToModeFilterForChannel, resolveReplyToMode } from "./reply-threading.js";
 import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-run-accounting.js";
 import { createTypingSignaler } from "./typing-mode.js";
@@ -96,6 +104,8 @@ export async function runReplyAgent(params: {
  sessionCtx: TemplateContext;
  shouldInjectGroupIntro: boolean;
  typingMode: TypingMode;
+  resetTriggered?: boolean;
+  replyOperation?: ReplyOperation;
 }): Promise<ReplyPayload | ReplyPayload[] | undefined> {
  const {
    commandBody,
@@ -123,6 +133,8 @@ export async function runReplyAgent(params: {
    sessionCtx,
    shouldInjectGroupIntro,
    typingMode,
+    resetTriggered,
+    replyOperation: providedReplyOperation,
  } = params;

  let activeSessionEntry = sessionEntry;
@@ -202,7 +214,10 @@ export async function runReplyAgent(params: {
  };

  if (shouldSteer && isStreaming) {
-    const steered = queueEmbeddedPiMessage(followupRun.run.sessionId, followupRun.prompt);
+    const steerSessionId =
+      (sessionKey ? replyRunRegistry.resolveSessionId(sessionKey) : undefined) ??
+      followupRun.run.sessionId;
+    const steered = queueEmbeddedPiMessage(steerSessionId, followupRun.prompt);
    if (steered && !shouldFollowup) {
      await touchActiveSessionEntry();
      typing.cleanup();
@@ -253,158 +268,185 @@ export async function runReplyAgent(params: {
    return undefined;
  }

-  await typingSignals.signalRunStart();
-
-  activeSessionEntry = await runPreflightCompactionIfNeeded({
-    cfg,
-    followupRun,
-    promptForEstimate: followupRun.prompt,
-    defaultModel,
-    agentCfgContextTokens,
-    sessionEntry: activeSessionEntry,
-    sessionStore: activeSessionStore,
-    sessionKey,
-    storePath,
-    isHeartbeat,
-  });
-
-  activeSessionEntry = await runMemoryFlushIfNeeded({
-    cfg,
-    followupRun,
-    promptForEstimate: followupRun.prompt,
-    sessionCtx,
-    opts,
-    defaultModel,
-    agentCfgContextTokens,
-    resolvedVerboseLevel,
-    sessionEntry: activeSessionEntry,
-    sessionStore: activeSessionStore,
-    sessionKey,
-    storePath,
-    isHeartbeat,
-  });
-
-  const runFollowupTurn = createFollowupRunner({
-    opts,
-    typing,
-    typingMode,
-    sessionEntry: activeSessionEntry,
-    sessionStore: activeSessionStore,
-    sessionKey,
-    storePath,
-    defaultModel,
-    agentCfgContextTokens,
-  });
-
-  let responseUsageLine: string | undefined;
-  type SessionResetOptions = {
-    failureLabel: string;
-    buildLogMessage: (nextSessionId: string) => string;
-    cleanupTranscripts?: boolean;
-  };
-  const resetSession = async ({
-    failureLabel,
-    buildLogMessage,
-    cleanupTranscripts,
-  }: SessionResetOptions): Promise<boolean> => {
-    if (!sessionKey || !activeSessionStore || !storePath) {
-      return false;
-    }
-    const prevEntry = activeSessionStore[sessionKey] ?? activeSessionEntry;
-    if (!prevEntry) {
-      return false;
-    }
-    const prevSessionId = cleanupTranscripts ? prevEntry.sessionId : undefined;
-    const nextSessionId = generateSecureUuid();
-    const nextEntry: SessionEntry = {
-      ...prevEntry,
-      sessionId: nextSessionId,
-      updatedAt: Date.now(),
-      systemSent: false,
-      abortedLastRun: false,
-      modelProvider: undefined,
-      model: undefined,
-      inputTokens: undefined,
-      outputTokens: undefined,
-      totalTokens: undefined,
-      totalTokensFresh: false,
-      estimatedCostUsd: undefined,
-      cacheRead: undefined,
-      cacheWrite: undefined,
-      contextTokens: undefined,
-      systemPromptReport: undefined,
-      fallbackNoticeSelectedModel: undefined,
-      fallbackNoticeActiveModel: undefined,
-      fallbackNoticeReason: undefined,
-    };
-    const agentId = resolveAgentIdFromSessionKey(sessionKey);
-    const nextSessionFile = resolveSessionTranscriptPath(
-      nextSessionId,
-      agentId,
-      sessionCtx.MessageThreadId,
-    );
-    nextEntry.sessionFile = nextSessionFile;
-    activeSessionStore[sessionKey] = nextEntry;
-    try {
-      await updateSessionStore(storePath, (store) => {
-        store[sessionKey] = nextEntry;
+  const replySessionKey = sessionKey ?? followupRun.run.sessionKey;
+  let replyOperation: ReplyOperation;
+  try {
+    replyOperation =
+      providedReplyOperation ??
+      createReplyOperation({
+        sessionId: followupRun.run.sessionId,
+        sessionKey: replySessionKey ?? "",
+        resetTriggered: resetTriggered === true,
+        upstreamAbortSignal: opts?.abortSignal,
      });
-    } catch (err) {
-      defaultRuntime.error(
-        `Failed to persist session reset after ${failureLabel} (${sessionKey}): ${String(err)}`,
-      );
+  } catch (error) {
+    if (error instanceof ReplyRunAlreadyActiveError) {
+      typing.cleanup();
+      return {
+        text: "⚠️ Previous run is still shutting down. Please try again in a moment.",
+      };
    }
-    followupRun.run.sessionId = nextSessionId;
-    followupRun.run.sessionFile = nextSessionFile;
-    refreshQueuedFollowupSession({
-      key: queueKey,
-      previousSessionId: prevEntry.sessionId,
-      nextSessionId,
-      nextSessionFile,
+    throw error;
+  }
+  let runFollowupTurn = queuedRunFollowupTurn;
+
+  try {
+    await typingSignals.signalRunStart();
+
+    activeSessionEntry = await runPreflightCompactionIfNeeded({
+      cfg,
+      followupRun,
+      promptForEstimate: followupRun.prompt,
+      defaultModel,
+      agentCfgContextTokens,
+      sessionEntry: activeSessionEntry,
+      sessionStore: activeSessionStore,
+      sessionKey,
+      storePath,
+      isHeartbeat,
+      replyOperation,
    });
-    activeSessionEntry = nextEntry;
-    activeIsNewSession = true;
-    defaultRuntime.error(buildLogMessage(nextSessionId));
-    if (cleanupTranscripts && prevSessionId) {
-      const transcriptCandidates = new Set<string>();
-      const resolved = resolveSessionFilePath(
-        prevSessionId,
-        prevEntry,
-        resolveSessionFilePathOptions({ agentId, storePath }),
-      );
-      if (resolved) {
-        transcriptCandidates.add(resolved);
+
+    activeSessionEntry = await runMemoryFlushIfNeeded({
+      cfg,
+      followupRun,
+      promptForEstimate: followupRun.prompt,
+      sessionCtx,
+      opts,
+      defaultModel,
+      agentCfgContextTokens,
+      resolvedVerboseLevel,
+      sessionEntry: activeSessionEntry,
+      sessionStore: activeSessionStore,
+      sessionKey,
+      storePath,
+      isHeartbeat,
+      replyOperation,
+    });
+
+    runFollowupTurn = createFollowupRunner({
+      opts,
+      typing,
+      typingMode,
+      sessionEntry: activeSessionEntry,
+      sessionStore: activeSessionStore,
+      sessionKey,
+      storePath,
+      defaultModel,
+      agentCfgContextTokens,
+    });
+
+    let responseUsageLine: string | undefined;
+    type SessionResetOptions = {
+      failureLabel: string;
+      buildLogMessage: (nextSessionId: string) => string;
+      cleanupTranscripts?: boolean;
+    };
+    const resetSession = async ({
+      failureLabel,
+      buildLogMessage,
+      cleanupTranscripts,
+    }: SessionResetOptions): Promise<boolean> => {
+      if (!sessionKey || !activeSessionStore || !storePath) {
+        return false;
      }
-      transcriptCandidates.add(resolveSessionTranscriptPath(prevSessionId, agentId));
-      for (const candidate of transcriptCandidates) {
-        try {
-          fs.unlinkSync(candidate);
-        } catch {
-          // Best-effort cleanup.
+      const prevEntry = activeSessionStore[sessionKey] ?? activeSessionEntry;
+      if (!prevEntry) {
+        return false;
+      }
+      const prevSessionId = cleanupTranscripts ? prevEntry.sessionId : undefined;
+      const nextSessionId = generateSecureUuid();
+      const nextEntry: SessionEntry = {
+        ...prevEntry,
+        sessionId: nextSessionId,
+        updatedAt: Date.now(),
+        systemSent: false,
+        abortedLastRun: false,
+        modelProvider: undefined,
+        model: undefined,
+        inputTokens: undefined,
+        outputTokens: undefined,
+        totalTokens: undefined,
+        totalTokensFresh: false,
+        estimatedCostUsd: undefined,
+        cacheRead: undefined,
+        cacheWrite: undefined,
+        contextTokens: undefined,
+        systemPromptReport: undefined,
+        fallbackNoticeSelectedModel: undefined,
+        fallbackNoticeActiveModel: undefined,
+        fallbackNoticeReason: undefined,
+      };
+      const agentId = resolveAgentIdFromSessionKey(sessionKey);
+      const nextSessionFile = resolveSessionTranscriptPath(
+        nextSessionId,
+        agentId,
+        sessionCtx.MessageThreadId,
+      );
+      nextEntry.sessionFile = nextSessionFile;
+      activeSessionStore[sessionKey] = nextEntry;
+      try {
+        await updateSessionStore(storePath, (store) => {
+          store[sessionKey] = nextEntry;
+        });
+      } catch (err) {
+        defaultRuntime.error(
+          `Failed to persist session reset after ${failureLabel} (${sessionKey}): ${String(err)}`,
+        );
+      }
+      followupRun.run.sessionId = nextSessionId;
+      followupRun.run.sessionFile = nextSessionFile;
+      refreshQueuedFollowupSession({
+        key: queueKey,
+        previousSessionId: prevEntry.sessionId,
+        nextSessionId,
+        nextSessionFile,
+      });
+      activeSessionEntry = nextEntry;
+      activeIsNewSession = true;
+      defaultRuntime.error(buildLogMessage(nextSessionId));
+      if (cleanupTranscripts && prevSessionId) {
+        const transcriptCandidates = new Set<string>();
+        const resolved = resolveSessionFilePath(
+          prevSessionId,
+          prevEntry,
+          resolveSessionFilePathOptions({ agentId, storePath }),
+        );
+        if (resolved) {
+          transcriptCandidates.add(resolved);
+        }
+        transcriptCandidates.add(resolveSessionTranscriptPath(prevSessionId, agentId));
+        for (const candidate of transcriptCandidates) {
+          try {
+            fs.unlinkSync(candidate);
+          } catch {
+            // Best-effort cleanup.
+          }
        }
      }
-    }
-    return true;
-  };
-  const resetSessionAfterCompactionFailure = async (reason: string): Promise<boolean> =>
-    resetSession({
-      failureLabel: "compaction failure",
-      buildLogMessage: (nextSessionId) =>
-        `Auto-compaction failed (${reason}). Restarting session ${sessionKey} -> ${nextSessionId} and retrying.`,
-    });
-  const resetSessionAfterRoleOrderingConflict = async (reason: string): Promise<boolean> =>
-    resetSession({
-      failureLabel: "role ordering conflict",
-      buildLogMessage: (nextSessionId) =>
-        `Role ordering conflict (${reason}). Restarting session ${sessionKey} -> ${nextSessionId}.`,
-      cleanupTranscripts: true,
-    });
-  try {
+      return true;
+    };
+    const resetSessionAfterCompactionFailure = async (reason: string): Promise<boolean> =>
+      resetSession({
+        failureLabel: "compaction failure",
+        buildLogMessage: (nextSessionId) =>
+          `Auto-compaction failed (${reason}). Restarting session ${sessionKey} -> ${nextSessionId} and retrying.`,
+      });
+    const resetSessionAfterRoleOrderingConflict = async (reason: string): Promise<boolean> =>
+      resetSession({
+        failureLabel: "role ordering conflict",
+        buildLogMessage: (nextSessionId) =>
+          `Role ordering conflict (${reason}). Restarting session ${sessionKey} -> ${nextSessionId}.`,
+        cleanupTranscripts: true,
+      });
+
+    replyOperation.setPhase("running");
    const runStartedAt = Date.now();
    const runOutcome = await runAgentTurnWithFallback({
      commandBody,
      followupRun,
      sessionCtx,
+      replyOperation,
      opts,
      typingSignals,
      blockReplyPipeline,
@@ -426,6 +468,9 @@ export async function runReplyAgent(params: {
    });

    if (runOutcome.kind === "final") {
+      if (!replyOperation.result) {
+        replyOperation.fail("run_failed", new Error("reply operation exited with final payload"));
+      }
      return finalizeWithFollowup(runOutcome.payload, queueKey, runFollowupTurn);
    }

@@ -785,11 +830,42 @@ export async function runReplyAgent(params: {
      runFollowupTurn,
    );
  } catch (error) {
+    if (
+      replyOperation.result?.kind === "aborted" &&
+      replyOperation.result.code === "aborted_for_restart"
+    ) {
+      return finalizeWithFollowup(
+        { text: "⚠️ Gateway is restarting. Please wait a few seconds and try again." },
+        queueKey,
+        runFollowupTurn,
+      );
+    }
+    if (replyOperation.result?.kind === "aborted") {
+      return finalizeWithFollowup({ text: SILENT_REPLY_TOKEN }, queueKey, runFollowupTurn);
+    }
+    if (error instanceof GatewayDrainingError) {
+      replyOperation.fail("gateway_draining", error);
+      return finalizeWithFollowup(
+        { text: "⚠️ Gateway is restarting. Please wait a few seconds and try again." },
+        queueKey,
+        runFollowupTurn,
+      );
+    }
+    if (error instanceof CommandLaneClearedError) {
+      replyOperation.fail("command_lane_cleared", error);
+      return finalizeWithFollowup(
+        { text: "⚠️ Gateway is restarting. Please wait a few seconds and try again." },
+        queueKey,
+        runFollowupTurn,
+      );
+    }
+    replyOperation.fail("run_failed", error);
    // Keep the followup queue moving even when an unexpected exception escapes
    // the run path; the caller still receives the original error.
    finalizeWithFollowup(undefined, queueKey, runFollowupTurn);
    throw error;
  } finally {
+    replyOperation.complete();
    blockReplyPipeline?.stop();
    typing.markRunComplete();
    // Safety net: the dispatcher's onIdle callback normally fires
--- a/src/auto-reply/reply/commands-session-abort.ts
+++ b/src/auto-reply/reply/commands-session-abort.ts
@@ -18,6 +18,7 @@ import { rejectUnauthorizedCommand } from "./command-gates.js";
 import { persistAbortTargetEntry } from "./commands-session-store.js";
 import type { CommandHandler } from "./commands-types.js";
 import { clearSessionQueues } from "./queue.js";
+import { replyRunRegistry } from "./reply-run-registry.js";

 type AbortTarget = {
  entry?: SessionEntry;
@@ -34,16 +35,25 @@ function resolveAbortTarget(params: {
  const targetSessionKey = params.ctx.CommandTargetSessionKey?.trim() || params.sessionKey;
  const { entry, key } = resolveSessionEntryForKey(params.sessionStore, targetSessionKey);
  if (entry && key) {
-    return { entry, key, sessionId: entry.sessionId };
+    return {
+      entry,
+      key,
+      sessionId: replyRunRegistry.resolveSessionId(key) ?? entry.sessionId,
+    };
  }
  if (params.sessionEntry && params.sessionKey) {
    return {
      entry: params.sessionEntry,
      key: params.sessionKey,
-      sessionId: params.sessionEntry.sessionId,
+      sessionId:
+        replyRunRegistry.resolveSessionId(params.sessionKey) ?? params.sessionEntry.sessionId,
    };
  }
-  return { entry: undefined, key: targetSessionKey, sessionId: undefined };
+  return {
+    entry: undefined,
+    key: targetSessionKey,
+    sessionId: targetSessionKey ? replyRunRegistry.resolveSessionId(targetSessionKey) : undefined,
+  };
 }

 function resolveAbortCutoffForTarget(params: {
@@ -70,6 +80,9 @@ async function applyAbortTarget(params: {
  abortCutoff?: AbortCutoff;
 }) {
  const { abortTarget } = params;
+  if (abortTarget.key) {
+    replyRunRegistry.abort(abortTarget.key);
+  }
  if (abortTarget.sessionId) {
    abortEmbeddedPiRun(abortTarget.sessionId);
  }
--- a/src/auto-reply/reply/followup-runner.ts
+++ b/src/auto-reply/reply/followup-runner.ts
@@ -34,6 +34,7 @@ import {
  filterMessagingToolMediaDuplicates,
  shouldSuppressMessagingToolReplies,
 } from "./reply-payloads.js";
+import { createReplyOperation } from "./reply-run-registry.js";
 import { resolveReplyToMode } from "./reply-threading.js";
 import { isRoutableChannel, routeReply } from "./route-reply.js";
 import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-run-accounting.js";
@@ -135,6 +136,13 @@ export function createFollowupRunner(params: {
  };

  return async (queued: FollowupRun) => {
+    const replySessionKey = queued.run.sessionKey ?? sessionKey;
+    const replyOperation = createReplyOperation({
+      sessionId: queued.run.sessionId,
+      sessionKey: replySessionKey ?? "",
+      resetTriggered: false,
+      upstreamAbortSignal: opts?.abortSignal,
+    });
    try {
      const runId = crypto.randomUUID();
      const shouldSurfaceToControlUi = isInternalMessageChannel(
@@ -167,10 +175,12 @@ export function createFollowupRunner(params: {
        sessionKey,
        storePath,
        isHeartbeat: opts?.isHeartbeat === true,
+        replyOperation,
      });
      let bootstrapPromptWarningSignaturesSeen = resolveBootstrapWarningSignaturesSeen(
        activeSessionEntry?.systemPromptReport,
      );
+      replyOperation.setPhase("running");
      try {
        const fallbackResult = await runWithModelFallback({
          cfg: queued.run.config,
@@ -189,6 +199,7 @@ export function createFollowupRunner(params: {
            try {
              const result = await runEmbeddedPiAgent({
                allowGatewaySubagentBinding: true,
+                replyOperation,
                sessionId: queued.run.sessionId,
                sessionKey: queued.run.sessionKey,
                agentId: queued.run.agentId,
@@ -268,6 +279,7 @@ export function createFollowupRunner(params: {
        fallbackModel = fallbackResult.model;
      } catch (err) {
        const message = err instanceof Error ? err.message : String(err);
+        replyOperation.fail("run_failed", err);
        defaultRuntime.error?.(`Followup agent failed before reply: ${message}`);
        return;
      }
@@ -399,6 +411,7 @@ export function createFollowupRunner(params: {

      await sendFollowupPayloads(finalPayloads, queued);
    } finally {
+      replyOperation.complete();
      // Both signals are required for the typing controller to clean up.
      // The main inbound dispatch path calls markDispatchIdle() from the
      // buffered dispatcher's finally block, but followup turns bypass the
--- a/src/auto-reply/reply/get-reply-run.media-only.test.ts
+++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts
@@ -1,5 +1,6 @@
 import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import { importFreshModule } from "../../../test/helpers/import-fresh.ts";
+import type { SessionEntry } from "../../config/sessions/types.js";

 vi.mock("../../agents/auth-profiles/session-override.js", () => ({
  resolveSessionAuthProfileOverride: vi.fn().mockResolvedValue(undefined),
@@ -9,7 +10,9 @@ vi.mock("../../agents/pi-embedded.runtime.js", () => ({
  abortEmbeddedPiRun: vi.fn().mockReturnValue(false),
  isEmbeddedPiRunActive: vi.fn().mockReturnValue(false),
  isEmbeddedPiRunStreaming: vi.fn().mockReturnValue(false),
+  resolveActiveEmbeddedRunSessionId: vi.fn().mockReturnValue(undefined),
  resolveEmbeddedSessionLane: vi.fn().mockReturnValue("session:session-key"),
+  waitForEmbeddedPiRunEnd: vi.fn().mockResolvedValue(true),
 }));

 vi.mock("../../config/sessions/group.js", () => ({
@@ -100,8 +103,25 @@ let runReplyAgent: typeof import("./agent-runner.runtime.js").runReplyAgent;
 let routeReply: typeof import("./route-reply.runtime.js").routeReply;
 let drainFormattedSystemEvents: typeof import("./session-system-events.js").drainFormattedSystemEvents;
 let resolveTypingMode: typeof import("./typing-mode.js").resolveTypingMode;
+let abortEmbeddedPiRunActual: typeof import("../../agents/pi-embedded-runner/runs.js").abortEmbeddedPiRun;
+let clearActiveEmbeddedRun: typeof import("../../agents/pi-embedded-runner/runs.js").clearActiveEmbeddedRun;
+let createReplyOperation: typeof import("./reply-run-registry.js").createReplyOperation;
+let getActiveReplyRunCount: typeof import("./reply-run-registry.js").getActiveReplyRunCount;
+let isEmbeddedPiRunActiveActual: typeof import("../../agents/pi-embedded-runner/runs.js").isEmbeddedPiRunActive;
+let isEmbeddedPiRunStreamingActual: typeof import("../../agents/pi-embedded-runner/runs.js").isEmbeddedPiRunStreaming;
+let replyRunTesting: typeof import("./reply-run-registry.js").__testing;
+let resolveActiveEmbeddedRunSessionIdActual: typeof import("../../agents/pi-embedded-runner/runs.js").resolveActiveEmbeddedRunSessionId;
+let runsTesting: typeof import("../../agents/pi-embedded-runner/runs.js").__testing;
+let setActiveEmbeddedRun: typeof import("../../agents/pi-embedded-runner/runs.js").setActiveEmbeddedRun;
+let waitForEmbeddedPiRunEndActual: typeof import("../../agents/pi-embedded-runner/runs.js").waitForEmbeddedPiRunEnd;
 let loadScopeCounter = 0;

+function createGatewayDrainingError(): Error {
+  const error = new Error("Gateway is draining for restart; new tasks are not accepted");
+  error.name = "GatewayDrainingError";
+  return error;
+}
+
 async function loadFreshGetReplyRunModuleForTest() {
  ({ runPreparedReply } = await importFreshModule<typeof import("./get-reply-run.js")>(
    import.meta.url,
@@ -191,12 +211,45 @@ describe("runPreparedReply media-only handling", () => {
    ({ routeReply } = await import("./route-reply.runtime.js"));
    ({ drainFormattedSystemEvents } = await import("./session-system-events.js"));
    ({ resolveTypingMode } = await import("./typing-mode.js"));
+    ({
+      __testing: runsTesting,
+      abortEmbeddedPiRun: abortEmbeddedPiRunActual,
+      clearActiveEmbeddedRun,
+      isEmbeddedPiRunActive: isEmbeddedPiRunActiveActual,
+      isEmbeddedPiRunStreaming: isEmbeddedPiRunStreamingActual,
+      resolveActiveEmbeddedRunSessionId: resolveActiveEmbeddedRunSessionIdActual,
+      setActiveEmbeddedRun,
+      waitForEmbeddedPiRunEnd: waitForEmbeddedPiRunEndActual,
+    } = await import("../../agents/pi-embedded-runner/runs.js"));
+    ({
+      __testing: replyRunTesting,
+      createReplyOperation,
+      getActiveReplyRunCount,
+    } = await import("./reply-run-registry.js"));
  });

  beforeEach(async () => {
    storeRuntimeLoads.mockClear();
    updateSessionStore.mockReset();
    vi.clearAllMocks();
+    runsTesting.resetActiveEmbeddedRuns();
+    replyRunTesting.resetReplyRunRegistry();
+    const piRuntime = await import("../../agents/pi-embedded.runtime.js");
+    vi.mocked(piRuntime.abortEmbeddedPiRun).mockImplementation((sessionId, opts) =>
+      abortEmbeddedPiRunActual(sessionId, opts),
+    );
+    vi.mocked(piRuntime.isEmbeddedPiRunActive).mockImplementation((sessionId) =>
+      isEmbeddedPiRunActiveActual(sessionId),
+    );
+    vi.mocked(piRuntime.isEmbeddedPiRunStreaming).mockImplementation((sessionId) =>
+      isEmbeddedPiRunStreamingActual(sessionId),
+    );
+    vi.mocked(piRuntime.resolveActiveEmbeddedRunSessionId).mockImplementation((sessionKey) =>
+      resolveActiveEmbeddedRunSessionIdActual(sessionKey),
+    );
+    vi.mocked(piRuntime.waitForEmbeddedPiRunEnd).mockImplementation((sessionId, timeoutMs) =>
+      waitForEmbeddedPiRunEndActual(sessionId, timeoutMs),
+    );
    await loadFreshGetReplyRunModuleForTest();
  });

@@ -253,87 +306,313 @@ describe("runPreparedReply media-only handling", () => {
    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
  });

-  it("omits auth key labels from /new and /reset confirmation messages", async () => {
+  it("does not send a standalone reset notice for reply-producing /new turns", async () => {
    await runPreparedReply(
      baseParams({
        resetTriggered: true,
      }),
    );

-    const resetNoticeCall = vi.mocked(routeReply).mock.calls[0]?.[0] as
-      | { payload?: { text?: string } }
-      | undefined;
-    expect(resetNoticeCall?.payload?.text).toContain("✅ New session started · model:");
-    expect(resetNoticeCall?.payload?.text).not.toContain("🔑");
-    expect(resetNoticeCall?.payload?.text).not.toContain("api-key");
-    expect(resetNoticeCall?.payload?.text).not.toContain("env:");
+    const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
+    expect(call?.resetTriggered).toBe(true);
+    expect(vi.mocked(routeReply)).not.toHaveBeenCalled();
  });

-  it("routes reset notices through the effective session account when AccountId is omitted", async () => {
-    await runPreparedReply(
-      baseParams({
-        resetTriggered: true,
-        ctx: {
-          Body: "",
-          RawBody: "",
-          CommandBody: "",
-          ThreadHistoryBody: "Earlier message in this thread",
-          OriginatingChannel: "slack",
-          OriginatingTo: "C123",
-          ChatType: "group",
-          AccountId: undefined,
-        },
-        sessionCtx: {
-          Body: "",
-          BodyStripped: "",
-          ThreadHistoryBody: "Earlier message in this thread",
-          MediaPath: "/tmp/input.png",
-          Provider: "slack",
-          ChatType: "group",
-          OriginatingChannel: "slack",
-          OriginatingTo: "C123",
-          AccountId: "work",
-        },
-      }),
-    );
+  it("does not emit a reset notice when /new is attempted during gateway drain", async () => {
+    vi.mocked(runReplyAgent).mockRejectedValueOnce(createGatewayDrainingError());

-    const resetNoticeCall = vi.mocked(routeReply).mock.calls[0]?.[0] as
-      | { accountId?: string }
-      | undefined;
-    expect(resetNoticeCall?.accountId).toBe("work");
-  });
-
-  it("skips reset notice when only webchat fallback routing is available", async () => {
-    await runPreparedReply(
-      baseParams({
-        resetTriggered: true,
-        ctx: {
-          Body: "",
-          RawBody: "",
-          CommandBody: "",
-          ThreadHistoryBody: "Earlier message in this thread",
-          OriginatingChannel: undefined,
-          OriginatingTo: undefined,
-          ChatType: "group",
-        },
-        command: {
-          surface: "webchat",
-          isAuthorizedSender: true,
-          abortKey: "session-key",
-          ownerList: [],
-          senderIsOwner: false,
-          rawBodyNormalized: "",
-          commandBodyNormalized: "",
-          channel: "webchat",
-          from: undefined,
-          to: undefined,
-        } as never,
-      }),
-    );
+    await expect(
+      runPreparedReply(
+        baseParams({
+          resetTriggered: true,
+        }),
+      ),
+    ).rejects.toThrow("Gateway is draining for restart; new tasks are not accepted");

    expect(vi.mocked(routeReply)).not.toHaveBeenCalled();
  });

+  it("does not register a reply operation before auth setup succeeds", async () => {
+    const { resolveSessionAuthProfileOverride } =
+      await import("../../agents/auth-profiles/session-override.js");
+    const sessionId = "reply-operation-auth-failure";
+    const activeBefore = getActiveReplyRunCount();
+    vi.mocked(resolveSessionAuthProfileOverride).mockRejectedValueOnce(new Error("auth failed"));
+
+    await expect(
+      runPreparedReply(
+        baseParams({
+          sessionId,
+        }),
+      ),
+    ).rejects.toThrow("auth failed");
+
+    expect(getActiveReplyRunCount()).toBe(activeBefore);
+  });
+  it("waits for the previous active run to clear before registering a new reply operation", async () => {
+    const queueSettings = await import("./queue/settings.js");
+    vi.mocked(queueSettings.resolveQueueSettings).mockReturnValueOnce({ mode: "interrupt" });
+    const previousRun = createReplyOperation({
+      sessionId: "session-overlap",
+      sessionKey: "session-key",
+      resetTriggered: false,
+    });
+    previousRun.setPhase("running");
+
+    const runPromise = runPreparedReply(
+      baseParams({
+        isNewSession: false,
+        sessionId: "session-overlap",
+      }),
+    );
+
+    await Promise.resolve();
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+
+    previousRun.complete();
+
+    await expect(runPromise).resolves.toEqual({ text: "ok" });
+    expect(vi.mocked(runReplyAgent)).toHaveBeenCalledOnce();
+  });
+  it("interrupts embedded-only active runs even without a reply operation", async () => {
+    const queueSettings = await import("./queue/settings.js");
+    vi.mocked(queueSettings.resolveQueueSettings).mockReturnValueOnce({ mode: "interrupt" });
+    const embeddedAbort = vi.fn();
+    const embeddedHandle = {
+      queueMessage: vi.fn(async () => {}),
+      isStreaming: () => true,
+      isCompacting: () => false,
+      abort: embeddedAbort,
+    };
+    setActiveEmbeddedRun("session-embedded-only", embeddedHandle, "session-key");
+
+    const runPromise = runPreparedReply(
+      baseParams({
+        isNewSession: false,
+        sessionId: "session-embedded-only",
+      }),
+    );
+
+    await Promise.resolve();
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+    expect(embeddedAbort).toHaveBeenCalledOnce();
+
+    clearActiveEmbeddedRun("session-embedded-only", embeddedHandle, "session-key");
+
+    await expect(runPromise).resolves.toEqual({ text: "ok" });
+    expect(vi.mocked(runReplyAgent)).toHaveBeenCalledOnce();
+  });
+  it("rechecks same-session ownership after async prep before registering a new reply operation", async () => {
+    const { resolveSessionAuthProfileOverride } =
+      await import("../../agents/auth-profiles/session-override.js");
+    const queueSettings = await import("./queue/settings.js");
+
+    let resolveAuth!: () => void;
+    const authPromise = new Promise<void>((resolve) => {
+      resolveAuth = resolve;
+    });
+
+    vi.mocked(resolveSessionAuthProfileOverride).mockImplementationOnce(
+      async () => await authPromise.then(() => undefined),
+    );
+    vi.mocked(queueSettings.resolveQueueSettings).mockReturnValueOnce({ mode: "interrupt" });
+
+    const runPromise = runPreparedReply(
+      baseParams({
+        isNewSession: false,
+        sessionId: "session-auth-race",
+      }),
+    );
+
+    await Promise.resolve();
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+
+    const intruderRun = createReplyOperation({
+      sessionId: "session-auth-race",
+      sessionKey: "session-key",
+      resetTriggered: false,
+    });
+    intruderRun.setPhase("running");
+    resolveAuth();
+
+    await Promise.resolve();
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+
+    intruderRun.complete();
+
+    await expect(runPromise).resolves.toEqual({ text: "ok" });
+    expect(vi.mocked(runReplyAgent)).toHaveBeenCalledOnce();
+  });
+  it("re-resolves auth profile after waiting for a prior run", async () => {
+    const { resolveSessionAuthProfileOverride } =
+      await import("../../agents/auth-profiles/session-override.js");
+    const queueSettings = await import("./queue/settings.js");
+    const sessionStore: Record<string, SessionEntry> = {
+      "session-key": {
+        sessionId: "session-auth-profile",
+        sessionFile: "/tmp/session-auth-profile.jsonl",
+        authProfileOverride: "profile-before-wait",
+        authProfileOverrideSource: "auto",
+        updatedAt: 1,
+      },
+    };
+    vi.mocked(resolveSessionAuthProfileOverride).mockImplementation(async ({ sessionEntry }) => {
+      return sessionEntry?.authProfileOverride;
+    });
+    vi.mocked(queueSettings.resolveQueueSettings).mockReturnValueOnce({ mode: "interrupt" });
+    const previousRun = createReplyOperation({
+      sessionId: "session-auth-profile",
+      sessionKey: "session-key",
+      resetTriggered: false,
+    });
+    previousRun.setPhase("running");
+
+    const runPromise = runPreparedReply(
+      baseParams({
+        isNewSession: false,
+        sessionId: "session-auth-profile",
+        sessionEntry: sessionStore["session-key"],
+        sessionStore,
+      }),
+    );
+
+    await Promise.resolve();
+    sessionStore["session-key"] = {
+      ...sessionStore["session-key"],
+      authProfileOverride: "profile-after-wait",
+      authProfileOverrideSource: "auto",
+      updatedAt: 2,
+    };
+    previousRun.complete();
+
+    await expect(runPromise).resolves.toEqual({ text: "ok" });
+    const call = vi.mocked(runReplyAgent).mock.calls.at(-1)?.[0];
+    expect(call?.followupRun.run.authProfileId).toBe("profile-after-wait");
+    expect(vi.mocked(resolveSessionAuthProfileOverride)).toHaveBeenCalledTimes(2);
+  });
+  it("re-resolves same-session ownership after session-id rotation during async prep", async () => {
+    const { resolveSessionAuthProfileOverride } =
+      await import("../../agents/auth-profiles/session-override.js");
+    const queueSettings = await import("./queue/settings.js");
+
+    let resolveAuth!: () => void;
+    const authPromise = new Promise<void>((resolve) => {
+      resolveAuth = resolve;
+    });
+    const sessionStore: Record<string, SessionEntry> = {
+      "session-key": {
+        sessionId: "session-before-rotation",
+        sessionFile: "/tmp/session-before-rotation.jsonl",
+        updatedAt: 1,
+      },
+    };
+
+    vi.mocked(resolveSessionAuthProfileOverride).mockImplementationOnce(
+      async () => await authPromise.then(() => undefined),
+    );
+    vi.mocked(queueSettings.resolveQueueSettings).mockReturnValueOnce({ mode: "interrupt" });
+
+    const runPromise = runPreparedReply(
+      baseParams({
+        isNewSession: false,
+        sessionId: "session-before-rotation",
+        sessionEntry: sessionStore["session-key"],
+        sessionStore,
+      }),
+    );
+
+    await Promise.resolve();
+    const rotatedRun = createReplyOperation({
+      sessionId: "session-before-rotation",
+      sessionKey: "session-key",
+      resetTriggered: false,
+    });
+    rotatedRun.setPhase("running");
+    sessionStore["session-key"] = {
+      ...sessionStore["session-key"],
+      sessionId: "session-after-rotation",
+      sessionFile: "/tmp/session-after-rotation.jsonl",
+      updatedAt: 2,
+    };
+    rotatedRun.updateSessionId("session-after-rotation");
+
+    resolveAuth();
+
+    await Promise.resolve();
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+
+    rotatedRun.complete();
+
+    await expect(runPromise).resolves.toEqual({ text: "ok" });
+    const call = vi.mocked(runReplyAgent).mock.calls.at(-1)?.[0];
+    expect(call?.followupRun.run.sessionId).toBe("session-after-rotation");
+  });
+  it("rechecks same-session ownership after wait resolves before calling the runner", async () => {
+    const queueSettings = await import("./queue/settings.js");
+    vi.mocked(queueSettings.resolveQueueSettings).mockReturnValueOnce({ mode: "interrupt" });
+    const previousRun = createReplyOperation({
+      sessionId: "session-before-wait",
+      sessionKey: "session-key",
+      resetTriggered: false,
+    });
+    previousRun.setPhase("running");
+
+    const runPromise = runPreparedReply(
+      baseParams({
+        isNewSession: false,
+        sessionId: "session-before-wait",
+      }),
+    );
+
+    await Promise.resolve();
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+
+    previousRun.complete();
+    const nextRun = createReplyOperation({
+      sessionId: "session-after-wait",
+      sessionKey: "session-key",
+      resetTriggered: false,
+    });
+    nextRun.setPhase("running");
+
+    await expect(runPromise).resolves.toEqual({
+      text: "⚠️ Previous run is still shutting down. Please try again in a moment.",
+    });
+    expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled();
+
+    nextRun.complete();
+  });
+  it("re-drains system events after waiting behind an active run", async () => {
+    const queueSettings = await import("./queue/settings.js");
+    vi.mocked(queueSettings.resolveQueueSettings).mockReturnValueOnce({ mode: "interrupt" });
+    vi.mocked(drainFormattedSystemEvents)
+      .mockResolvedValueOnce("System: [t] Initial event.")
+      .mockResolvedValueOnce("System: [t] Post-compaction context.");
+
+    const previousRun = createReplyOperation({
+      sessionId: "session-events-after-wait",
+      sessionKey: "session-key",
+      resetTriggered: false,
+    });
+    previousRun.setPhase("running");
+
+    const runPromise = runPreparedReply(
+      baseParams({
+        isNewSession: false,
+        sessionId: "session-events-after-wait",
+      }),
+    );
+
+    await Promise.resolve();
+    previousRun.complete();
+
+    await expect(runPromise).resolves.toEqual({ text: "ok" });
+    const call = vi.mocked(runReplyAgent).mock.calls.at(-1)?.[0];
+    expect(call?.commandBody).toContain("System: [t] Initial event.");
+    expect(call?.commandBody).toContain("System: [t] Post-compaction context.");
+    expect(call?.followupRun.prompt).toContain("System: [t] Initial event.");
+    expect(call?.followupRun.prompt).toContain("System: [t] Post-compaction context.");
+  });
  it("uses inbound origin channel for run messageProvider", async () => {
    await runPreparedReply(
      baseParams({
--- a/src/auto-reply/reply/get-reply-run.ts
+++ b/src/auto-reply/reply/get-reply-run.ts
@@ -8,6 +8,7 @@ import {
  resolveSessionFilePath,
  resolveSessionFilePathOptions,
 } from "../../config/sessions/paths.js";
+import { resolveSessionStoreEntry } from "../../config/sessions/store.js";
 import type { SessionEntry } from "../../config/sessions/types.js";
 import { logVerbose } from "../../globals.js";
 import { clearCommandLane, getQueueSize } from "../../process/command-queue.js";
@@ -35,8 +36,8 @@ import { buildGroupChatContext, buildGroupIntro } from "./groups.js";
 import { buildInboundMetaSystemPrompt, buildInboundUserContextPrefix } from "./inbound-meta.js";
 import type { createModelSelectionState } from "./model-selection.js";
 import { resolveOriginMessageProvider } from "./origin-routing.js";
+import { resolveActiveRunQueueAction } from "./queue-policy.js";
 import { resolveQueueSettings } from "./queue/settings.js";
-import type { RouteReplyParams } from "./route-reply.js";
 import { buildBareSessionResetPrompt } from "./session-reset-prompt.js";
 import { drainFormattedSystemEvents } from "./session-system-events.js";
 import { resolveTypingMode } from "./typing-mode.js";
@@ -50,7 +51,6 @@ type ExecOverrides = Pick<ExecToolDefaults, "host" | "security" | "ask" | "node"
 let piEmbeddedRuntimePromise: Promise<typeof import("../../agents/pi-embedded.runtime.js")> | null =
  null;
 let agentRunnerRuntimePromise: Promise<typeof import("./agent-runner.runtime.js")> | null = null;
-let routeReplyRuntimePromise: Promise<typeof import("./route-reply.runtime.js")> | null = null;
 let sessionUpdatesRuntimePromise: Promise<typeof import("./session-updates.runtime.js")> | null =
  null;
 let sessionStoreRuntimePromise: Promise<
@@ -67,11 +67,6 @@ function loadAgentRunnerRuntime() {
  return agentRunnerRuntimePromise;
 }

-function loadRouteReplyRuntime() {
-  routeReplyRuntimePromise ??= import("./route-reply.runtime.js");
-  return routeReplyRuntimePromise;
-}
-
 function loadSessionUpdatesRuntime() {
  sessionUpdatesRuntimePromise ??= import("./session-updates.runtime.js");
  return sessionUpdatesRuntimePromise;
@@ -82,77 +77,6 @@ function loadSessionStoreRuntime() {
  return sessionStoreRuntimePromise;
 }

-function buildResetSessionNoticeText(params: {
-  provider: string;
-  model: string;
-  defaultProvider: string;
-  defaultModel: string;
-}): string {
-  const modelLabel = `${params.provider}/${params.model}`;
-  const defaultLabel = `${params.defaultProvider}/${params.defaultModel}`;
-  return modelLabel === defaultLabel
-    ? `✅ New session started · model: ${modelLabel}`
-    : `✅ New session started · model: ${modelLabel} (default: ${defaultLabel})`;
-}
-
-function resolveResetSessionNoticeRoute(params: {
-  ctx: MsgContext;
-  command: ReturnType<typeof buildCommandContext>;
-}): {
-  channel: RouteReplyParams["channel"];
-  to: string;
-} | null {
-  const commandChannel = params.command.channel?.trim().toLowerCase();
-  const fallbackChannel =
-    commandChannel && commandChannel !== "webchat"
-      ? (commandChannel as RouteReplyParams["channel"])
-      : undefined;
-  const channel = params.ctx.OriginatingChannel ?? fallbackChannel;
-  const to = params.ctx.OriginatingTo ?? params.command.from ?? params.command.to;
-  if (!channel || channel === "webchat" || !to) {
-    return null;
-  }
-  return { channel, to };
-}
-
-async function sendResetSessionNotice(params: {
-  ctx: MsgContext;
-  command: ReturnType<typeof buildCommandContext>;
-  sessionKey: string;
-  cfg: OpenClawConfig;
-  accountId: string | undefined;
-  threadId: string | number | undefined;
-  provider: string;
-  model: string;
-  defaultProvider: string;
-  defaultModel: string;
-}): Promise<void> {
-  const route = resolveResetSessionNoticeRoute({
-    ctx: params.ctx,
-    command: params.command,
-  });
-  if (!route) {
-    return;
-  }
-  const { routeReply } = await loadRouteReplyRuntime();
-  await routeReply({
-    payload: {
-      text: buildResetSessionNoticeText({
-        provider: params.provider,
-        model: params.model,
-        defaultProvider: params.defaultProvider,
-        defaultModel: params.defaultModel,
-      }),
-    },
-    channel: route.channel,
-    to: route.to,
-    sessionKey: params.sessionKey,
-    accountId: params.accountId,
-    threadId: params.threadId,
-    cfg: params.cfg,
-  });
-}
-
 type RunPreparedReplyParams = {
  ctx: MsgContext;
  sessionCtx: TemplateContext;
@@ -236,7 +160,6 @@ export async function runPreparedReply(
    perMessageQueueOptions,
    typing,
    opts,
-    defaultProvider,
    defaultModel,
    timeoutMs,
    isNewSession,
@@ -376,20 +299,7 @@ export async function runPreparedReply(
      prefixedBodyBase = parts.slice(1).join(" ").trim();
    }
  }
-  // Drain system events once, then prepend to each path's body independently.
-  // The queue/steer path uses effectiveBaseBody (unstripped, no session hints) to match
-  // main's pre-PR behavior; the immediate-run path uses prefixedBodyBase (post-hints,
-  // post-think-hint-strip) so the run sees the cleaned-up body.
-  const eventsBlock = await drainFormattedSystemEvents({
-    cfg,
-    sessionKey,
-    isMainSession,
-    isNewSession,
-  });
-  const prependEvents = (body: string) => (eventsBlock ? `${eventsBlock}\n\n${body}` : body);
-  const bodyWithEvents = prependEvents(effectiveBaseBody);
-  prefixedBodyBase = prependEvents(prefixedBodyBase);
-  prefixedBodyBase = appendUntrustedContext(prefixedBodyBase, sessionCtx.UntrustedContext);
+  const prefixedBodyCore = prefixedBodyBase;
  const threadStarterBody = ctx.ThreadStarterBody?.trim();
  const threadHistoryBody = ctx.ThreadHistoryBody?.trim();
  const threadContextNote = threadHistoryBody
@@ -397,6 +307,38 @@ export async function runPreparedReply(
    : threadStarterBody
      ? `[Thread starter - for context]\n${threadStarterBody}`
      : undefined;
+  const drainedSystemEventBlocks: string[] = [];
+  const rebuildPromptBodies = async (): Promise<{
+    prefixedCommandBody: string;
+    queuedBody: string;
+  }> => {
+    const eventsBlock = await drainFormattedSystemEvents({
+      cfg,
+      sessionKey,
+      isMainSession,
+      isNewSession,
+    });
+    if (eventsBlock) {
+      drainedSystemEventBlocks.push(eventsBlock);
+    }
+    const combinedEventsBlock = drainedSystemEventBlocks.join("\n");
+    const prependEvents = (body: string) =>
+      combinedEventsBlock ? `${combinedEventsBlock}\n\n${body}` : body;
+    const bodyWithEvents = prependEvents(effectiveBaseBody);
+    const prefixedBodyWithEvents = appendUntrustedContext(
+      prependEvents(prefixedBodyCore),
+      sessionCtx.UntrustedContext,
+    );
+    const prefixedBody = [threadContextNote, prefixedBodyWithEvents].filter(Boolean).join("\n\n");
+    const queueBodyBase = [threadContextNote, bodyWithEvents].filter(Boolean).join("\n\n");
+    const queuedBody = mediaNote
+      ? [mediaNote, mediaReplyHint, queueBodyBase].filter(Boolean).join("\n").trim()
+      : queueBodyBase;
+    const prefixedCommandBody = mediaNote
+      ? [mediaNote, mediaReplyHint, prefixedBody || ""].filter(Boolean).join("\n").trim()
+      : prefixedBody;
+    return { prefixedCommandBody, queuedBody };
+  };
  const skillResult =
    process.env.OPENCLAW_TEST_FAST === "1"
      ? {
@@ -421,14 +363,11 @@ export async function runPreparedReply(
  sessionEntry = skillResult.sessionEntry ?? sessionEntry;
  currentSystemSent = skillResult.systemSent;
  const skillsSnapshot = skillResult.skillsSnapshot;
-  const prefixedBody = [threadContextNote, prefixedBodyBase].filter(Boolean).join("\n\n");
  const mediaNote = buildInboundMediaNote(ctx);
  const mediaReplyHint = mediaNote
    ? "To send an image back, prefer the message tool (media/path/filePath). If you must inline, use MEDIA:https://example.com/image.jpg (spaces ok, quote if needed) or a safe relative path like MEDIA:./image.jpg. Avoid absolute paths (MEDIA:/...) and ~ paths — they are blocked for security. Keep caption in the text body."
    : undefined;
-  let prefixedCommandBody = mediaNote
-    ? [mediaNote, mediaReplyHint, prefixedBody ?? ""].filter(Boolean).join("\n").trim()
-    : prefixedBody;
+  let { prefixedCommandBody, queuedBody } = await rebuildPromptBodies();
  if (!resolvedThinkLevel) {
    resolvedThinkLevel = await modelState.resolveDefaultThinkingLevel();
  }
@@ -453,32 +392,32 @@ export async function runPreparedReply(
      }
    }
  }
-  if (resetTriggered && command.isAuthorizedSender) {
-    await sendResetSessionNotice({
-      ctx,
-      command,
-      sessionKey,
-      cfg,
-      accountId: sessionCtx.AccountId,
-      threadId: ctx.MessageThreadId,
-      provider,
-      model,
-      defaultProvider,
-      defaultModel,
-    });
-  }
  const sessionIdFinal = sessionId ?? crypto.randomUUID();
-  const sessionFile = resolveSessionFilePath(
-    sessionIdFinal,
-    sessionEntry,
-    resolveSessionFilePathOptions({ agentId, storePath }),
-  );
-  // Use bodyWithEvents (events prepended, but no session hints / untrusted context) so
-  // deferred turns receive system events while keeping the same scope as effectiveBaseBody did.
-  const queueBodyBase = [threadContextNote, bodyWithEvents].filter(Boolean).join("\n\n");
-  const queuedBody = mediaNote
-    ? [mediaNote, mediaReplyHint, queueBodyBase].filter(Boolean).join("\n").trim()
-    : queueBodyBase;
+  const sessionFilePathOptions = resolveSessionFilePathOptions({ agentId, storePath });
+  const resolvePreparedSessionState = (): {
+    sessionEntry: SessionEntry | undefined;
+    sessionId: string;
+    sessionFile: string;
+  } => {
+    const latestSessionEntry =
+      sessionStore && sessionKey
+        ? (resolveSessionStoreEntry({
+            store: sessionStore,
+            sessionKey,
+          }).existing ?? sessionEntry)
+        : sessionEntry;
+    const latestSessionId = latestSessionEntry?.sessionId ?? sessionIdFinal;
+    return {
+      sessionEntry: latestSessionEntry,
+      sessionId: latestSessionId,
+      sessionFile: resolveSessionFilePath(
+        latestSessionId,
+        latestSessionEntry,
+        sessionFilePathOptions,
+      ),
+    };
+  };
+  let preparedSessionState = resolvePreparedSessionState();
  const resolvedQueue = resolveQueueSettings({
    cfg,
    channel: sessionCtx.Provider,
@@ -490,34 +429,89 @@ export async function runPreparedReply(
    abortEmbeddedPiRun,
    isEmbeddedPiRunActive,
    isEmbeddedPiRunStreaming,
+    resolveActiveEmbeddedRunSessionId,
    resolveEmbeddedSessionLane,
+    waitForEmbeddedPiRunEnd,
  } = await loadPiEmbeddedRuntime();
  const sessionLaneKey = resolveEmbeddedSessionLane(sessionKey ?? sessionIdFinal);
  const laneSize = getQueueSize(sessionLaneKey);
  if (resolvedQueue.mode === "interrupt" && laneSize > 0) {
    const cleared = clearCommandLane(sessionLaneKey);
-    const aborted = abortEmbeddedPiRun(sessionIdFinal);
+    const activeSessionId = resolveActiveEmbeddedRunSessionId(sessionKey);
+    const aborted = abortEmbeddedPiRun(activeSessionId ?? preparedSessionState.sessionId);
    logVerbose(`Interrupting ${sessionLaneKey} (cleared ${cleared}, aborted=${aborted})`);
  }
-  const queueKey = sessionKey ?? sessionIdFinal;
-  const isActive = isEmbeddedPiRunActive(sessionIdFinal);
-  const isStreaming = isEmbeddedPiRunStreaming(sessionIdFinal);
-  const shouldSteer = resolvedQueue.mode === "steer" || resolvedQueue.mode === "steer-backlog";
-  const shouldFollowup =
-    resolvedQueue.mode === "followup" ||
-    resolvedQueue.mode === "collect" ||
-    resolvedQueue.mode === "steer-backlog";
-  const authProfileId = await resolveSessionAuthProfileOverride({
+  let authProfileId = await resolveSessionAuthProfileOverride({
    cfg,
    provider,
    agentDir,
-    sessionEntry,
+    sessionEntry: preparedSessionState.sessionEntry,
    sessionStore,
    sessionKey,
    storePath,
    isNewSession,
  });
-  const authProfileIdSource = sessionEntry?.authProfileOverrideSource;
+  const { runReplyAgent } = await loadAgentRunnerRuntime();
+  const queueKey = sessionKey ?? sessionIdFinal;
+  preparedSessionState = resolvePreparedSessionState();
+  const resolveActiveQueueSessionId = () =>
+    resolveActiveEmbeddedRunSessionId(sessionKey) ?? preparedSessionState.sessionId;
+  const resolveQueueBusyState = () => {
+    const activeSessionId = resolveActiveQueueSessionId();
+    if (!activeSessionId) {
+      return { activeSessionId: undefined, isActive: false, isStreaming: false };
+    }
+    return {
+      activeSessionId,
+      isActive: isEmbeddedPiRunActive(activeSessionId),
+      isStreaming: isEmbeddedPiRunStreaming(activeSessionId),
+    };
+  };
+  let { activeSessionId, isActive, isStreaming } = resolveQueueBusyState();
+  const shouldSteer = resolvedQueue.mode === "steer" || resolvedQueue.mode === "steer-backlog";
+  const shouldFollowup =
+    resolvedQueue.mode === "followup" ||
+    resolvedQueue.mode === "collect" ||
+    resolvedQueue.mode === "steer-backlog";
+  const activeRunQueueAction = resolveActiveRunQueueAction({
+    isActive,
+    isHeartbeat: opts?.isHeartbeat === true,
+    shouldFollowup,
+    queueMode: resolvedQueue.mode,
+  });
+  if (isActive && activeRunQueueAction === "run-now") {
+    const activeSessionIdBeforeWait = activeSessionId ?? resolveActiveQueueSessionId();
+    if (resolvedQueue.mode === "interrupt" && activeSessionIdBeforeWait) {
+      const aborted = abortEmbeddedPiRun(activeSessionIdBeforeWait);
+      logVerbose(
+        `Interrupting active run for ${sessionKey ?? sessionIdFinal} (aborted=${aborted})`,
+      );
+    }
+    if (activeSessionIdBeforeWait) {
+      await waitForEmbeddedPiRunEnd(activeSessionIdBeforeWait);
+    }
+    preparedSessionState = resolvePreparedSessionState();
+    authProfileId = await resolveSessionAuthProfileOverride({
+      cfg,
+      provider,
+      agentDir,
+      sessionEntry: preparedSessionState.sessionEntry,
+      sessionStore,
+      sessionKey,
+      storePath,
+      isNewSession,
+    });
+    preparedSessionState = resolvePreparedSessionState();
+    ({ prefixedCommandBody, queuedBody } = await rebuildPromptBodies());
+    ({ activeSessionId, isActive, isStreaming } = resolveQueueBusyState());
+    if (isActive) {
+      typing.cleanup();
+      return {
+        text: "⚠️ Previous run is still shutting down. Please try again in a moment.",
+      };
+    }
+  }
+  const authProfileIdSource = preparedSessionState.sessionEntry?.authProfileOverrideSource;
  const followupRun = {
    prompt: queuedBody,
    messageId: sessionCtx.MessageSidFull ?? sessionCtx.MessageSid,
@@ -532,7 +526,7 @@ export async function runPreparedReply(
    run: {
      agentId,
      agentDir,
-      sessionId: sessionIdFinal,
+      sessionId: preparedSessionState.sessionId,
      sessionKey,
      messageProvider: resolveOriginMessageProvider({
        originatingChannel: ctx.OriginatingChannel ?? sessionCtx.OriginatingChannel,
@@ -550,7 +544,7 @@ export async function runPreparedReply(
      senderUsername: sessionCtx.SenderUsername?.trim() || undefined,
      senderE164: sessionCtx.SenderE164?.trim() || undefined,
      senderIsOwner: command.senderIsOwner,
-      sessionFile,
+      sessionFile: preparedSessionState.sessionFile,
      workspaceDir,
      config: cfg,
      skillsSnapshot,
@@ -564,7 +558,7 @@ export async function runPreparedReply(
        provider,
        model,
        agentId,
-        sessionEntry,
+        sessionEntry: preparedSessionState.sessionEntry,
      }).enabled,
      verboseLevel: resolvedVerboseLevel,
      reasoningLevel: resolvedReasoningLevel,
@@ -590,7 +584,6 @@ export async function runPreparedReply(
    },
  };

-  const { runReplyAgent } = await loadAgentRunnerRuntime();
  return runReplyAgent({
    commandBody: prefixedCommandBody,
    followupRun,
@@ -599,11 +592,16 @@ export async function runPreparedReply(
    shouldSteer,
    shouldFollowup,
    isActive,
-    isRunActive: () => isEmbeddedPiRunActive(sessionIdFinal),
+    isRunActive: () => {
+      const latestSessionState = resolvePreparedSessionState();
+      const latestActiveSessionId =
+        resolveActiveEmbeddedRunSessionId(sessionKey) ?? latestSessionState.sessionId;
+      return isEmbeddedPiRunActive(latestActiveSessionId);
+    },
    isStreaming,
    opts,
    typing,
-    sessionEntry,
+    sessionEntry: preparedSessionState.sessionEntry,
    sessionStore,
    sessionKey,
    storePath,
@@ -617,5 +615,6 @@ export async function runPreparedReply(
    sessionCtx,
    shouldInjectGroupIntro,
    typingMode,
+    resetTriggered,
  });
 }
--- a/src/auto-reply/reply/reply-run-registry.test.ts
+++ b/src/auto-reply/reply/reply-run-registry.test.ts
@@ -0,0 +1,110 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import {
+  __testing,
+  abortActiveReplyRuns,
+  createReplyOperation,
+  isReplyRunActiveForSessionId,
+  queueReplyRunMessage,
+  replyRunRegistry,
+  resolveActiveReplyRunSessionId,
+  waitForReplyRunEndBySessionId,
+} from "./reply-run-registry.js";
+
+describe("reply run registry", () => {
+  afterEach(() => {
+    __testing.resetReplyRunRegistry();
+    vi.restoreAllMocks();
+  });
+
+  it("keeps ownership stable by sessionKey while sessionId rotates", async () => {
+    vi.useFakeTimers();
+    try {
+      const operation = createReplyOperation({
+        sessionKey: "agent:main:main",
+        sessionId: "session-old",
+        resetTriggered: false,
+      });
+
+      const oldWaitPromise = waitForReplyRunEndBySessionId("session-old", 1_000);
+
+      operation.updateSessionId("session-new");
+
+      expect(replyRunRegistry.isActive("agent:main:main")).toBe(true);
+      expect(resolveActiveReplyRunSessionId("agent:main:main")).toBe("session-new");
+      expect(isReplyRunActiveForSessionId("session-old")).toBe(false);
+      expect(isReplyRunActiveForSessionId("session-new")).toBe(true);
+
+      let settled = false;
+      void oldWaitPromise.then(() => {
+        settled = true;
+      });
+      await vi.advanceTimersByTimeAsync(100);
+      expect(settled).toBe(false);
+
+      operation.complete();
+
+      await expect(oldWaitPromise).resolves.toBe(true);
+    } finally {
+      await vi.runOnlyPendingTimersAsync();
+      vi.useRealTimers();
+    }
+  });
+
+  it("clears queued operations immediately on user abort", () => {
+    const operation = createReplyOperation({
+      sessionKey: "agent:main:main",
+      sessionId: "session-queued",
+      resetTriggered: false,
+    });
+
+    expect(replyRunRegistry.isActive("agent:main:main")).toBe(true);
+
+    operation.abortByUser();
+
+    expect(operation.result).toEqual({ kind: "aborted", code: "aborted_by_user" });
+    expect(replyRunRegistry.isActive("agent:main:main")).toBe(false);
+  });
+
+  it("queues messages only through the active running backend", async () => {
+    const queueMessage = vi.fn(async () => {});
+    const operation = createReplyOperation({
+      sessionKey: "agent:main:main",
+      sessionId: "session-running",
+      resetTriggered: false,
+    });
+
+    operation.attachBackend({
+      kind: "embedded",
+      cancel: vi.fn(),
+      isStreaming: () => true,
+      queueMessage,
+    });
+
+    expect(queueReplyRunMessage("session-running", "before running")).toBe(false);
+
+    operation.setPhase("running");
+
+    expect(queueReplyRunMessage("session-running", "hello")).toBe(true);
+    expect(queueMessage).toHaveBeenCalledWith("hello");
+  });
+
+  it("aborts compacting runs through the registry compatibility helper", () => {
+    const compactingOperation = createReplyOperation({
+      sessionKey: "agent:main:main",
+      sessionId: "session-compacting",
+      resetTriggered: false,
+    });
+    compactingOperation.setPhase("preflight_compacting");
+
+    const runningOperation = createReplyOperation({
+      sessionKey: "agent:main:other",
+      sessionId: "session-running",
+      resetTriggered: false,
+    });
+    runningOperation.setPhase("running");
+
+    expect(abortActiveReplyRuns({ mode: "compacting" })).toBe(true);
+    expect(compactingOperation.result).toEqual({ kind: "aborted", code: "aborted_for_restart" });
+    expect(runningOperation.result).toBeNull();
+  });
+});
--- a/src/auto-reply/reply/reply-run-registry.ts
+++ b/src/auto-reply/reply/reply-run-registry.ts
@@ -0,0 +1,536 @@
+import { resolveGlobalSingleton } from "../../shared/global-singleton.js";
+
+export type ReplyRunKey = string;
+
+export type ReplyBackendKind = "embedded" | "cli";
+
+export type ReplyBackendCancelReason = "user_abort" | "restart" | "superseded";
+
+export type ReplyBackendHandle = {
+  readonly kind: ReplyBackendKind;
+  cancel(reason?: ReplyBackendCancelReason): void;
+  isStreaming(): boolean;
+  queueMessage?: (text: string) => Promise<void>;
+  /**
+   * Compatibility-only hook so legacy "abort compacting runs" paths can still
+   * find embedded runs that are compacting during the main run phase.
+   */
+  isCompacting?: () => boolean;
+};
+
+export type ReplyOperationPhase =
+  | "queued"
+  | "preflight_compacting"
+  | "memory_flushing"
+  | "running"
+  | "completed"
+  | "failed"
+  | "aborted";
+
+export type ReplyOperationFailureCode =
+  | "gateway_draining"
+  | "command_lane_cleared"
+  | "aborted_by_user"
+  | "session_corruption_reset"
+  | "run_failed";
+
+export type ReplyOperationAbortCode = "aborted_by_user" | "aborted_for_restart";
+
+export type ReplyOperationResult =
+  | { kind: "completed" }
+  | { kind: "failed"; code: ReplyOperationFailureCode; cause?: unknown }
+  | { kind: "aborted"; code: ReplyOperationAbortCode };
+
+export type ReplyOperation = {
+  readonly key: ReplyRunKey;
+  readonly sessionId: string;
+  readonly abortSignal: AbortSignal;
+  readonly resetTriggered: boolean;
+  readonly phase: ReplyOperationPhase;
+  readonly result: ReplyOperationResult | null;
+  setPhase(next: "queued" | "preflight_compacting" | "memory_flushing" | "running"): void;
+  updateSessionId(nextSessionId: string): void;
+  attachBackend(handle: ReplyBackendHandle): void;
+  detachBackend(handle: ReplyBackendHandle): void;
+  complete(): void;
+  fail(code: Exclude<ReplyOperationFailureCode, "aborted_by_user">, cause?: unknown): void;
+  abortByUser(): void;
+  abortForRestart(): void;
+};
+
+export type ReplyRunRegistry = {
+  begin(params: {
+    sessionKey: string;
+    sessionId: string;
+    resetTriggered: boolean;
+    upstreamAbortSignal?: AbortSignal;
+  }): ReplyOperation;
+  get(sessionKey: string): ReplyOperation | undefined;
+  isActive(sessionKey: string): boolean;
+  isStreaming(sessionKey: string): boolean;
+  abort(sessionKey: string): boolean;
+  waitForIdle(sessionKey: string, timeoutMs?: number): Promise<boolean>;
+  resolveSessionId(sessionKey: string): string | undefined;
+};
+
+type ReplyRunWaiter = {
+  resolve: (ended: boolean) => void;
+  timer: NodeJS.Timeout;
+};
+
+type ReplyRunState = {
+  activeRunsByKey: Map<string, ReplyOperation>;
+  activeSessionIdsByKey: Map<string, string>;
+  activeKeysBySessionId: Map<string, string>;
+  waitKeysBySessionId: Map<string, string>;
+  waitersByKey: Map<string, Set<ReplyRunWaiter>>;
+};
+
+const REPLY_RUN_STATE_KEY = Symbol.for("openclaw.replyRunRegistry");
+
+const replyRunState = resolveGlobalSingleton<ReplyRunState>(REPLY_RUN_STATE_KEY, () => ({
+  activeRunsByKey: new Map<string, ReplyOperation>(),
+  activeSessionIdsByKey: new Map<string, string>(),
+  activeKeysBySessionId: new Map<string, string>(),
+  waitKeysBySessionId: new Map<string, string>(),
+  waitersByKey: new Map<string, Set<ReplyRunWaiter>>(),
+}));
+
+export class ReplyRunAlreadyActiveError extends Error {
+  constructor(sessionKey: string) {
+    super(`Reply run already active for ${sessionKey}`);
+    this.name = "ReplyRunAlreadyActiveError";
+  }
+}
+
+function normalizeSessionKey(sessionKey: string | undefined): string | undefined {
+  const normalized = sessionKey?.trim();
+  return normalized || undefined;
+}
+
+function normalizeSessionId(sessionId: string | undefined): string | undefined {
+  const normalized = sessionId?.trim();
+  return normalized || undefined;
+}
+
+function createUserAbortError(): Error {
+  const err = new Error("Reply operation aborted by user");
+  err.name = "AbortError";
+  return err;
+}
+
+function registerWaitSessionId(sessionKey: string, sessionId: string): void {
+  replyRunState.waitKeysBySessionId.set(sessionId, sessionKey);
+}
+
+function clearWaitSessionIds(sessionKey: string): void {
+  for (const [sessionId, mappedKey] of replyRunState.waitKeysBySessionId) {
+    if (mappedKey === sessionKey) {
+      replyRunState.waitKeysBySessionId.delete(sessionId);
+    }
+  }
+}
+
+function notifyReplyRunEnded(sessionKey: string): void {
+  const waiters = replyRunState.waitersByKey.get(sessionKey);
+  if (!waiters || waiters.size === 0) {
+    return;
+  }
+  replyRunState.waitersByKey.delete(sessionKey);
+  for (const waiter of waiters) {
+    clearTimeout(waiter.timer);
+    waiter.resolve(true);
+  }
+}
+
+function resolveReplyRunForCurrentSessionId(sessionId: string): ReplyOperation | undefined {
+  const normalizedSessionId = normalizeSessionId(sessionId);
+  if (!normalizedSessionId) {
+    return undefined;
+  }
+  const sessionKey = replyRunState.activeKeysBySessionId.get(normalizedSessionId);
+  if (!sessionKey) {
+    return undefined;
+  }
+  return replyRunState.activeRunsByKey.get(sessionKey);
+}
+
+function resolveReplyRunWaitKey(sessionId: string): string | undefined {
+  const normalizedSessionId = normalizeSessionId(sessionId);
+  if (!normalizedSessionId) {
+    return undefined;
+  }
+  return (
+    replyRunState.activeKeysBySessionId.get(normalizedSessionId) ??
+    replyRunState.waitKeysBySessionId.get(normalizedSessionId)
+  );
+}
+
+function isReplyRunCompacting(operation: ReplyOperation): boolean {
+  if (operation.phase === "preflight_compacting" || operation.phase === "memory_flushing") {
+    return true;
+  }
+  if (operation.phase !== "running") {
+    return false;
+  }
+  const backend = getAttachedBackend(operation);
+  return backend?.isCompacting?.() ?? false;
+}
+
+const attachedBackendByOperation = new WeakMap<ReplyOperation, ReplyBackendHandle>();
+
+function getAttachedBackend(operation: ReplyOperation): ReplyBackendHandle | undefined {
+  return attachedBackendByOperation.get(operation);
+}
+
+function clearReplyRunState(params: { sessionKey: string; sessionId: string }): void {
+  replyRunState.activeRunsByKey.delete(params.sessionKey);
+  if (replyRunState.activeSessionIdsByKey.get(params.sessionKey) === params.sessionId) {
+    replyRunState.activeSessionIdsByKey.delete(params.sessionKey);
+  } else {
+    replyRunState.activeSessionIdsByKey.delete(params.sessionKey);
+  }
+  if (replyRunState.activeKeysBySessionId.get(params.sessionId) === params.sessionKey) {
+    replyRunState.activeKeysBySessionId.delete(params.sessionId);
+  }
+  clearWaitSessionIds(params.sessionKey);
+  notifyReplyRunEnded(params.sessionKey);
+}
+
+export function createReplyOperation(params: {
+  sessionKey: string;
+  sessionId: string;
+  resetTriggered: boolean;
+  upstreamAbortSignal?: AbortSignal;
+}): ReplyOperation {
+  const sessionKey = normalizeSessionKey(params.sessionKey);
+  const sessionId = normalizeSessionId(params.sessionId);
+  if (!sessionKey) {
+    throw new Error("Reply operations require a canonical sessionKey");
+  }
+  if (!sessionId) {
+    throw new Error("Reply operations require a sessionId");
+  }
+  if (replyRunState.activeRunsByKey.has(sessionKey)) {
+    throw new ReplyRunAlreadyActiveError(sessionKey);
+  }
+
+  const controller = new AbortController();
+  let currentSessionId = sessionId;
+  let phase: ReplyOperationPhase = "queued";
+  let result: ReplyOperationResult | null = null;
+  let stateCleared = false;
+
+  const clearState = () => {
+    if (stateCleared) {
+      return;
+    }
+    stateCleared = true;
+    clearReplyRunState({
+      sessionKey,
+      sessionId: currentSessionId,
+    });
+  };
+
+  const abortInternally = (reason?: unknown) => {
+    if (!controller.signal.aborted) {
+      controller.abort(reason);
+    }
+  };
+
+  const abortWithReason = (
+    reason: ReplyBackendCancelReason,
+    abortReason: unknown,
+    opts?: { abortedCode?: ReplyOperationAbortCode },
+  ) => {
+    if (opts?.abortedCode && !result) {
+      result = { kind: "aborted", code: opts.abortedCode };
+    }
+    phase = "aborted";
+    abortInternally(abortReason);
+    getAttachedBackend(operation)?.cancel(reason);
+  };
+
+  if (params.upstreamAbortSignal) {
+    if (params.upstreamAbortSignal.aborted) {
+      abortInternally(params.upstreamAbortSignal.reason);
+    } else {
+      params.upstreamAbortSignal.addEventListener(
+        "abort",
+        () => {
+          abortInternally(params.upstreamAbortSignal?.reason);
+        },
+        { once: true },
+      );
+    }
+  }
+
+  const operation: ReplyOperation = {
+    get key() {
+      return sessionKey;
+    },
+    get sessionId() {
+      return currentSessionId;
+    },
+    get abortSignal() {
+      return controller.signal;
+    },
+    get resetTriggered() {
+      return params.resetTriggered;
+    },
+    get phase() {
+      return phase;
+    },
+    get result() {
+      return result;
+    },
+    setPhase(next) {
+      if (result) {
+        return;
+      }
+      phase = next;
+    },
+    updateSessionId(nextSessionId) {
+      if (result) {
+        return;
+      }
+      const normalizedNextSessionId = normalizeSessionId(nextSessionId);
+      if (!normalizedNextSessionId || normalizedNextSessionId === currentSessionId) {
+        return;
+      }
+      if (
+        replyRunState.activeKeysBySessionId.has(normalizedNextSessionId) &&
+        replyRunState.activeKeysBySessionId.get(normalizedNextSessionId) !== sessionKey
+      ) {
+        throw new Error(
+          `Cannot rebind reply operation ${sessionKey} to active session ${normalizedNextSessionId}`,
+        );
+      }
+      replyRunState.activeKeysBySessionId.delete(currentSessionId);
+      registerWaitSessionId(sessionKey, currentSessionId);
+      currentSessionId = normalizedNextSessionId;
+      replyRunState.activeSessionIdsByKey.set(sessionKey, currentSessionId);
+      replyRunState.activeKeysBySessionId.set(currentSessionId, sessionKey);
+      registerWaitSessionId(sessionKey, currentSessionId);
+    },
+    attachBackend(handle) {
+      if (result) {
+        handle.cancel(
+          result.kind === "aborted"
+            ? result.code === "aborted_for_restart"
+              ? "restart"
+              : "user_abort"
+            : "superseded",
+        );
+        return;
+      }
+      attachedBackendByOperation.set(operation, handle);
+      if (controller.signal.aborted) {
+        handle.cancel("superseded");
+      }
+    },
+    detachBackend(handle) {
+      if (getAttachedBackend(operation) === handle) {
+        attachedBackendByOperation.delete(operation);
+      }
+    },
+    complete() {
+      if (!result) {
+        result = { kind: "completed" };
+        phase = "completed";
+      }
+      clearState();
+    },
+    fail(code, cause) {
+      if (!result) {
+        result = { kind: "failed", code, cause };
+        phase = "failed";
+      }
+      clearState();
+    },
+    abortByUser() {
+      const phaseBeforeAbort = phase;
+      abortWithReason("user_abort", createUserAbortError(), {
+        abortedCode: "aborted_by_user",
+      });
+      if (phaseBeforeAbort === "queued") {
+        clearState();
+      }
+    },
+    abortForRestart() {
+      const phaseBeforeAbort = phase;
+      abortWithReason("restart", new Error("Reply operation aborted for restart"), {
+        abortedCode: "aborted_for_restart",
+      });
+      if (phaseBeforeAbort === "queued") {
+        clearState();
+      }
+    },
+  };
+
+  replyRunState.activeRunsByKey.set(sessionKey, operation);
+  replyRunState.activeSessionIdsByKey.set(sessionKey, currentSessionId);
+  replyRunState.activeKeysBySessionId.set(currentSessionId, sessionKey);
+  registerWaitSessionId(sessionKey, currentSessionId);
+
+  return operation;
+}
+
+export const replyRunRegistry: ReplyRunRegistry = {
+  begin(params) {
+    return createReplyOperation(params);
+  },
+  get(sessionKey) {
+    const normalizedSessionKey = normalizeSessionKey(sessionKey);
+    if (!normalizedSessionKey) {
+      return undefined;
+    }
+    return replyRunState.activeRunsByKey.get(normalizedSessionKey);
+  },
+  isActive(sessionKey) {
+    const normalizedSessionKey = normalizeSessionKey(sessionKey);
+    if (!normalizedSessionKey) {
+      return false;
+    }
+    return replyRunState.activeRunsByKey.has(normalizedSessionKey);
+  },
+  isStreaming(sessionKey) {
+    const operation = this.get(sessionKey);
+    if (!operation || operation.phase !== "running") {
+      return false;
+    }
+    return getAttachedBackend(operation)?.isStreaming() ?? false;
+  },
+  abort(sessionKey) {
+    const operation = this.get(sessionKey);
+    if (!operation) {
+      return false;
+    }
+    operation.abortByUser();
+    return true;
+  },
+  waitForIdle(sessionKey, timeoutMs = 15_000) {
+    const normalizedSessionKey = normalizeSessionKey(sessionKey);
+    if (!normalizedSessionKey || !replyRunState.activeRunsByKey.has(normalizedSessionKey)) {
+      return Promise.resolve(true);
+    }
+    return new Promise((resolve) => {
+      const waiters = replyRunState.waitersByKey.get(normalizedSessionKey) ?? new Set();
+      const waiter: ReplyRunWaiter = {
+        resolve,
+        timer: setTimeout(
+          () => {
+            waiters.delete(waiter);
+            if (waiters.size === 0) {
+              replyRunState.waitersByKey.delete(normalizedSessionKey);
+            }
+            resolve(false);
+          },
+          Math.max(100, timeoutMs),
+        ),
+      };
+      waiters.add(waiter);
+      replyRunState.waitersByKey.set(normalizedSessionKey, waiters);
+      if (!replyRunState.activeRunsByKey.has(normalizedSessionKey)) {
+        waiters.delete(waiter);
+        if (waiters.size === 0) {
+          replyRunState.waitersByKey.delete(normalizedSessionKey);
+        }
+        clearTimeout(waiter.timer);
+        resolve(true);
+      }
+    });
+  },
+  resolveSessionId(sessionKey) {
+    const normalizedSessionKey = normalizeSessionKey(sessionKey);
+    if (!normalizedSessionKey) {
+      return undefined;
+    }
+    return replyRunState.activeSessionIdsByKey.get(normalizedSessionKey);
+  },
+};
+
+export function resolveActiveReplyRunSessionId(sessionKey: string): string | undefined {
+  return replyRunRegistry.resolveSessionId(sessionKey);
+}
+
+export function isReplyRunActiveForSessionId(sessionId: string): boolean {
+  return resolveReplyRunForCurrentSessionId(sessionId) !== undefined;
+}
+
+export function isReplyRunStreamingForSessionId(sessionId: string): boolean {
+  const operation = resolveReplyRunForCurrentSessionId(sessionId);
+  if (!operation || operation.phase !== "running") {
+    return false;
+  }
+  return getAttachedBackend(operation)?.isStreaming() ?? false;
+}
+
+export function queueReplyRunMessage(sessionId: string, text: string): boolean {
+  const operation = resolveReplyRunForCurrentSessionId(sessionId);
+  const backend = operation ? getAttachedBackend(operation) : undefined;
+  if (!operation || operation.phase !== "running" || !backend?.queueMessage) {
+    return false;
+  }
+  if (!backend.isStreaming()) {
+    return false;
+  }
+  void backend.queueMessage(text);
+  return true;
+}
+
+export function abortReplyRunBySessionId(sessionId: string): boolean {
+  const operation = resolveReplyRunForCurrentSessionId(sessionId);
+  if (!operation) {
+    return false;
+  }
+  operation.abortByUser();
+  return true;
+}
+
+export function waitForReplyRunEndBySessionId(
+  sessionId: string,
+  timeoutMs = 15_000,
+): Promise<boolean> {
+  const waitKey = resolveReplyRunWaitKey(sessionId);
+  if (!waitKey) {
+    return Promise.resolve(true);
+  }
+  return replyRunRegistry.waitForIdle(waitKey, timeoutMs);
+}
+
+export function abortActiveReplyRuns(opts: { mode: "all" | "compacting" }): boolean {
+  let aborted = false;
+  for (const operation of replyRunState.activeRunsByKey.values()) {
+    if (opts.mode === "compacting" && !isReplyRunCompacting(operation)) {
+      continue;
+    }
+    operation.abortForRestart();
+    aborted = true;
+  }
+  return aborted;
+}
+
+export function getActiveReplyRunCount(): number {
+  return replyRunState.activeRunsByKey.size;
+}
+
+export function listActiveReplyRunSessionIds(): string[] {
+  return [...replyRunState.activeSessionIdsByKey.values()];
+}
+
+export const __testing = {
+  resetReplyRunRegistry(): void {
+    replyRunState.activeRunsByKey.clear();
+    replyRunState.activeSessionIdsByKey.clear();
+    replyRunState.activeKeysBySessionId.clear();
+    replyRunState.waitKeysBySessionId.clear();
+    for (const waiters of replyRunState.waitersByKey.values()) {
+      for (const waiter of waiters) {
+        clearTimeout(waiter.timer);
+        waiter.resolve(false);
+      }
+    }
+    replyRunState.waitersByKey.clear();
+  },
+};