diff --git a/CHANGELOG.md b/CHANGELOG.md index 421874b4177..402aff74222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,7 @@ Docs: https://docs.openclaw.ai - Gateway/install: add a validated `--wrapper`/`OPENCLAW_WRAPPER` service install path that persists executable LaunchAgent/systemd wrappers across forced reinstalls, updates, and doctor repairs instead of falling back to raw node/bun `ProgramArguments`. Fixes #69400. (#72445) Thanks @willtmc. - Plugins: fail plugin registration when loader-owned acceptance gates reject missing hook names or memory-only capability registration from non-memory plugins, surfacing the issue through plugin status and doctor instead of silently dropping the registration. Fixes #72459. Thanks @1fanwang and @amknight. - macOS Gateway: write launchd services with a state-dir `WorkingDirectory`, use a durable state-dir temp path instead of freezing macOS session `TMPDIR`, create that temp directory before bootstrap, and label abort-shaped launchd exits as `SIGABRT/abort` in status output. Fixes #53679 and #70223; refs #71848. Thanks @dlturock, @stammi922, and @palladius. +- Control UI/update: make `Update now` require a real gateway process replacement, report skipped/error update outcomes with stable reasons, and verify the running gateway version after restart so global installs cannot silently keep old code in memory. Fixes #62492; addresses #64892 and #63562. Thanks @IAMSamuelRodda. - Exec approvals: accept runtime-owned `source: "allow-always"` and `commandText` allowlist metadata in gateway and node approval-set payloads so Control UI round-trips no longer fail with `unexpected property 'source'`. Fixes #60000; carries forward #60064. Thanks @sd1471123, @sharkqwy, and @luoyanglang. - Exec/node: skip approval-plan preparation for full-trust `host=node` runs so interpreter and script commands no longer fail with `SYSTEM_RUN_DENIED: approval cannot safely bind` when effective policy is `security=full` and `ask=off`. Fixes #48457 and duplicate #69251. Thanks @ajtran303, @jaserNo1, @Blakeshannon, @lesliefag, and @AvIsBeastMC. - Exec/node: synthesize a local approval plan when a paired node advertises `system.run` without `system.run.prepare`, unblocking approval-required `host=node` exec on current macOS companion nodes while preserving remote prepare for node hosts that support it. Fixes #37591 and duplicate #66839; carries forward #69725. Thanks @soloclz. diff --git a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift index cf770029afb..f8224dbb257 100644 --- a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift @@ -4487,6 +4487,8 @@ public struct ChatEvent: Codable, Sendable { } } +public struct UpdateStatusParams: Codable, Sendable {} + public struct UpdateRunParams: Codable, Sendable { public let sessionkey: String? public let deliverycontext: [String: AnyCodable]? diff --git a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift index cf770029afb..f8224dbb257 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift @@ -4487,6 +4487,8 @@ public struct ChatEvent: Codable, Sendable { } } +public struct UpdateStatusParams: Codable, Sendable {} + public struct UpdateRunParams: Codable, Sendable { public let sessionkey: String? public let deliverycontext: [String: AnyCodable]? diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 89c15a41233..f967031a6ed 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -579,6 +579,7 @@ For tooling that writes config over the gateway API, prefer this flow: deletes, arrays replace) - `config.apply` only when you intend to replace the entire config - `update.run` for explicit self-update plus restart +- `update.status` to inspect the latest update restart sentinel and verify the running version after a restart Agents should treat `config.schema.lookup` as the first stop for exact field-level docs and constraints. Use [Configuration reference](/gateway/configuration-reference) @@ -589,6 +590,8 @@ subsystem references. Control-plane writes (`config.apply`, `config.patch`, `update.run`) are rate-limited to 3 requests per 60 seconds per `deviceId+clientIp`. Restart requests coalesce and then enforce a 30-second cooldown between restart cycles. +`update.status` is read-only but admin-scoped because the restart sentinel can +include update step summaries and command output tails. Example partial patch: diff --git a/docs/gateway/protocol.md b/docs/gateway/protocol.md index 12276812ed2..677c626ac58 100644 --- a/docs/gateway/protocol.md +++ b/docs/gateway/protocol.md @@ -330,6 +330,7 @@ enumeration of `src/gateway/server-methods/*.ts`. - `config.schema` returns the live config schema payload used by Control UI and CLI tooling: schema, `uiHints`, version, and generation metadata, including plugin + channel schema metadata when the runtime can load it. The schema includes field `title` / `description` metadata derived from the same labels and help text used by the UI, including nested object, wildcard, array-item, and `anyOf` / `oneOf` / `allOf` composition branches when matching field documentation exists. - `config.schema.lookup` returns a path-scoped lookup payload for one config path: normalized path, a shallow schema node, matched hint + `hintPath`, and immediate child summaries for UI/CLI drill-down. Lookup schema nodes keep the user-facing docs and common validation fields (`title`, `description`, `type`, `enum`, `const`, `format`, `pattern`, numeric/string/array/object bounds, and flags like `additionalProperties`, `deprecated`, `readOnly`, `writeOnly`). Child summaries expose `key`, normalized `path`, `type`, `required`, `hasChildren`, plus the matched `hint` / `hintPath`. - `update.run` runs the gateway update flow and schedules a restart only when the update itself succeeded. + - `update.status` returns the latest cached update restart sentinel, including the post-restart running version when available. - `wizard.start`, `wizard.next`, `wizard.status`, and `wizard.cancel` expose the onboarding wizard over WS RPC. diff --git a/docs/web/control-ui.md b/docs/web/control-ui.md index 5032f8d763d..d8f9d5a2e9d 100644 --- a/docs/web/control-ui.md +++ b/docs/web/control-ui.md @@ -115,7 +115,7 @@ The Control UI can localize itself on first load based on your browser locale. T - Debug: status/health/models snapshots + event log + manual RPC calls (`status`, `health`, `models.list`). - Logs: live tail of gateway file logs with filter/export (`logs.tail`). - - Update: run a package/git update + restart (`update.run`) with a restart report. + - Update: run a package/git update + restart (`update.run`) with a restart report, then poll `update.status` after reconnect to verify the running gateway version. - For isolated jobs, delivery defaults to announce summary. You can switch to none if you want internal-only runs. diff --git a/src/cli/gateway-cli/run-loop.test.ts b/src/cli/gateway-cli/run-loop.test.ts index 7771bbfdc71..fff72a91fd4 100644 --- a/src/cli/gateway-cli/run-loop.test.ts +++ b/src/cli/gateway-cli/run-loop.test.ts @@ -9,6 +9,7 @@ const consumeGatewaySigusr1RestartAuthorization = vi.fn(() => true); const consumeGatewayRestartIntentSync = vi.fn(() => false); const isGatewaySigusr1RestartExternallyAllowed = vi.fn(() => false); const markGatewaySigusr1RestartHandled = vi.fn(); +const peekGatewaySigusr1RestartReason = vi.fn<() => string | undefined>(() => undefined); const scheduleGatewaySigusr1Restart = vi.fn((_opts?: { delayMs?: number; reason?: string }) => ({ ok: true, pid: process.pid, @@ -30,6 +31,17 @@ const waitForBundledRuntimeDepsInstallIdle = vi.fn(async (_timeoutMs?: number) = const restartGatewayProcessWithFreshPid = vi.fn< () => { mode: "spawned" | "supervised" | "disabled" | "failed"; pid?: number; detail?: string } >(() => ({ mode: "disabled" })); +const respawnGatewayProcessForUpdate = vi.fn< + () => { + mode: "spawned" | "supervised" | "disabled" | "failed"; + pid?: number; + detail?: string; + child?: { kill: () => void }; + } +>(() => ({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN" })); +const markUpdateRestartSentinelFailure = vi.fn<(reason: string) => Promise>( + async (_reason: string) => null, +); const abortEmbeddedPiRun = vi.fn( (_sessionId?: string, _opts?: { mode?: "all" | "compacting" }) => false, ); @@ -58,14 +70,20 @@ vi.mock("../../infra/restart.js", () => ({ consumeGatewayRestartIntentSync: () => consumeGatewayRestartIntentSync(), isGatewaySigusr1RestartExternallyAllowed: () => isGatewaySigusr1RestartExternallyAllowed(), markGatewaySigusr1RestartHandled: () => markGatewaySigusr1RestartHandled(), + peekGatewaySigusr1RestartReason: () => peekGatewaySigusr1RestartReason(), scheduleGatewaySigusr1Restart: (opts?: { delayMs?: number; reason?: string }) => scheduleGatewaySigusr1Restart(opts), })); vi.mock("../../infra/process-respawn.js", () => ({ + respawnGatewayProcessForUpdate: () => respawnGatewayProcessForUpdate(), restartGatewayProcessWithFreshPid: () => restartGatewayProcessWithFreshPid(), })); +vi.mock("../../infra/restart-sentinel.js", () => ({ + markUpdateRestartSentinelFailure: (reason: string) => markUpdateRestartSentinelFailure(reason), +})); + vi.mock("../../process/command-queue.js", () => ({ getActiveTaskCount: () => getActiveTaskCount(), markGatewayDraining: () => markGatewayDraining(), @@ -195,6 +213,8 @@ async function runLoopWithStart(params: { start: ReturnType; runtime: LoopRuntime; lockPort?: number; + healthHost?: string; + waitForHealthyChild?: (port: number, pid?: number, host?: string) => Promise; }) { vi.resetModules(); const { runGatewayLoop } = await import("./run-loop.js"); @@ -202,6 +222,8 @@ async function runLoopWithStart(params: { start: params.start as unknown as Parameters[0]["start"], runtime: params.runtime, lockPort: params.lockPort, + healthHost: params.healthHost, + waitForHealthyChild: params.waitForHealthyChild, }); return { loopPromise }; } @@ -292,6 +314,12 @@ describe("runGatewayLoop", () => { }, }, }); + peekGatewaySigusr1RestartReason.mockReturnValue(undefined); + respawnGatewayProcessForUpdate.mockReturnValue({ + mode: "disabled", + detail: "OPENCLAW_NO_RESPAWN", + }); + markUpdateRestartSentinelFailure.mockClear(); await withIsolatedSignals(async ({ captureSignal }) => { getActiveTaskCount.mockReturnValueOnce(2).mockReturnValueOnce(0); @@ -453,6 +481,7 @@ describe("runGatewayLoop", () => { it("releases the lock before exiting on spawned restart", async () => { vi.clearAllMocks(); + peekGatewaySigusr1RestartReason.mockReturnValue(undefined); await withIsolatedSignals(async ({ captureSignal }) => { const lockRelease = vi.fn(async () => {}); @@ -484,6 +513,7 @@ describe("runGatewayLoop", () => { it("waits briefly before exiting on launchd supervised restart", async () => { vi.clearAllMocks(); + peekGatewaySigusr1RestartReason.mockReturnValue(undefined); try { setPlatform("darwin"); process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway"; @@ -511,6 +541,7 @@ describe("runGatewayLoop", () => { it("forwards lockPort to initial and restart lock acquisitions", async () => { vi.clearAllMocks(); + peekGatewaySigusr1RestartReason.mockReturnValue(undefined); await withIsolatedSignals(async ({ captureSignal }) => { const closeFirst = vi.fn(async () => {}); @@ -549,6 +580,7 @@ describe("runGatewayLoop", () => { it("exits when lock reacquire fails during in-process restart fallback", async () => { vi.clearAllMocks(); + peekGatewaySigusr1RestartReason.mockReturnValue(undefined); await withIsolatedSignals(async ({ captureSignal }) => { const lockRelease = vi.fn(async () => {}); @@ -574,6 +606,103 @@ describe("runGatewayLoop", () => { ); }); }); + + it("hard-respawns update restarts and exits only after the replacement becomes healthy", async () => { + vi.clearAllMocks(); + peekGatewaySigusr1RestartReason.mockReturnValue("update.run"); + respawnGatewayProcessForUpdate.mockReturnValueOnce({ + mode: "spawned", + pid: 7777, + child: { kill: vi.fn() }, + }); + + await withIsolatedSignals(async ({ captureSignal }) => { + const waitForHealthyChild = vi.fn(async () => true); + const close = vi.fn(async () => {}); + const { start, started } = createSignaledStart(close); + const { runtime, exited } = createRuntimeWithExitSignal(); + await runLoopWithStart({ start, runtime, lockPort: 18789, waitForHealthyChild }); + await waitForStart(started); + const sigusr1 = captureSignal("SIGUSR1"); + + sigusr1(); + + await expect(exited).resolves.toBe(0); + expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 7777, "127.0.0.1"); + expect(respawnGatewayProcessForUpdate).toHaveBeenCalledTimes(1); + expect(start).toHaveBeenCalledTimes(1); + expect(markUpdateRestartSentinelFailure).not.toHaveBeenCalled(); + }); + }); + + it("probes the configured gateway host for update respawn health", async () => { + vi.clearAllMocks(); + peekGatewaySigusr1RestartReason.mockReturnValue("update.run"); + respawnGatewayProcessForUpdate.mockReturnValueOnce({ + mode: "spawned", + pid: 7778, + child: { kill: vi.fn() }, + }); + + await withIsolatedSignals(async ({ captureSignal }) => { + const waitForHealthyChild = vi.fn(async () => true); + const close = vi.fn(async () => {}); + const { start, started } = createSignaledStart(close); + const { runtime, exited } = createRuntimeWithExitSignal(); + await runLoopWithStart({ + start, + runtime, + lockPort: 18789, + healthHost: "10.0.0.25", + waitForHealthyChild, + }); + await waitForStart(started); + const sigusr1 = captureSignal("SIGUSR1"); + + sigusr1(); + + await expect(exited).resolves.toBe(0); + expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 7778, "10.0.0.25"); + }); + }); + + it("marks update respawn failures and falls back to in-process restart", async () => { + vi.clearAllMocks(); + peekGatewaySigusr1RestartReason.mockReturnValue("update.run"); + const kill = vi.fn(); + respawnGatewayProcessForUpdate.mockReturnValueOnce({ + mode: "spawned", + pid: 8888, + child: { kill }, + }); + + await withIsolatedSignals(async ({ captureSignal }) => { + const waitForHealthyChild = vi.fn(async () => false); + const closeFirst = vi.fn(async () => {}); + const closeSecond = vi.fn(async () => {}); + const { runtime, exited } = createRuntimeWithExitSignal(); + const start = vi + .fn() + .mockResolvedValueOnce({ close: closeFirst }) + .mockResolvedValueOnce({ close: closeSecond }); + + await runLoopWithStart({ start, runtime, lockPort: 18789, waitForHealthyChild }); + await new Promise((resolve) => setImmediate(resolve)); + const sigusr1 = captureSignal("SIGUSR1"); + const sigterm = captureSignal("SIGTERM"); + + sigusr1(); + await new Promise((resolve) => setImmediate(resolve)); + + expect(waitForHealthyChild).toHaveBeenCalledWith(18789, 8888, "127.0.0.1"); + expect(kill).toHaveBeenCalledTimes(1); + expect(markUpdateRestartSentinelFailure).toHaveBeenCalledWith("restart-unhealthy"); + expect(start).toHaveBeenCalledTimes(2); + + sigterm(); + await expect(exited).resolves.toBe(0); + }); + }); }); describe("gateway discover routing helpers", () => { diff --git a/src/cli/gateway-cli/run-loop.ts b/src/cli/gateway-cli/run-loop.ts index a00cc9efab7..f42e3199043 100644 --- a/src/cli/gateway-cli/run-loop.ts +++ b/src/cli/gateway-cli/run-loop.ts @@ -1,3 +1,4 @@ +import net from "node:net"; import { abortEmbeddedPiRun, getActiveEmbeddedRunCount, @@ -7,12 +8,17 @@ import { loadConfig } from "../../config/config.js"; import type { startGatewayServer } from "../../gateway/server.js"; import { formatErrorMessage } from "../../infra/errors.js"; import { acquireGatewayLock } from "../../infra/gateway-lock.js"; -import { restartGatewayProcessWithFreshPid } from "../../infra/process-respawn.js"; +import { + respawnGatewayProcessForUpdate, + restartGatewayProcessWithFreshPid, +} from "../../infra/process-respawn.js"; +import { markUpdateRestartSentinelFailure } from "../../infra/restart-sentinel.js"; import { consumeGatewaySigusr1RestartAuthorization, consumeGatewayRestartIntentSync, isGatewaySigusr1RestartExternallyAllowed, markGatewaySigusr1RestartHandled, + peekGatewaySigusr1RestartReason, scheduleGatewaySigusr1Restart, } from "../../infra/restart.js"; import { detectRespawnSupervisor } from "../../infra/supervisor-markers.js"; @@ -35,22 +41,67 @@ const gatewayLog = createSubsystemLogger("gateway"); const LAUNCHD_SUPERVISED_RESTART_EXIT_DELAY_MS = 1500; const DEFAULT_RESTART_DRAIN_TIMEOUT_MS = 300_000; const RESTART_DRAIN_STILL_PENDING_WARN_MS = 30_000; +const UPDATE_RESPAWN_HEALTH_TIMEOUT_MS = 10_000; +const UPDATE_RESPAWN_HEALTH_POLL_MS = 200; type GatewayRunSignalAction = "stop" | "restart"; type RestartDrainTimeoutMs = number | undefined; +async function waitForGatewayPortReady(host: string, port: number): Promise { + return await new Promise((resolve) => { + const socket = net.createConnection({ host, port }); + let settled = false; + const finish = (value: boolean) => { + if (settled) { + return; + } + settled = true; + clearTimeout(timer); + socket.removeAllListeners(); + socket.destroy(); + resolve(value); + }; + const timer = setTimeout(() => { + finish(false); + }, UPDATE_RESPAWN_HEALTH_POLL_MS); + socket.once("connect", () => finish(true)); + socket.once("error", () => finish(false)); + }); +} + +async function waitForHealthyGatewayChild( + port: number, + _pid?: number, + host = "127.0.0.1", + timeoutMs = UPDATE_RESPAWN_HEALTH_TIMEOUT_MS, +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (await waitForGatewayPortReady(host, port)) { + return true; + } + await new Promise((resolve) => { + setTimeout(resolve, UPDATE_RESPAWN_HEALTH_POLL_MS); + }); + } + return false; +} + export async function runGatewayLoop(params: { start: (params?: { startupStartedAt?: number; }) => Promise>>; runtime: RuntimeEnv; lockPort?: number; + healthHost?: string; + waitForHealthyChild?: (port: number, pid?: number, host?: string) => Promise; }) { let startupStartedAt = Date.now(); let lock = await acquireGatewayLock({ port: params.lockPort }); let server: Awaited> | null = null; let shuttingDown = false; let restartResolver: (() => void) | null = null; + const waitForHealthyChild = params.waitForHealthyChild ?? waitForHealthyGatewayChild; const cleanupSignals = () => { process.removeListener("SIGTERM", onSigterm); @@ -86,8 +137,73 @@ export async function runGatewayLoop(params: { return false; } }; - const handleRestartAfterServerClose = async () => { + const handleRestartAfterServerClose = async (restartReason?: string) => { const hadLock = await releaseLockIfHeld(); + const isUpdateRestart = restartReason === "update.run"; + + if (isUpdateRestart) { + const respawn = respawnGatewayProcessForUpdate(); + if (respawn.mode === "spawned") { + const port = params.lockPort; + const healthy = + typeof port === "number" + ? await waitForHealthyChild(port, respawn.pid, params.healthHost ?? "127.0.0.1") + : false; + if (healthy) { + gatewayLog.info( + `restart mode: update process respawn (spawned pid ${respawn.pid ?? "unknown"})`, + ); + exitProcess(0); + return; + } + gatewayLog.warn( + `update respawn child did not become healthy (${respawn.pid ?? "unknown"}); falling back to in-process restart`, + ); + try { + respawn.child?.kill(); + } catch { + // Best-effort; parent fallback keeps the gateway reachable for recovery. + } + await markUpdateRestartSentinelFailure("restart-unhealthy").catch((err) => { + gatewayLog.warn(`failed to mark update restart sentinel unhealthy: ${String(err)}`); + }); + if (hadLock && !(await reacquireLockForInProcessRestart())) { + return; + } + shuttingDown = false; + restartResolver?.(); + return; + } + if (respawn.mode === "supervised") { + gatewayLog.info("restart mode: update process respawn (supervisor restart)"); + if (detectRespawnSupervisor(process.env, process.platform) === "launchd") { + await new Promise((resolve) => { + setTimeout(resolve, LAUNCHD_SUPERVISED_RESTART_EXIT_DELAY_MS); + }); + } + exitProcess(0); + return; + } + if (respawn.mode === "failed") { + gatewayLog.warn( + `update respawn failed (${respawn.detail ?? "unknown error"}); falling back to in-process restart`, + ); + await markUpdateRestartSentinelFailure("restart-unhealthy").catch((err) => { + gatewayLog.warn(`failed to mark update restart sentinel unhealthy: ${String(err)}`); + }); + } else { + gatewayLog.info( + `restart mode: in-process restart (${respawn.detail ?? "OPENCLAW_NO_RESPAWN"})`, + ); + } + if (hadLock && !(await reacquireLockForInProcessRestart())) { + return; + } + shuttingDown = false; + restartResolver?.(); + return; + } + // Release the lock BEFORE spawning so the child can acquire it immediately. const respawn = restartGatewayProcessWithFreshPid(); if (respawn.mode === "spawned" || respawn.mode === "supervised") { @@ -143,7 +259,7 @@ export async function runGatewayLoop(params: { } }; - const request = (action: GatewayRunSignalAction, signal: string) => { + const request = (action: GatewayRunSignalAction, signal: string, restartReason?: string) => { if (shuttingDown) { gatewayLog.info(`received ${signal} during shutdown; ignoring`); return; @@ -257,7 +373,7 @@ export async function runGatewayLoop(params: { clearForceExitTimer(); server = null; if (isRestart) { - await handleRestartAfterServerClose(); + await handleRestartAfterServerClose(restartReason); } else { await handleStopAfterServerClose(); } @@ -292,8 +408,9 @@ export async function runGatewayLoop(params: { scheduleGatewaySigusr1Restart({ delayMs: 0, reason: "SIGUSR1" }); return; } + const restartReason = peekGatewaySigusr1RestartReason(); markGatewaySigusr1RestartHandled(); - request("restart", "SIGUSR1"); + request("restart", "SIGUSR1", restartReason); }; process.on("SIGTERM", onSigterm); diff --git a/src/cli/gateway-cli/run.ts b/src/cli/gateway-cli/run.ts index 877b7ba06c4..82c63e47c19 100644 --- a/src/cli/gateway-cli/run.ts +++ b/src/cli/gateway-cli/run.ts @@ -24,7 +24,11 @@ import { import type { OpenClawConfig } from "../../config/types.openclaw.js"; import { hasConfiguredSecretInput } from "../../config/types.secrets.js"; import { resolveGatewayAuth } from "../../gateway/auth.js"; -import { defaultGatewayBindMode, isContainerEnvironment } from "../../gateway/net.js"; +import { + defaultGatewayBindMode, + isContainerEnvironment, + resolveGatewayBindHost, +} from "../../gateway/net.js"; import type { GatewayWsLogStyle } from "../../gateway/ws-logging.js"; import { setGatewayWsLogStyle } from "../../gateway/ws-logging.js"; import { setVerbose } from "../../globals.js"; @@ -680,6 +684,7 @@ async function runGatewayCommand(opts: GatewayRunOpts) { await runGatewayLoop({ runtime: defaultRuntime, lockPort: port, + healthHost: await resolveGatewayBindHost(bind, cfg.gateway?.customBindHost), start: async ({ startupStartedAt } = {}) => await startGatewayServer(port, { bind, diff --git a/src/cli/update-cli.test.ts b/src/cli/update-cli.test.ts index 45657e90470..3945b0a911d 100644 --- a/src/cli/update-cli.test.ts +++ b/src/cli/update-cli.test.ts @@ -1078,6 +1078,30 @@ describe("update-cli", () => { ).toContain("Low disk space near"); }); + it("refuses package updates from inside the gateway service process", async () => { + mockPackageInstallStatus(createCaseDir("openclaw-update")); + + await withEnvAsync( + { + OPENCLAW_SERVICE_MARKER: "openclaw", + OPENCLAW_SERVICE_KIND: "gateway", + }, + async () => { + await updateCommand({ yes: true }); + }, + ); + + expect(defaultRuntime.error).toHaveBeenCalledWith( + expect.stringContaining("Package updates cannot run from inside the gateway service process."), + ); + expect(defaultRuntime.exit).toHaveBeenCalledWith(1); + expect(runGatewayUpdate).not.toHaveBeenCalled(); + expect(runCommandWithTimeout).not.toHaveBeenCalledWith( + ["npm", "i", "-g", "openclaw@latest", "--no-fund", "--no-audit", "--loglevel=error"], + expect.any(Object), + ); + }); + it("blocks package updates when the target requires a newer Node runtime", async () => { mockPackageInstallStatus(createCaseDir("openclaw-update")); vi.mocked(fetchNpmPackageTargetStatus).mockResolvedValue({ diff --git a/src/cli/update-cli/update-command.ts b/src/cli/update-cli/update-command.ts index 54937619ae2..0d1c10d0b0c 100644 --- a/src/cli/update-cli/update-command.ts +++ b/src/cli/update-cli/update-command.ts @@ -15,6 +15,7 @@ import { } from "../../config/config.js"; import { formatConfigIssueLines } from "../../config/issue-format.js"; import { asResolvedSourceConfig, asRuntimeConfig } from "../../config/materialize.js"; +import { GATEWAY_SERVICE_KIND, GATEWAY_SERVICE_MARKER } from "../../daemon/constants.js"; import { resolveGatewayInstallEntrypoint } from "../../daemon/gateway-entrypoint.js"; import { resolveGatewayRestartLogPath } from "../../daemon/restart-logs.js"; import { readGatewayServiceState, resolveGatewayService } from "../../daemon/service.js"; @@ -151,6 +152,16 @@ export function shouldUseLegacyProcessRestartAfterUpdate(params: { return !isPackageManagerUpdateMode(params.updateMode); } +function isRunningInsideGatewayService( + env: Record = process.env, +): boolean { + if (env.OPENCLAW_SERVICE_MARKER?.trim() !== GATEWAY_SERVICE_MARKER) { + return false; + } + const serviceKind = env.OPENCLAW_SERVICE_KIND?.trim(); + return !serviceKind || serviceKind === GATEWAY_SERVICE_KIND; +} + function formatCommandFailure(stdout: string, stderr: string): string { const detail = (stderr || stdout).trim(); if (!detail) { @@ -1309,6 +1320,18 @@ export async function updateCommand(opts: UpdateCommandOptions): Promise { return; } + if (updateInstallKind === "package" && isRunningInsideGatewayService()) { + defaultRuntime.error( + [ + "Package updates cannot run from inside the gateway service process.", + "That path replaces the active OpenClaw dist tree while the live gateway may still lazy-load old chunks.", + `Run \`${replaceCliName(formatCliCommand("openclaw update"), CLI_NAME)}\` from a shell outside the gateway service, or stop the gateway service first and then update.`, + ].join("\n"), + ); + defaultRuntime.exit(1); + return; + } + if (downgradeRisk && !opts.yes) { if (!process.stdin.isTTY || opts.json) { defaultRuntime.error( diff --git a/src/gateway/method-scopes.test.ts b/src/gateway/method-scopes.test.ts index f64f6d94cca..76fd2b2ff9c 100644 --- a/src/gateway/method-scopes.test.ts +++ b/src/gateway/method-scopes.test.ts @@ -38,6 +38,7 @@ describe("method scope resolution", () => { ["diagnostics.stability", ["operator.read"]], ["node.pair.approve", ["operator.pairing"]], ["poll", ["operator.write"]], + ["update.status", ["operator.admin"]], ["config.patch", ["operator.admin"]], ["nativeHook.invoke", ["operator.admin"]], ["wizard.start", ["operator.admin"]], diff --git a/src/gateway/method-scopes.ts b/src/gateway/method-scopes.ts index 54b8b1e330e..3d31db631f7 100644 --- a/src/gateway/method-scopes.ts +++ b/src/gateway/method-scopes.ts @@ -183,6 +183,7 @@ const METHOD_SCOPE_GROUPS: Record = { "set-heartbeats", "system-event", "agents.files.set", + "update.status", ], [TALK_SECRETS_SCOPE]: [], }; diff --git a/src/gateway/protocol/index.ts b/src/gateway/protocol/index.ts index 23ccb80337a..ace83ed58ef 100644 --- a/src/gateway/protocol/index.ts +++ b/src/gateway/protocol/index.ts @@ -93,6 +93,8 @@ import { ConfigSchemaResponseSchema, type ConfigSetParams, ConfigSetParamsSchema, + type UpdateStatusParams, + UpdateStatusParamsSchema, type ConnectParams, ConnectParamsSchema, type CronAddParams, @@ -536,6 +538,8 @@ export const validateChatSendParams = ajv.compile(ChatSendParamsSchema); export const validateChatAbortParams = ajv.compile(ChatAbortParamsSchema); export const validateChatInjectParams = ajv.compile(ChatInjectParamsSchema); export const validateChatEvent = ajv.compile(ChatEventSchema); +export const validateUpdateStatusParams = + ajv.compile(UpdateStatusParamsSchema); export const validateUpdateRunParams = ajv.compile(UpdateRunParamsSchema); export const validateWebLoginStartParams = ajv.compile(WebLoginStartParamsSchema); @@ -638,6 +642,7 @@ export { ConfigSchemaLookupParamsSchema, ConfigSchemaResponseSchema, ConfigSchemaLookupResultSchema, + UpdateStatusParamsSchema, WizardStartParamsSchema, WizardNextParamsSchema, WizardCancelParamsSchema, @@ -838,6 +843,7 @@ export type { WebPushSubscribeParams, WebPushUnsubscribeParams, WebPushTestParams, + UpdateStatusParams, UpdateRunParams, ChatInjectParams, }; diff --git a/src/gateway/protocol/schema/config.ts b/src/gateway/protocol/schema/config.ts index 8c61c1097c7..7e879757532 100644 --- a/src/gateway/protocol/schema/config.ts +++ b/src/gateway/protocol/schema/config.ts @@ -51,6 +51,8 @@ export const ConfigSchemaLookupParamsSchema = Type.Object( { additionalProperties: false }, ); +export const UpdateStatusParamsSchema = Type.Object({}, { additionalProperties: false }); + export const UpdateRunParamsSchema = Type.Object( { sessionKey: Type.Optional(Type.String()), diff --git a/src/gateway/protocol/schema/protocol-schemas.ts b/src/gateway/protocol/schema/protocol-schemas.ts index c3972fbf764..32d1e4d15f1 100644 --- a/src/gateway/protocol/schema/protocol-schemas.ts +++ b/src/gateway/protocol/schema/protocol-schemas.ts @@ -78,6 +78,7 @@ import { ConfigSchemaParamsSchema, ConfigSchemaResponseSchema, ConfigSetParamsSchema, + UpdateStatusParamsSchema, UpdateRunParamsSchema, } from "./config.js"; import { @@ -365,6 +366,7 @@ export const ProtocolSchemas = { ChatAbortParams: ChatAbortParamsSchema, ChatInjectParams: ChatInjectParamsSchema, ChatEvent: ChatEventSchema, + UpdateStatusParams: UpdateStatusParamsSchema, UpdateRunParams: UpdateRunParamsSchema, TickEvent: TickEventSchema, ShutdownEvent: ShutdownEventSchema, diff --git a/src/gateway/protocol/schema/types.ts b/src/gateway/protocol/schema/types.ts index fb4da4202b7..c25aa46a05b 100644 --- a/src/gateway/protocol/schema/types.ts +++ b/src/gateway/protocol/schema/types.ts @@ -69,6 +69,7 @@ export type ConfigSchemaParams = SchemaType<"ConfigSchemaParams">; export type ConfigSchemaLookupParams = SchemaType<"ConfigSchemaLookupParams">; export type ConfigSchemaResponse = SchemaType<"ConfigSchemaResponse">; export type ConfigSchemaLookupResult = SchemaType<"ConfigSchemaLookupResult">; +export type UpdateStatusParams = SchemaType<"UpdateStatusParams">; export type WizardStartParams = SchemaType<"WizardStartParams">; export type WizardNextParams = SchemaType<"WizardNextParams">; export type WizardCancelParams = SchemaType<"WizardCancelParams">; diff --git a/src/gateway/server-methods-list.ts b/src/gateway/server-methods-list.ts index 4cf4c605228..142de04f33e 100644 --- a/src/gateway/server-methods-list.ts +++ b/src/gateway/server-methods-list.ts @@ -71,6 +71,7 @@ const BASE_METHODS = [ "skills.bins", "skills.install", "skills.update", + "update.status", "update.run", "voicewake.get", "voicewake.set", diff --git a/src/gateway/server-methods/update.test.ts b/src/gateway/server-methods/update.test.ts index 00a2c4bad5c..5139bc2fa1b 100644 --- a/src/gateway/server-methods/update.test.ts +++ b/src/gateway/server-methods/update.test.ts @@ -1,11 +1,21 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import type { RestartSentinelPayload } from "../../infra/restart-sentinel.js"; -import type { UpdateRunResult } from "../../infra/update-runner.js"; +import type { UpdateInstallSurface, UpdateRunResult } from "../../infra/update-runner.js"; // Capture the sentinel payload written during update.run let capturedPayload: RestartSentinelPayload | undefined; const runGatewayUpdateMock = vi.fn<() => Promise>(); +const resolveUpdateInstallSurfaceMock = vi.fn<() => Promise>(async () => ({ + kind: "git", + mode: "git", + root: "/tmp/openclaw", + packageRoot: "/tmp/openclaw", +})); +const getLatestUpdateRestartSentinelMock = vi.fn<() => RestartSentinelPayload | null>(() => null); +const isRestartEnabledMock = vi.fn(() => true); +const readPackageVersionMock = vi.fn(async () => "1.0.0"); +const detectRespawnSupervisorMock = vi.fn(() => null); const scheduleGatewaySigusr1RestartMock = vi.fn(() => ({ scheduled: true })); @@ -13,6 +23,10 @@ vi.mock("../../config/config.js", () => ({ loadConfig: () => ({ update: {} }), })); +vi.mock("../../config/commands.flags.js", () => ({ + isRestartEnabled: isRestartEnabledMock, +})); + vi.mock("../../config/sessions.js", () => ({ extractDeliveryInfo: (sessionKey: string | undefined) => { if (!sessionKey) { @@ -57,18 +71,33 @@ vi.mock("../../infra/restart.js", () => ({ scheduleGatewaySigusr1Restart: scheduleGatewaySigusr1RestartMock, })); +vi.mock("../../infra/package-json.js", () => ({ + readPackageVersion: readPackageVersionMock, +})); + +vi.mock("../../infra/supervisor-markers.js", () => ({ + detectRespawnSupervisor: detectRespawnSupervisorMock, +})); + vi.mock("../../infra/update-channels.js", () => ({ normalizeUpdateChannel: () => undefined, })); vi.mock("../../infra/update-runner.js", () => ({ + resolveUpdateInstallSurface: resolveUpdateInstallSurfaceMock, runGatewayUpdate: runGatewayUpdateMock, })); vi.mock("../protocol/index.js", () => ({ + validateUpdateStatusParams: () => true, validateUpdateRunParams: () => true, })); +vi.mock("../server-restart-sentinel.js", () => ({ + getLatestUpdateRestartSentinel: getLatestUpdateRestartSentinelMock, + recordLatestUpdateRestartSentinel: vi.fn(), +})); + vi.mock("./restart-request.js", () => ({ parseRestartRequestParams: (params: Record) => ({ sessionKey: params.sessionKey, @@ -83,13 +112,28 @@ vi.mock("./validation.js", () => ({ beforeEach(() => { capturedPayload = undefined; + isRestartEnabledMock.mockReset(); + isRestartEnabledMock.mockReturnValue(true); + readPackageVersionMock.mockClear(); + readPackageVersionMock.mockResolvedValue("1.0.0"); + detectRespawnSupervisorMock.mockReset(); + detectRespawnSupervisorMock.mockReturnValue(null); runGatewayUpdateMock.mockClear(); runGatewayUpdateMock.mockResolvedValue({ status: "ok", mode: "npm", + after: { version: "2.0.0" }, steps: [], durationMs: 100, }); + resolveUpdateInstallSurfaceMock.mockClear(); + resolveUpdateInstallSurfaceMock.mockResolvedValue({ + kind: "git", + mode: "git", + root: "/tmp/openclaw", + packageRoot: "/tmp/openclaw", + }); + getLatestUpdateRestartSentinelMock.mockClear(); scheduleGatewaySigusr1RestartMock.mockClear(); scheduleGatewaySigusr1RestartMock.mockReturnValue({ scheduled: true }); }); @@ -199,4 +243,94 @@ describe("update.run restart scheduling", () => { expect(payload?.restart).toBeNull(); expect(capturedPayload?.continuation).toBeUndefined(); }); + + it.each([ + { status: "skipped" as const, reason: "dirty" }, + { status: "skipped" as const, reason: "not-git-install" }, + { status: "skipped" as const, reason: "restart-disabled" }, + { status: "error" as const, reason: "deps-install-failed" }, + { status: "error" as const, reason: "build-failed" }, + { status: "error" as const, reason: "global-install-failed" }, + ])("returns ok=false for $status:$reason", async ({ status, reason }) => { + runGatewayUpdateMock.mockResolvedValueOnce({ + status, + mode: "git", + reason, + steps: [], + durationMs: 100, + }); + + let payload: { ok: boolean; result?: { status?: string; reason?: string } } | undefined; + + await invokeUpdateRun({}, (_ok: boolean, response: unknown) => { + payload = response as typeof payload; + }); + + expect(payload?.ok).toBe(false); + expect(payload?.result).toEqual( + expect.objectContaining({ + status, + reason, + }), + ); + }); + + it("blocks unmanaged global installs before package mutation when restart is unavailable", async () => { + isRestartEnabledMock.mockReturnValue(false); + detectRespawnSupervisorMock.mockReturnValue(null); + resolveUpdateInstallSurfaceMock.mockResolvedValueOnce({ + kind: "global", + mode: "npm", + root: "/tmp/openclaw-global", + packageRoot: "/tmp/openclaw-global", + }); + + let payload: + | { ok: boolean; result?: { status?: string; reason?: string; mode?: string } } + | undefined; + + await invokeUpdateRun({}, (_ok: boolean, response: unknown) => { + payload = response as typeof payload; + }); + + expect(runGatewayUpdateMock).not.toHaveBeenCalled(); + expect(scheduleGatewaySigusr1RestartMock).not.toHaveBeenCalled(); + expect(payload).toEqual( + expect.objectContaining({ + ok: false, + result: expect.objectContaining({ + status: "skipped", + reason: "restart-unavailable", + mode: "npm", + }), + }), + ); + }); +}); + +describe("update.status", () => { + it("returns the latest cached update sentinel", async () => { + getLatestUpdateRestartSentinelMock.mockReturnValueOnce({ + kind: "update", + status: "ok", + ts: 1, + stats: { + after: { version: "2.0.0" }, + }, + }); + const { updateHandlers } = await import("./update.js"); + const respond = vi.fn(); + + await updateHandlers["update.status"]({ + params: {}, + respond, + } as never); + + expect(respond).toHaveBeenCalledWith(true, { + sentinel: expect.objectContaining({ + kind: "update", + status: "ok", + }), + }); + }); }); diff --git a/src/gateway/server-methods/update.ts b/src/gateway/server-methods/update.ts index 47f4f0fe8d9..338e93625ed 100644 --- a/src/gateway/server-methods/update.ts +++ b/src/gateway/server-methods/update.ts @@ -1,21 +1,36 @@ +import { isRestartEnabled } from "../../config/commands.flags.js"; import { loadConfig } from "../../config/config.js"; import { extractDeliveryInfo } from "../../config/sessions.js"; import { resolveOpenClawPackageRoot } from "../../infra/openclaw-root.js"; +import { readPackageVersion } from "../../infra/package-json.js"; import { formatDoctorNonInteractiveHint, type RestartSentinelPayload, writeRestartSentinel, } from "../../infra/restart-sentinel.js"; import { scheduleGatewaySigusr1Restart } from "../../infra/restart.js"; +import { detectRespawnSupervisor } from "../../infra/supervisor-markers.js"; import { normalizeUpdateChannel } from "../../infra/update-channels.js"; -import { runGatewayUpdate } from "../../infra/update-runner.js"; +import { resolveUpdateInstallSurface, runGatewayUpdate } from "../../infra/update-runner.js"; import { formatControlPlaneActor, resolveControlPlaneActor } from "../control-plane-audit.js"; -import { validateUpdateRunParams } from "../protocol/index.js"; +import { validateUpdateRunParams, validateUpdateStatusParams } from "../protocol/index.js"; +import { + getLatestUpdateRestartSentinel, + recordLatestUpdateRestartSentinel, +} from "../server-restart-sentinel.js"; import { parseRestartRequestParams } from "./restart-request.js"; import type { GatewayRequestHandlers } from "./types.js"; import { assertValidParams } from "./validation.js"; export const updateHandlers: GatewayRequestHandlers = { + "update.status": async ({ params, respond }) => { + if (!assertValidParams(params, validateUpdateStatusParams, "update.status", respond)) { + return; + } + respond(true, { + sentinel: getLatestUpdateRestartSentinel(), + }); + }, "update.run": async ({ params, respond, client, context }) => { if (!assertValidParams(params, validateUpdateRunParams, "update.run", respond)) { return; @@ -48,17 +63,38 @@ export const updateHandlers: GatewayRequestHandlers = { argv1: process.argv[1], cwd: process.cwd(), })) ?? process.cwd(); - result = await runGatewayUpdate({ + const installSurface = await resolveUpdateInstallSurface({ timeoutMs, cwd: root, argv1: process.argv[1], - channel: configChannel ?? undefined, }); - } catch (err) { + const supervisor = detectRespawnSupervisor(process.env, process.platform); + if (!isRestartEnabled(config) && !supervisor) { + const beforeVersion = installSurface.root + ? await readPackageVersion(installSurface.root) + : null; + result = { + status: "skipped", + mode: installSurface.mode, + ...(installSurface.root ? { root: installSurface.root } : {}), + reason: installSurface.kind === "global" ? "restart-unavailable" : "restart-disabled", + ...(beforeVersion ? { before: { version: beforeVersion } } : {}), + steps: [], + durationMs: 0, + }; + } else { + result = await runGatewayUpdate({ + timeoutMs, + cwd: root, + argv1: process.argv[1], + channel: configChannel ?? undefined, + }); + } + } catch { result = { status: "error", mode: "unknown", - reason: String(err), + reason: "unexpected-error", steps: [], durationMs: 0, }; @@ -97,6 +133,7 @@ export const updateHandlers: GatewayRequestHandlers = { let sentinelPath: string | null = null; try { sentinelPath = await writeRestartSentinel(payload); + recordLatestUpdateRestartSentinel(payload); } catch { sentinelPath = null; } @@ -129,7 +166,7 @@ export const updateHandlers: GatewayRequestHandlers = { respond( true, { - ok: result.status !== "error", + ok: result.status === "ok", result, restart, sentinel: { diff --git a/src/gateway/server-restart-sentinel.ts b/src/gateway/server-restart-sentinel.ts index 446b3eaa415..2fd1f432519 100644 --- a/src/gateway/server-restart-sentinel.ts +++ b/src/gateway/server-restart-sentinel.ts @@ -14,10 +14,12 @@ import { ackDelivery, enqueueDelivery, failDelivery } from "../infra/outbound/de import { buildOutboundSessionContext } from "../infra/outbound/session-context.js"; import { resolveOutboundTarget } from "../infra/outbound/targets.js"; import { + finalizeUpdateRestartSentinelRunningVersion, formatRestartSentinelMessage, readRestartSentinel, removeRestartSentinelFile, type RestartSentinelContinuation, + type RestartSentinelPayload, resolveRestartSentinelPath, summarizeRestartSentinel, } from "../infra/restart-sentinel.js"; @@ -45,6 +47,16 @@ import { runStartupTasks, type StartupTask } from "./startup-tasks.js"; const log = createSubsystemLogger("gateway/restart-sentinel"); const OUTBOUND_RETRY_DELAY_MS = 1_000; const OUTBOUND_MAX_ATTEMPTS = 45; +let latestUpdateRestartSentinel: RestartSentinelPayload | null = null; + +function cloneRestartSentinelPayload( + payload: RestartSentinelPayload | null, +): RestartSentinelPayload | null { + if (!payload) { + return null; + } + return JSON.parse(JSON.stringify(payload)) as RestartSentinelPayload; +} function hasRoutableDeliveryContext(context?: { channel?: string; @@ -562,3 +574,20 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { export function shouldWakeFromRestartSentinel() { return !process.env.VITEST && process.env.NODE_ENV !== "test"; } + +export async function refreshLatestUpdateRestartSentinel(): Promise { + const finalized = await finalizeUpdateRestartSentinelRunningVersion(); + const sentinel = finalized ?? (await readRestartSentinel()); + if (sentinel?.payload.kind === "update") { + latestUpdateRestartSentinel = cloneRestartSentinelPayload(sentinel.payload); + } + return cloneRestartSentinelPayload(latestUpdateRestartSentinel); +} + +export function getLatestUpdateRestartSentinel(): RestartSentinelPayload | null { + return cloneRestartSentinelPayload(latestUpdateRestartSentinel); +} + +export function recordLatestUpdateRestartSentinel(payload: RestartSentinelPayload): void { + latestUpdateRestartSentinel = cloneRestartSentinelPayload(payload); +} diff --git a/src/gateway/server-startup-post-attach.test.ts b/src/gateway/server-startup-post-attach.test.ts index 268a2ccf685..6b15358ac24 100644 --- a/src/gateway/server-startup-post-attach.test.ts +++ b/src/gateway/server-startup-post-attach.test.ts @@ -20,6 +20,7 @@ const hoisted = vi.hoisted(() => { const scheduleSubagentOrphanRecovery = vi.fn(); const shouldWakeFromRestartSentinel = vi.fn(() => false); const scheduleRestartSentinelWake = vi.fn(); + const refreshLatestUpdateRestartSentinel = vi.fn(async () => null); const getAcpRuntimeBackend = vi.fn<(id?: string) => unknown>(() => null); const reconcilePendingSessionIdentities = vi.fn(async () => ({ checked: 0, @@ -42,6 +43,7 @@ const hoisted = vi.hoisted(() => { scheduleSubagentOrphanRecovery, shouldWakeFromRestartSentinel, scheduleRestartSentinelWake, + refreshLatestUpdateRestartSentinel, getAcpRuntimeBackend, reconcilePendingSessionIdentities, }; @@ -104,6 +106,7 @@ vi.mock("../acp/runtime/registry.js", () => ({ })); vi.mock("./server-restart-sentinel.js", () => ({ + refreshLatestUpdateRestartSentinel: hoisted.refreshLatestUpdateRestartSentinel, scheduleRestartSentinelWake: hoisted.scheduleRestartSentinelWake, shouldWakeFromRestartSentinel: hoisted.shouldWakeFromRestartSentinel, })); @@ -403,6 +406,7 @@ function createPostAttachRuntimeDeps( return { getGlobalHookRunner: vi.fn(() => null), logGatewayStartup: hoisted.logGatewayStartup, + refreshLatestUpdateRestartSentinel: hoisted.refreshLatestUpdateRestartSentinel, scheduleGatewayUpdateCheck: hoisted.scheduleGatewayUpdateCheck, startGatewaySidecars: vi.fn(async () => ({ pluginServices: null })), startGatewayTailscaleExposure: hoisted.startGatewayTailscaleExposure, diff --git a/src/gateway/server-startup-post-attach.ts b/src/gateway/server-startup-post-attach.ts index 739237e9de6..9e9069227b2 100644 --- a/src/gateway/server-startup-post-attach.ts +++ b/src/gateway/server-startup-post-attach.ts @@ -13,6 +13,7 @@ import { GATEWAY_EVENT_UPDATE_AVAILABLE, type GatewayUpdateAvailableEventPayload, } from "./events.js"; +import type { refreshLatestUpdateRestartSentinel } from "./server-restart-sentinel.js"; import type { logGatewayStartup } from "./server-startup-log.js"; import { STARTUP_UNAVAILABLE_GATEWAY_METHODS } from "./server-startup-unavailable-methods.js"; import type { startGatewayTailscaleExposure } from "./server-tailscale.js"; @@ -407,6 +408,9 @@ export async function startGatewaySidecars(params: { type GatewayPostAttachRuntimeDeps = { getGlobalHookRunner: () => Awaitable>; logGatewayStartup: (params: Parameters[0]) => Awaitable; + refreshLatestUpdateRestartSentinel: () => Awaitable< + ReturnType + >; scheduleGatewayUpdateCheck: ( ...args: Parameters ) => Awaitable>; @@ -421,6 +425,8 @@ const defaultGatewayPostAttachRuntimeDeps: GatewayPostAttachRuntimeDeps = { (await import("../plugins/hook-runner-global.js")).getGlobalHookRunner(), logGatewayStartup: async (params) => (await import("./server-startup-log.js")).logGatewayStartup(params), + refreshLatestUpdateRestartSentinel: async () => + (await import("./server-restart-sentinel.js")).refreshLatestUpdateRestartSentinel(), scheduleGatewayUpdateCheck: async (...args) => (await import("../infra/update-startup.js")).scheduleGatewayUpdateCheck(...args), startGatewaySidecars, @@ -471,6 +477,14 @@ export async function startGatewayPostAttachRuntime( }, runtimeDeps: GatewayPostAttachRuntimeDeps = defaultGatewayPostAttachRuntimeDeps, ) { + await measureStartup(params.startupTrace, "post-attach.update-sentinel", async () => { + try { + await runtimeDeps.refreshLatestUpdateRestartSentinel(); + } catch (err) { + params.log.warn(`restart sentinel refresh failed: ${String(err)}`); + } + }); + await measureStartup(params.startupTrace, "post-attach.log", () => runtimeDeps.logGatewayStartup({ cfg: params.cfgAtStart, diff --git a/src/gateway/server.roles-allowlist-update.test.ts b/src/gateway/server.roles-allowlist-update.test.ts index a0c6d6bcf11..72a70d62946 100644 --- a/src/gateway/server.roles-allowlist-update.test.ts +++ b/src/gateway/server.roles-allowlist-update.test.ts @@ -12,6 +12,12 @@ import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-cha import type { GatewayClient } from "./client.js"; vi.mock("../infra/update-runner.js", () => ({ + resolveUpdateInstallSurface: vi.fn(async () => ({ + kind: "git", + mode: "git", + root: "/repo", + packageRoot: "/repo", + })), runGatewayUpdate: vi.fn(async () => ({ status: "ok", mode: "git", diff --git a/src/infra/infra-runtime.test.ts b/src/infra/infra-runtime.test.ts index 8d015ed6fba..704835e3dba 100644 --- a/src/infra/infra-runtime.test.ts +++ b/src/infra/infra-runtime.test.ts @@ -12,6 +12,7 @@ import { emitGatewayRestart, isGatewaySigusr1RestartExternallyAllowed, markGatewaySigusr1RestartHandled, + peekGatewaySigusr1RestartReason, scheduleGatewaySigusr1Restart, setGatewaySigusr1RestartPolicy, setPreRestartDeferralCheck, @@ -101,6 +102,24 @@ describe("infra runtime", () => { } }); + it("preserves update restart reason when a scheduled restart coalesces", async () => { + const handler = () => {}; + process.on("SIGUSR1", handler); + try { + const first = scheduleGatewaySigusr1Restart({ delayMs: 1_000, reason: "config.patch" }); + const second = scheduleGatewaySigusr1Restart({ delayMs: 1_000, reason: "update.run" }); + + expect(first.coalesced).toBe(false); + expect(second.coalesced).toBe(true); + + await vi.advanceTimersByTimeAsync(1_000); + + expect(peekGatewaySigusr1RestartReason()).toBe("update.run"); + } finally { + process.removeListener("SIGUSR1", handler); + } + }); + it("runs restart preparation only when the scheduled restart emits", async () => { const beforeEmit = vi.fn(async () => {}); const emitSpy = vi.spyOn(process, "emit"); diff --git a/src/infra/process-respawn.test.ts b/src/infra/process-respawn.test.ts index e1e810180db..2a61a673b96 100644 --- a/src/infra/process-respawn.test.ts +++ b/src/infra/process-respawn.test.ts @@ -18,7 +18,10 @@ vi.mock("./restart.js", () => ({ triggerOpenClawRestart: (...args: unknown[]) => triggerOpenClawRestartMock(...args), })); -import { restartGatewayProcessWithFreshPid } from "./process-respawn.js"; +import { + respawnGatewayProcessForUpdate, + restartGatewayProcessWithFreshPid, +} from "./process-respawn.js"; const originalArgv = [...process.argv]; const originalExecArgv = [...process.execArgv]; @@ -231,3 +234,42 @@ describe("restartGatewayProcessWithFreshPid", () => { expect(result.detail).toContain("spawn failed"); }); }); + +describe("respawnGatewayProcessForUpdate", () => { + it("keeps OPENCLAW_NO_RESPAWN semantics for update restarts", () => { + clearSupervisorHints(); + process.env.OPENCLAW_NO_RESPAWN = "1"; + + const result = respawnGatewayProcessForUpdate(); + + expect(result).toEqual({ mode: "disabled", detail: "OPENCLAW_NO_RESPAWN" }); + expect(spawnMock).not.toHaveBeenCalled(); + }); + + it("allows detached respawn on unmanaged Windows during updates", () => { + clearSupervisorHints(); + setPlatform("win32"); + process.execArgv = []; + process.argv = [ + "C:\\Program Files\\node.exe", + "C:\\openclaw\\dist\\index.js", + "gateway", + "run", + ]; + spawnMock.mockReturnValue({ pid: 5151, unref: vi.fn(), kill: vi.fn() }); + + const result = respawnGatewayProcessForUpdate(); + + expect(result.mode).toBe("spawned"); + expect(result.pid).toBe(5151); + expect(spawnMock).toHaveBeenCalledWith( + process.execPath, + ["C:\\openclaw\\dist\\index.js", "gateway", "run"], + expect.objectContaining({ + detached: true, + env: process.env, + stdio: "inherit", + }), + ); + }); +}); diff --git a/src/infra/process-respawn.ts b/src/infra/process-respawn.ts index 9fac4214600..8a70660d3da 100644 --- a/src/infra/process-respawn.ts +++ b/src/infra/process-respawn.ts @@ -1,4 +1,4 @@ -import { spawn } from "node:child_process"; +import { spawn, type ChildProcess } from "node:child_process"; import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js"; import { formatErrorMessage } from "./errors.js"; import { triggerOpenClawRestart } from "./restart.js"; @@ -12,11 +12,26 @@ export type GatewayRespawnResult = { detail?: string; }; +export type GatewayUpdateRespawnResult = GatewayRespawnResult & { + child?: ChildProcess; +}; + function isTruthy(value: string | undefined): boolean { const normalized = normalizeOptionalLowercaseString(value); return normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on"; } +function spawnDetachedGatewayProcess(): { child: ChildProcess; pid?: number } { + const args = [...process.execArgv, ...process.argv.slice(1)]; + const child = spawn(process.execPath, args, { + env: process.env, + detached: true, + stdio: "inherit", + }); + child.unref(); + return { child, pid: child.pid ?? undefined }; +} + /** * Attempt to restart this process with a fresh PID. * - supervised environments (launchd/systemd/schtasks): caller should exit and let supervisor restart @@ -53,16 +68,46 @@ export function restartGatewayProcessWithFreshPid(): GatewayRespawnResult { } try { - const args = [...process.execArgv, ...process.argv.slice(1)]; - const child = spawn(process.execPath, args, { - env: process.env, - detached: true, - stdio: "inherit", - }); - child.unref(); - return { mode: "spawned", pid: child.pid ?? undefined }; + const { pid } = spawnDetachedGatewayProcess(); + return { mode: "spawned", pid }; } catch (err) { const detail = formatErrorMessage(err); return { mode: "failed", detail }; } } + +/** + * Update restarts must replace the OS process so the new code runs from a + * fresh module graph after package files have changed on disk. + * + * Unlike the generic restart path, update mode allows detached respawn on + * unmanaged Windows installs because there is no safe in-process fallback once + * the installed package contents have been replaced. + */ +export function respawnGatewayProcessForUpdate(): GatewayUpdateRespawnResult { + if (isTruthy(process.env.OPENCLAW_NO_RESPAWN)) { + return { mode: "disabled", detail: "OPENCLAW_NO_RESPAWN" }; + } + const supervisor = detectRespawnSupervisor(process.env); + if (supervisor) { + if (supervisor === "schtasks") { + const restart = triggerOpenClawRestart(); + if (!restart.ok) { + return { + mode: "failed", + detail: restart.detail ?? `${restart.method} restart failed`, + }; + } + } + return { mode: "supervised" }; + } + try { + const { child, pid } = spawnDetachedGatewayProcess(); + return { mode: "spawned", pid, child }; + } catch (err) { + return { + mode: "failed", + detail: formatErrorMessage(err), + }; + } +} diff --git a/src/infra/restart-sentinel.test.ts b/src/infra/restart-sentinel.test.ts index ff06d701290..15338547566 100644 --- a/src/infra/restart-sentinel.test.ts +++ b/src/infra/restart-sentinel.test.ts @@ -7,8 +7,10 @@ import { DEFAULT_RESTART_SUCCESS_CONTINUATION_MESSAGE, buildRestartSuccessContinuation, consumeRestartSentinel, + finalizeUpdateRestartSentinelRunningVersion, formatDoctorNonInteractiveHint, formatRestartSentinelMessage, + markUpdateRestartSentinelFailure, readRestartSentinel, resolveRestartSentinelPath, summarizeRestartSentinel, @@ -184,6 +186,55 @@ describe("restart sentinel", () => { expect(trimLogTail("hello\n")).toBe("hello"); expect(trimLogTail(undefined)).toBeNull(); }); + + it("writes the running version back to update sentinels on startup", async () => { + await withRestartSentinelStateDir(async () => { + await writeRestartSentinel({ + kind: "update", + status: "ok", + ts: Date.now(), + stats: { + after: { version: "expected-version" }, + }, + }); + + await finalizeUpdateRestartSentinelRunningVersion("actual-version"); + + await expect(readRestartSentinel()).resolves.toMatchObject({ + payload: { + kind: "update", + stats: { + after: { + version: "actual-version", + }, + }, + }, + }); + }); + }); + + it("marks update restart failures with a stable reason", async () => { + await withRestartSentinelStateDir(async () => { + await writeRestartSentinel({ + kind: "update", + status: "ok", + ts: Date.now(), + stats: {}, + }); + + await markUpdateRestartSentinelFailure("restart-unhealthy"); + + await expect(readRestartSentinel()).resolves.toMatchObject({ + payload: { + kind: "update", + status: "error", + stats: { + reason: "restart-unhealthy", + }, + }, + }); + }); + }); }); describe("restart success continuation", () => { diff --git a/src/infra/restart-sentinel.ts b/src/infra/restart-sentinel.ts index 3dfd9088ffa..879d55658ec 100644 --- a/src/infra/restart-sentinel.ts +++ b/src/infra/restart-sentinel.ts @@ -2,6 +2,7 @@ import fs from "node:fs/promises"; import path from "node:path"; import { formatCliCommand } from "../cli/command-format.js"; import { resolveStateDir } from "../config/paths.js"; +import { resolveRuntimeServiceVersion } from "../version.js"; import { writeJsonAtomic } from "./json-files.js"; export type RestartSentinelLog = { @@ -87,6 +88,70 @@ export async function writeRestartSentinel( return filePath; } +function isPlainRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function cloneRestartSentinelPayload(payload: RestartSentinelPayload): RestartSentinelPayload { + return JSON.parse(JSON.stringify(payload)) as RestartSentinelPayload; +} + +export async function rewriteRestartSentinel( + rewrite: (payload: RestartSentinelPayload) => RestartSentinelPayload | null, + env: NodeJS.ProcessEnv = process.env, +): Promise { + const current = await readRestartSentinel(env); + if (!current) { + return null; + } + const nextPayload = rewrite(cloneRestartSentinelPayload(current.payload)); + if (!nextPayload) { + return null; + } + await writeRestartSentinel(nextPayload, env); + return { + version: 1, + payload: nextPayload, + }; +} + +export async function finalizeUpdateRestartSentinelRunningVersion( + version = resolveRuntimeServiceVersion(process.env), + env: NodeJS.ProcessEnv = process.env, +): Promise { + return await rewriteRestartSentinel((payload) => { + if (payload.kind !== "update") { + return null; + } + const stats = payload.stats ? { ...payload.stats } : {}; + const after = isPlainRecord(stats.after) ? { ...stats.after } : {}; + after.version = version; + stats.after = after; + return { + ...payload, + stats, + }; + }, env); +} + +export async function markUpdateRestartSentinelFailure( + reason: string, + env: NodeJS.ProcessEnv = process.env, +): Promise { + return await rewriteRestartSentinel((payload) => { + if (payload.kind !== "update") { + return null; + } + const stats = payload.stats ? { ...payload.stats } : {}; + stats.reason = reason; + return { + ...payload, + status: "error", + stats, + }; + }, env); +} + export async function removeRestartSentinelFile(filePath: string | null | undefined) { if (!filePath) { return; diff --git a/src/infra/restart.ts b/src/infra/restart.ts index f524fa30167..3bfed4024b1 100644 --- a/src/infra/restart.ts +++ b/src/infra/restart.ts @@ -37,6 +37,7 @@ let preRestartCheck: (() => number) | null = null; let restartCycleToken = 0; let emittedRestartToken = 0; let consumedRestartToken = 0; +let emittedRestartReason: string | undefined; let lastRestartEmittedAt = 0; let pendingRestartTimer: ReturnType | null = null; let pendingRestartDueAt = 0; @@ -45,6 +46,10 @@ let pendingRestartEmitHooks: RestartEmitHooks | undefined; let pendingRestartPreparing = false; const activeDeferralPolls = new Set>(); +function shouldPreferRestartReason(next?: string, current?: string): boolean { + return next === "update.run" && current !== "update.run"; +} + function hasUnconsumedRestartSignal(): boolean { return emittedRestartToken > consumedRestartToken; } @@ -241,7 +246,7 @@ export function setPreRestartDeferralCheck(fn: () => number): void { * Both scheduleGatewaySigusr1Restart and the config watcher should use this * to ensure only one restart fires. */ -export function emitGatewayRestart(): boolean { +export function emitGatewayRestart(reasonOverride?: string): boolean { if (hasUnconsumedRestartSignal()) { clearActiveDeferralPolls(); clearPendingScheduledRestart(); @@ -251,6 +256,7 @@ export function emitGatewayRestart(): boolean { clearPendingScheduledRestart(); const cycleToken = ++restartCycleToken; emittedRestartToken = cycleToken; + emittedRestartReason = reasonOverride ?? pendingRestartReason; authorizeGatewaySigusr1Restart(); try { if (process.listenerCount("SIGUSR1") > 0) { @@ -261,6 +267,7 @@ export function emitGatewayRestart(): boolean { } catch { // Roll back the cycle marker so future restart requests can still proceed. emittedRestartToken = consumedRestartToken; + emittedRestartReason = undefined; return false; } lastRestartEmittedAt = Date.now(); @@ -307,6 +314,10 @@ export function consumeGatewaySigusr1RestartAuthorization(): boolean { return true; } +export function peekGatewaySigusr1RestartReason(): string | undefined { + return hasUnconsumedRestartSignal() ? emittedRestartReason : undefined; +} + /** * Mark the currently emitted SIGUSR1 restart cycle as consumed by the run loop. * This explicitly advances the cycle state instead of resetting emit guards inside @@ -315,6 +326,7 @@ export function consumeGatewaySigusr1RestartAuthorization(): boolean { export function markGatewaySigusr1RestartHandled(): void { if (hasUnconsumedRestartSignal()) { consumedRestartToken = emittedRestartToken; + emittedRestartReason = undefined; } } @@ -337,7 +349,10 @@ function updatePendingRestartEmitHooks(hooks?: RestartEmitHooks): void { } } -async function emitPreparedGatewayRestart(hooks?: RestartEmitHooks): Promise { +async function emitPreparedGatewayRestart( + hooks?: RestartEmitHooks, + reasonOverride?: string, +): Promise { let nextHooks = hooks ?? pendingRestartEmitHooks; if (!hooks) { pendingRestartEmitHooks = undefined; @@ -363,7 +378,7 @@ async function emitPreparedGatewayRestart(hooks?: RestartEmitHooks): Promise undefined); } @@ -380,6 +395,7 @@ export function deferGatewayRestartUntilIdle(opts: { emitHooks?: RestartEmitHooks; pollMs?: number; maxWaitMs?: number; + reason?: string; }): void { const pollMsRaw = opts.pollMs ?? DEFAULT_DEFERRAL_POLL_MS; const pollMs = Math.max(10, Math.floor(pollMsRaw)); @@ -393,12 +409,12 @@ export function deferGatewayRestartUntilIdle(opts: { pending = opts.getPendingCount(); } catch (err) { opts.hooks?.onCheckError?.(err); - void emitPreparedGatewayRestart(opts.emitHooks); + void emitPreparedGatewayRestart(opts.emitHooks, opts.reason); return; } if (pending <= 0) { opts.hooks?.onReady?.(); - void emitPreparedGatewayRestart(opts.emitHooks); + void emitPreparedGatewayRestart(opts.emitHooks, opts.reason); return; } @@ -413,14 +429,14 @@ export function deferGatewayRestartUntilIdle(opts: { clearInterval(poll); activeDeferralPolls.delete(poll); opts.hooks?.onCheckError?.(err); - void emitPreparedGatewayRestart(opts.emitHooks); + void emitPreparedGatewayRestart(opts.emitHooks, opts.reason); return; } if (current <= 0) { clearInterval(poll); activeDeferralPolls.delete(poll); opts.hooks?.onReady?.(); - void emitPreparedGatewayRestart(opts.emitHooks); + void emitPreparedGatewayRestart(opts.emitHooks, opts.reason); return; } const elapsedMs = Date.now() - startedAt; @@ -432,7 +448,7 @@ export function deferGatewayRestartUntilIdle(opts: { clearInterval(poll); activeDeferralPolls.delete(poll); opts.hooks?.onTimeout?.(current, elapsedMs); - void emitPreparedGatewayRestart(opts.emitHooks); + void emitPreparedGatewayRestart(opts.emitHooks, opts.reason); } }, pollMs); activeDeferralPolls.add(poll); @@ -622,6 +638,9 @@ export function scheduleGatewaySigusr1Restart(opts?: { const requestedDueAt = nowMs + delayMs + cooldownMsApplied; if (hasUnconsumedRestartSignal()) { + if (shouldPreferRestartReason(reason, emittedRestartReason)) { + emittedRestartReason = reason; + } restartLog.warn( `restart request coalesced (already in-flight) reason=${reason ?? "unspecified"} ${formatRestartAudit(opts?.audit)}`, ); @@ -646,6 +665,9 @@ export function scheduleGatewaySigusr1Restart(opts?: { ); clearPendingScheduledRestart(); } else { + if (shouldPreferRestartReason(reason, pendingRestartReason)) { + pendingRestartReason = reason; + } restartLog.warn( `restart request coalesced (already scheduled) reason=${reason ?? "unspecified"} pendingReason=${pendingRestartReason ?? "unspecified"} delayMs=${remainingMs} ${formatRestartAudit(opts?.audit)}`, ); @@ -668,19 +690,21 @@ export function scheduleGatewaySigusr1Restart(opts?: { pendingRestartEmitHooks = opts?.emitHooks; pendingRestartTimer = setTimeout( () => { + const scheduledReason = pendingRestartReason; pendingRestartTimer = null; pendingRestartDueAt = 0; pendingRestartReason = undefined; pendingRestartPreparing = true; const pendingCheck = preRestartCheck; if (!pendingCheck) { - void emitPreparedGatewayRestart(); + void emitPreparedGatewayRestart(undefined, scheduledReason); return; } const cfg = getRuntimeConfig(); deferGatewayRestartUntilIdle({ getPendingCount: pendingCheck, maxWaitMs: cfg.gateway?.reload?.deferralTimeoutMs, + reason: scheduledReason, }); }, Math.max(0, requestedDueAt - nowMs), @@ -706,6 +730,7 @@ export const __testing = { restartCycleToken = 0; emittedRestartToken = 0; consumedRestartToken = 0; + emittedRestartReason = undefined; lastRestartEmittedAt = 0; clearActiveDeferralPolls(); clearPendingScheduledRestart(); diff --git a/src/infra/update-runner.test.ts b/src/infra/update-runner.test.ts index 4a3d5cb6ec8..66e2f67a5ba 100644 --- a/src/infra/update-runner.test.ts +++ b/src/infra/update-runner.test.ts @@ -1509,7 +1509,7 @@ describe("runGatewayUpdate", () => { }); expect(result.status).toBe("error"); - expect(result.reason).toBe("global install verify"); + expect(result.reason).toBe("global-install-failed"); expect(result.after?.version).toBe("2.0.0"); expect(result.steps.at(-1)?.stderrTail).toContain( "expected installed version 2026.3.23-2, found 2.0.0", @@ -1539,7 +1539,7 @@ describe("runGatewayUpdate", () => { const result = await runWithCommand(runCommand, { cwd: pkgRoot }); expect(result.status).toBe("error"); - expect(result.reason).toBe("global install verify"); + expect(result.reason).toBe("global-install-failed"); expect(result.steps.at(-1)?.stderrTail).toContain( `missing packaged dist file ${WHATSAPP_LIGHT_RUNTIME_API}`, ); diff --git a/src/infra/update-runner.ts b/src/infra/update-runner.ts index 2623382b8b9..d210b4abaf8 100644 --- a/src/infra/update-runner.ts +++ b/src/infra/update-runner.ts @@ -26,6 +26,7 @@ import { detectGlobalInstallManagerForRoot, resolveGlobalInstallTarget, resolveGlobalInstallSpec, + type GlobalInstallManager, } from "./update-global.js"; import { managerInstallIgnoreScriptsArgs, @@ -123,6 +124,32 @@ type UpdateRunnerOptions = { progress?: UpdateStepProgress; }; +export type UpdateInstallSurface = + | { + kind: "git"; + mode: "git"; + root: string; + packageRoot: string; + } + | { + kind: "global"; + mode: GlobalInstallManager; + root: string; + packageRoot: string; + } + | { + kind: "package-root"; + mode: "unknown"; + root: string; + packageRoot: string; + } + | { + kind: "missing"; + mode: "unknown"; + root?: string; + packageRoot?: undefined; + }; + function mapManagerResolutionFailure( reason: UpdatePackageManagerFailureReason, ): UpdateRunResult["reason"] { @@ -132,7 +159,6 @@ function mapManagerResolutionFailure( const DEFAULT_TIMEOUT_MS = 20 * 60_000; const MAX_LOG_CHARS = 8000; const PREFLIGHT_MAX_COMMITS = 10; -const START_DIRS = ["cwd", "argv1", "process"]; const DEFAULT_PACKAGE_NAME = "openclaw"; const CORE_PACKAGE_NAMES = new Set([DEFAULT_PACKAGE_NAME]); const PREFLIGHT_TEMP_PREFIX = @@ -535,18 +561,98 @@ function shouldRunDevPreflightLint(): boolean { return process.platform !== "win32"; } -export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise { - const startedAt = Date.now(); +function normalizeFallbackFailureReason(stepName: string): NonNullable { + switch (stepName) { + case "global update": + case "global update (omit optional)": + case "global install verify": + return "global-install-failed"; + case "openclaw doctor": + return "doctor-failed"; + case "ui:build (post-doctor repair)": + return "ui-build-failed"; + default: + return "unexpected-error"; + } +} + +async function buildUpdateCommandRunner( + runCommand?: CommandRunner, +): Promise<{ defaultCommandEnv: NodeJS.ProcessEnv | undefined; runCommand: CommandRunner }> { const defaultCommandEnv = await createGlobalInstallEnv(); - const runCommand = - opts.runCommand ?? - (async (argv, options) => { + if (runCommand) { + return { + defaultCommandEnv, + runCommand, + }; + } + return { + defaultCommandEnv, + runCommand: async (argv, options) => { const res = await runCommandWithTimeout(argv, { ...options, env: mergeCommandEnvironments(defaultCommandEnv, options.env), }); return { stdout: res.stdout, stderr: res.stderr, code: res.code }; - }); + }, + }; +} + +export async function resolveUpdateInstallSurface( + opts: Pick = {}, +): Promise { + const { runCommand } = await buildUpdateCommandRunner(opts.runCommand); + const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const candidates = buildStartDirs(opts); + const pkgRoot = await findPackageRoot(candidates); + + let gitRoot = await resolveGitRoot(runCommand, candidates, timeoutMs); + if (gitRoot && pkgRoot && path.resolve(gitRoot) !== path.resolve(pkgRoot)) { + gitRoot = null; + } + if (gitRoot && !pkgRoot) { + return { + kind: "missing", + mode: "unknown", + root: gitRoot, + }; + } + if (gitRoot && pkgRoot && path.resolve(gitRoot) === path.resolve(pkgRoot)) { + return { + kind: "git", + mode: "git", + root: gitRoot, + packageRoot: pkgRoot, + }; + } + if (!pkgRoot) { + return { + kind: "missing", + mode: "unknown", + }; + } + + const globalManager = await detectGlobalInstallManagerForRoot(runCommand, pkgRoot, timeoutMs); + if (globalManager) { + return { + kind: "global", + mode: globalManager, + root: pkgRoot, + packageRoot: pkgRoot, + }; + } + + return { + kind: "package-root", + mode: "unknown", + root: pkgRoot, + packageRoot: pkgRoot, + }; +} + +export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise { + const startedAt = Date.now(); + const { defaultCommandEnv, runCommand } = await buildUpdateCommandRunner(opts.runCommand); const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS; const progress = opts.progress; const steps: UpdateStepResult[] = []; @@ -1187,6 +1293,17 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise< step("openclaw doctor", doctorArgv, gitRoot, { OPENCLAW_UPDATE_IN_PROGRESS: "1" }), ); steps.push(doctorStep); + if (doctorStep.exitCode !== 0) { + return { + status: "error", + mode: "git", + root: gitRoot, + reason: "doctor-failed", + before: { sha: beforeSha, version: beforeVersion }, + steps, + durationMs: Date.now() - startedAt, + }; + } const uiIndexHealth = await resolveControlUiDistIndexHealth({ root: gitRoot }); if (!uiIndexHealth.exists) { @@ -1213,7 +1330,7 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise< status: "error", mode: "git", root: gitRoot, - reason: repairStep.name, + reason: "ui-build-failed", before: { sha: beforeSha, version: beforeVersion }, steps, durationMs: Date.now() - startedAt, @@ -1255,7 +1372,7 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise< status: failedStep ? "error" : "ok", mode: "git", root: gitRoot, - reason: failedStep ? failedStep.name : undefined, + reason: failedStep ? normalizeFallbackFailureReason(failedStep.name) : undefined, before: { sha: beforeSha, version: beforeVersion }, after: { sha: afterShaStep.stdoutTail?.trim() ?? null, @@ -1273,7 +1390,7 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise< return { status: "error", mode: "unknown", - reason: `no root (${START_DIRS.join(",")})`, + reason: "not-openclaw-root", steps: [], durationMs: Date.now() - startedAt, }; @@ -1324,7 +1441,9 @@ export async function runGatewayUpdate(opts: UpdateRunnerOptions = {}): Promise< status: packageUpdate.failedStep ? "error" : "ok", mode: globalManager, root: packageUpdate.verifiedPackageRoot ?? pkgRoot, - reason: packageUpdate.failedStep ? packageUpdate.failedStep.name : undefined, + reason: packageUpdate.failedStep + ? normalizeFallbackFailureReason(packageUpdate.failedStep.name) + : undefined, before: { version: beforeVersion }, after: { version: packageUpdate.afterVersion }, steps: packageUpdate.steps, diff --git a/ui/src/ui/app-gateway.node.test.ts b/ui/src/ui/app-gateway.node.test.ts index 0e774e24f2a..54a6989018b 100644 --- a/ui/src/ui/app-gateway.node.test.ts +++ b/ui/src/ui/app-gateway.node.test.ts @@ -43,6 +43,9 @@ vi.mock("./gateway.ts", async (importOriginal) => { readonly start = vi.fn(); readonly stop = vi.fn(); readonly request = vi.fn(async (method: string) => { + if (method === "update.status") { + return { sentinel: null }; + } if (method === "models.authStatus") { return { ts: 0, providers: [] }; } @@ -154,6 +157,8 @@ function createHost(): TestGatewayHost { assistantAgentId: null, localMediaPreviewRoots: [], serverVersion: null, + pendingUpdateExpectedVersion: null, + updateStatusBanner: null, sessionKey: "main", chatMessages: [], chatQueue: [], @@ -283,6 +288,117 @@ describe("connectGateway", () => { }); }); + it("clears pending update verification when the restarted version matches", async () => { + const host = createHost(); + host.pendingUpdateExpectedVersion = "2.0.0"; + + connectGateway(host); + const client = gatewayClientInstances[0]; + expect(client).toBeDefined(); + client.request.mockImplementation(async (method: string) => { + if (method === "update.status") { + return { + sentinel: { + kind: "update", + status: "ok", + stats: { + after: { version: "2.0.0" }, + }, + }, + }; + } + return {}; + }); + + client.emitHello({ + type: "hello-ok", + protocol: 3, + server: { version: "2.0.0" }, + snapshot: {}, + }); + + await vi.waitFor(() => { + expect(host.pendingUpdateExpectedVersion).toBeNull(); + }); + expect(host.updateStatusBanner).toBeNull(); + }); + + it("shows a hard error when the restarted version does not match the expected update", async () => { + const host = createHost(); + host.pendingUpdateExpectedVersion = "2.0.0"; + + connectGateway(host); + const client = gatewayClientInstances[0]; + expect(client).toBeDefined(); + client.request.mockImplementation(async (method: string) => { + if (method === "update.status") { + return { + sentinel: { + kind: "update", + status: "ok", + stats: { + after: { version: "1.0.0" }, + }, + }, + }; + } + return {}; + }); + + client.emitHello({ + type: "hello-ok", + protocol: 3, + server: { version: "1.0.0" }, + snapshot: {}, + }); + + await vi.waitFor(() => { + expect(host.pendingUpdateExpectedVersion).toBeNull(); + expect(host.updateStatusBanner?.text).toContain( + "Update installed but running version did not change", + ); + }); + }); + + it("surfaces post-restart sentinel failures after reconnect", async () => { + const host = createHost(); + host.pendingUpdateExpectedVersion = "2.0.0"; + + connectGateway(host); + const client = gatewayClientInstances[0]; + expect(client).toBeDefined(); + client.request.mockImplementation(async (method: string) => { + if (method === "update.status") { + return { + sentinel: { + kind: "update", + status: "error", + stats: { + reason: "restart-unhealthy", + after: { version: "1.0.0" }, + }, + }, + }; + } + return {}; + }); + + client.emitHello({ + type: "hello-ok", + protocol: 3, + server: { version: "1.0.0" }, + snapshot: {}, + }); + + await vi.waitFor(() => { + expect(host.pendingUpdateExpectedVersion).toBeNull(); + expect(host.updateStatusBanner).toEqual({ + tone: "danger", + text: "Update error: restart-unhealthy. The replacement process never became healthy and the previous process stayed up.", + }); + }); + }); + it("ignores stale client onClose callbacks after reconnect", () => { const host = createHost(); diff --git a/ui/src/ui/app-gateway.ts b/ui/src/ui/app-gateway.ts index a80c3e1b2b5..65214fc79b2 100644 --- a/ui/src/ui/app-gateway.ts +++ b/ui/src/ui/app-gateway.ts @@ -97,6 +97,8 @@ type GatewayHost = { assistantAvatar: string | null; assistantAgentId: string | null; serverVersion: string | null; + pendingUpdateExpectedVersion: string | null; + updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null; sessionKey: string; chatRunId: string | null; pendingAbort?: { runId: string; sessionKey: string } | null; @@ -157,6 +159,94 @@ type ConnectGatewayOptions = { reason?: "initial" | "seq-gap"; }; +type UpdateRestartStatusResponse = { + sentinel?: { + kind?: string; + status?: string; + stats?: { + reason?: string | null; + after?: { version?: string | null } | null; + } | null; + } | null; +}; + +function resolveUpdateVerificationBanner(params: { + expectedVersion: string; + actualVersion: string | null; +}): { tone: "danger"; text: string } { + const actualSuffix = params.actualVersion + ? ` Expected v${params.expectedVersion}, running v${params.actualVersion}.` + : ""; + return { + tone: "danger", + text: `Update installed but running version did not change — restart may have been blocked.${actualSuffix}`, + }; +} + +function resolvePostRestartUpdateBanner(reason: string | null | undefined): { + tone: "danger"; + text: string; +} { + const normalizedReason = reason?.trim() || "restart-unhealthy"; + const guidance = + normalizedReason === "restart-unhealthy" + ? "The replacement process never became healthy and the previous process stayed up." + : "Check the gateway logs for the replacement failure."; + return { + tone: "danger", + text: `Update error: ${normalizedReason}. ${guidance}`, + }; +} + +async function verifyPendingUpdateVersion( + host: GatewayHost, + client: GatewayBrowserClient, +): Promise { + const expectedVersion = host.pendingUpdateExpectedVersion?.trim(); + if (!expectedVersion) { + return; + } + const deadline = Date.now() + 10_000; + while (host.client === client && host.connected && Date.now() < deadline) { + let response: UpdateRestartStatusResponse | null = null; + try { + response = await client.request("update.status", {}); + } catch { + response = null; + } + const sentinel = response?.sentinel; + const actualVersion = sentinel?.stats?.after?.version?.trim() || null; + if (sentinel?.kind === "update" && actualVersion) { + host.pendingUpdateExpectedVersion = null; + if (sentinel.status && sentinel.status !== "ok") { + host.updateStatusBanner = resolvePostRestartUpdateBanner(sentinel.stats?.reason ?? null); + return; + } + if (actualVersion !== expectedVersion) { + host.updateStatusBanner = resolveUpdateVerificationBanner({ + expectedVersion, + actualVersion, + }); + } + return; + } + await new Promise((resolve) => { + setTimeout(resolve, 250); + }); + } + if (host.client !== client || !host.connected) { + return; + } + const currentVersion = host.hello?.server?.version?.trim() || null; + host.pendingUpdateExpectedVersion = null; + if (currentVersion !== expectedVersion) { + host.updateStatusBanner = resolveUpdateVerificationBanner({ + expectedVersion, + actualVersion: currentVersion, + }); + } +} + export function resolveControlUiClientVersion(params: { gatewayUrl: string; serverVersion: string | null; @@ -344,6 +434,7 @@ export function connectGateway(host: GatewayHost, options?: ConnectGatewayOption void refreshActiveTab(host as unknown as Parameters[0]); // Re-run push reconciliation now that the gateway client is available. void host.reconcileWebPushState?.(); + void verifyPendingUpdateVersion(host, client); }, onClose: ({ code, reason, error }) => { if (host.client !== client) { diff --git a/ui/src/ui/app-render.ts b/ui/src/ui/app-render.ts index b8af6974b24..0fee5fa84d4 100644 --- a/ui/src/ui/app-render.ts +++ b/ui/src/ui/app-render.ts @@ -1482,6 +1482,11 @@ export function renderApp(state: AppViewState) {
+ ${state.updateStatusBanner + ? html`` + : nothing} ${state.updateAvailable && state.updateAvailable.latestVersion !== state.updateAvailable.currentVersion && !isUpdateBannerDismissed(state.updateAvailable) diff --git a/ui/src/ui/app-settings.ts b/ui/src/ui/app-settings.ts index c1e990ef01b..c647524519a 100644 --- a/ui/src/ui/app-settings.ts +++ b/ui/src/ui/app-settings.ts @@ -744,7 +744,7 @@ function buildAttentionItems(host: SettingsAppHost) { // Use the same predicate as the Overview card so the two stay in sync. // Without this, a `missing` provider shows up on the card but never // produces the re-auth attention callout. - const monitored = modelAuth.providers.filter(isMonitoredAuthProvider); + const monitored = (modelAuth.providers ?? []).filter(isMonitoredAuthProvider); const expiredProviders = monitored.filter( (p) => p.status === "expired" || p.status === "missing", ); diff --git a/ui/src/ui/app-view-state.ts b/ui/src/ui/app-view-state.ts index ca7d09b05bf..af9d79ae47d 100644 --- a/ui/src/ui/app-view-state.ts +++ b/ui/src/ui/app-view-state.ts @@ -176,6 +176,8 @@ export type AppViewState = { configSearchQuery: string; configActiveSection: string | null; configActiveSubsection: string | null; + pendingUpdateExpectedVersion: string | null; + updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null; communicationsFormMode: "form" | "raw"; communicationsSearchQuery: string; communicationsActiveSection: string | null; diff --git a/ui/src/ui/app.ts b/ui/src/ui/app.ts index a0cb27fbe27..9e96a408d1e 100644 --- a/ui/src/ui/app.ts +++ b/ui/src/ui/app.ts @@ -284,6 +284,8 @@ export class OpenClawApp extends LitElement { @state() configSearchQuery = ""; @state() configActiveSection: string | null = null; @state() configActiveSubsection: string | null = null; + @state() pendingUpdateExpectedVersion: string | null = null; + @state() updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null = null; @state() communicationsFormMode: "form" | "raw" = "form"; @state() communicationsSearchQuery = ""; @state() communicationsActiveSection: string | null = null; diff --git a/ui/src/ui/controllers/agents.test.ts b/ui/src/ui/controllers/agents.test.ts index 6f926b2db4b..ea5274127e4 100644 --- a/ui/src/ui/controllers/agents.test.ts +++ b/ui/src/ui/controllers/agents.test.ts @@ -73,6 +73,8 @@ function createSaveState(): { configSearchQuery: "", configActiveSection: null, configActiveSubsection: null, + pendingUpdateExpectedVersion: null, + updateStatusBanner: null, lastError: null, }, request, diff --git a/ui/src/ui/controllers/config.test.ts b/ui/src/ui/controllers/config.test.ts index ae19523ecd1..1ccd11b5984 100644 --- a/ui/src/ui/controllers/config.test.ts +++ b/ui/src/ui/controllers/config.test.ts @@ -37,6 +37,8 @@ function createState(): ConfigState { configValid: null, connected: false, lastError: null, + pendingUpdateExpectedVersion: null, + updateStatusBanner: null, updateRunning: false, }; } @@ -554,6 +556,44 @@ describe("runUpdate", () => { await runUpdate(state); - expect(state.lastError).toBe("Update error: network unavailable"); + expect(state.updateStatusBanner).toEqual({ + tone: "danger", + text: "Update error: network unavailable. See the gateway logs for the exact failure and retry once the cause is fixed.", + }); + }); + + it("surfaces skipped updates with actionable guidance", async () => { + const request = vi.fn().mockResolvedValue({ + ok: false, + result: { status: "skipped", reason: "dirty" }, + }); + const state = createState(); + state.connected = true; + state.client = { request } as unknown as ConfigState["client"]; + + await runUpdate(state); + + expect(state.updateStatusBanner).toEqual({ + tone: "warn", + text: "Update skipped: dirty. Commit or stash changes, then retry.", + }); + }); + + it("stores the expected post-update version when update.run succeeds", async () => { + const request = vi.fn().mockResolvedValue({ + ok: true, + result: { + status: "ok", + after: { version: "2.0.0" }, + }, + }); + const state = createState(); + state.connected = true; + state.client = { request } as unknown as ConfigState["client"]; + + await runUpdate(state); + + expect(state.pendingUpdateExpectedVersion).toBe("2.0.0"); + expect(state.updateStatusBanner).toBeNull(); }); }); diff --git a/ui/src/ui/controllers/config.ts b/ui/src/ui/controllers/config.ts index 35e6c49a46f..ae8d6ba8291 100644 --- a/ui/src/ui/controllers/config.ts +++ b/ui/src/ui/controllers/config.ts @@ -34,6 +34,8 @@ export type ConfigState = { configSearchQuery: string; configActiveSection: string | null; configActiveSubsection: string | null; + pendingUpdateExpectedVersion: string | null; + updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null; lastError: string | null; }; @@ -138,6 +140,39 @@ function serializeFormForSubmit(state: ConfigState): string { type ConfigSubmitMethod = "config.set" | "config.apply"; type ConfigSubmitBusyKey = "configSaving" | "configApplying"; +function resolveUpdateStatusBanner(params: { status?: string; reason?: string }): { + tone: "danger" | "warn" | "info"; + text: string; +} { + const status = (params.status ?? "error").trim() || "error"; + const reason = (params.reason ?? "unexpected-error").trim() || "unexpected-error"; + const tone = status === "skipped" ? "warn" : "danger"; + const guidance = + { + dirty: "Commit or stash changes, then retry.", + "no-upstream": "Set an upstream branch, then retry.", + "not-git-install": + "Not a git checkout. Run `openclaw update` from the CLI for a global reinstall.", + "not-openclaw-root": + "Run the update from an OpenClaw checkout or use the CLI global reinstall path.", + "deps-install-failed": "Dependency install failed. Fix the install error and retry.", + "build-failed": "Build failed. Fix the build error and retry.", + "ui-build-failed": "The control UI rebuild failed. Fix the UI build error and retry.", + "global-install-failed": + "The global package install did not verify on disk. Retry or reinstall from the CLI.", + "restart-disabled": "The update was not applied because gateway restarts are disabled. Enable restarts in config, then retry — or run `openclaw update` from the CLI.", + "restart-unavailable": + "This global install cannot be safely replaced while restarts are disabled and no supervisor is present.", + "restart-unhealthy": + "The replacement process never became healthy. The previous process stayed up so you can recover.", + "doctor-failed": "Doctor repair failed. Run `openclaw doctor --non-interactive` and retry.", + }[reason] ?? "See the gateway logs for the exact failure and retry once the cause is fixed."; + return { + tone, + text: `Update ${status}: ${reason}. ${guidance}`, + }; +} + async function submitConfigChange( state: ConfigState, method: ConfigSubmitMethod, @@ -193,20 +228,27 @@ export async function runUpdate(state: ConfigState) { } state.updateRunning = true; state.lastError = null; + state.updateStatusBanner = null; try { const res = await state.client.request<{ ok?: boolean; - result?: { status?: string; reason?: string }; + result?: { status?: string; reason?: string; after?: { version?: string | null } }; }>("update.run", { sessionKey: state.applySessionKey, }); - if (res && res.ok === false) { - const status = res.result?.status ?? "error"; - const reason = res.result?.reason ?? "Update failed."; - state.lastError = `Update ${status}: ${reason}`; + const status = res.result?.status ?? (res.ok === true ? "ok" : "error"); + if (status === "ok" && res.ok === true) { + state.pendingUpdateExpectedVersion = res.result?.after?.version ?? null; + return; } + state.pendingUpdateExpectedVersion = null; + state.updateStatusBanner = resolveUpdateStatusBanner({ + status, + reason: res.result?.reason, + }); } catch (err) { state.lastError = String(err); + state.pendingUpdateExpectedVersion = null; } finally { state.updateRunning = false; }