From be8a3617d92e2c557cbe42c64c896a13a32b1f51 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 26 Apr 2026 06:35:25 +0100 Subject: [PATCH] fix: verify updated gateway version after package restart --- CHANGELOG.md | 2 +- docs/cli/update.md | 2 +- src/cli/daemon-cli/restart-health.test.ts | 32 +++++++ src/cli/daemon-cli/restart-health.ts | 4 + src/cli/update-cli.test.ts | 46 +++++++++ src/cli/update-cli/update-command.ts | 108 +++++++++++++--------- 6 files changed, 146 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab3bcb00e8f..ca514501e9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,7 +101,7 @@ Docs: https://docs.openclaw.ai start browser-capable CLI node services through the restored `openclaw node start` command, and show an actionable browser-control error when the local control service is missing. Fixes #66637. -- Gateway/update: fail package updates when the restarted managed gateway reports the wrong version, avoiding false-success mixed-version restarts after macOS LaunchAgent updates. Fixes #71835. Thanks @abhinas90 and @jsompis. +- Gateway/update: fail package updates when the restarted managed gateway reports the wrong version, including fallback restarts and JSON mode, avoiding false-success mixed-version restarts after macOS LaunchAgent updates. Fixes #71835. Thanks @abhinas90 and @jsompis. - Plugins/runtime deps: surface activated plugin load failures in health and fail package-update restart verification or doctor repair when bundled runtime deps still cannot load, avoiding false-success repairs. (#71883) Thanks @Solvely-Colin. - Gateway/Linux: include fnm `aliases/default/bin` in generated service PATHs and let doctor accept either modern fnm aliases or the legacy `current/bin` symlink, avoiding false PATH repair prompts. Fixes #68169. Thanks @richard-scott. - Installer/Linux: run apt installs with noninteractive dpkg and needrestart settings so fresh Ubuntu 24.04 `curl | bash` installs do not hang while installing Node.js, Git, or build tools. Fixes #41146. Thanks @iht76, @alexcarv318, @cs3gallery, @firofame, and @cgdusek. diff --git a/docs/cli/update.md b/docs/cli/update.md index 430f832cdf4..1c998ea1d19 100644 --- a/docs/cli/update.md +++ b/docs/cli/update.md @@ -32,7 +32,7 @@ openclaw --update ## Options -- `--no-restart`: skip restarting the Gateway service after a successful update. +- `--no-restart`: skip restarting the Gateway service after a successful update. Package-manager updates that do restart the Gateway verify the restarted service reports the expected updated version before the command succeeds. - `--channel `: set the update channel (git + npm; persisted in config). - `--tag `: override the package target for this update only. For package installs, `main` maps to `github:openclaw/openclaw#main`. - `--dry-run`: preview planned update actions (channel/tag/target/restart flow) without writing config, installing, syncing plugins, or restarting. diff --git a/src/cli/daemon-cli/restart-health.test.ts b/src/cli/daemon-cli/restart-health.test.ts index ea17602d19b..8077a2877f5 100644 --- a/src/cli/daemon-cli/restart-health.test.ts +++ b/src/cli/daemon-cli/restart-health.test.ts @@ -305,6 +305,38 @@ describe("inspectGatewayRestart", () => { expect(snapshot.versionMismatch).toBeUndefined(); }); + it("stops waiting once the restarted gateway reports the wrong version", async () => { + probeGateway.mockResolvedValue({ + ok: true, + close: null, + server: { version: "2026.4.23", connId: "old" }, + }); + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }], + hints: [], + }); + + const { waitForGatewayHealthyRestart } = await import("./restart-health.js"); + const snapshot = await waitForGatewayHealthyRestart({ + service: makeGatewayService({ status: "running", pid: 8000 }), + port: 18789, + expectedVersion: "2026.4.24", + }); + + expect(snapshot).toMatchObject({ + healthy: false, + waitOutcome: "version-mismatch", + elapsedMs: 0, + versionMismatch: { + expected: "2026.4.24", + actual: "2026.4.23", + }, + }); + expect(sleep).not.toHaveBeenCalled(); + }); + it("marks matching-version restarts unhealthy when activated plugins failed to load", async () => { probeGateway.mockResolvedValue({ ok: true, diff --git a/src/cli/daemon-cli/restart-health.ts b/src/cli/daemon-cli/restart-health.ts index 83a08d3b317..1a01a1ef20a 100644 --- a/src/cli/daemon-cli/restart-health.ts +++ b/src/cli/daemon-cli/restart-health.ts @@ -26,6 +26,7 @@ const WINDOWS_STOPPED_FREE_EARLY_EXIT_GRACE_MS = 90_000; export type GatewayRestartWaitOutcome = | "healthy" | "plugin-errors" + | "version-mismatch" | "stale-pids" | "stopped-free" | "timeout"; @@ -414,6 +415,9 @@ export async function waitForGatewayHealthyRestart(params: { if (snapshot.activatedPluginErrors?.length) { return withWaitContext(snapshot, "plugin-errors", attempt * delayMs); } + if (snapshot.versionMismatch) { + return withWaitContext(snapshot, "version-mismatch", attempt * delayMs); + } if (snapshot.staleGatewayPids.length > 0 && snapshot.runtime.status !== "running") { return withWaitContext(snapshot, "stale-pids", attempt * delayMs); } diff --git a/src/cli/update-cli.test.ts b/src/cli/update-cli.test.ts index 7c0b6b9575f..44ae6434101 100644 --- a/src/cli/update-cli.test.ts +++ b/src/cli/update-cli.test.ts @@ -1645,6 +1645,52 @@ describe("update-cli", () => { expect(defaultRuntime.exit).toHaveBeenCalledWith(1); }); + it("fails a JSON package update when fallback restart leaves the old gateway running", async () => { + setupUpdatedRootRefresh({ + gatewayUpdateImpl: async () => + makeOkUpdateResult({ + mode: "npm", + root: createCaseDir("openclaw-updated-root"), + before: { version: "2026.4.23" }, + after: { version: "2026.4.24" }, + }), + }); + prepareRestartScript.mockResolvedValue(null); + probeGateway.mockResolvedValue({ + ok: true, + close: null, + server: { + version: "2026.4.23", + connId: "old-gateway", + }, + auth: { role: "operator", scopes: ["operator.read"], capability: "read_only" }, + health: null, + status: null, + presence: null, + configSnapshot: null, + connectLatencyMs: 1, + error: null, + url: "ws://127.0.0.1:18789", + }); + + await updateCommand({ yes: true, json: true }); + + expect(runRestartScript).not.toHaveBeenCalled(); + expect(runDaemonRestart).toHaveBeenCalled(); + expect(probeGateway).toHaveBeenCalledWith(expect.objectContaining({ includeDetails: true })); + expect(defaultRuntime.exit).toHaveBeenCalledWith(1); + expect(defaultRuntime.writeJson).not.toHaveBeenCalled(); + expect( + vi + .mocked(defaultRuntime.error) + .mock.calls.map((call) => String(call[0])) + .join("\n"), + ).toContain( + "Gateway version mismatch: expected 2026.4.24, running gateway reported 2026.4.23.", + ); + expect(doctorCommand).not.toHaveBeenCalled(); + }); + it("fails a package update when the restarted gateway reports activated plugin load errors", async () => { setupUpdatedRootRefresh({ gatewayUpdateImpl: async () => diff --git a/src/cli/update-cli/update-command.ts b/src/cli/update-cli/update-command.ts index 238419a391e..5f2e7b51a60 100644 --- a/src/cli/update-cli/update-command.ts +++ b/src/cli/update-cli/update-command.ts @@ -750,6 +750,52 @@ async function maybeRestartService(params: { restartScriptPath?: string | null; invocationCwd?: string; }): Promise { + const verifyRestartedGateway = async (expectedGatewayVersion: string | undefined) => { + const service = resolveGatewayService(); + let health = await waitForGatewayHealthyRestart({ + service, + port: params.gatewayPort, + expectedVersion: expectedGatewayVersion, + }); + if (!health.healthy && health.staleGatewayPids.length > 0) { + if (!params.opts.json) { + defaultRuntime.log( + theme.warn( + `Found stale gateway process(es) after restart: ${health.staleGatewayPids.join(", ")}. Cleaning up...`, + ), + ); + } + await terminateStaleGatewayPids(health.staleGatewayPids); + await runDaemonRestart(); + health = await waitForGatewayHealthyRestart({ + service, + port: params.gatewayPort, + expectedVersion: expectedGatewayVersion, + }); + } + + if (health.healthy) { + return true; + } + + const diagnosticLines = [ + "Gateway did not become healthy after restart.", + ...renderRestartDiagnostics(health), + `Restart log: ${resolveGatewayRestartLogPath(process.env)}`, + `Run \`${replaceCliName(formatCliCommand("openclaw gateway status --deep"), CLI_NAME)}\` for details.`, + ]; + if (params.opts.json) { + defaultRuntime.error(diagnosticLines.join("\n")); + } else { + defaultRuntime.log(theme.warn(diagnosticLines[0] ?? "Gateway did not become healthy.")); + for (const line of diagnosticLines.slice(1)) { + defaultRuntime.log(theme.muted(line)); + } + } + + return !(health.versionMismatch || health.activatedPluginErrors?.length); + }; + if (params.shouldRestart) { if (!params.opts.json) { defaultRuntime.log(""); @@ -791,6 +837,22 @@ async function maybeRestartService(params: { restarted = await runDaemonRestart(); } + const shouldVerifyRestart = + restartInitiated || (restarted && expectedGatewayVersion !== undefined); + if (shouldVerifyRestart) { + const restartHealthy = await verifyRestartedGateway(expectedGatewayVersion); + if (!restartHealthy) { + if (!params.opts.json) { + defaultRuntime.log(""); + } + return false; + } + if (!params.opts.json && restartInitiated) { + defaultRuntime.log(theme.success("Daemon restart completed.")); + defaultRuntime.log(""); + } + } + if (!params.opts.json && restarted) { defaultRuntime.log(theme.success("Daemon restarted successfully.")); defaultRuntime.log(""); @@ -807,52 +869,6 @@ async function maybeRestartService(params: { delete process.env.OPENCLAW_UPDATE_IN_PROGRESS; } } - - if (!params.opts.json && restartInitiated) { - const service = resolveGatewayService(); - let health = await waitForGatewayHealthyRestart({ - service, - port: params.gatewayPort, - expectedVersion: expectedGatewayVersion, - }); - if (!health.healthy && health.staleGatewayPids.length > 0) { - if (!params.opts.json) { - defaultRuntime.log( - theme.warn( - `Found stale gateway process(es) after restart: ${health.staleGatewayPids.join(", ")}. Cleaning up...`, - ), - ); - } - await terminateStaleGatewayPids(health.staleGatewayPids); - await runDaemonRestart(); - health = await waitForGatewayHealthyRestart({ - service, - port: params.gatewayPort, - expectedVersion: expectedGatewayVersion, - }); - } - - if (health.healthy) { - defaultRuntime.log(theme.success("Daemon restart completed.")); - } else { - defaultRuntime.log(theme.warn("Gateway did not become healthy after restart.")); - for (const line of renderRestartDiagnostics(health)) { - defaultRuntime.log(theme.muted(line)); - } - defaultRuntime.log( - theme.muted(`Restart log: ${resolveGatewayRestartLogPath(process.env)}`), - ); - defaultRuntime.log( - theme.muted( - `Run \`${replaceCliName(formatCliCommand("openclaw gateway status --deep"), CLI_NAME)}\` for details.`, - ), - ); - } - defaultRuntime.log(""); - if (!health.healthy && (health.versionMismatch || health.activatedPluginErrors?.length)) { - return false; - } - } } catch (err) { if (!params.opts.json) { defaultRuntime.log(theme.warn(`Daemon restart failed: ${String(err)}`));