From 427e485f76825983bfaf5462962dc46e8bc14e9a Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 26 Apr 2026 05:34:36 +0100 Subject: [PATCH] fix(update): verify restarted gateway version --- CHANGELOG.md | 1 + src/cli/daemon-cli/restart-health.test.ts | 57 +++++++++++ src/cli/daemon-cli/restart-health.ts | 109 +++++++++++++++++---- src/cli/update-cli.test.ts | 54 ++++++++++ src/cli/update-cli/update-command.ts | 23 ++++- src/commands/gateway-status.test.ts | 8 ++ src/commands/gateway-status/output.test.ts | 4 + src/gateway/probe.test.ts | 9 ++ src/gateway/probe.ts | 19 ++++ 9 files changed, 262 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85476578466..8d4d24d0f4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,7 @@ Docs: https://docs.openclaw.ai start browser-capable CLI node services through the restored `openclaw node start` command, and show an actionable browser-control error when the local control service is missing. Fixes #66637. +- Gateway/update: fail package updates when the restarted managed gateway reports the wrong version, avoiding false-success mixed-version restarts after macOS LaunchAgent updates. Fixes #71835. Thanks @abhinas90 and @jsompis. - Providers/Z.AI: map OpenClaw thinking controls to Z.AI's `thinking` payload and add opt-in preserved thinking replay via `params.preserveThinking`, so GLM 5.x can keep prior `reasoning_content` when requested. Fixes #58680. Thanks @xuanmingguo. - Channels/status: keep read-only channel lists on manifest and package metadata by default, loading setup runtime only for explicit fallback callers. Thanks @shakkernerd. - Plugins/onboarding: defer onboarding install-record index writes until the guarded config commit so setup failures cannot leave the plugin index ahead of `openclaw.json`. Thanks @shakkernerd. diff --git a/src/cli/daemon-cli/restart-health.test.ts b/src/cli/daemon-cli/restart-health.test.ts index 9f0dbf17f82..0dbaa97ad8f 100644 --- a/src/cli/daemon-cli/restart-health.test.ts +++ b/src/cli/daemon-cli/restart-health.test.ts @@ -40,6 +40,7 @@ function makeGatewayService( async function inspectGatewayRestartWithSnapshot(params: { runtime: { status: "running"; pid: number } | { status: "stopped" }; portUsage: PortUsage; + expectedVersion?: string; includeUnknownListenersAsStale?: boolean; }) { const service = makeGatewayService(params.runtime); @@ -48,6 +49,7 @@ async function inspectGatewayRestartWithSnapshot(params: { return inspectGatewayRestart({ service, port: 18789, + ...(params.expectedVersion === undefined ? {} : { expectedVersion: params.expectedVersion }), ...(params.includeUnknownListenersAsStale === undefined ? {} : { includeUnknownListenersAsStale: params.includeUnknownListenersAsStale }), @@ -248,6 +250,61 @@ describe("inspectGatewayRestart", () => { expect(snapshot.healthy).toBe(true); }); + it("requires the expected gateway version when provided", async () => { + probeGateway.mockResolvedValue({ + ok: true, + close: null, + server: { version: "2026.4.23", connId: "old" }, + }); + + const snapshot = await inspectGatewayRestartWithSnapshot({ + runtime: { status: "running", pid: 8000 }, + expectedVersion: "2026.4.24", + portUsage: { + port: 18789, + status: "busy", + listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }], + hints: [], + }, + }); + + expect(snapshot).toMatchObject({ + healthy: false, + gatewayVersion: "2026.4.23", + expectedVersion: "2026.4.24", + versionMismatch: { + expected: "2026.4.24", + actual: "2026.4.23", + }, + }); + }); + + it("accepts the restarted gateway when the expected version matches", async () => { + probeGateway.mockResolvedValue({ + ok: true, + close: null, + server: { version: "2026.4.24", connId: "new" }, + }); + + const snapshot = await inspectGatewayRestartWithSnapshot({ + runtime: { status: "running", pid: 8000 }, + expectedVersion: "2026.4.24", + portUsage: { + port: 18789, + status: "busy", + listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }], + hints: [], + }, + }); + + expect(snapshot).toMatchObject({ + healthy: true, + gatewayVersion: "2026.4.24", + expectedVersion: "2026.4.24", + }); + expect(snapshot.versionMismatch).toBeUndefined(); + }); + it("treats busy ports with unavailable listener details as healthy when runtime is running", async () => { const service = { readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })), diff --git a/src/cli/daemon-cli/restart-health.ts b/src/cli/daemon-cli/restart-health.ts index 4c9a7ddbf1e..f80604427c7 100644 --- a/src/cli/daemon-cli/restart-health.ts +++ b/src/cli/daemon-cli/restart-health.ts @@ -29,6 +29,12 @@ export type GatewayRestartSnapshot = { portUsage: PortUsage; healthy: boolean; staleGatewayPids: number[]; + gatewayVersion?: string | null; + expectedVersion?: string; + versionMismatch?: { + expected: string; + actual: string | null; + }; waitOutcome?: GatewayRestartWaitOutcome; elapsedMs?: number; }; @@ -38,6 +44,11 @@ export type GatewayPortHealthSnapshot = { healthy: boolean; }; +type GatewayReachability = { + reachable: boolean; + gatewayVersion: string | null; +}; + function hasListenerAttributionGap(portUsage: PortUsage): boolean { if (portUsage.status !== "busy" || portUsage.listeners.length > 0) { return false; @@ -69,7 +80,28 @@ function looksLikeAuthClose(code: number | undefined, reason: string | undefined ); } -async function confirmGatewayReachable(port: number): Promise { +function applyExpectedVersion( + snapshot: GatewayRestartSnapshot, + expectedVersion: string | undefined, +): GatewayRestartSnapshot { + if (!expectedVersion) { + return snapshot; + } + if (snapshot.gatewayVersion === expectedVersion) { + return { ...snapshot, expectedVersion }; + } + return { + ...snapshot, + healthy: false, + expectedVersion, + versionMismatch: { + expected: expectedVersion, + actual: snapshot.gatewayVersion ?? null, + }, + }; +} + +async function confirmGatewayReachable(port: number): Promise { const token = normalizeOptionalString(process.env.OPENCLAW_GATEWAY_TOKEN); const password = normalizeOptionalString(process.env.OPENCLAW_GATEWAY_PASSWORD); const probe = await probeGateway({ @@ -78,7 +110,10 @@ async function confirmGatewayReachable(port: number): Promise { timeoutMs: 3_000, includeDetails: false, }); - return probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason); + return { + reachable: probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason), + gatewayVersion: probe.server?.version ?? null, + }; } async function inspectGatewayPortHealth(port: number): Promise { @@ -98,7 +133,7 @@ async function inspectGatewayPortHealth(port: number): Promise { const env = params.env ?? process.env; + const expectedVersion = normalizeOptionalString(params.expectedVersion); + let reachability: GatewayReachability | null = null; + const loadReachability = async () => { + reachability ??= await confirmGatewayReachable(params.port); + return reachability; + }; let runtime: GatewayServiceRuntime = { status: "unknown" }; try { runtime = await params.service.readRuntime(env); @@ -136,14 +178,18 @@ export async function inspectGatewayRestart(params: { if (portUsage.status === "busy" && runtime.status !== "running") { try { - const reachable = await confirmGatewayReachable(params.port); - if (reachable) { - return { - runtime, - portUsage, - healthy: true, - staleGatewayPids: [], - }; + const reachable = await loadReachability(); + if (reachable.reachable) { + return applyExpectedVersion( + { + runtime, + portUsage, + healthy: true, + staleGatewayPids: [], + gatewayVersion: reachable.gatewayVersion, + }, + expectedVersion, + ); } } catch { // Probe is best-effort; keep the ownership-based diagnostics. @@ -176,9 +222,21 @@ export async function inspectGatewayRestart(params: { ) || listenerAttributionGap : gatewayListeners.length > 0 || listenerAttributionGap; let healthy = running && ownsPort; - if (!healthy && running && portUsage.status === "busy") { + let gatewayVersion: string | null | undefined; + if (expectedVersion && healthy && portUsage.status === "busy") { try { - healthy = await confirmGatewayReachable(params.port); + const reachable = await loadReachability(); + healthy = reachable.reachable; + gatewayVersion = reachable.gatewayVersion; + } catch { + healthy = false; + } + } + if (!healthy && running && portUsage.status === "busy" && !expectedVersion) { + try { + const reachable = await loadReachability(); + healthy = reachable.reachable; + gatewayVersion = reachable.gatewayVersion; } catch { // best-effort probe } @@ -203,12 +261,16 @@ export async function inspectGatewayRestart(params: { ]), ); - return { - runtime, - portUsage, - healthy, - staleGatewayPids, - }; + return applyExpectedVersion( + { + runtime, + portUsage, + healthy, + staleGatewayPids, + ...(gatewayVersion !== undefined ? { gatewayVersion } : {}), + }, + expectedVersion, + ); } function shouldEarlyExitStoppedFree( @@ -243,6 +305,7 @@ export async function waitForGatewayHealthyRestart(params: { attempts?: number; delayMs?: number; env?: NodeJS.ProcessEnv; + expectedVersion?: string | null; includeUnknownListenersAsStale?: boolean; }): Promise { const attempts = params.attempts ?? DEFAULT_RESTART_HEALTH_ATTEMPTS; @@ -252,6 +315,7 @@ export async function waitForGatewayHealthyRestart(params: { service: params.service, port: params.port, env: params.env, + expectedVersion: params.expectedVersion, includeUnknownListenersAsStale: params.includeUnknownListenersAsStale, }); @@ -282,6 +346,7 @@ export async function waitForGatewayHealthyRestart(params: { service: params.service, port: params.port, env: params.env, + expectedVersion: params.expectedVersion, includeUnknownListenersAsStale: params.includeUnknownListenersAsStale, }); } @@ -328,6 +393,12 @@ function renderPortUsageDiagnostics(snapshot: GatewayPortHealthSnapshot): string export function renderRestartDiagnostics(snapshot: GatewayRestartSnapshot): string[] { const lines: string[] = []; + if (snapshot.versionMismatch) { + const actual = snapshot.versionMismatch.actual ?? "unavailable"; + lines.push( + `Gateway version mismatch: expected ${snapshot.versionMismatch.expected}, running gateway reported ${actual}.`, + ); + } const runtimeSummary = [ snapshot.runtime.status ? `status=${snapshot.runtime.status}` : null, snapshot.runtime.state ? `state=${snapshot.runtime.state}` : null, diff --git a/src/cli/update-cli.test.ts b/src/cli/update-cli.test.ts index eb9361b7428..10effcbfe24 100644 --- a/src/cli/update-cli.test.ts +++ b/src/cli/update-cli.test.ts @@ -28,6 +28,7 @@ const serviceReadRuntime = vi.fn(); const inspectPortUsage = vi.fn(); const classifyPortListener = vi.fn(); const formatPortDiagnostics = vi.fn(); +const probeGateway = vi.fn(); const pathExists = vi.fn(); const syncPluginsForUpdateChannel = vi.fn(); const updateNpmInstalledPlugins = vi.fn(); @@ -174,6 +175,10 @@ vi.mock("../infra/ports.js", () => ({ formatPortDiagnostics: (...args: unknown[]) => formatPortDiagnostics(...args), })); +vi.mock("../gateway/probe.js", () => ({ + probeGateway: (...args: unknown[]) => probeGateway(...args), +})); + vi.mock("./update-cli/restart-helper.js", () => ({ prepareRestartScript: (...args: unknown[]) => prepareRestartScript(...args), runRestartScript: (...args: unknown[]) => runRestartScript(...args), @@ -446,6 +451,22 @@ describe("update-cli", () => { }); classifyPortListener.mockReturnValue("gateway"); formatPortDiagnostics.mockReturnValue(["Port 18789 is already in use."]); + probeGateway.mockResolvedValue({ + ok: true, + close: null, + server: { + version: "1.0.0", + connId: "conn-test", + }, + auth: { role: "operator", scopes: ["operator.read"], capability: "read_only" }, + health: null, + status: null, + presence: null, + configSnapshot: null, + connectLatencyMs: 1, + error: null, + url: "ws://127.0.0.1:18789", + }); pathExists.mockResolvedValue(false); syncPluginsForUpdateChannel.mockResolvedValue({ changed: false, @@ -521,6 +542,22 @@ describe("update-cli", () => { tag: "latest", version: "2026.4.10", }); + probeGateway.mockResolvedValue({ + ok: true, + close: null, + server: { + version: "2026.4.10", + connId: "downgraded-gateway", + }, + auth: { role: "operator", scopes: ["operator.read"], capability: "read_only" }, + health: null, + status: null, + presence: null, + configSnapshot: null, + connectLatencyMs: 1, + error: null, + url: "ws://127.0.0.1:18789", + }); await updateCommand({ yes: true, tag: "2026.4.10" }); @@ -528,6 +565,7 @@ describe("update-cli", () => { expect(syncPluginsForUpdateChannel).toHaveBeenCalled(); expect(updateNpmInstalledPlugins).toHaveBeenCalled(); expect(runDaemonInstall).toHaveBeenCalled(); + expect(probeGateway).toHaveBeenCalled(); expect(defaultRuntime.exit).not.toHaveBeenCalledWith(1); }); @@ -1591,6 +1629,22 @@ describe("update-cli", () => { }, ] as const)("updateCommand service refresh behavior: $name", runUpdateCliScenario); + it("fails a package update when service env refresh cannot complete", async () => { + const tempDir = createCaseDir("openclaw-update"); + mockPackageInstallStatus(tempDir); + serviceLoaded.mockResolvedValue(true); + vi.mocked(runDaemonInstall).mockRejectedValueOnce(new Error("refresh failed")); + + await updateCommand({ yes: true }); + + expect(runDaemonInstall).toHaveBeenCalledWith({ + force: true, + json: undefined, + }); + expect(runRestartScript).not.toHaveBeenCalled(); + expect(defaultRuntime.exit).toHaveBeenCalledWith(1); + }); + it.each([ { name: "updateCommand refreshes service env from updated install root when available", diff --git a/src/cli/update-cli/update-command.ts b/src/cli/update-cli/update-command.ts index a4487b7a03a..a6edbbc96b2 100644 --- a/src/cli/update-cli/update-command.ts +++ b/src/cli/update-cli/update-command.ts @@ -50,6 +50,7 @@ import { import { syncPluginsForUpdateChannel, updateNpmInstalledPlugins } from "../../plugins/update.js"; import { runCommandWithTimeout } from "../../process/exec.js"; import { defaultRuntime } from "../../runtime.js"; +import { normalizeOptionalString } from "../../shared/string-coerce.js"; import { stylePromptMessage } from "../../terminal/prompt-style.js"; import { theme } from "../../terminal/theme.js"; import { pathExists } from "../../utils.js"; @@ -748,7 +749,7 @@ async function maybeRestartService(params: { gatewayPort: number; restartScriptPath?: string | null; invocationCwd?: string; -}): Promise { +}): Promise { if (params.shouldRestart) { if (!params.opts.json) { defaultRuntime.log(""); @@ -756,6 +757,9 @@ async function maybeRestartService(params: { } try { + const expectedGatewayVersion = isPackageManagerUpdateMode(params.result.mode) + ? normalizeOptionalString(params.result.after?.version) + : undefined; let restarted = false; let restartInitiated = false; if (params.refreshServiceEnv) { @@ -775,6 +779,9 @@ async function maybeRestartService(params: { } else { defaultRuntime.log(theme.warn(message)); } + if (isPackageManagerUpdateMode(params.result.mode)) { + return false; + } } } if (params.restartScriptPath) { @@ -806,6 +813,7 @@ async function maybeRestartService(params: { let health = await waitForGatewayHealthyRestart({ service, port: params.gatewayPort, + expectedVersion: expectedGatewayVersion, }); if (!health.healthy && health.staleGatewayPids.length > 0) { if (!params.opts.json) { @@ -820,6 +828,7 @@ async function maybeRestartService(params: { health = await waitForGatewayHealthyRestart({ service, port: params.gatewayPort, + expectedVersion: expectedGatewayVersion, }); } @@ -840,6 +849,9 @@ async function maybeRestartService(params: { ); } defaultRuntime.log(""); + if (!health.healthy && health.versionMismatch) { + return false; + } } } catch (err) { if (!params.opts.json) { @@ -851,7 +863,7 @@ async function maybeRestartService(params: { ); } } - return; + return true; } if (!params.opts.json) { @@ -870,6 +882,7 @@ async function maybeRestartService(params: { ); } } + return true; } async function runPostCorePluginUpdate(params: { @@ -1423,7 +1436,7 @@ export async function updateCommand(opts: UpdateCommandOptions): Promise { skipPrompt: Boolean(opts.yes), }); - await maybeRestartService({ + const restartOk = await maybeRestartService({ shouldRestart, result, opts, @@ -1432,6 +1445,10 @@ export async function updateCommand(opts: UpdateCommandOptions): Promise { restartScriptPath, invocationCwd, }); + if (!restartOk) { + defaultRuntime.exit(1); + return; + } } if (!opts.json) { diff --git a/src/commands/gateway-status.test.ts b/src/commands/gateway-status.test.ts index 94bbfa61f92..ebe9f9b38a6 100644 --- a/src/commands/gateway-status.test.ts +++ b/src/commands/gateway-status.test.ts @@ -60,6 +60,10 @@ const mocks = vi.hoisted(() => { scopes: ["operator.read"], capability: "read_only", }, + server: { + version: "2026.4.24", + connId: "local", + }, health: { ok: true }, status: { linkChannel: { @@ -103,6 +107,10 @@ const mocks = vi.hoisted(() => { scopes: ["operator.admin"], capability: "admin_capable", }, + server: { + version: "2026.4.24", + connId: "remote", + }, health: { ok: true }, status: { linkChannel: { diff --git a/src/commands/gateway-status/output.test.ts b/src/commands/gateway-status/output.test.ts index 181b99ab6a3..27f2e8a8875 100644 --- a/src/commands/gateway-status/output.test.ts +++ b/src/commands/gateway-status/output.test.ts @@ -49,6 +49,10 @@ function createProbe( scopes: capability === "admin_capable" ? ["operator.admin"] : ["operator.read"], capability, }, + server: { + version: "2026.4.24", + connId: "conn-test", + }, health: null, status: null, presence: null, diff --git a/src/gateway/probe.test.ts b/src/gateway/probe.test.ts index 873e0d45989..9ba82e5cdae 100644 --- a/src/gateway/probe.test.ts +++ b/src/gateway/probe.test.ts @@ -9,6 +9,10 @@ const gatewayClientState = vi.hoisted(() => ({ role: "operator", scopes: ["operator.read"], } as { role?: string; scopes?: string[] } | undefined, + helloServer: { + version: "2026.4.24", + connId: "conn-test", + }, connectError: "scope upgrade pending approval (requestId: req-123)", connectErrorDetails: { code: "PAIRING_REQUIRED", @@ -76,6 +80,7 @@ class MockGatewayClient { if (typeof onHelloOk === "function") { await onHelloOk({ type: "hello-ok", + server: gatewayClientState.helloServer, auth: gatewayClientState.helloAuth, }); } @@ -169,6 +174,10 @@ describe("probeGateway", () => { scopes: ["operator.read"], capability: "read_only", }); + expect(result.server).toEqual({ + version: "2026.4.24", + connId: "conn-test", + }); }); it("keeps device identity enabled for remote probes", async () => { diff --git a/src/gateway/probe.ts b/src/gateway/probe.ts index 07b62b0437b..574f58f931b 100644 --- a/src/gateway/probe.ts +++ b/src/gateway/probe.ts @@ -33,6 +33,11 @@ export type GatewayProbeAuthSummary = { capability: GatewayProbeCapability; }; +export type GatewayProbeServerSummary = { + version: string | null; + connId: string | null; +}; + export type GatewayProbeResult = { ok: boolean; url: string; @@ -41,6 +46,7 @@ export type GatewayProbeResult = { connectErrorDetails?: unknown; close: GatewayProbeClose | null; auth: GatewayProbeAuthSummary; + server?: GatewayProbeServerSummary; health: unknown; status: unknown; presence: SystemPresence[] | null; @@ -70,6 +76,13 @@ function emptyProbeAuth(): GatewayProbeAuthSummary { }; } +function emptyProbeServer(): GatewayProbeServerSummary { + return { + version: null, + connId: null, + }; +} + function resolveProbeAuthSummary(params: { role?: string | null; scopes?: string[]; @@ -143,6 +156,7 @@ export async function probeGateway(opts: { let connectErrorDetails: unknown = null; let close: GatewayProbeClose | null = null; let auth = emptyProbeAuth(); + let server = emptyProbeServer(); let authMetadataPresent = false; const detailLevel = opts.includeDetails === false ? "none" : (opts.detailLevel ?? "full"); @@ -235,6 +249,7 @@ export async function probeGateway(opts: { verifiedRead: params.verifiedRead, connectLatencyMs, }), + server, health: params.health, status: params.status, presence: params.presence, @@ -273,6 +288,10 @@ export async function probeGateway(opts: { onHelloOk: async (hello) => { connectLatencyMs = Date.now() - startedAt; authMetadataPresent = typeof hello?.auth === "object" && hello.auth !== null; + server = { + version: typeof hello?.server?.version === "string" ? hello.server.version : null, + connId: typeof hello?.server?.connId === "string" ? hello.server.connId : null, + }; auth = resolveProbeAuthSummary({ role: typeof hello?.auth?.role === "string" ? hello.auth.role : null, scopes: Array.isArray(hello?.auth?.scopes)