fix: verify updated gateway version after package restart

This commit is contained in:
Peter Steinberger
2026-04-26 06:35:25 +01:00
parent 142577d9b2
commit be8a3617d9
6 changed files with 146 additions and 48 deletions

View File

@@ -101,7 +101,7 @@ Docs: https://docs.openclaw.ai
start browser-capable CLI node services through the restored
`openclaw node start` command, and show an actionable browser-control error
when the local control service is missing. Fixes #66637.
- Gateway/update: fail package updates when the restarted managed gateway reports the wrong version, avoiding false-success mixed-version restarts after macOS LaunchAgent updates. Fixes #71835. Thanks @abhinas90 and @jsompis.
- Gateway/update: fail package updates when the restarted managed gateway reports the wrong version, including fallback restarts and JSON mode, avoiding false-success mixed-version restarts after macOS LaunchAgent updates. Fixes #71835. Thanks @abhinas90 and @jsompis.
- Plugins/runtime deps: surface activated plugin load failures in health and fail package-update restart verification or doctor repair when bundled runtime deps still cannot load, avoiding false-success repairs. (#71883) Thanks @Solvely-Colin.
- Gateway/Linux: include fnm `aliases/default/bin` in generated service PATHs and let doctor accept either modern fnm aliases or the legacy `current/bin` symlink, avoiding false PATH repair prompts. Fixes #68169. Thanks @richard-scott.
- Installer/Linux: run apt installs with noninteractive dpkg and needrestart settings so fresh Ubuntu 24.04 `curl | bash` installs do not hang while installing Node.js, Git, or build tools. Fixes #41146. Thanks @iht76, @alexcarv318, @cs3gallery, @firofame, and @cgdusek.

View File

@@ -32,7 +32,7 @@ openclaw --update
## Options
- `--no-restart`: skip restarting the Gateway service after a successful update.
- `--no-restart`: skip restarting the Gateway service after a successful update. Package-manager updates that do restart the Gateway verify the restarted service reports the expected updated version before the command succeeds.
- `--channel <stable|beta|dev>`: set the update channel (git + npm; persisted in config).
- `--tag <dist-tag|version|spec>`: override the package target for this update only. For package installs, `main` maps to `github:openclaw/openclaw#main`.
- `--dry-run`: preview planned update actions (channel/tag/target/restart flow) without writing config, installing, syncing plugins, or restarting.

View File

@@ -305,6 +305,38 @@ describe("inspectGatewayRestart", () => {
expect(snapshot.versionMismatch).toBeUndefined();
});
it("stops waiting once the restarted gateway reports the wrong version", async () => {
probeGateway.mockResolvedValue({
ok: true,
close: null,
server: { version: "2026.4.23", connId: "old" },
});
inspectPortUsage.mockResolvedValue({
port: 18789,
status: "busy",
listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }],
hints: [],
});
const { waitForGatewayHealthyRestart } = await import("./restart-health.js");
const snapshot = await waitForGatewayHealthyRestart({
service: makeGatewayService({ status: "running", pid: 8000 }),
port: 18789,
expectedVersion: "2026.4.24",
});
expect(snapshot).toMatchObject({
healthy: false,
waitOutcome: "version-mismatch",
elapsedMs: 0,
versionMismatch: {
expected: "2026.4.24",
actual: "2026.4.23",
},
});
expect(sleep).not.toHaveBeenCalled();
});
it("marks matching-version restarts unhealthy when activated plugins failed to load", async () => {
probeGateway.mockResolvedValue({
ok: true,

View File

@@ -26,6 +26,7 @@ const WINDOWS_STOPPED_FREE_EARLY_EXIT_GRACE_MS = 90_000;
export type GatewayRestartWaitOutcome =
| "healthy"
| "plugin-errors"
| "version-mismatch"
| "stale-pids"
| "stopped-free"
| "timeout";
@@ -414,6 +415,9 @@ export async function waitForGatewayHealthyRestart(params: {
if (snapshot.activatedPluginErrors?.length) {
return withWaitContext(snapshot, "plugin-errors", attempt * delayMs);
}
if (snapshot.versionMismatch) {
return withWaitContext(snapshot, "version-mismatch", attempt * delayMs);
}
if (snapshot.staleGatewayPids.length > 0 && snapshot.runtime.status !== "running") {
return withWaitContext(snapshot, "stale-pids", attempt * delayMs);
}

View File

@@ -1645,6 +1645,52 @@ describe("update-cli", () => {
expect(defaultRuntime.exit).toHaveBeenCalledWith(1);
});
it("fails a JSON package update when fallback restart leaves the old gateway running", async () => {
setupUpdatedRootRefresh({
gatewayUpdateImpl: async () =>
makeOkUpdateResult({
mode: "npm",
root: createCaseDir("openclaw-updated-root"),
before: { version: "2026.4.23" },
after: { version: "2026.4.24" },
}),
});
prepareRestartScript.mockResolvedValue(null);
probeGateway.mockResolvedValue({
ok: true,
close: null,
server: {
version: "2026.4.23",
connId: "old-gateway",
},
auth: { role: "operator", scopes: ["operator.read"], capability: "read_only" },
health: null,
status: null,
presence: null,
configSnapshot: null,
connectLatencyMs: 1,
error: null,
url: "ws://127.0.0.1:18789",
});
await updateCommand({ yes: true, json: true });
expect(runRestartScript).not.toHaveBeenCalled();
expect(runDaemonRestart).toHaveBeenCalled();
expect(probeGateway).toHaveBeenCalledWith(expect.objectContaining({ includeDetails: true }));
expect(defaultRuntime.exit).toHaveBeenCalledWith(1);
expect(defaultRuntime.writeJson).not.toHaveBeenCalled();
expect(
vi
.mocked(defaultRuntime.error)
.mock.calls.map((call) => String(call[0]))
.join("\n"),
).toContain(
"Gateway version mismatch: expected 2026.4.24, running gateway reported 2026.4.23.",
);
expect(doctorCommand).not.toHaveBeenCalled();
});
it("fails a package update when the restarted gateway reports activated plugin load errors", async () => {
setupUpdatedRootRefresh({
gatewayUpdateImpl: async () =>

View File

@@ -750,6 +750,52 @@ async function maybeRestartService(params: {
restartScriptPath?: string | null;
invocationCwd?: string;
}): Promise<boolean> {
const verifyRestartedGateway = async (expectedGatewayVersion: string | undefined) => {
const service = resolveGatewayService();
let health = await waitForGatewayHealthyRestart({
service,
port: params.gatewayPort,
expectedVersion: expectedGatewayVersion,
});
if (!health.healthy && health.staleGatewayPids.length > 0) {
if (!params.opts.json) {
defaultRuntime.log(
theme.warn(
`Found stale gateway process(es) after restart: ${health.staleGatewayPids.join(", ")}. Cleaning up...`,
),
);
}
await terminateStaleGatewayPids(health.staleGatewayPids);
await runDaemonRestart();
health = await waitForGatewayHealthyRestart({
service,
port: params.gatewayPort,
expectedVersion: expectedGatewayVersion,
});
}
if (health.healthy) {
return true;
}
const diagnosticLines = [
"Gateway did not become healthy after restart.",
...renderRestartDiagnostics(health),
`Restart log: ${resolveGatewayRestartLogPath(process.env)}`,
`Run \`${replaceCliName(formatCliCommand("openclaw gateway status --deep"), CLI_NAME)}\` for details.`,
];
if (params.opts.json) {
defaultRuntime.error(diagnosticLines.join("\n"));
} else {
defaultRuntime.log(theme.warn(diagnosticLines[0] ?? "Gateway did not become healthy."));
for (const line of diagnosticLines.slice(1)) {
defaultRuntime.log(theme.muted(line));
}
}
return !(health.versionMismatch || health.activatedPluginErrors?.length);
};
if (params.shouldRestart) {
if (!params.opts.json) {
defaultRuntime.log("");
@@ -791,6 +837,22 @@ async function maybeRestartService(params: {
restarted = await runDaemonRestart();
}
const shouldVerifyRestart =
restartInitiated || (restarted && expectedGatewayVersion !== undefined);
if (shouldVerifyRestart) {
const restartHealthy = await verifyRestartedGateway(expectedGatewayVersion);
if (!restartHealthy) {
if (!params.opts.json) {
defaultRuntime.log("");
}
return false;
}
if (!params.opts.json && restartInitiated) {
defaultRuntime.log(theme.success("Daemon restart completed."));
defaultRuntime.log("");
}
}
if (!params.opts.json && restarted) {
defaultRuntime.log(theme.success("Daemon restarted successfully."));
defaultRuntime.log("");
@@ -807,52 +869,6 @@ async function maybeRestartService(params: {
delete process.env.OPENCLAW_UPDATE_IN_PROGRESS;
}
}
if (!params.opts.json && restartInitiated) {
const service = resolveGatewayService();
let health = await waitForGatewayHealthyRestart({
service,
port: params.gatewayPort,
expectedVersion: expectedGatewayVersion,
});
if (!health.healthy && health.staleGatewayPids.length > 0) {
if (!params.opts.json) {
defaultRuntime.log(
theme.warn(
`Found stale gateway process(es) after restart: ${health.staleGatewayPids.join(", ")}. Cleaning up...`,
),
);
}
await terminateStaleGatewayPids(health.staleGatewayPids);
await runDaemonRestart();
health = await waitForGatewayHealthyRestart({
service,
port: params.gatewayPort,
expectedVersion: expectedGatewayVersion,
});
}
if (health.healthy) {
defaultRuntime.log(theme.success("Daemon restart completed."));
} else {
defaultRuntime.log(theme.warn("Gateway did not become healthy after restart."));
for (const line of renderRestartDiagnostics(health)) {
defaultRuntime.log(theme.muted(line));
}
defaultRuntime.log(
theme.muted(`Restart log: ${resolveGatewayRestartLogPath(process.env)}`),
);
defaultRuntime.log(
theme.muted(
`Run \`${replaceCliName(formatCliCommand("openclaw gateway status --deep"), CLI_NAME)}\` for details.`,
),
);
}
defaultRuntime.log("");
if (!health.healthy && (health.versionMismatch || health.activatedPluginErrors?.length)) {
return false;
}
}
} catch (err) {
if (!params.opts.json) {
defaultRuntime.log(theme.warn(`Daemon restart failed: ${String(err)}`));