diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d75edbc6e2..4ed0fd5b0e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai - Channels: keep Matrix and Mattermost bundled in the core package instead of advertising external npm installs before those channels are cut over. Thanks @vincentkoc. - Bonjour: disable LAN mDNS advertising after a repeated stuck-announcing recovery instead of repeatedly restarting ciao and saturating the Gateway event loop. - Channels/setup: label installable channel picker hints as remote npm installs and hide remote install hints for bundled plugins that already ship with OpenClaw. +- CLI/update: refuse package updates launched from the active gateway process tree before stopping the managed Gateway service, avoiding self-terminated in-lane updates that leave old Gateway code running. Fixes #75691. (#75819) Thanks @ai-hpc. - CLI/plugins: stop treating the non-plugin `auth` command root as a bundled plugin id, so restrictive `plugins.allow` configs no longer tell users to add stale `auth` plugin entries. - Doctor/plugins: update configured plugin installs whose stale manifests still declare channels without `channelConfigs`, so beta upgrades repair old Discord-style package payloads during `doctor --fix`. - Doctor/plugins: repair configured external plugin installs whose persisted install record points at a missing package directory, so upgrades reconcile phantom npm metadata before plugin runtime validation. Thanks @vincentkoc. diff --git a/src/cli/update-cli.test.ts b/src/cli/update-cli.test.ts index 2933751dc6c..dea383f319e 100644 --- a/src/cli/update-cli.test.ts +++ b/src/cli/update-cli.test.ts @@ -29,6 +29,7 @@ const runRestartScript = vi.fn(); const mockedRunDaemonInstall = vi.fn(); const serviceReadCommand = vi.fn(); const serviceReadRuntime = vi.fn(); +const mockGetSelfAndAncestorPidsSync = vi.fn(() => new Set([process.pid])); const inspectPortUsage = vi.fn(); const classifyPortListener = vi.fn(); const formatPortDiagnostics = vi.fn(); @@ -128,6 +129,10 @@ vi.mock("../infra/runtime-guard.js", () => ({ }, })); +vi.mock("../infra/restart-stale-pids.js", () => ({ + getSelfAndAncestorPidsSync: () => mockGetSelfAndAncestorPidsSync(), +})); + vi.mock("node:child_process", async () => { const actual = await vi.importActual("node:child_process"); return { @@ -498,6 +503,7 @@ describe("update-cli", () => { pid: 4242, state: "running", }); + mockGetSelfAndAncestorPidsSync.mockReturnValue(new Set([process.pid])); prepareRestartScript.mockResolvedValue("/tmp/openclaw-restart-test.sh"); runRestartScript.mockResolvedValue(undefined); inspectPortUsage.mockResolvedValue({ @@ -1425,6 +1431,26 @@ describe("update-cli", () => { ); }); + it("refuses package updates from inside the active gateway process tree", async () => { + mockPackageInstallStatus(createCaseDir("openclaw-update")); + serviceLoaded.mockResolvedValue(true); + mockGetSelfAndAncestorPidsSync.mockReturnValue(new Set([process.pid, 4242])); + + await updateCommand({ yes: true }); + + const errors = vi.mocked(defaultRuntime.error).mock.calls.map((call) => String(call[0])); + expect(errors.join("\n")).toContain( + "openclaw update detected it is running inside the gateway process tree.", + ); + expect(errors.join("\n")).toContain("Gateway PID 4242 is an ancestor"); + expect(defaultRuntime.exit).toHaveBeenCalledWith(1); + expect(serviceStop).not.toHaveBeenCalled(); + expect(runCommandWithTimeout).not.toHaveBeenCalledWith( + ["npm", "i", "-g", "openclaw@latest", "--no-fund", "--no-audit", "--loglevel=error"], + expect.any(Object), + ); + }); + it("blocks package updates when the target requires a newer Node runtime", async () => { mockPackageInstallStatus(createCaseDir("openclaw-update")); vi.mocked(fetchNpmPackageTargetStatus).mockResolvedValue({ diff --git a/src/cli/update-cli/update-command.ts b/src/cli/update-cli/update-command.ts index 9cbdb2e16f3..66324707c75 100644 --- a/src/cli/update-cli/update-command.ts +++ b/src/cli/update-cli/update-command.ts @@ -24,6 +24,7 @@ import { resolveGatewayRestartLogPath } from "../../daemon/restart-logs.js"; import { readGatewayServiceState, resolveGatewayService } from "../../daemon/service.js"; import { createLowDiskSpaceWarning } from "../../infra/disk-space.js"; import { runGlobalPackageUpdateSteps } from "../../infra/package-update-steps.js"; +import { getSelfAndAncestorPidsSync } from "../../infra/restart-stale-pids.js"; import { nodeVersionSatisfiesEngine } from "../../infra/runtime-guard.js"; import { channelToNpmTag, @@ -236,9 +237,30 @@ type PrePackageServiceStop = { inspected: boolean; runtimeInspected: boolean; running: boolean; + blockMessage?: string; serviceEnv?: NodeJS.ProcessEnv; }; +function formatGatewayAncestryBlockMessage(pid: number): string { + return `openclaw update detected it is running inside the gateway process tree. +Gateway PID ${pid} is an ancestor of this process, so this updater cannot safely stop or restart the gateway that owns it. +Run \`${replaceCliName(formatCliCommand("openclaw update"), CLI_NAME)}\` from a shell outside the gateway service, or stop the gateway service first and then update.`; +} + +function isGatewayAncestorPid(pid: unknown): pid is number { + return typeof pid === "number" && pid > 0 && getSelfAndAncestorPidsSync().has(pid); +} + +function gatewayAncestryBlockMessage(pid: unknown): string | undefined { + return isGatewayAncestorPid(pid) ? formatGatewayAncestryBlockMessage(pid) : undefined; +} + +function gatewayRuntimeAncestryBlockMessage( + runtime: { pid?: unknown } | null | undefined, +): string | undefined { + return gatewayAncestryBlockMessage(runtime?.pid); +} + async function maybeStopManagedServiceBeforePackageUpdate(params: { shouldRestart: boolean; jsonMode: boolean; @@ -301,6 +323,18 @@ async function maybeStopManagedServiceBeforePackageUpdate(params: { }; } + const blockMessage = gatewayRuntimeAncestryBlockMessage(serviceState.runtime); + if (blockMessage) { + return { + stopped: false, + inspected: true, + runtimeInspected: true, + running: true, + blockMessage, + serviceEnv: serviceState.env, + }; + } + if (!params.jsonMode) { defaultRuntime.log(theme.muted("Stopping managed gateway service before package update...")); } @@ -1817,6 +1851,13 @@ export async function updateCommand(opts: UpdateCommandOptions): Promise { return; } + if (prePackageServiceStop?.blockMessage) { + stop(); + defaultRuntime.error(prePackageServiceStop.blockMessage); + defaultRuntime.exit(1); + return; + } + if (shouldBlockPackageUpdateFromGatewayServiceEnv({ prePackageServiceStop })) { stop(); defaultRuntime.error( diff --git a/src/infra/restart-stale-pids.ts b/src/infra/restart-stale-pids.ts index 2d4c8b7e882..16b84bb740f 100644 --- a/src/infra/restart-stale-pids.ts +++ b/src/infra/restart-stale-pids.ts @@ -139,7 +139,7 @@ function readParentPidFromProc(pid: number): number | null { * `node:fs` to inject `/proc//status` payloads) — there is no * reachable override for runtime callers to mutate. */ -function getSelfAndAncestorPidsSync(): Set { +export function getSelfAndAncestorPidsSync(): Set { const pids = new Set([process.pid]); const immediateParent = getParentPid(); if (!Number.isFinite(immediateParent) || immediateParent <= 0) {