From 90d385cb932820b9d5d35119afa41c90ee19bba2 Mon Sep 17 00:00:00 2001 From: masatohoshino Date: Sun, 7 Jun 2026 23:31:05 +0900 Subject: [PATCH] fix(update): resume official plugin convergence after gateway git update The gateway update.run RPC updated git/source installs via runGatewayUpdate but, unlike the openclaw update CLI, never resumed the post-core plugin convergence that runGatewayUpdate's doctor pass defers. As a result a git/source core update would restart on the new core with official managed plugins still pinned to versions built against removed core APIs. Spawn the rebuilt binary's update finalize entrypoint after a successful git update so official plugins reconcile to a host-compatible version, and block the restart if convergence fails (mirroring the CLI). --- src/gateway/server-methods/update.test.ts | 93 ++++++++++ src/gateway/server-methods/update.ts | 18 ++ src/infra/update-post-core-finalize.test.ts | 164 +++++++++++++++++ src/infra/update-post-core-finalize.ts | 186 ++++++++++++++++++++ 4 files changed, 461 insertions(+) create mode 100644 src/infra/update-post-core-finalize.test.ts create mode 100644 src/infra/update-post-core-finalize.ts diff --git a/src/gateway/server-methods/update.test.ts b/src/gateway/server-methods/update.test.ts index 06029414f9a..8f3d4f08b42 100644 --- a/src/gateway/server-methods/update.test.ts +++ b/src/gateway/server-methods/update.test.ts @@ -33,6 +33,15 @@ const startManagedServiceUpdateHandoffMock = vi.fn(async () => ({ const scheduleGatewaySigusr1RestartMock = vi.fn(() => ({ scheduled: true })); +type PostCoreFinalizeOutcome = Awaited< + ReturnType< + typeof import("../../infra/update-post-core-finalize.js").runPostCoreFinalizeAfterGatewayUpdate + > +>; +const runPostCoreFinalizeAfterGatewayUpdateMock = vi.fn<() => Promise>( + async () => ({ status: "skipped", reason: "not-git-update" }), +); + type UpdateRunPayload = { ok: boolean; result?: { status?: string; reason?: string; mode?: string }; @@ -110,6 +119,18 @@ vi.mock("../../infra/update-runner.js", () => ({ runGatewayUpdate: runGatewayUpdateMock, })); +// Keep the real `foldPostCoreFinalizeIntoResult` so the restart-gate behavior on +// finalize failure is exercised; only stub the subprocess-spawning finalizer. +vi.mock("../../infra/update-post-core-finalize.js", async () => { + const actual = await vi.importActual( + "../../infra/update-post-core-finalize.js", + ); + return { + ...actual, + runPostCoreFinalizeAfterGatewayUpdate: runPostCoreFinalizeAfterGatewayUpdateMock, + }; +}); + vi.mock("../../../packages/gateway-protocol/src/index.js", () => ({ validateUpdateStatusParams: () => true, validateUpdateRunParams: () => true, @@ -182,6 +203,11 @@ beforeEach(() => { startManagedServiceUpdateHandoffMock.mockClear(); scheduleGatewaySigusr1RestartMock.mockClear(); scheduleGatewaySigusr1RestartMock.mockReturnValue({ scheduled: true }); + runPostCoreFinalizeAfterGatewayUpdateMock.mockClear(); + runPostCoreFinalizeAfterGatewayUpdateMock.mockResolvedValue({ + status: "skipped", + reason: "not-git-update", + }); }); async function invokeUpdateRun( @@ -663,6 +689,73 @@ describe("update.run restart scheduling", () => { }); }); +describe("update.run post-core plugin finalize", () => { + function mockGitOkUpdate(root: string) { + runGatewayUpdateMock.mockResolvedValueOnce({ + status: "ok", + mode: "git", + root, + after: { version: "2026.6.1" }, + steps: [], + durationMs: 100, + }); + mockGitInstallSurface(root); + } + + it("resumes official plugin convergence after a git/source core update", async () => { + runPostCoreFinalizeAfterGatewayUpdateMock.mockResolvedValueOnce({ + status: "ok", + entrypoint: "/tmp/openclaw-git/dist/index.mjs", + }); + mockGitOkUpdate("/tmp/openclaw-git"); + + const payload = await captureUpdateRunPayload(); + + expect(runPostCoreFinalizeAfterGatewayUpdateMock).toHaveBeenCalledTimes(1); + const [finalizeParams] = firstMockCall( + runPostCoreFinalizeAfterGatewayUpdateMock, + "post-core finalize", + ) as [{ result?: UpdateRunResult }]; + expect(finalizeParams.result?.mode).toBe("git"); + expect(finalizeParams.result?.status).toBe("ok"); + // Convergence succeeded, so the gateway is allowed to restart onto the new core. + expect(scheduleGatewaySigusr1RestartMock).toHaveBeenCalledTimes(1); + expect(payload?.ok).toBe(true); + expect(payload?.result?.status).toBe("ok"); + }); + + it("blocks the restart when post-core plugin finalize fails", async () => { + runPostCoreFinalizeAfterGatewayUpdateMock.mockResolvedValueOnce({ + status: "error", + reason: "nonzero-exit", + entrypoint: "/tmp/openclaw-git/dist/index.mjs", + exitCode: 1, + message: "convergence failed", + }); + mockGitOkUpdate("/tmp/openclaw-git"); + + const payload = await captureUpdateRunPayload(); + + // Restarting onto the new core with unreconciled plugins is the bug we avoid. + expect(scheduleGatewaySigusr1RestartMock).not.toHaveBeenCalled(); + expect(payload?.ok).toBe(false); + expect(payload?.result?.status).toBe("error"); + expect(payload?.result?.reason).toBe("post-core-plugin-finalize-failed"); + expect(readCapturedPayload().status).toBe("error"); + }); + + it("does not run finalize on the managed-service handoff path", async () => { + detectRespawnSupervisorMock.mockReturnValueOnce("launchd"); + mockGlobalInstallSurface(); + + await captureUpdateRunPayload(); + + expect(runGatewayUpdateMock).not.toHaveBeenCalled(); + expect(runPostCoreFinalizeAfterGatewayUpdateMock).not.toHaveBeenCalled(); + expect(startManagedServiceUpdateHandoffMock).toHaveBeenCalledTimes(1); + }); +}); + describe("update.status", () => { it("refreshes the latest update sentinel before responding", async () => { getLatestUpdateRestartSentinelMock.mockReturnValueOnce({ diff --git a/src/gateway/server-methods/update.ts b/src/gateway/server-methods/update.ts index b9efc1b8daa..9b6910407ff 100644 --- a/src/gateway/server-methods/update.ts +++ b/src/gateway/server-methods/update.ts @@ -21,6 +21,10 @@ import { formatManagedServiceUpdateCommand, startManagedServiceUpdateHandoff, } from "../../infra/update-managed-service-handoff.js"; +import { + foldPostCoreFinalizeIntoResult, + runPostCoreFinalizeAfterGatewayUpdate, +} from "../../infra/update-post-core-finalize.js"; import { buildUpdateRestartSentinelPayload, type UpdateRestartSentinelMeta, @@ -277,6 +281,20 @@ export const updateHandlers: GatewayRequestHandlers = { argv1: process.argv[1], channel: configChannel ?? undefined, }); + // The CLI `openclaw update` resumes post-core plugin convergence after a + // git/source core update; the RPC path did not, leaving official managed + // plugins stale on the new core. Run the finalizer here to match. + const finalizeOutcome = await runPostCoreFinalizeAfterGatewayUpdate({ + result, + channel: configChannel ?? undefined, + ...(timeoutMs === undefined ? {} : { timeoutMs }), + }); + if (finalizeOutcome.status === "error") { + context?.logGateway?.warn( + `update.run post-core plugin finalize failed ${formatControlPlaneActor(actor)} reason=${finalizeOutcome.reason}`, + ); + } + result = foldPostCoreFinalizeIntoResult(result, finalizeOutcome); } } catch { result = { diff --git a/src/infra/update-post-core-finalize.test.ts b/src/infra/update-post-core-finalize.test.ts new file mode 100644 index 00000000000..b842671adda --- /dev/null +++ b/src/infra/update-post-core-finalize.test.ts @@ -0,0 +1,164 @@ +import { describe, expect, it, vi } from "vitest"; +import { + foldPostCoreFinalizeIntoResult, + type PostCoreFinalizeSpawner, + runPostCoreFinalizeAfterGatewayUpdate, +} from "./update-post-core-finalize.js"; +import type { UpdateRunResult } from "./update-runner.js"; + +function gitOkResult(overrides: Partial = {}): UpdateRunResult { + return { + status: "ok", + mode: "git", + root: "/srv/openclaw", + before: { sha: "aaa", version: "2026.5.3" }, + after: { sha: "bbb", version: "2026.6.1" }, + steps: [], + durationMs: 10, + ...overrides, + }; +} + +const ENTRYPOINT = "/srv/openclaw/dist/index.mjs"; +const resolveEntrypointOk = async () => ENTRYPOINT; + +describe("runPostCoreFinalizeAfterGatewayUpdate", () => { + it("skips non-git update modes", async () => { + const spawnFinalize = vi.fn(); + for (const result of [ + gitOkResult({ mode: "pnpm" }), + gitOkResult({ status: "error" }), + gitOkResult({ status: "skipped" }), + gitOkResult({ root: undefined }), + ]) { + const outcome = await runPostCoreFinalizeAfterGatewayUpdate({ + result, + resolveEntrypoint: resolveEntrypointOk, + spawnFinalize, + }); + expect(outcome).toEqual({ status: "skipped", reason: "not-git-update" }); + } + expect(spawnFinalize).not.toHaveBeenCalled(); + }); + + it("skips when no built entrypoint is found", async () => { + const spawnFinalize = vi.fn(); + const outcome = await runPostCoreFinalizeAfterGatewayUpdate({ + result: gitOkResult(), + resolveEntrypoint: async () => undefined, + spawnFinalize, + }); + expect(outcome).toEqual({ status: "skipped", reason: "entrypoint-missing" }); + expect(spawnFinalize).not.toHaveBeenCalled(); + }); + + it("spawns `update finalize` against the rebuilt binary and reports success", async () => { + const spawnFinalize = vi.fn(async () => ({ code: 0 })); + const outcome = await runPostCoreFinalizeAfterGatewayUpdate({ + result: gitOkResult(), + channel: "stable", + timeoutMs: 120_000, + resolveEntrypoint: resolveEntrypointOk, + spawnFinalize, + }); + expect(outcome).toEqual({ status: "ok", entrypoint: ENTRYPOINT }); + expect(spawnFinalize).toHaveBeenCalledTimes(1); + const call = spawnFinalize.mock.calls[0]![0]; + // Reconcile runs through the designed finalizer; never restarts (RPC owns restart). + expect(call.argv).toEqual([ + expect.any(String), + ENTRYPOINT, + "update", + "finalize", + "--json", + "--yes", + "--no-restart", + "--channel", + "stable", + "--timeout", + "120", + ]); + // Host-compat resolution is pinned to the just-installed core version. + expect(call.env.OPENCLAW_COMPATIBILITY_HOST_VERSION).toBe("2026.6.1"); + }); + + it("omits channel/timeout flags when not provided", async () => { + const spawnFinalize = vi.fn(async () => ({ code: 0 })); + await runPostCoreFinalizeAfterGatewayUpdate({ + result: gitOkResult(), + resolveEntrypoint: resolveEntrypointOk, + spawnFinalize, + }); + const argv = spawnFinalize.mock.calls[0]![0].argv; + expect(argv).not.toContain("--channel"); + expect(argv).not.toContain("--timeout"); + }); + + it("reports error on a non-zero finalize exit", async () => { + const spawnFinalize = vi.fn(async () => ({ + code: 1, + stderr: "convergence failed", + })); + const outcome = await runPostCoreFinalizeAfterGatewayUpdate({ + result: gitOkResult(), + resolveEntrypoint: resolveEntrypointOk, + spawnFinalize, + }); + expect(outcome).toEqual({ + status: "error", + reason: "nonzero-exit", + entrypoint: ENTRYPOINT, + exitCode: 1, + message: "convergence failed", + }); + }); + + it("reports error when the finalize spawn throws", async () => { + const spawnFinalize = vi.fn(async () => { + throw new Error("ENOENT"); + }); + const outcome = await runPostCoreFinalizeAfterGatewayUpdate({ + result: gitOkResult(), + resolveEntrypoint: resolveEntrypointOk, + spawnFinalize, + }); + expect(outcome).toEqual({ + status: "error", + reason: "spawn-failed", + entrypoint: ENTRYPOINT, + message: "ENOENT", + }); + }); +}); + +describe("foldPostCoreFinalizeIntoResult", () => { + it("leaves the result unchanged for ok/skipped outcomes", () => { + const result = gitOkResult(); + expect(foldPostCoreFinalizeIntoResult(result, { status: "ok", entrypoint: ENTRYPOINT })).toBe( + result, + ); + expect( + foldPostCoreFinalizeIntoResult(result, { status: "skipped", reason: "not-git-update" }), + ).toBe(result); + }); + + it("flips status to error so the RPC restart gate is skipped", () => { + const result = gitOkResult(); + const folded = foldPostCoreFinalizeIntoResult(result, { + status: "error", + reason: "nonzero-exit", + entrypoint: ENTRYPOINT, + exitCode: 2, + message: "boom", + }); + expect(folded.status).toBe("error"); + expect(folded.reason).toBe("post-core-plugin-finalize-failed"); + expect(folded.steps.at(-1)).toMatchObject({ + name: "post-core plugin finalize", + exitCode: 2, + stderrTail: "boom", + }); + // Core update metadata is preserved for the sentinel. + expect(folded.after).toEqual(result.after); + }); +}); diff --git a/src/infra/update-post-core-finalize.ts b/src/infra/update-post-core-finalize.ts new file mode 100644 index 00000000000..b3dc5348ab7 --- /dev/null +++ b/src/infra/update-post-core-finalize.ts @@ -0,0 +1,186 @@ +// Resume post-core plugin convergence after a gateway control-plane git/source +// update. +// +// `runGatewayUpdate` (git mode) runs `openclaw doctor --fix` with +// `OPENCLAW_UPDATE_PARENT_SUPPORTS_DOCTOR_CONFIG_WRITE=1`, which makes the doctor +// pass DEFER configured-plugin repair to a later convergence step (see +// `shouldDeferConfiguredPluginInstallRepair`). The `openclaw update` CLI resumes +// that deferred work in a fresh post-core process; the gateway `update.run` RPC +// did not, so a git/source core update would restart on the new core with stale +// official plugins still pinned to versions built against removed core APIs. +// +// This helper closes that CLI/RPC asymmetry by spawning the freshly-built +// binary's hidden `openclaw update finalize` entrypoint — the designed +// "external core runtime change" finalizer that runs doctor plus +// `updatePluginsAfterCoreUpdate` (which calls +// `updateNpmInstalledPlugins({ syncOfficialPluginInstalls: true, disableOnFailure: true })` +// and `runPostCorePluginConvergence`). Finalization never restarts, so the RPC +// handler keeps ownership of the gateway restart. +import path from "node:path"; +import { resolveGatewayInstallEntrypoint } from "../daemon/gateway-entrypoint.js"; +import { runCommandWithTimeout } from "../process/exec.js"; +import { resolveStableNodePath } from "./stable-node-path.js"; +import type { UpdateChannel } from "./update-channels.js"; +import type { UpdateRunResult } from "./update-runner.js"; + +const DEFAULT_FINALIZE_TIMEOUT_MS = 30 * 60_000; + +export type PostCoreFinalizeOutcome = + | { status: "skipped"; reason: "not-git-update" | "entrypoint-missing" } + | { status: "ok"; entrypoint: string } + | { + status: "error"; + reason: "nonzero-exit" | "spawn-failed"; + entrypoint: string; + exitCode?: number; + message?: string; + }; + +type FinalizeSpawnResult = { code: number | null; stderr?: string }; + +export type PostCoreFinalizeSpawner = (params: { + argv: string[]; + cwd: string; + timeoutMs: number; + env: NodeJS.ProcessEnv; +}) => Promise; + +const defaultFinalizeSpawner: PostCoreFinalizeSpawner = async ({ argv, cwd, timeoutMs, env }) => { + const res = await runCommandWithTimeout(argv, { cwd, timeoutMs, env }); + return { code: res.code, ...(res.stderr ? { stderr: res.stderr } : {}) }; +}; + +// Only git/source updates routed through `runGatewayUpdate` defer-and-drop +// plugin convergence. Package-manager/global installs already converge because +// the RPC routes them through `startManagedServiceUpdateHandoff`, which +// re-enters the full `openclaw update` CLI. +function isGitUpdateNeedingFinalize( + result: UpdateRunResult, +): result is UpdateRunResult & { root: string } { + return ( + result.status === "ok" && + result.mode === "git" && + typeof result.root === "string" && + result.root.length > 0 + ); +} + +function buildFinalizeArgv(params: { + nodePath: string; + entrypoint: string; + channel?: UpdateChannel; + timeoutMs?: number; +}): string[] { + const argv = [ + params.nodePath, + params.entrypoint, + "update", + "finalize", + "--json", + "--yes", + "--no-restart", + ]; + if (params.channel) { + argv.push("--channel", params.channel); + } + if (typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)) { + // `update finalize --timeout` is per-step seconds. + argv.push("--timeout", String(Math.max(1, Math.ceil(params.timeoutMs / 1000)))); + } + return argv; +} + +export async function runPostCoreFinalizeAfterGatewayUpdate(params: { + result: UpdateRunResult; + channel?: UpdateChannel; + timeoutMs?: number; + resolveEntrypoint?: (root: string) => Promise; + spawnFinalize?: PostCoreFinalizeSpawner; + env?: NodeJS.ProcessEnv; +}): Promise { + const { result } = params; + if (!isGitUpdateNeedingFinalize(result)) { + return { status: "skipped", reason: "not-git-update" }; + } + const resolveEntrypoint = params.resolveEntrypoint ?? resolveGatewayInstallEntrypoint; + const entrypoint = await resolveEntrypoint(result.root); + if (!entrypoint) { + return { status: "skipped", reason: "entrypoint-missing" }; + } + + const spawnFinalize = params.spawnFinalize ?? defaultFinalizeSpawner; + const timeoutMs = + typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs) + ? params.timeoutMs + : undefined; + const nodePath = await resolveStableNodePath(process.execPath); + const argv = buildFinalizeArgv({ + nodePath, + entrypoint, + ...(params.channel ? { channel: params.channel } : {}), + ...(timeoutMs === undefined ? {} : { timeoutMs }), + }); + // Pin the finalizer's host-compat resolution to the just-installed core + // version so plugins reconcile against the new core, not the running process. + const compatHostVersion = result.after?.version ?? undefined; + const baseEnv = params.env ?? process.env; + const env: NodeJS.ProcessEnv = compatHostVersion + ? { ...baseEnv, OPENCLAW_COMPATIBILITY_HOST_VERSION: compatHostVersion } + : { ...baseEnv }; + + try { + const spawnResult = await spawnFinalize({ + argv, + cwd: path.dirname(entrypoint), + timeoutMs: timeoutMs ?? DEFAULT_FINALIZE_TIMEOUT_MS, + env, + }); + if (spawnResult.code === 0) { + return { status: "ok", entrypoint }; + } + return { + status: "error", + reason: "nonzero-exit", + entrypoint, + ...(typeof spawnResult.code === "number" ? { exitCode: spawnResult.code } : {}), + ...(spawnResult.stderr ? { message: spawnResult.stderr } : {}), + }; + } catch (err) { + return { + status: "error", + reason: "spawn-failed", + entrypoint, + message: err instanceof Error ? err.message : String(err), + }; + } +} + +// Fold a finalize failure into the update result so the RPC handler's existing +// `result.status === "ok"` restart gate skips the restart: restarting on the new +// core after convergence failed would load the stale plugins we just failed to +// reconcile. Mirrors the CLI, which exits non-zero before restarting on +// post-core convergence failure. +export function foldPostCoreFinalizeIntoResult( + result: UpdateRunResult, + outcome: PostCoreFinalizeOutcome, +): UpdateRunResult { + if (outcome.status !== "error") { + return result; + } + return { + ...result, + status: "error", + reason: "post-core-plugin-finalize-failed", + steps: [ + ...result.steps, + { + name: "post-core plugin finalize", + command: "openclaw update finalize", + cwd: result.root ?? process.cwd(), + durationMs: 0, + exitCode: outcome.reason === "nonzero-exit" ? (outcome.exitCode ?? 1) : 1, + ...(outcome.message ? { stderrTail: outcome.message } : {}), + }, + ], + }; +}