fix(update): resume official plugin convergence after gateway git update

The gateway update.run RPC updated git/source installs via runGatewayUpdate
but, unlike the openclaw update CLI, never resumed the post-core plugin
convergence that runGatewayUpdate's doctor pass defers. As a result a
git/source core update would restart on the new core with official managed
plugins still pinned to versions built against removed core APIs.

Spawn the rebuilt binary's update finalize entrypoint after a successful
git update so official plugins reconcile to a host-compatible version, and
block the restart if convergence fails (mirroring the CLI).
This commit is contained in:
masatohoshino
2026-06-07 23:31:05 +09:00
committed by Vincent Koc
parent 2ac2b021cf
commit 90d385cb93
4 changed files with 461 additions and 0 deletions

View File

@@ -33,6 +33,15 @@ const startManagedServiceUpdateHandoffMock = vi.fn(async () => ({
const scheduleGatewaySigusr1RestartMock = vi.fn(() => ({ scheduled: true }));
type PostCoreFinalizeOutcome = Awaited<
ReturnType<
typeof import("../../infra/update-post-core-finalize.js").runPostCoreFinalizeAfterGatewayUpdate
>
>;
const runPostCoreFinalizeAfterGatewayUpdateMock = vi.fn<() => Promise<PostCoreFinalizeOutcome>>(
async () => ({ status: "skipped", reason: "not-git-update" }),
);
type UpdateRunPayload = {
ok: boolean;
result?: { status?: string; reason?: string; mode?: string };
@@ -110,6 +119,18 @@ vi.mock("../../infra/update-runner.js", () => ({
runGatewayUpdate: runGatewayUpdateMock,
}));
// Keep the real `foldPostCoreFinalizeIntoResult` so the restart-gate behavior on
// finalize failure is exercised; only stub the subprocess-spawning finalizer.
vi.mock("../../infra/update-post-core-finalize.js", async () => {
const actual = await vi.importActual<typeof import("../../infra/update-post-core-finalize.js")>(
"../../infra/update-post-core-finalize.js",
);
return {
...actual,
runPostCoreFinalizeAfterGatewayUpdate: runPostCoreFinalizeAfterGatewayUpdateMock,
};
});
vi.mock("../../../packages/gateway-protocol/src/index.js", () => ({
validateUpdateStatusParams: () => true,
validateUpdateRunParams: () => true,
@@ -182,6 +203,11 @@ beforeEach(() => {
startManagedServiceUpdateHandoffMock.mockClear();
scheduleGatewaySigusr1RestartMock.mockClear();
scheduleGatewaySigusr1RestartMock.mockReturnValue({ scheduled: true });
runPostCoreFinalizeAfterGatewayUpdateMock.mockClear();
runPostCoreFinalizeAfterGatewayUpdateMock.mockResolvedValue({
status: "skipped",
reason: "not-git-update",
});
});
async function invokeUpdateRun(
@@ -663,6 +689,73 @@ describe("update.run restart scheduling", () => {
});
});
describe("update.run post-core plugin finalize", () => {
function mockGitOkUpdate(root: string) {
runGatewayUpdateMock.mockResolvedValueOnce({
status: "ok",
mode: "git",
root,
after: { version: "2026.6.1" },
steps: [],
durationMs: 100,
});
mockGitInstallSurface(root);
}
it("resumes official plugin convergence after a git/source core update", async () => {
runPostCoreFinalizeAfterGatewayUpdateMock.mockResolvedValueOnce({
status: "ok",
entrypoint: "/tmp/openclaw-git/dist/index.mjs",
});
mockGitOkUpdate("/tmp/openclaw-git");
const payload = await captureUpdateRunPayload();
expect(runPostCoreFinalizeAfterGatewayUpdateMock).toHaveBeenCalledTimes(1);
const [finalizeParams] = firstMockCall(
runPostCoreFinalizeAfterGatewayUpdateMock,
"post-core finalize",
) as [{ result?: UpdateRunResult }];
expect(finalizeParams.result?.mode).toBe("git");
expect(finalizeParams.result?.status).toBe("ok");
// Convergence succeeded, so the gateway is allowed to restart onto the new core.
expect(scheduleGatewaySigusr1RestartMock).toHaveBeenCalledTimes(1);
expect(payload?.ok).toBe(true);
expect(payload?.result?.status).toBe("ok");
});
it("blocks the restart when post-core plugin finalize fails", async () => {
runPostCoreFinalizeAfterGatewayUpdateMock.mockResolvedValueOnce({
status: "error",
reason: "nonzero-exit",
entrypoint: "/tmp/openclaw-git/dist/index.mjs",
exitCode: 1,
message: "convergence failed",
});
mockGitOkUpdate("/tmp/openclaw-git");
const payload = await captureUpdateRunPayload();
// Restarting onto the new core with unreconciled plugins is the bug we avoid.
expect(scheduleGatewaySigusr1RestartMock).not.toHaveBeenCalled();
expect(payload?.ok).toBe(false);
expect(payload?.result?.status).toBe("error");
expect(payload?.result?.reason).toBe("post-core-plugin-finalize-failed");
expect(readCapturedPayload().status).toBe("error");
});
it("does not run finalize on the managed-service handoff path", async () => {
detectRespawnSupervisorMock.mockReturnValueOnce("launchd");
mockGlobalInstallSurface();
await captureUpdateRunPayload();
expect(runGatewayUpdateMock).not.toHaveBeenCalled();
expect(runPostCoreFinalizeAfterGatewayUpdateMock).not.toHaveBeenCalled();
expect(startManagedServiceUpdateHandoffMock).toHaveBeenCalledTimes(1);
});
});
describe("update.status", () => {
it("refreshes the latest update sentinel before responding", async () => {
getLatestUpdateRestartSentinelMock.mockReturnValueOnce({

View File

@@ -21,6 +21,10 @@ import {
formatManagedServiceUpdateCommand,
startManagedServiceUpdateHandoff,
} from "../../infra/update-managed-service-handoff.js";
import {
foldPostCoreFinalizeIntoResult,
runPostCoreFinalizeAfterGatewayUpdate,
} from "../../infra/update-post-core-finalize.js";
import {
buildUpdateRestartSentinelPayload,
type UpdateRestartSentinelMeta,
@@ -277,6 +281,20 @@ export const updateHandlers: GatewayRequestHandlers = {
argv1: process.argv[1],
channel: configChannel ?? undefined,
});
// The CLI `openclaw update` resumes post-core plugin convergence after a
// git/source core update; the RPC path did not, leaving official managed
// plugins stale on the new core. Run the finalizer here to match.
const finalizeOutcome = await runPostCoreFinalizeAfterGatewayUpdate({
result,
channel: configChannel ?? undefined,
...(timeoutMs === undefined ? {} : { timeoutMs }),
});
if (finalizeOutcome.status === "error") {
context?.logGateway?.warn(
`update.run post-core plugin finalize failed ${formatControlPlaneActor(actor)} reason=${finalizeOutcome.reason}`,
);
}
result = foldPostCoreFinalizeIntoResult(result, finalizeOutcome);
}
} catch {
result = {

View File

@@ -0,0 +1,164 @@
import { describe, expect, it, vi } from "vitest";
import {
foldPostCoreFinalizeIntoResult,
type PostCoreFinalizeSpawner,
runPostCoreFinalizeAfterGatewayUpdate,
} from "./update-post-core-finalize.js";
import type { UpdateRunResult } from "./update-runner.js";
function gitOkResult(overrides: Partial<UpdateRunResult> = {}): UpdateRunResult {
return {
status: "ok",
mode: "git",
root: "/srv/openclaw",
before: { sha: "aaa", version: "2026.5.3" },
after: { sha: "bbb", version: "2026.6.1" },
steps: [],
durationMs: 10,
...overrides,
};
}
const ENTRYPOINT = "/srv/openclaw/dist/index.mjs";
const resolveEntrypointOk = async () => ENTRYPOINT;
describe("runPostCoreFinalizeAfterGatewayUpdate", () => {
it("skips non-git update modes", async () => {
const spawnFinalize = vi.fn<PostCoreFinalizeSpawner>();
for (const result of [
gitOkResult({ mode: "pnpm" }),
gitOkResult({ status: "error" }),
gitOkResult({ status: "skipped" }),
gitOkResult({ root: undefined }),
]) {
const outcome = await runPostCoreFinalizeAfterGatewayUpdate({
result,
resolveEntrypoint: resolveEntrypointOk,
spawnFinalize,
});
expect(outcome).toEqual({ status: "skipped", reason: "not-git-update" });
}
expect(spawnFinalize).not.toHaveBeenCalled();
});
it("skips when no built entrypoint is found", async () => {
const spawnFinalize = vi.fn<PostCoreFinalizeSpawner>();
const outcome = await runPostCoreFinalizeAfterGatewayUpdate({
result: gitOkResult(),
resolveEntrypoint: async () => undefined,
spawnFinalize,
});
expect(outcome).toEqual({ status: "skipped", reason: "entrypoint-missing" });
expect(spawnFinalize).not.toHaveBeenCalled();
});
it("spawns `update finalize` against the rebuilt binary and reports success", async () => {
const spawnFinalize = vi.fn<PostCoreFinalizeSpawner>(async () => ({ code: 0 }));
const outcome = await runPostCoreFinalizeAfterGatewayUpdate({
result: gitOkResult(),
channel: "stable",
timeoutMs: 120_000,
resolveEntrypoint: resolveEntrypointOk,
spawnFinalize,
});
expect(outcome).toEqual({ status: "ok", entrypoint: ENTRYPOINT });
expect(spawnFinalize).toHaveBeenCalledTimes(1);
const call = spawnFinalize.mock.calls[0]![0];
// Reconcile runs through the designed finalizer; never restarts (RPC owns restart).
expect(call.argv).toEqual([
expect.any(String),
ENTRYPOINT,
"update",
"finalize",
"--json",
"--yes",
"--no-restart",
"--channel",
"stable",
"--timeout",
"120",
]);
// Host-compat resolution is pinned to the just-installed core version.
expect(call.env.OPENCLAW_COMPATIBILITY_HOST_VERSION).toBe("2026.6.1");
});
it("omits channel/timeout flags when not provided", async () => {
const spawnFinalize = vi.fn<PostCoreFinalizeSpawner>(async () => ({ code: 0 }));
await runPostCoreFinalizeAfterGatewayUpdate({
result: gitOkResult(),
resolveEntrypoint: resolveEntrypointOk,
spawnFinalize,
});
const argv = spawnFinalize.mock.calls[0]![0].argv;
expect(argv).not.toContain("--channel");
expect(argv).not.toContain("--timeout");
});
it("reports error on a non-zero finalize exit", async () => {
const spawnFinalize = vi.fn<PostCoreFinalizeSpawner>(async () => ({
code: 1,
stderr: "convergence failed",
}));
const outcome = await runPostCoreFinalizeAfterGatewayUpdate({
result: gitOkResult(),
resolveEntrypoint: resolveEntrypointOk,
spawnFinalize,
});
expect(outcome).toEqual({
status: "error",
reason: "nonzero-exit",
entrypoint: ENTRYPOINT,
exitCode: 1,
message: "convergence failed",
});
});
it("reports error when the finalize spawn throws", async () => {
const spawnFinalize = vi.fn<PostCoreFinalizeSpawner>(async () => {
throw new Error("ENOENT");
});
const outcome = await runPostCoreFinalizeAfterGatewayUpdate({
result: gitOkResult(),
resolveEntrypoint: resolveEntrypointOk,
spawnFinalize,
});
expect(outcome).toEqual({
status: "error",
reason: "spawn-failed",
entrypoint: ENTRYPOINT,
message: "ENOENT",
});
});
});
describe("foldPostCoreFinalizeIntoResult", () => {
it("leaves the result unchanged for ok/skipped outcomes", () => {
const result = gitOkResult();
expect(foldPostCoreFinalizeIntoResult(result, { status: "ok", entrypoint: ENTRYPOINT })).toBe(
result,
);
expect(
foldPostCoreFinalizeIntoResult(result, { status: "skipped", reason: "not-git-update" }),
).toBe(result);
});
it("flips status to error so the RPC restart gate is skipped", () => {
const result = gitOkResult();
const folded = foldPostCoreFinalizeIntoResult(result, {
status: "error",
reason: "nonzero-exit",
entrypoint: ENTRYPOINT,
exitCode: 2,
message: "boom",
});
expect(folded.status).toBe("error");
expect(folded.reason).toBe("post-core-plugin-finalize-failed");
expect(folded.steps.at(-1)).toMatchObject({
name: "post-core plugin finalize",
exitCode: 2,
stderrTail: "boom",
});
// Core update metadata is preserved for the sentinel.
expect(folded.after).toEqual(result.after);
});
});

View File

@@ -0,0 +1,186 @@
// Resume post-core plugin convergence after a gateway control-plane git/source
// update.
//
// `runGatewayUpdate` (git mode) runs `openclaw doctor --fix` with
// `OPENCLAW_UPDATE_PARENT_SUPPORTS_DOCTOR_CONFIG_WRITE=1`, which makes the doctor
// pass DEFER configured-plugin repair to a later convergence step (see
// `shouldDeferConfiguredPluginInstallRepair`). The `openclaw update` CLI resumes
// that deferred work in a fresh post-core process; the gateway `update.run` RPC
// did not, so a git/source core update would restart on the new core with stale
// official plugins still pinned to versions built against removed core APIs.
//
// This helper closes that CLI/RPC asymmetry by spawning the freshly-built
// binary's hidden `openclaw update finalize` entrypoint — the designed
// "external core runtime change" finalizer that runs doctor plus
// `updatePluginsAfterCoreUpdate` (which calls
// `updateNpmInstalledPlugins({ syncOfficialPluginInstalls: true, disableOnFailure: true })`
// and `runPostCorePluginConvergence`). Finalization never restarts, so the RPC
// handler keeps ownership of the gateway restart.
import path from "node:path";
import { resolveGatewayInstallEntrypoint } from "../daemon/gateway-entrypoint.js";
import { runCommandWithTimeout } from "../process/exec.js";
import { resolveStableNodePath } from "./stable-node-path.js";
import type { UpdateChannel } from "./update-channels.js";
import type { UpdateRunResult } from "./update-runner.js";
const DEFAULT_FINALIZE_TIMEOUT_MS = 30 * 60_000;
export type PostCoreFinalizeOutcome =
| { status: "skipped"; reason: "not-git-update" | "entrypoint-missing" }
| { status: "ok"; entrypoint: string }
| {
status: "error";
reason: "nonzero-exit" | "spawn-failed";
entrypoint: string;
exitCode?: number;
message?: string;
};
type FinalizeSpawnResult = { code: number | null; stderr?: string };
export type PostCoreFinalizeSpawner = (params: {
argv: string[];
cwd: string;
timeoutMs: number;
env: NodeJS.ProcessEnv;
}) => Promise<FinalizeSpawnResult>;
const defaultFinalizeSpawner: PostCoreFinalizeSpawner = async ({ argv, cwd, timeoutMs, env }) => {
const res = await runCommandWithTimeout(argv, { cwd, timeoutMs, env });
return { code: res.code, ...(res.stderr ? { stderr: res.stderr } : {}) };
};
// Only git/source updates routed through `runGatewayUpdate` defer-and-drop
// plugin convergence. Package-manager/global installs already converge because
// the RPC routes them through `startManagedServiceUpdateHandoff`, which
// re-enters the full `openclaw update` CLI.
function isGitUpdateNeedingFinalize(
result: UpdateRunResult,
): result is UpdateRunResult & { root: string } {
return (
result.status === "ok" &&
result.mode === "git" &&
typeof result.root === "string" &&
result.root.length > 0
);
}
function buildFinalizeArgv(params: {
nodePath: string;
entrypoint: string;
channel?: UpdateChannel;
timeoutMs?: number;
}): string[] {
const argv = [
params.nodePath,
params.entrypoint,
"update",
"finalize",
"--json",
"--yes",
"--no-restart",
];
if (params.channel) {
argv.push("--channel", params.channel);
}
if (typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)) {
// `update finalize --timeout` is per-step seconds.
argv.push("--timeout", String(Math.max(1, Math.ceil(params.timeoutMs / 1000))));
}
return argv;
}
export async function runPostCoreFinalizeAfterGatewayUpdate(params: {
result: UpdateRunResult;
channel?: UpdateChannel;
timeoutMs?: number;
resolveEntrypoint?: (root: string) => Promise<string | undefined>;
spawnFinalize?: PostCoreFinalizeSpawner;
env?: NodeJS.ProcessEnv;
}): Promise<PostCoreFinalizeOutcome> {
const { result } = params;
if (!isGitUpdateNeedingFinalize(result)) {
return { status: "skipped", reason: "not-git-update" };
}
const resolveEntrypoint = params.resolveEntrypoint ?? resolveGatewayInstallEntrypoint;
const entrypoint = await resolveEntrypoint(result.root);
if (!entrypoint) {
return { status: "skipped", reason: "entrypoint-missing" };
}
const spawnFinalize = params.spawnFinalize ?? defaultFinalizeSpawner;
const timeoutMs =
typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)
? params.timeoutMs
: undefined;
const nodePath = await resolveStableNodePath(process.execPath);
const argv = buildFinalizeArgv({
nodePath,
entrypoint,
...(params.channel ? { channel: params.channel } : {}),
...(timeoutMs === undefined ? {} : { timeoutMs }),
});
// Pin the finalizer's host-compat resolution to the just-installed core
// version so plugins reconcile against the new core, not the running process.
const compatHostVersion = result.after?.version ?? undefined;
const baseEnv = params.env ?? process.env;
const env: NodeJS.ProcessEnv = compatHostVersion
? { ...baseEnv, OPENCLAW_COMPATIBILITY_HOST_VERSION: compatHostVersion }
: { ...baseEnv };
try {
const spawnResult = await spawnFinalize({
argv,
cwd: path.dirname(entrypoint),
timeoutMs: timeoutMs ?? DEFAULT_FINALIZE_TIMEOUT_MS,
env,
});
if (spawnResult.code === 0) {
return { status: "ok", entrypoint };
}
return {
status: "error",
reason: "nonzero-exit",
entrypoint,
...(typeof spawnResult.code === "number" ? { exitCode: spawnResult.code } : {}),
...(spawnResult.stderr ? { message: spawnResult.stderr } : {}),
};
} catch (err) {
return {
status: "error",
reason: "spawn-failed",
entrypoint,
message: err instanceof Error ? err.message : String(err),
};
}
}
// Fold a finalize failure into the update result so the RPC handler's existing
// `result.status === "ok"` restart gate skips the restart: restarting on the new
// core after convergence failed would load the stale plugins we just failed to
// reconcile. Mirrors the CLI, which exits non-zero before restarting on
// post-core convergence failure.
export function foldPostCoreFinalizeIntoResult(
result: UpdateRunResult,
outcome: PostCoreFinalizeOutcome,
): UpdateRunResult {
if (outcome.status !== "error") {
return result;
}
return {
...result,
status: "error",
reason: "post-core-plugin-finalize-failed",
steps: [
...result.steps,
{
name: "post-core plugin finalize",
command: "openclaw update finalize",
cwd: result.root ?? process.cwd(),
durationMs: 0,
exitCode: outcome.reason === "nonzero-exit" ? (outcome.exitCode ?? 1) : 1,
...(outcome.message ? { stderrTail: outcome.message } : {}),
},
],
};
}