fix: fail updates on activated plugin load errors

This commit is contained in:
Peter Steinberger
2026-04-26 05:57:26 +01:00
parent ad5c00b8e0
commit 73e2151107
10 changed files with 407 additions and 25 deletions

View File

@@ -80,6 +80,7 @@ Docs: https://docs.openclaw.ai
`openclaw node start` command, and show an actionable browser-control error
when the local control service is missing. Fixes #66637.
- Gateway/update: fail package updates when the restarted managed gateway reports the wrong version, avoiding false-success mixed-version restarts after macOS LaunchAgent updates. Fixes #71835. Thanks @abhinas90 and @jsompis.
- Plugins/runtime deps: surface activated plugin load failures in health and fail package-update restart verification or doctor repair when bundled runtime deps still cannot load, avoiding false-success repairs. (#71883) Thanks @Solvely-Colin.
- WhatsApp: remove ack reactions after a visible reply when `messages.removeAckAfterReply` is enabled, matching other reaction-capable channels. Fixes #26183. Thanks @MrUnforsaken.
- Providers/Z.AI: map OpenClaw thinking controls to Z.AI's `thinking` payload and add opt-in preserved thinking replay via `params.preserveThinking`, so GLM 5.x can keep prior `reasoning_content` when requested. Fixes #58680. Thanks @xuanmingguo.
- Channels/status: keep read-only channel lists on manifest and package metadata by default, loading setup runtime only for explicit fallback callers. Thanks @shakkernerd.

View File

@@ -305,6 +305,107 @@ describe("inspectGatewayRestart", () => {
expect(snapshot.versionMismatch).toBeUndefined();
});
it("marks matching-version restarts unhealthy when activated plugins failed to load", async () => {
probeGateway.mockResolvedValue({
ok: true,
close: null,
server: { version: "2026.4.24", connId: "new" },
health: {
ok: true,
plugins: {
errors: [
{
id: "telegram",
origin: "bundled",
activated: true,
error: "failed to install bundled runtime deps: ENOSPC",
},
{
id: "optional",
origin: "workspace",
activated: false,
error: "disabled plugin ignored",
},
],
},
},
});
const snapshot = await inspectGatewayRestartWithSnapshot({
runtime: { status: "running", pid: 8000 },
expectedVersion: "2026.4.24",
portUsage: {
port: 18789,
status: "busy",
listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }],
hints: [],
},
});
expect(snapshot).toMatchObject({
healthy: false,
gatewayVersion: "2026.4.24",
expectedVersion: "2026.4.24",
activatedPluginErrors: [
{
id: "telegram",
origin: "bundled",
activated: true,
error: "failed to install bundled runtime deps: ENOSPC",
},
],
});
expect(snapshot.versionMismatch).toBeUndefined();
expect(probeGateway).toHaveBeenCalledWith(expect.objectContaining({ includeDetails: true }));
const { renderRestartDiagnostics } = await import("./restart-health.js");
expect(renderRestartDiagnostics(snapshot).join("\n")).toContain(
"Activated plugin load errors:\n- telegram: failed to install bundled runtime deps: ENOSPC",
);
});
it("stops waiting once the expected-version gateway reports activated plugin errors", async () => {
probeGateway.mockResolvedValue({
ok: true,
close: null,
server: { version: "2026.4.24", connId: "new" },
health: {
ok: true,
plugins: {
errors: [
{
id: "telegram",
origin: "bundled",
activated: true,
error: "failed to install bundled runtime deps: ENOSPC",
},
],
},
},
});
inspectPortUsage.mockResolvedValue({
port: 18789,
status: "busy",
listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }],
hints: [],
});
const { waitForGatewayHealthyRestart } = await import("./restart-health.js");
const snapshot = await waitForGatewayHealthyRestart({
service: makeGatewayService({ status: "running", pid: 8000 }),
port: 18789,
expectedVersion: "2026.4.24",
});
expect(snapshot).toMatchObject({
healthy: false,
waitOutcome: "plugin-errors",
elapsedMs: 0,
activatedPluginErrors: [expect.objectContaining({ id: "telegram" })],
});
expect(sleep).not.toHaveBeenCalled();
});
it("treats busy ports with unavailable listener details as healthy when runtime is running", async () => {
const service = {
readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })),

View File

@@ -1,3 +1,4 @@
import type { PluginHealthErrorSummary } from "../../commands/health.types.js";
import type { GatewayServiceRuntime } from "../../daemon/service-runtime.js";
import type { GatewayService } from "../../daemon/service.js";
import { probeGateway } from "../../gateway/probe.js";
@@ -22,7 +23,12 @@ export const DEFAULT_RESTART_HEALTH_ATTEMPTS = Math.ceil(
const STOPPED_FREE_EARLY_EXIT_GRACE_MS = 10_000;
const WINDOWS_STOPPED_FREE_EARLY_EXIT_GRACE_MS = 90_000;
export type GatewayRestartWaitOutcome = "healthy" | "stale-pids" | "stopped-free" | "timeout";
export type GatewayRestartWaitOutcome =
| "healthy"
| "plugin-errors"
| "stale-pids"
| "stopped-free"
| "timeout";
export type GatewayRestartSnapshot = {
runtime: GatewayServiceRuntime;
@@ -30,6 +36,7 @@ export type GatewayRestartSnapshot = {
healthy: boolean;
staleGatewayPids: number[];
gatewayVersion?: string | null;
activatedPluginErrors?: PluginHealthErrorSummary[];
expectedVersion?: string;
versionMismatch?: {
expected: string;
@@ -47,6 +54,7 @@ export type GatewayPortHealthSnapshot = {
type GatewayReachability = {
reachable: boolean;
gatewayVersion: string | null;
activatedPluginErrors: PluginHealthErrorSummary[];
};
function hasListenerAttributionGap(portUsage: PortUsage): boolean {
@@ -101,18 +109,73 @@ function applyExpectedVersion(
};
}
async function confirmGatewayReachable(port: number): Promise<GatewayReachability> {
function readActivatedPluginErrors(health: unknown): PluginHealthErrorSummary[] {
if (!health || typeof health !== "object") {
return [];
}
const plugins = (health as { plugins?: unknown }).plugins;
if (!plugins || typeof plugins !== "object") {
return [];
}
const errors = (plugins as { errors?: unknown }).errors;
if (!Array.isArray(errors)) {
return [];
}
return errors
.filter((entry): entry is PluginHealthErrorSummary => {
if (!entry || typeof entry !== "object") {
return false;
}
const candidate = entry as Partial<PluginHealthErrorSummary>;
return (
candidate.activated === true &&
typeof candidate.id === "string" &&
typeof candidate.error === "string"
);
})
.map((entry) => {
const error: PluginHealthErrorSummary = {
id: entry.id,
origin: typeof entry.origin === "string" ? entry.origin : "unknown",
activated: true,
error: entry.error,
};
if (typeof entry.activationSource === "string") {
error.activationSource = entry.activationSource;
}
if (typeof entry.activationReason === "string") {
error.activationReason = entry.activationReason;
}
if (typeof entry.failurePhase === "string") {
error.failurePhase = entry.failurePhase;
}
return error;
});
}
function applyActivatedPluginErrors(snapshot: GatewayRestartSnapshot): GatewayRestartSnapshot {
if (!snapshot.activatedPluginErrors?.length) {
return snapshot;
}
return { ...snapshot, healthy: false };
}
async function confirmGatewayReachable(params: {
port: number;
includeHealthDetails?: boolean;
}): Promise<GatewayReachability> {
const token = normalizeOptionalString(process.env.OPENCLAW_GATEWAY_TOKEN);
const password = normalizeOptionalString(process.env.OPENCLAW_GATEWAY_PASSWORD);
const probe = await probeGateway({
url: `ws://127.0.0.1:${port}`,
url: `ws://127.0.0.1:${params.port}`,
auth: token || password ? { token, password } : undefined,
timeoutMs: 3_000,
includeDetails: false,
includeDetails: params.includeHealthDetails === true,
});
return {
reachable: probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason),
gatewayVersion: probe.server?.version ?? null,
activatedPluginErrors: readActivatedPluginErrors(probe.health),
};
}
@@ -133,7 +196,7 @@ async function inspectGatewayPortHealth(port: number): Promise<GatewayPortHealth
let healthy = false;
if (portUsage.status === "busy") {
try {
healthy = (await confirmGatewayReachable(port)).reachable;
healthy = (await confirmGatewayReachable({ port })).reachable;
} catch {
// best-effort probe
}
@@ -152,8 +215,15 @@ export async function inspectGatewayRestart(params: {
const env = params.env ?? process.env;
const expectedVersion = normalizeOptionalString(params.expectedVersion);
let reachability: GatewayReachability | null = null;
let activatedPluginErrors: PluginHealthErrorSummary[] = [];
const loadReachability = async () => {
reachability ??= await confirmGatewayReachable(params.port);
if (!reachability) {
reachability = await confirmGatewayReachable({
port: params.port,
includeHealthDetails: Boolean(expectedVersion),
});
activatedPluginErrors = reachability.activatedPluginErrors;
}
return reachability;
};
let runtime: GatewayServiceRuntime = { status: "unknown" };
@@ -180,15 +250,20 @@ export async function inspectGatewayRestart(params: {
try {
const reachable = await loadReachability();
if (reachable.reachable) {
return applyExpectedVersion(
{
runtime,
portUsage,
healthy: true,
staleGatewayPids: [],
gatewayVersion: reachable.gatewayVersion,
},
expectedVersion,
return applyActivatedPluginErrors(
applyExpectedVersion(
{
runtime,
portUsage,
healthy: true,
staleGatewayPids: [],
gatewayVersion: reachable.gatewayVersion,
...(reachable.activatedPluginErrors.length > 0
? { activatedPluginErrors: reachable.activatedPluginErrors }
: {}),
},
expectedVersion,
),
);
}
} catch {
@@ -228,6 +303,9 @@ export async function inspectGatewayRestart(params: {
const reachable = await loadReachability();
healthy = reachable.reachable;
gatewayVersion = reachable.gatewayVersion;
if (reachable.activatedPluginErrors.length > 0) {
healthy = false;
}
} catch {
healthy = false;
}
@@ -261,15 +339,18 @@ export async function inspectGatewayRestart(params: {
]),
);
return applyExpectedVersion(
{
runtime,
portUsage,
healthy,
staleGatewayPids,
...(gatewayVersion !== undefined ? { gatewayVersion } : {}),
},
expectedVersion,
return applyActivatedPluginErrors(
applyExpectedVersion(
{
runtime,
portUsage,
healthy,
staleGatewayPids,
...(gatewayVersion !== undefined ? { gatewayVersion } : {}),
...(activatedPluginErrors.length ? { activatedPluginErrors } : {}),
},
expectedVersion,
),
);
}
@@ -330,6 +411,9 @@ export async function waitForGatewayHealthyRestart(params: {
if (snapshot.healthy) {
return withWaitContext(snapshot, "healthy", attempt * delayMs);
}
if (snapshot.activatedPluginErrors?.length) {
return withWaitContext(snapshot, "plugin-errors", attempt * delayMs);
}
if (snapshot.staleGatewayPids.length > 0 && snapshot.runtime.status !== "running") {
return withWaitContext(snapshot, "stale-pids", attempt * delayMs);
}
@@ -399,6 +483,12 @@ export function renderRestartDiagnostics(snapshot: GatewayRestartSnapshot): stri
`Gateway version mismatch: expected ${snapshot.versionMismatch.expected}, running gateway reported ${actual}.`,
);
}
if (snapshot.activatedPluginErrors?.length) {
lines.push("Activated plugin load errors:");
for (const plugin of snapshot.activatedPluginErrors) {
lines.push(`- ${plugin.id}: ${plugin.error}`);
}
}
const runtimeSummary = [
snapshot.runtime.status ? `status=${snapshot.runtime.status}` : null,
snapshot.runtime.state ? `state=${snapshot.runtime.state}` : null,

View File

@@ -1645,6 +1645,60 @@ describe("update-cli", () => {
expect(defaultRuntime.exit).toHaveBeenCalledWith(1);
});
it("fails a package update when the restarted gateway reports activated plugin load errors", async () => {
setupUpdatedRootRefresh({
gatewayUpdateImpl: async () =>
makeOkUpdateResult({
mode: "npm",
root: createCaseDir("openclaw-updated-root"),
before: { version: "2026.4.23" },
after: { version: "2026.4.24" },
}),
});
readPackageVersion.mockResolvedValue("2026.4.24");
serviceLoaded.mockResolvedValue(true);
probeGateway.mockResolvedValue({
ok: true,
close: null,
server: {
version: "2026.4.24",
connId: "updated-gateway",
},
auth: { role: "operator", scopes: ["operator.read"], capability: "read_only" },
health: {
ok: true,
plugins: {
errors: [
{
id: "telegram",
origin: "bundled",
activated: true,
error: "failed to install bundled runtime deps: ENOSPC",
},
],
},
},
status: null,
presence: null,
configSnapshot: null,
connectLatencyMs: 1,
error: null,
url: "ws://127.0.0.1:18789",
});
await updateCommand({ yes: true });
expect(runRestartScript).toHaveBeenCalled();
expect(probeGateway).toHaveBeenCalledWith(expect.objectContaining({ includeDetails: true }));
expect(defaultRuntime.exit).toHaveBeenCalledWith(1);
expect(
vi
.mocked(defaultRuntime.log)
.mock.calls.map((call) => String(call[0]))
.join("\n"),
).toContain("- telegram: failed to install bundled runtime deps: ENOSPC");
});
it.each([
{
name: "updateCommand refreshes service env from updated install root when available",

View File

@@ -849,7 +849,7 @@ async function maybeRestartService(params: {
);
}
defaultRuntime.log("");
if (!health.healthy && health.versionMismatch) {
if (!health.healthy && (health.versionMismatch || health.activatedPluginErrors?.length)) {
return false;
}
}

View File

@@ -317,6 +317,32 @@ describe("doctor bundled plugin runtime deps", () => {
expect(readRetainedRuntimeDepsManifest(installRoot)).toEqual(["grammy@1.37.0"]);
});
it("throws when bundled runtime dependency repair fails", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-doctor-bundled-"));
const errors: string[] = [];
writeJson(path.join(root, "package.json"), { name: "openclaw" });
writeBundledChannelPlugin(root, "telegram", { grammy: "1.37.0" });
await expect(
maybeRepairBundledPluginRuntimeDeps({
runtime: { error: (message: string) => errors.push(message) } as never,
prompter: createNonInteractivePrompter(),
packageRoot: root,
config: {
plugins: { enabled: true },
channels: { telegram: { enabled: true } },
},
installDeps: () => {
throw new Error("ENOSPC");
},
}),
).rejects.toThrow("ENOSPC");
expect(errors.join("\n")).toContain(
"Failed to install bundled plugin runtime deps: Error: ENOSPC",
);
});
it("repairs Feishu runtime deps from preserved source config", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-doctor-bundled-"));
writeJson(path.join(root, "package.json"), { name: "openclaw" });

View File

@@ -96,5 +96,6 @@ export async function maybeRepairBundledPluginRuntimeDeps(params: {
note(`Installed bundled plugin deps: ${result.installSpecs.join(", ")}`, "Bundled plugins");
} catch (error) {
params.runtime.error(`Failed to install bundled plugin runtime deps: ${String(error)}`);
throw error instanceof Error ? error : new Error(String(error));
}
}

View File

@@ -3,6 +3,7 @@ import os from "node:os";
import path from "node:path";
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import type { ChannelPlugin } from "../channels/plugins/types.js";
import { createPluginRecord } from "../plugins/status.test-helpers.js";
import type { HealthSummary } from "./health.js";
let testConfig: Record<string, unknown> = {};
@@ -317,6 +318,57 @@ describe("getHealthSnapshot", () => {
vi.unstubAllEnvs();
});
it("includes active plugin load errors in the health snapshot", async () => {
testConfig = { session: { store: "/tmp/x" } };
testStore = {};
setActivePluginRegistry({
...createTestRegistry([]),
plugins: [
createPluginRecord({ id: "telegram", origin: "bundled", status: "loaded" }),
createPluginRecord({
id: "whatsapp",
origin: "bundled",
status: "error",
activated: true,
activationSource: "explicit",
activationReason: "bundled-channel-enabled-in-config",
failurePhase: "load",
error: "failed to install bundled runtime deps: ENOSPC",
}),
createPluginRecord({
id: "optional-broken",
origin: "workspace",
enabled: false,
activated: false,
status: "error",
error: "disabled plugin ignored",
}),
],
});
const snap = await getHealthSnapshot({ timeoutMs: 10, probe: false });
expect(snap.plugins?.loaded).toEqual(["telegram"]);
expect(snap.plugins?.errors).toEqual([
{
id: "optional-broken",
origin: "workspace",
activated: false,
activationSource: "disabled",
error: "disabled plugin ignored",
},
{
id: "whatsapp",
origin: "bundled",
activated: true,
activationSource: "explicit",
activationReason: "bundled-channel-enabled-in-config",
failurePhase: "load",
error: "failed to install bundled runtime deps: ENOSPC",
},
]);
});
it("skips telegram probe when not configured", async () => {
testConfig = { session: { store: "/tmp/x" } };
testStore = {

View File

@@ -12,6 +12,7 @@ import { info } from "../globals.js";
import { isTruthyEnvValue } from "../infra/env.js";
import { formatErrorMessage } from "../infra/errors.js";
import { resolveHeartbeatSummaryForAgent } from "../infra/heartbeat-summary.js";
import { getActivePluginRegistry } from "../plugins/runtime.js";
import { buildChannelAccountBindings, resolvePreferredAccountId } from "../routing/bindings.js";
import { normalizeAgentId } from "../routing/session-key.js";
import { type RuntimeEnv, writeRuntimeJson } from "../runtime.js";
@@ -24,6 +25,8 @@ import type {
ChannelAccountHealthSummary,
ChannelHealthSummary,
HealthSummary,
PluginHealthErrorSummary,
PluginHealthSummary,
} from "./health.types.js";
import { logGatewayConnectionDetails } from "./status.gateway-connection.js";
export { formatHealthChannelLines } from "./health-format.js";
@@ -134,6 +137,42 @@ const buildSessionSummary = async (storePath: string) => {
} satisfies HealthSummary["sessions"];
};
function buildPluginHealthSummary(): PluginHealthSummary | undefined {
const registry = getActivePluginRegistry();
if (!registry) {
return undefined;
}
const loaded = registry.plugins
.filter((plugin) => plugin.status === "loaded")
.map((plugin) => plugin.id)
.toSorted((left, right) => left.localeCompare(right));
const errors = registry.plugins
.filter((plugin) => plugin.status === "error")
.map((plugin) => {
const error: PluginHealthErrorSummary = {
id: plugin.id,
origin: plugin.origin,
activated: plugin.activated === true,
error: plugin.error ?? "unknown plugin load error",
};
if (plugin.activationSource) {
error.activationSource = plugin.activationSource;
}
if (plugin.activationReason) {
error.activationReason = plugin.activationReason;
}
if (plugin.failurePhase) {
error.failurePhase = plugin.failurePhase;
}
return error;
})
.toSorted((left, right) => left.id.localeCompare(right.id));
if (loaded.length === 0 && errors.length === 0) {
return undefined;
}
return { loaded, errors };
}
async function inspectHealthAccount(plugin: ChannelPlugin, cfg: OpenClawConfig, accountId: string) {
return (
plugin.config.inspectAccount?.(cfg, accountId) ??
@@ -375,10 +414,12 @@ export async function getHealthSnapshot(params?: {
}
}
const pluginHealth = buildPluginHealthSummary();
const summary: HealthSummary = {
ok: true,
ts: Date.now(),
durationMs: Date.now() - start,
...(pluginHealth ? { plugins: pluginHealth } : {}),
channels,
channelOrder,
channelLabels,

View File

@@ -20,10 +20,26 @@ export type AgentHealthSummary = {
sessions: HealthSummary["sessions"];
};
export type PluginHealthErrorSummary = {
id: string;
origin: string;
activated: boolean;
activationSource?: string;
activationReason?: string;
failurePhase?: string;
error: string;
};
export type PluginHealthSummary = {
loaded: string[];
errors: PluginHealthErrorSummary[];
};
export type HealthSummary = {
ok: true;
ts: number;
durationMs: number;
plugins?: PluginHealthSummary;
channels: Record<string, ChannelHealthSummary>;
channelOrder: string[];
channelLabels: Record<string, string>;