Fail package update on unhealthy restart (#72422)

This commit is contained in:
Tak Hoffman
2026-04-26 18:38:23 -05:00
committed by GitHub
parent 998e37fcb3
commit 560ddd2f9b
3 changed files with 129 additions and 24 deletions

View File

@@ -438,6 +438,44 @@ describe("inspectGatewayRestart", () => {
expect(sleep).not.toHaveBeenCalled();
});
it("stops waiting once the expected-version gateway reports channel probe errors", async () => {
probeGateway.mockResolvedValue({
ok: true,
close: null,
server: { version: "2026.4.24", connId: "new" },
health: {
ok: true,
channels: {
telegram: {
configured: true,
probe: { ok: false, error: "This operation was aborted" },
},
},
},
});
inspectPortUsage.mockResolvedValue({
port: 18789,
status: "busy",
listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }],
hints: [],
});
const { waitForGatewayHealthyRestart } = await import("./restart-health.js");
const snapshot = await waitForGatewayHealthyRestart({
service: makeGatewayService({ status: "running", pid: 8000 }),
port: 18789,
expectedVersion: "2026.4.24",
});
expect(snapshot).toMatchObject({
healthy: false,
waitOutcome: "channel-errors",
elapsedMs: 0,
channelProbeErrors: [{ id: "telegram", error: "This operation was aborted" }],
});
expect(sleep).not.toHaveBeenCalled();
});
it("treats busy ports with unavailable listener details as healthy when runtime is running", async () => {
const service = {
readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })),

View File

@@ -26,6 +26,7 @@ const WINDOWS_STOPPED_FREE_EARLY_EXIT_GRACE_MS = 90_000;
export type GatewayRestartWaitOutcome =
| "healthy"
| "plugin-errors"
| "channel-errors"
| "version-mismatch"
| "stale-pids"
| "stopped-free"
@@ -38,6 +39,7 @@ export type GatewayRestartSnapshot = {
staleGatewayPids: number[];
gatewayVersion?: string | null;
activatedPluginErrors?: PluginHealthErrorSummary[];
channelProbeErrors?: Array<{ id: string; error: string }>;
expectedVersion?: string;
versionMismatch?: {
expected: string;
@@ -56,6 +58,7 @@ type GatewayReachability = {
reachable: boolean;
gatewayVersion: string | null;
activatedPluginErrors: PluginHealthErrorSummary[];
channelProbeErrors: Array<{ id: string; error: string }>;
};
function hasListenerAttributionGap(portUsage: PortUsage): boolean {
@@ -154,6 +157,36 @@ function readActivatedPluginErrors(health: unknown): PluginHealthErrorSummary[]
});
}
function readChannelProbeErrors(health: unknown): Array<{ id: string; error: string }> {
if (!health || typeof health !== "object") {
return [];
}
const channels = (health as { channels?: unknown }).channels;
if (!channels || typeof channels !== "object" || Array.isArray(channels)) {
return [];
}
const errors: Array<{ id: string; error: string }> = [];
for (const [id, summary] of Object.entries(channels)) {
if (!summary || typeof summary !== "object") {
continue;
}
const probe = (summary as { probe?: unknown }).probe;
if (!probe || typeof probe !== "object") {
continue;
}
const ok = (probe as { ok?: unknown }).ok;
if (ok !== false) {
continue;
}
const error = (probe as { error?: unknown }).error;
errors.push({
id,
error: typeof error === "string" && error.trim() ? error : "probe failed",
});
}
return errors;
}
function applyActivatedPluginErrors(snapshot: GatewayRestartSnapshot): GatewayRestartSnapshot {
if (!snapshot.activatedPluginErrors?.length) {
return snapshot;
@@ -161,6 +194,13 @@ function applyActivatedPluginErrors(snapshot: GatewayRestartSnapshot): GatewayRe
return { ...snapshot, healthy: false };
}
function applyChannelProbeErrors(snapshot: GatewayRestartSnapshot): GatewayRestartSnapshot {
if (!snapshot.channelProbeErrors?.length) {
return snapshot;
}
return { ...snapshot, healthy: false };
}
async function confirmGatewayReachable(params: {
port: number;
includeHealthDetails?: boolean;
@@ -177,6 +217,7 @@ async function confirmGatewayReachable(params: {
reachable: probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason),
gatewayVersion: probe.server?.version ?? null,
activatedPluginErrors: readActivatedPluginErrors(probe.health),
channelProbeErrors: readChannelProbeErrors(probe.health),
};
}
@@ -217,6 +258,7 @@ export async function inspectGatewayRestart(params: {
const expectedVersion = normalizeOptionalString(params.expectedVersion);
let reachability: GatewayReachability | null = null;
let activatedPluginErrors: PluginHealthErrorSummary[] = [];
let channelProbeErrors: Array<{ id: string; error: string }> = [];
const loadReachability = async () => {
if (!reachability) {
reachability = await confirmGatewayReachable({
@@ -224,6 +266,7 @@ export async function inspectGatewayRestart(params: {
includeHealthDetails: Boolean(expectedVersion),
});
activatedPluginErrors = reachability.activatedPluginErrors;
channelProbeErrors = reachability.channelProbeErrors;
}
return reachability;
};
@@ -251,19 +294,24 @@ export async function inspectGatewayRestart(params: {
try {
const reachable = await loadReachability();
if (reachable.reachable) {
return applyActivatedPluginErrors(
applyExpectedVersion(
{
runtime,
portUsage,
healthy: true,
staleGatewayPids: [],
gatewayVersion: reachable.gatewayVersion,
...(reachable.activatedPluginErrors.length > 0
? { activatedPluginErrors: reachable.activatedPluginErrors }
: {}),
},
expectedVersion,
return applyChannelProbeErrors(
applyActivatedPluginErrors(
applyExpectedVersion(
{
runtime,
portUsage,
healthy: true,
staleGatewayPids: [],
gatewayVersion: reachable.gatewayVersion,
...(reachable.activatedPluginErrors.length > 0
? { activatedPluginErrors: reachable.activatedPluginErrors }
: {}),
...(reachable.channelProbeErrors.length > 0
? { channelProbeErrors: reachable.channelProbeErrors }
: {}),
},
expectedVersion,
),
),
);
}
@@ -307,6 +355,9 @@ export async function inspectGatewayRestart(params: {
if (reachable.activatedPluginErrors.length > 0) {
healthy = false;
}
if (reachable.channelProbeErrors.length > 0) {
healthy = false;
}
} catch {
healthy = false;
}
@@ -340,17 +391,20 @@ export async function inspectGatewayRestart(params: {
]),
);
return applyActivatedPluginErrors(
applyExpectedVersion(
{
runtime,
portUsage,
healthy,
staleGatewayPids,
...(gatewayVersion !== undefined ? { gatewayVersion } : {}),
...(activatedPluginErrors.length ? { activatedPluginErrors } : {}),
},
expectedVersion,
return applyChannelProbeErrors(
applyActivatedPluginErrors(
applyExpectedVersion(
{
runtime,
portUsage,
healthy,
staleGatewayPids,
...(gatewayVersion !== undefined ? { gatewayVersion } : {}),
...(activatedPluginErrors.length ? { activatedPluginErrors } : {}),
...(channelProbeErrors.length ? { channelProbeErrors } : {}),
},
expectedVersion,
),
),
);
}
@@ -415,6 +469,9 @@ export async function waitForGatewayHealthyRestart(params: {
if (snapshot.activatedPluginErrors?.length) {
return withWaitContext(snapshot, "plugin-errors", attempt * delayMs);
}
if (snapshot.channelProbeErrors?.length) {
return withWaitContext(snapshot, "channel-errors", attempt * delayMs);
}
if (snapshot.versionMismatch) {
return withWaitContext(snapshot, "version-mismatch", attempt * delayMs);
}
@@ -493,6 +550,12 @@ export function renderRestartDiagnostics(snapshot: GatewayRestartSnapshot): stri
lines.push(`- ${plugin.id}: ${plugin.error}`);
}
}
if (snapshot.channelProbeErrors?.length) {
lines.push("Channel health probe errors:");
for (const channel of snapshot.channelProbeErrors) {
lines.push(`- ${channel.id}: ${channel.error}`);
}
}
const runtimeSummary = [
snapshot.runtime.status ? `status=${snapshot.runtime.status}` : null,
snapshot.runtime.state ? `state=${snapshot.runtime.state}` : null,

View File

@@ -856,6 +856,10 @@ async function maybeRestartService(params: {
}
}
if (isPackageManagerUpdateMode(params.result.mode)) {
return false;
}
return !(health.versionMismatch || health.activatedPluginErrors?.length);
};