fix(gateway/cli): relax local backend self-pairing and harden launchd restarts (#46290)

Signed-off-by: sallyom <somalley@redhat.com>
This commit is contained in:
Sally O'Malley
2026-03-14 14:27:52 -04:00
committed by GitHub
parent ac29edf6c3
commit 8db6fcca77
10 changed files with 347 additions and 5 deletions

View File

@@ -182,7 +182,7 @@ export async function inspectGatewayRestart(params: {
return true;
}
if (runtimePid == null) {
return true;
return false;
}
return !listenerOwnedByRuntimePid({ listener, runtimePid });
})

View File

@@ -1,5 +1,6 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { captureEnv } from "../../test-utils/env.js";
import type { GatewayRestartSnapshot } from "./restart-health.js";
const callGatewayStatusProbe = vi.fn(async (_opts?: unknown) => ({ ok: true as const }));
const loadGatewayTlsRuntime = vi.fn(async (_cfg?: unknown) => ({
@@ -18,6 +19,14 @@ const readLastGatewayErrorLine = vi.fn(async (_env?: NodeJS.ProcessEnv) => null)
const auditGatewayServiceConfig = vi.fn(async (_opts?: unknown) => undefined);
const serviceIsLoaded = vi.fn(async (_opts?: unknown) => true);
const serviceReadRuntime = vi.fn(async (_env?: NodeJS.ProcessEnv) => ({ status: "running" }));
const inspectGatewayRestart = vi.fn<(opts?: unknown) => Promise<GatewayRestartSnapshot>>(
async (_opts?: unknown) => ({
runtime: { status: "running", pid: 1234 },
portUsage: { port: 19001, status: "busy", listeners: [], hints: [] },
healthy: true,
staleGatewayPids: [],
}),
);
const serviceReadCommand = vi.fn<
(env?: NodeJS.ProcessEnv) => Promise<{
programArguments: string[];
@@ -117,6 +126,10 @@ vi.mock("./probe.js", () => ({
probeGatewayStatus: (opts: unknown) => callGatewayStatusProbe(opts),
}));
vi.mock("./restart-health.js", () => ({
inspectGatewayRestart: (opts: unknown) => inspectGatewayRestart(opts),
}));
const { gatherDaemonStatus } = await import("./status.gather.js");
describe("gatherDaemonStatus", () => {
@@ -139,6 +152,7 @@ describe("gatherDaemonStatus", () => {
delete process.env.DAEMON_GATEWAY_PASSWORD;
callGatewayStatusProbe.mockClear();
loadGatewayTlsRuntime.mockClear();
inspectGatewayRestart.mockClear();
daemonLoadedConfig = {
gateway: {
bind: "lan",
@@ -362,4 +376,34 @@ describe("gatherDaemonStatus", () => {
expect(callGatewayStatusProbe).not.toHaveBeenCalled();
expect(status.rpc).toBeUndefined();
});
it("surfaces stale gateway listener pids from restart health inspection", async () => {
inspectGatewayRestart.mockResolvedValueOnce({
runtime: { status: "running", pid: 8000 },
portUsage: {
port: 19001,
status: "busy",
listeners: [{ pid: 9000, ppid: 8999, commandLine: "openclaw-gateway" }],
hints: [],
},
healthy: false,
staleGatewayPids: [9000],
});
const status = await gatherDaemonStatus({
rpc: {},
probe: true,
deep: false,
});
expect(inspectGatewayRestart).toHaveBeenCalledWith(
expect.objectContaining({
port: 19001,
}),
);
expect(status.health).toEqual({
healthy: false,
staleGatewayPids: [9000],
});
});
});

View File

@@ -29,6 +29,7 @@ import {
import { pickPrimaryTailnetIPv4 } from "../../infra/tailnet.js";
import { loadGatewayTlsRuntime } from "../../infra/tls/gateway.js";
import { probeGatewayStatus } from "./probe.js";
import { inspectGatewayRestart } from "./restart-health.js";
import { normalizeListenerAddress, parsePortFromArgs, pickProbeHostForBind } from "./shared.js";
import type { GatewayRpcOpts } from "./types.js";
@@ -112,6 +113,10 @@ export type DaemonStatus = {
error?: string;
url?: string;
};
health?: {
healthy: boolean;
staleGatewayPids: number[];
};
extraServices: Array<{ label: string; detail: string; scope: string }>;
};
@@ -331,6 +336,14 @@ export async function gatherDaemonStatus(
configPath: daemonConfigSummary.path,
})
: undefined;
const health =
opts.probe && loaded
? await inspectGatewayRestart({
service,
port: daemonPort,
env: serviceEnv,
}).catch(() => undefined)
: undefined;
let lastError: string | undefined;
if (loaded && runtime?.status === "running" && portStatus && portStatus.status !== "busy") {
@@ -357,6 +370,14 @@ export async function gatherDaemonStatus(
...(portCliStatus ? { portCli: portCliStatus } : {}),
lastError,
...(rpc ? { rpc: { ...rpc, url: gateway.probeUrl } } : {}),
...(health
? {
health: {
healthy: health.healthy,
staleGatewayPids: health.staleGatewayPids,
},
}
: {}),
extraServices,
};
}

View File

@@ -0,0 +1,116 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const runtime = vi.hoisted(() => ({
log: vi.fn<(line: string) => void>(),
error: vi.fn<(line: string) => void>(),
}));
vi.mock("../../runtime.js", () => ({
defaultRuntime: runtime,
}));
vi.mock("../../terminal/theme.js", () => ({
colorize: (_rich: boolean, _theme: unknown, text: string) => text,
}));
vi.mock("../../commands/onboard-helpers.js", () => ({
resolveControlUiLinks: () => ({ httpUrl: "http://127.0.0.1:18789" }),
}));
vi.mock("../../daemon/inspect.js", () => ({
renderGatewayServiceCleanupHints: () => [],
}));
vi.mock("../../daemon/launchd.js", () => ({
resolveGatewayLogPaths: () => ({
stdoutPath: "/tmp/gateway.out.log",
stderrPath: "/tmp/gateway.err.log",
}),
}));
vi.mock("../../daemon/systemd-hints.js", () => ({
isSystemdUnavailableDetail: () => false,
renderSystemdUnavailableHints: () => [],
}));
vi.mock("../../infra/wsl.js", () => ({
isWSLEnv: () => false,
}));
vi.mock("../../logging.js", () => ({
getResolvedLoggerSettings: () => ({ file: "/tmp/openclaw.log" }),
}));
vi.mock("./shared.js", () => ({
createCliStatusTextStyles: () => ({
rich: false,
label: (text: string) => text,
accent: (text: string) => text,
infoText: (text: string) => text,
okText: (text: string) => text,
warnText: (text: string) => text,
errorText: (text: string) => text,
}),
filterDaemonEnv: () => ({}),
formatRuntimeStatus: () => "running (pid 8000)",
resolveRuntimeStatusColor: () => "",
renderRuntimeHints: () => [],
safeDaemonEnv: () => [],
}));
vi.mock("./status.gather.js", () => ({
renderPortDiagnosticsForCli: () => [],
resolvePortListeningAddresses: () => ["127.0.0.1:18789"],
}));
const { printDaemonStatus } = await import("./status.print.js");
describe("printDaemonStatus", () => {
beforeEach(() => {
runtime.log.mockReset();
runtime.error.mockReset();
});
it("prints stale gateway pid guidance when runtime does not own the listener", () => {
printDaemonStatus(
{
service: {
label: "LaunchAgent",
loaded: true,
loadedText: "loaded",
notLoadedText: "not loaded",
runtime: { status: "running", pid: 8000 },
},
gateway: {
bindMode: "loopback",
bindHost: "127.0.0.1",
port: 18789,
portSource: "env/config",
probeUrl: "ws://127.0.0.1:18789",
},
port: {
port: 18789,
status: "busy",
listeners: [{ pid: 9000, ppid: 8999, address: "127.0.0.1:18789" }],
hints: [],
},
rpc: {
ok: false,
error: "gateway closed (1006 abnormal closure (no close frame))",
url: "ws://127.0.0.1:18789",
},
health: {
healthy: false,
staleGatewayPids: [9000],
},
extraServices: [],
},
{ json: false },
);
expect(runtime.error).toHaveBeenCalledWith(
expect.stringContaining("Gateway runtime PID does not own the listening port"),
);
expect(runtime.error).toHaveBeenCalledWith(expect.stringContaining("openclaw gateway restart"));
});
});

View File

@@ -194,6 +194,25 @@ export function printDaemonStatus(status: DaemonStatus, opts: { json: boolean })
spacer();
}
if (
status.health &&
status.health.staleGatewayPids.length > 0 &&
service.runtime?.status === "running" &&
typeof service.runtime.pid === "number"
) {
defaultRuntime.error(
errorText(
`Gateway runtime PID does not own the listening port. Other gateway process(es) are listening: ${status.health.staleGatewayPids.join(", ")}`,
),
);
defaultRuntime.error(
errorText(
`Fix: run ${formatCliCommand("openclaw gateway restart")} and re-check with ${formatCliCommand("openclaw gateway status --deep")}.`,
),
);
spacer();
}
const systemdUnavailable =
process.platform === "linux" && isSystemdUnavailableDetail(service.runtime?.detail);
if (systemdUnavailable) {