mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-03 02:40:21 +00:00
fix: harden windows gateway lifecycle
This commit is contained in:
@@ -1,8 +1,5 @@
|
||||
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const mockReadFileSync = vi.hoisted(() => vi.fn());
|
||||
const mockSpawnSync = vi.hoisted(() => vi.fn());
|
||||
|
||||
type RestartHealthSnapshot = {
|
||||
healthy: boolean;
|
||||
staleGatewayPids: number[];
|
||||
@@ -35,7 +32,9 @@ const terminateStaleGatewayPids = vi.fn();
|
||||
const renderGatewayPortHealthDiagnostics = vi.fn(() => ["diag: unhealthy port"]);
|
||||
const renderRestartDiagnostics = vi.fn(() => ["diag: unhealthy runtime"]);
|
||||
const resolveGatewayPort = vi.fn(() => 18789);
|
||||
const findGatewayPidsOnPortSync = vi.fn<(port: number) => number[]>(() => []);
|
||||
const findVerifiedGatewayListenerPidsOnPortSync = vi.fn<(port: number) => number[]>(() => []);
|
||||
const signalVerifiedGatewayPidSync = vi.fn<(pid: number, signal: "SIGTERM" | "SIGUSR1") => void>();
|
||||
const formatGatewayPidList = vi.fn<(pids: number[]) => string>((pids) => pids.join(", "));
|
||||
const probeGateway = vi.fn<
|
||||
(opts: {
|
||||
url: string;
|
||||
@@ -49,24 +48,18 @@ const probeGateway = vi.fn<
|
||||
const isRestartEnabled = vi.fn<(config?: { commands?: unknown }) => boolean>(() => true);
|
||||
const loadConfig = vi.fn(() => ({}));
|
||||
|
||||
vi.mock("node:fs", () => ({
|
||||
default: {
|
||||
readFileSync: (...args: unknown[]) => mockReadFileSync(...args),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("node:child_process", () => ({
|
||||
spawnSync: (...args: unknown[]) => mockSpawnSync(...args),
|
||||
}));
|
||||
|
||||
vi.mock("../../config/config.js", () => ({
|
||||
loadConfig: () => loadConfig(),
|
||||
readBestEffortConfig: async () => loadConfig(),
|
||||
resolveGatewayPort,
|
||||
}));
|
||||
|
||||
vi.mock("../../infra/restart.js", () => ({
|
||||
findGatewayPidsOnPortSync: (port: number) => findGatewayPidsOnPortSync(port),
|
||||
vi.mock("../../infra/gateway-processes.js", () => ({
|
||||
findVerifiedGatewayListenerPidsOnPortSync: (port: number) =>
|
||||
findVerifiedGatewayListenerPidsOnPortSync(port),
|
||||
signalVerifiedGatewayPidSync: (pid: number, signal: "SIGTERM" | "SIGUSR1") =>
|
||||
signalVerifiedGatewayPidSync(pid, signal),
|
||||
formatGatewayPidList: (pids: number[]) => formatGatewayPidList(pids),
|
||||
}));
|
||||
|
||||
vi.mock("../../gateway/probe.js", () => ({
|
||||
@@ -121,12 +114,12 @@ describe("runDaemonRestart health checks", () => {
|
||||
renderGatewayPortHealthDiagnostics.mockReset();
|
||||
renderRestartDiagnostics.mockReset();
|
||||
resolveGatewayPort.mockReset();
|
||||
findGatewayPidsOnPortSync.mockReset();
|
||||
findVerifiedGatewayListenerPidsOnPortSync.mockReset();
|
||||
signalVerifiedGatewayPidSync.mockReset();
|
||||
formatGatewayPidList.mockReset();
|
||||
probeGateway.mockReset();
|
||||
isRestartEnabled.mockReset();
|
||||
loadConfig.mockReset();
|
||||
mockReadFileSync.mockReset();
|
||||
mockSpawnSync.mockReset();
|
||||
|
||||
service.readCommand.mockResolvedValue({
|
||||
programArguments: ["openclaw", "gateway", "--port", "18789"],
|
||||
@@ -158,23 +151,8 @@ describe("runDaemonRestart health checks", () => {
|
||||
configSnapshot: { commands: { restart: true } },
|
||||
});
|
||||
isRestartEnabled.mockReturnValue(true);
|
||||
mockReadFileSync.mockImplementation((path: string) => {
|
||||
const match = path.match(/\/proc\/(\d+)\/cmdline$/);
|
||||
if (!match) {
|
||||
throw new Error(`unexpected path ${path}`);
|
||||
}
|
||||
const pid = Number.parseInt(match[1] ?? "", 10);
|
||||
if ([4200, 4300].includes(pid)) {
|
||||
return ["openclaw", "gateway", "--port", "18789", ""].join("\0");
|
||||
}
|
||||
throw new Error(`unknown pid ${pid}`);
|
||||
});
|
||||
mockSpawnSync.mockReturnValue({
|
||||
error: null,
|
||||
status: 0,
|
||||
stdout: "openclaw gateway --port 18789",
|
||||
stderr: "",
|
||||
});
|
||||
signalVerifiedGatewayPidSync.mockImplementation(() => {});
|
||||
formatGatewayPidList.mockImplementation((pids) => pids.join(", "));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -242,38 +220,20 @@ describe("runDaemonRestart health checks", () => {
|
||||
});
|
||||
|
||||
it("signals an unmanaged gateway process on stop", async () => {
|
||||
vi.spyOn(process, "platform", "get").mockReturnValue("win32");
|
||||
const killSpy = vi.spyOn(process, "kill").mockImplementation(() => true);
|
||||
findGatewayPidsOnPortSync.mockReturnValue([4200, 4200, 4300]);
|
||||
mockSpawnSync.mockReturnValue({
|
||||
error: null,
|
||||
status: 0,
|
||||
stdout:
|
||||
'CommandLine="C:\\\\Program Files\\\\OpenClaw\\\\openclaw.exe" gateway --port 18789\r\n',
|
||||
stderr: "",
|
||||
});
|
||||
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200, 4200, 4300]);
|
||||
runServiceStop.mockImplementation(async (params: { onNotLoaded?: () => Promise<unknown> }) => {
|
||||
await params.onNotLoaded?.();
|
||||
});
|
||||
|
||||
await runDaemonStop({ json: true });
|
||||
|
||||
expect(findGatewayPidsOnPortSync).toHaveBeenCalledWith(18789);
|
||||
expect(killSpy).toHaveBeenCalledWith(4200, "SIGTERM");
|
||||
expect(killSpy).toHaveBeenCalledWith(4300, "SIGTERM");
|
||||
expect(findVerifiedGatewayListenerPidsOnPortSync).toHaveBeenCalledWith(18789);
|
||||
expect(signalVerifiedGatewayPidSync).toHaveBeenCalledWith(4200, "SIGTERM");
|
||||
expect(signalVerifiedGatewayPidSync).toHaveBeenCalledWith(4300, "SIGTERM");
|
||||
});
|
||||
|
||||
it("signals a single unmanaged gateway process on restart", async () => {
|
||||
vi.spyOn(process, "platform", "get").mockReturnValue("win32");
|
||||
const killSpy = vi.spyOn(process, "kill").mockImplementation(() => true);
|
||||
findGatewayPidsOnPortSync.mockReturnValue([4200]);
|
||||
mockSpawnSync.mockReturnValue({
|
||||
error: null,
|
||||
status: 0,
|
||||
stdout:
|
||||
'CommandLine="C:\\\\Program Files\\\\OpenClaw\\\\openclaw.exe" gateway --port 18789\r\n',
|
||||
stderr: "",
|
||||
});
|
||||
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200]);
|
||||
runServiceRestart.mockImplementation(
|
||||
async (params: RestartParams & { onNotLoaded?: () => Promise<unknown> }) => {
|
||||
await params.onNotLoaded?.();
|
||||
@@ -291,8 +251,8 @@ describe("runDaemonRestart health checks", () => {
|
||||
|
||||
await runDaemonRestart({ json: true });
|
||||
|
||||
expect(findGatewayPidsOnPortSync).toHaveBeenCalledWith(18789);
|
||||
expect(killSpy).toHaveBeenCalledWith(4200, "SIGUSR1");
|
||||
expect(findVerifiedGatewayListenerPidsOnPortSync).toHaveBeenCalledWith(18789);
|
||||
expect(signalVerifiedGatewayPidSync).toHaveBeenCalledWith(4200, "SIGUSR1");
|
||||
expect(probeGateway).toHaveBeenCalledTimes(1);
|
||||
expect(waitForGatewayHealthyListener).toHaveBeenCalledTimes(1);
|
||||
expect(waitForGatewayHealthyRestart).not.toHaveBeenCalled();
|
||||
@@ -301,15 +261,7 @@ describe("runDaemonRestart health checks", () => {
|
||||
});
|
||||
|
||||
it("fails unmanaged restart when multiple gateway listeners are present", async () => {
|
||||
vi.spyOn(process, "platform", "get").mockReturnValue("win32");
|
||||
findGatewayPidsOnPortSync.mockReturnValue([4200, 4300]);
|
||||
mockSpawnSync.mockReturnValue({
|
||||
error: null,
|
||||
status: 0,
|
||||
stdout:
|
||||
'CommandLine="C:\\\\Program Files\\\\OpenClaw\\\\openclaw.exe" gateway --port 18789\r\n',
|
||||
stderr: "",
|
||||
});
|
||||
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200, 4300]);
|
||||
runServiceRestart.mockImplementation(
|
||||
async (params: RestartParams & { onNotLoaded?: () => Promise<unknown> }) => {
|
||||
await params.onNotLoaded?.();
|
||||
@@ -323,7 +275,7 @@ describe("runDaemonRestart health checks", () => {
|
||||
});
|
||||
|
||||
it("fails unmanaged restart when the running gateway has commands.restart disabled", async () => {
|
||||
findGatewayPidsOnPortSync.mockReturnValue([4200]);
|
||||
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200]);
|
||||
probeGateway.mockResolvedValue({
|
||||
ok: true,
|
||||
configSnapshot: { commands: { restart: false } },
|
||||
@@ -342,21 +294,13 @@ describe("runDaemonRestart health checks", () => {
|
||||
});
|
||||
|
||||
it("skips unmanaged signaling for pids that are not live gateway processes", async () => {
|
||||
const killSpy = vi.spyOn(process, "kill").mockImplementation(() => true);
|
||||
findGatewayPidsOnPortSync.mockReturnValue([4200]);
|
||||
mockReadFileSync.mockReturnValue(["python", "-m", "http.server", ""].join("\0"));
|
||||
mockSpawnSync.mockReturnValue({
|
||||
error: null,
|
||||
status: 0,
|
||||
stdout: "python -m http.server",
|
||||
stderr: "",
|
||||
});
|
||||
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([]);
|
||||
runServiceStop.mockImplementation(async (params: { onNotLoaded?: () => Promise<unknown> }) => {
|
||||
await params.onNotLoaded?.();
|
||||
});
|
||||
|
||||
await runDaemonStop({ json: true });
|
||||
|
||||
expect(killSpy).not.toHaveBeenCalled();
|
||||
expect(signalVerifiedGatewayPidSync).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import { spawnSync } from "node:child_process";
|
||||
import fsSync from "node:fs";
|
||||
import { isRestartEnabled } from "../../config/commands.js";
|
||||
import { readBestEffortConfig, resolveGatewayPort } from "../../config/config.js";
|
||||
import { parseCmdScriptCommandLine } from "../../daemon/cmd-argv.js";
|
||||
import { resolveGatewayService } from "../../daemon/service.js";
|
||||
import { probeGateway } from "../../gateway/probe.js";
|
||||
import { isGatewayArgv, parseProcCmdline } from "../../infra/gateway-process-argv.js";
|
||||
import { findGatewayPidsOnPortSync } from "../../infra/restart.js";
|
||||
import {
|
||||
findVerifiedGatewayListenerPidsOnPortSync,
|
||||
formatGatewayPidList,
|
||||
signalVerifiedGatewayPidSync,
|
||||
} from "../../infra/gateway-processes.js";
|
||||
import { defaultRuntime } from "../../runtime.js";
|
||||
import { theme } from "../../terminal/theme.js";
|
||||
import { formatCliCommand } from "../command-format.js";
|
||||
@@ -43,85 +43,12 @@ async function resolveGatewayLifecyclePort(service = resolveGatewayService()) {
|
||||
return portFromArgs ?? resolveGatewayPort(await readBestEffortConfig(), mergedEnv);
|
||||
}
|
||||
|
||||
function extractWindowsCommandLine(raw: string): string | null {
|
||||
const lines = raw
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean);
|
||||
for (const line of lines) {
|
||||
if (!line.toLowerCase().startsWith("commandline=")) {
|
||||
continue;
|
||||
}
|
||||
const value = line.slice("commandline=".length).trim();
|
||||
return value || null;
|
||||
}
|
||||
return lines.find((line) => line.toLowerCase() !== "commandline") ?? null;
|
||||
}
|
||||
|
||||
function readGatewayProcessArgsSync(pid: number): string[] | null {
|
||||
if (process.platform === "linux") {
|
||||
try {
|
||||
return parseProcCmdline(fsSync.readFileSync(`/proc/${pid}/cmdline`, "utf8"));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
if (process.platform === "darwin") {
|
||||
const ps = spawnSync("ps", ["-o", "command=", "-p", String(pid)], {
|
||||
encoding: "utf8",
|
||||
timeout: 1000,
|
||||
});
|
||||
if (ps.error || ps.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
const command = ps.stdout.trim();
|
||||
return command ? command.split(/\s+/) : null;
|
||||
}
|
||||
if (process.platform === "win32") {
|
||||
const wmic = spawnSync(
|
||||
"wmic",
|
||||
["process", "where", `ProcessId=${pid}`, "get", "CommandLine", "/value"],
|
||||
{
|
||||
encoding: "utf8",
|
||||
timeout: 1000,
|
||||
},
|
||||
);
|
||||
if (wmic.error || wmic.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
const command = extractWindowsCommandLine(wmic.stdout);
|
||||
return command ? parseCmdScriptCommandLine(command) : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveGatewayListenerPids(port: number): number[] {
|
||||
return Array.from(new Set(findGatewayPidsOnPortSync(port)))
|
||||
.filter((pid): pid is number => Number.isFinite(pid) && pid > 0)
|
||||
.filter((pid) => {
|
||||
const args = readGatewayProcessArgsSync(pid);
|
||||
return args != null && isGatewayArgv(args, { allowGatewayBinary: true });
|
||||
});
|
||||
}
|
||||
|
||||
function resolveGatewayPortFallback(): Promise<number> {
|
||||
return readBestEffortConfig()
|
||||
.then((cfg) => resolveGatewayPort(cfg, process.env))
|
||||
.catch(() => resolveGatewayPort(undefined, process.env));
|
||||
}
|
||||
|
||||
function signalGatewayPid(pid: number, signal: "SIGTERM" | "SIGUSR1") {
|
||||
const args = readGatewayProcessArgsSync(pid);
|
||||
if (!args || !isGatewayArgv(args, { allowGatewayBinary: true })) {
|
||||
throw new Error(`refusing to signal non-gateway process pid ${pid}`);
|
||||
}
|
||||
process.kill(pid, signal);
|
||||
}
|
||||
|
||||
function formatGatewayPidList(pids: number[]): string {
|
||||
return pids.join(", ");
|
||||
}
|
||||
|
||||
async function assertUnmanagedGatewayRestartEnabled(port: number): Promise<void> {
|
||||
const probe = await probeGateway({
|
||||
url: `ws://127.0.0.1:${port}`,
|
||||
@@ -143,7 +70,7 @@ async function assertUnmanagedGatewayRestartEnabled(port: number): Promise<void>
|
||||
}
|
||||
|
||||
function resolveVerifiedGatewayListenerPids(port: number): number[] {
|
||||
return resolveGatewayListenerPids(port).filter(
|
||||
return findVerifiedGatewayListenerPidsOnPortSync(port).filter(
|
||||
(pid): pid is number => Number.isFinite(pid) && pid > 0,
|
||||
);
|
||||
}
|
||||
@@ -154,7 +81,7 @@ async function stopGatewayWithoutServiceManager(port: number) {
|
||||
return null;
|
||||
}
|
||||
for (const pid of pids) {
|
||||
signalGatewayPid(pid, "SIGTERM");
|
||||
signalVerifiedGatewayPidSync(pid, "SIGTERM");
|
||||
}
|
||||
return {
|
||||
result: "stopped" as const,
|
||||
@@ -173,7 +100,7 @@ async function restartGatewayWithoutServiceManager(port: number) {
|
||||
`multiple gateway processes are listening on port ${port}: ${formatGatewayPidList(pids)}; use "openclaw gateway status --deep" before retrying restart`,
|
||||
);
|
||||
}
|
||||
signalGatewayPid(pids[0], "SIGUSR1");
|
||||
signalVerifiedGatewayPidSync(pids[0], "SIGUSR1");
|
||||
return {
|
||||
result: "restarted" as const,
|
||||
message: `Gateway restart signal sent to unmanaged process on port ${port}: ${pids[0]}.`,
|
||||
|
||||
@@ -190,6 +190,32 @@ describe("inspectGatewayRestart", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("treats a busy port as healthy when runtime status lags but the probe succeeds", async () => {
|
||||
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
|
||||
|
||||
const service = {
|
||||
readRuntime: vi.fn(async () => ({ status: "stopped" })),
|
||||
} as unknown as GatewayService;
|
||||
|
||||
inspectPortUsage.mockResolvedValue({
|
||||
port: 18789,
|
||||
status: "busy",
|
||||
listeners: [{ pid: 9100, commandLine: "openclaw-gateway" }],
|
||||
hints: [],
|
||||
});
|
||||
classifyPortListener.mockReturnValue("gateway");
|
||||
probeGateway.mockResolvedValue({
|
||||
ok: true,
|
||||
close: null,
|
||||
});
|
||||
|
||||
const { inspectGatewayRestart } = await import("./restart-health.js");
|
||||
const snapshot = await inspectGatewayRestart({ service, port: 18789 });
|
||||
|
||||
expect(snapshot.healthy).toBe(true);
|
||||
expect(snapshot.staleGatewayPids).toEqual([]);
|
||||
});
|
||||
|
||||
it("treats auth-closed probe as healthy gateway reachability", async () => {
|
||||
const snapshot = await inspectAmbiguousOwnershipWithProbe({
|
||||
ok: false,
|
||||
|
||||
@@ -65,7 +65,8 @@ async function confirmGatewayReachable(port: number): Promise<boolean> {
|
||||
const probe = await probeGateway({
|
||||
url: `ws://127.0.0.1:${port}`,
|
||||
auth: token || password ? { token, password } : undefined,
|
||||
timeoutMs: 1_000,
|
||||
timeoutMs: 3_000,
|
||||
includeDetails: false,
|
||||
});
|
||||
return probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason);
|
||||
}
|
||||
@@ -123,6 +124,22 @@ export async function inspectGatewayRestart(params: {
|
||||
};
|
||||
}
|
||||
|
||||
if (portUsage.status === "busy" && runtime.status !== "running") {
|
||||
try {
|
||||
const reachable = await confirmGatewayReachable(params.port);
|
||||
if (reachable) {
|
||||
return {
|
||||
runtime,
|
||||
portUsage,
|
||||
healthy: true,
|
||||
staleGatewayPids: [],
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// Probe is best-effort; keep the ownership-based diagnostics.
|
||||
}
|
||||
}
|
||||
|
||||
const gatewayListeners =
|
||||
portUsage.status === "busy"
|
||||
? portUsage.listeners.filter(
|
||||
|
||||
Reference in New Issue
Block a user