mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-18 12:41:12 +00:00
fix: windows self-restart stale gateway cleanup (#60480) (thanks @arifahmedjoy)
* fix: implement Windows stale gateway process cleanup before restart findGatewayPidsOnPortSync() returned [] immediately on Windows, causing cleanStaleGatewayProcessesSync() to skip killing old gateway processes during self-restart (triggerOpenClawRestart -> schtasks path). This led to an infinite retry loop: 'gateway already running under schtasks; waiting 5000ms before retrying startup'. Changes: - Extract Windows port/process helpers into shared windows-port-pids.ts to break the circular import between restart-stale-pids.ts and gateway-processes.ts, with configurable timeoutMs for poll compliance - findGatewayPidsOnPortSync: discover + verify Windows gateway PIDs via readWindowsListeningPidsOnPortSync + readWindowsProcessArgsSync - pollPortOnceWindows: use short POLL_SPAWN_TIMEOUT_MS (400ms) so a single slow PowerShell call cannot exceed the 2s polling budget - terminateStaleProcessesSync: add terminateStaleProcessesWindows using taskkill.exe (graceful /T first, then /F force-kill) Fixes the Windows gateway restart infinite loop caused by the schtasks supervisor detecting a port conflict it cannot resolve. * fix: tighten windows stale gateway cleanup * fix: preserve windows restart probe failures * refactor: unify windows gateway pid verification * fix: preserve windows argv probe failures * fix: windows self-restart stale gateway cleanup (#60480) (thanks @arifahmedjoy) --------- Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
committed by
GitHub
parent
ff6fd18629
commit
63fcc52520
@@ -181,6 +181,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Plugin SDK/context engines: export the missing context-engine result and subagent lifecycle types from `openclaw/plugin-sdk` so context engine plugins can type `ContextEngine` implementations without local workarounds. (#61251) Thanks @DaevMithran.
|
||||
- Tasks/maintenance: reconcile stale cron and chat-backed CLI task rows against live cron-job and agent-run ownership instead of treating any persisted session key as proof that the task is still running. (#60310) Thanks @lml2468.
|
||||
- Update/npm: prefer the npm binary that owns the installed global OpenClaw prefix so mixed Homebrew-plus-nvm setups update the right install. (#60153) Thanks @jayeshp19.
|
||||
- Windows/restart: clean up stale gateway listeners before Windows self-restart and treat listener and argv probe failures as inconclusive, so scheduled-task relaunch no longer falls into an `EADDRINUSE` retry loop. (#60480) Thanks @arifahmedjoy.
|
||||
|
||||
## 2026.4.2
|
||||
|
||||
|
||||
@@ -1,114 +1,11 @@
|
||||
import { spawnSync } from "node:child_process";
|
||||
import fsSync from "node:fs";
|
||||
import { parseCmdScriptCommandLine } from "../daemon/cmd-argv.js";
|
||||
import { isGatewayArgv, parseProcCmdline } from "./gateway-process-argv.js";
|
||||
import { findGatewayPidsOnPortSync as findUnixGatewayPidsOnPortSync } from "./restart-stale-pids.js";
|
||||
|
||||
const WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS = 5_000;
|
||||
|
||||
function extractWindowsCommandLine(raw: string): string | null {
|
||||
const lines = raw
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean);
|
||||
for (const line of lines) {
|
||||
if (!line.toLowerCase().startsWith("commandline=")) {
|
||||
continue;
|
||||
}
|
||||
const value = line.slice("commandline=".length).trim();
|
||||
return value || null;
|
||||
}
|
||||
return lines.find((line) => line.toLowerCase() !== "commandline") ?? null;
|
||||
}
|
||||
|
||||
function readWindowsProcessArgsViaPowerShell(pid: number): string[] | null {
|
||||
const ps = spawnSync(
|
||||
"powershell",
|
||||
[
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
`(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}" | Select-Object -ExpandProperty CommandLine)`,
|
||||
],
|
||||
{
|
||||
encoding: "utf8",
|
||||
timeout: WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
if (ps.error || ps.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
const command = ps.stdout.trim();
|
||||
return command ? parseCmdScriptCommandLine(command) : null;
|
||||
}
|
||||
|
||||
function readWindowsProcessArgsViaWmic(pid: number): string[] | null {
|
||||
const wmic = spawnSync(
|
||||
"wmic",
|
||||
["process", "where", `ProcessId=${pid}`, "get", "CommandLine", "/value"],
|
||||
{
|
||||
encoding: "utf8",
|
||||
timeout: WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
if (wmic.error || wmic.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
const command = extractWindowsCommandLine(wmic.stdout);
|
||||
return command ? parseCmdScriptCommandLine(command) : null;
|
||||
}
|
||||
|
||||
function readWindowsListeningPidsViaPowerShell(port: number): number[] | null {
|
||||
const ps = spawnSync(
|
||||
"powershell",
|
||||
[
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
`(Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue | Select-Object -ExpandProperty OwningProcess)`,
|
||||
],
|
||||
{
|
||||
encoding: "utf8",
|
||||
timeout: WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
if (ps.error || ps.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
return ps.stdout
|
||||
.split(/\r?\n/)
|
||||
.map((line) => Number.parseInt(line.trim(), 10))
|
||||
.filter((pid) => Number.isFinite(pid) && pid > 0);
|
||||
}
|
||||
|
||||
function readWindowsListeningPidsViaNetstat(port: number): number[] {
|
||||
const netstat = spawnSync("netstat", ["-ano", "-p", "tcp"], {
|
||||
encoding: "utf8",
|
||||
timeout: WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS,
|
||||
windowsHide: true,
|
||||
});
|
||||
if (netstat.error || netstat.status !== 0) {
|
||||
return [];
|
||||
}
|
||||
const pids = new Set<number>();
|
||||
for (const line of netstat.stdout.split(/\r?\n/)) {
|
||||
const match = line.match(/^\s*TCP\s+(\S+):(\d+)\s+\S+\s+LISTENING\s+(\d+)\s*$/i);
|
||||
if (!match) {
|
||||
continue;
|
||||
}
|
||||
const parsedPort = Number.parseInt(match[2] ?? "", 10);
|
||||
const pid = Number.parseInt(match[3] ?? "", 10);
|
||||
if (parsedPort === port && Number.isFinite(pid) && pid > 0) {
|
||||
pids.add(pid);
|
||||
}
|
||||
}
|
||||
return [...pids];
|
||||
}
|
||||
|
||||
function readWindowsListeningPidsOnPortSync(port: number): number[] {
|
||||
return readWindowsListeningPidsViaPowerShell(port) ?? readWindowsListeningPidsViaNetstat(port);
|
||||
}
|
||||
import {
|
||||
readWindowsListeningPidsOnPortSync,
|
||||
readWindowsProcessArgsSync,
|
||||
} from "./windows-port-pids.js";
|
||||
|
||||
export function readGatewayProcessArgsSync(pid: number): string[] | null {
|
||||
if (process.platform === "linux") {
|
||||
@@ -130,7 +27,7 @@ export function readGatewayProcessArgsSync(pid: number): string[] | null {
|
||||
return command ? command.split(/\s+/) : null;
|
||||
}
|
||||
if (process.platform === "win32") {
|
||||
return readWindowsProcessArgsViaPowerShell(pid) ?? readWindowsProcessArgsViaWmic(pid);
|
||||
return readWindowsProcessArgsSync(pid);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -1,14 +1,31 @@
|
||||
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
// This entire file tests lsof-based Unix port polling. The feature is a deliberate
|
||||
// no-op on Windows (findGatewayPidsOnPortSync returns [] immediately). Running these
|
||||
// tests on a Windows CI runner would require lsof which does not exist there, so we
|
||||
// skip the suite entirely and rely on the Linux/macOS runners for coverage.
|
||||
// This file primarily tests lsof-based Unix port polling. On Windows,
|
||||
// findGatewayPidsOnPortSync delegates to findVerifiedGatewayListenerPidsOnPortSync
|
||||
// (PowerShell/netstat discovery in gateway-processes.ts) instead of returning [].
|
||||
// Running lsof-dependent tests on a Windows CI runner is not possible, so the suite
|
||||
// is skipped on Windows; cross-platform tests mock process.platform to win32.
|
||||
const isWindows = process.platform === "win32";
|
||||
|
||||
const mockSpawnSync = vi.hoisted(() => vi.fn());
|
||||
const mockResolveGatewayPort = vi.hoisted(() => vi.fn(() => 18789));
|
||||
const mockRestartWarn = vi.hoisted(() => vi.fn());
|
||||
const mockReadWindowsListeningPids = vi.hoisted(() =>
|
||||
vi.fn((_port: number, _timeoutMs?: number): number[] => []),
|
||||
);
|
||||
const mockReadWindowsListeningPidsResult = vi.hoisted(() =>
|
||||
vi.fn<(_port: number, _timeoutMs?: number) => MockWindowsListeningPidsResult>(
|
||||
(_port: number, _timeoutMs?: number) => ({ ok: true, pids: [] }),
|
||||
),
|
||||
);
|
||||
const mockReadWindowsProcessArgs = vi.hoisted(() =>
|
||||
vi.fn((_pid: number, _timeoutMs?: number): string[] | null => null),
|
||||
);
|
||||
const mockReadWindowsProcessArgsResult = vi.hoisted(() =>
|
||||
vi.fn<(_pid: number, _timeoutMs?: number) => MockWindowsProcessArgsResult>(
|
||||
(_pid: number, _timeoutMs?: number) => ({ ok: true, args: null }),
|
||||
),
|
||||
);
|
||||
|
||||
vi.mock("node:child_process", async () => {
|
||||
const { mockNodeBuiltinModule } = await import("../../test/helpers/node-builtin-mocks.js");
|
||||
@@ -37,6 +54,19 @@ vi.mock("../logging/subsystem.js", () => ({
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("./gateway-processes.js", () => ({}));
|
||||
|
||||
vi.mock("./windows-port-pids.js", () => ({
|
||||
readWindowsListeningPidsOnPortSync: (port: number, timeoutMs?: number) =>
|
||||
mockReadWindowsListeningPids(port, timeoutMs),
|
||||
readWindowsListeningPidsResultSync: (port: number, timeoutMs?: number) =>
|
||||
mockReadWindowsListeningPidsResult(port, timeoutMs),
|
||||
readWindowsProcessArgsSync: (pid: number, timeoutMs?: number) =>
|
||||
mockReadWindowsProcessArgs(pid, timeoutMs),
|
||||
readWindowsProcessArgsResultSync: (pid: number, timeoutMs?: number) =>
|
||||
mockReadWindowsProcessArgsResult(pid, timeoutMs),
|
||||
}));
|
||||
|
||||
import { resolveLsofCommandSync } from "./ports-lsof.js";
|
||||
let __testing: typeof import("./restart-stale-pids.js").__testing;
|
||||
let cleanStaleGatewayProcessesSync: typeof import("./restart-stale-pids.js").cleanStaleGatewayProcessesSync;
|
||||
@@ -53,6 +83,14 @@ type MockLsofResult = {
|
||||
stderr: string;
|
||||
};
|
||||
|
||||
type MockWindowsListeningPidsResult =
|
||||
| { ok: true; pids: number[] }
|
||||
| { ok: false; permanent: boolean };
|
||||
|
||||
type MockWindowsProcessArgsResult =
|
||||
| { ok: true; args: string[] | null }
|
||||
| { ok: false; permanent: boolean };
|
||||
|
||||
function createLsofResult(overrides: Partial<MockLsofResult> = {}): MockLsofResult {
|
||||
return {
|
||||
error: null,
|
||||
@@ -101,7 +139,15 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
|
||||
mockSpawnSync.mockReset();
|
||||
mockResolveGatewayPort.mockReset();
|
||||
mockRestartWarn.mockReset();
|
||||
mockReadWindowsListeningPids.mockReset();
|
||||
mockReadWindowsListeningPidsResult.mockReset();
|
||||
mockReadWindowsProcessArgs.mockReset();
|
||||
mockReadWindowsProcessArgsResult.mockReset();
|
||||
mockResolveGatewayPort.mockReturnValue(18789);
|
||||
mockReadWindowsListeningPids.mockReturnValue([]);
|
||||
mockReadWindowsListeningPidsResult.mockReturnValue({ ok: true, pids: [] });
|
||||
mockReadWindowsProcessArgs.mockReturnValue(null);
|
||||
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: true, args: null });
|
||||
__testing.setSleepSyncOverride(() => {});
|
||||
});
|
||||
|
||||
@@ -189,15 +235,14 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
|
||||
expect(result).toEqual([stalePid]); // deduped — not [pid, pid]
|
||||
});
|
||||
|
||||
it("returns [] and skips lsof on win32", () => {
|
||||
// The entire describe block is skipped on Windows (isWindows guard at top),
|
||||
// so this test only runs on Linux/macOS. It mocks platform to win32 for the
|
||||
// single assertion without needing to restore — the suite-level skipIf means
|
||||
// this will never run on an actual Windows runner where the mock could leak.
|
||||
it("delegates to Windows port helpers on win32 and skips lsof", () => {
|
||||
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
|
||||
try {
|
||||
mockReadWindowsListeningPids.mockReturnValue([]);
|
||||
expect(findGatewayPidsOnPortSync(18789)).toEqual([]);
|
||||
expect(mockReadWindowsListeningPids).toHaveBeenCalledWith(18789, undefined);
|
||||
// lsof must NOT be invoked — Windows uses PowerShell/netstat
|
||||
expect(mockSpawnSync).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
if (origDescriptor) {
|
||||
@@ -205,6 +250,24 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("returns verified gateway pids from Windows helpers on win32", () => {
|
||||
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
const stalePid = process.pid + 900;
|
||||
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
|
||||
try {
|
||||
mockReadWindowsListeningPids.mockReturnValue([stalePid]);
|
||||
// Simulate a verified gateway process (must pass real isGatewayArgv)
|
||||
mockReadWindowsProcessArgs.mockReturnValue(["openclaw", "gateway"]);
|
||||
expect(findGatewayPidsOnPortSync(18789)).toEqual([stalePid]);
|
||||
expect(mockReadWindowsListeningPids).toHaveBeenCalledWith(18789, undefined);
|
||||
expect(mockReadWindowsProcessArgs).toHaveBeenCalledWith(stalePid, undefined);
|
||||
} finally {
|
||||
if (origDescriptor) {
|
||||
Object.defineProperty(process, "platform", origDescriptor);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
@@ -597,6 +660,196 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
|
||||
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
|
||||
expect(killSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("treats failed Windows port probes as inconclusive, not free", () => {
|
||||
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
const stalePid = process.pid + 910;
|
||||
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
|
||||
try {
|
||||
mockReadWindowsListeningPids.mockReturnValue([stalePid]);
|
||||
mockReadWindowsProcessArgs.mockReturnValue(["openclaw", "gateway"]);
|
||||
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: true, args: ["openclaw", "gateway"] });
|
||||
mockSpawnSync.mockReturnValue({
|
||||
error: null,
|
||||
status: 0,
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
});
|
||||
let fakeNow = 0;
|
||||
__testing.setDateNowOverride(() => fakeNow);
|
||||
mockReadWindowsListeningPidsResult.mockImplementation((_port, timeoutMs) => {
|
||||
if (timeoutMs === 400) {
|
||||
fakeNow += 2001;
|
||||
return { ok: false, permanent: false };
|
||||
}
|
||||
return { ok: true, pids: [stalePid] };
|
||||
});
|
||||
let aliveChecks = 0;
|
||||
const killSpy = vi.spyOn(process, "kill").mockImplementation((pid, signal) => {
|
||||
if (signal === 0 && pid === stalePid) {
|
||||
aliveChecks += 1;
|
||||
if (aliveChecks < 3) {
|
||||
return true;
|
||||
}
|
||||
throw Object.assign(new Error("ESRCH"), { code: "ESRCH" });
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
expect(cleanStaleGatewayProcessesSync()).toEqual([stalePid]);
|
||||
expect(mockReadWindowsListeningPidsResult).toHaveBeenCalledWith(18789, 400);
|
||||
expect(mockRestartWarn).toHaveBeenCalledWith(
|
||||
expect.stringContaining("port 18789 still in use after 2000ms"),
|
||||
);
|
||||
expect(killSpy).toHaveBeenCalledWith(stalePid, 0);
|
||||
} finally {
|
||||
__testing.setDateNowOverride(null);
|
||||
if (origDescriptor) {
|
||||
Object.defineProperty(process, "platform", origDescriptor);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("waits for port release when the initial Windows stale-pid probe is inconclusive", () => {
|
||||
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
|
||||
try {
|
||||
let fakeNow = 0;
|
||||
__testing.setDateNowOverride(() => fakeNow);
|
||||
mockReadWindowsListeningPidsResult.mockImplementation((_port, timeoutMs) => {
|
||||
if (timeoutMs === 400) {
|
||||
fakeNow += 2001;
|
||||
}
|
||||
return { ok: false, permanent: false };
|
||||
});
|
||||
const killSpy = vi.spyOn(process, "kill").mockReturnValue(true);
|
||||
|
||||
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
|
||||
expect(mockReadWindowsListeningPidsResult).toHaveBeenCalledWith(18789, 400);
|
||||
expect(mockRestartWarn).toHaveBeenCalledWith(
|
||||
expect.stringContaining("port 18789 still in use after 2000ms"),
|
||||
);
|
||||
expect(killSpy).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
__testing.setDateNowOverride(null);
|
||||
if (origDescriptor) {
|
||||
Object.defineProperty(process, "platform", origDescriptor);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("waits for port release when Windows listener argv inspection is inconclusive", () => {
|
||||
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
const stalePid = process.pid + 913;
|
||||
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
|
||||
try {
|
||||
let fakeNow = 0;
|
||||
__testing.setDateNowOverride(() => fakeNow);
|
||||
mockReadWindowsListeningPidsResult.mockImplementation((_port, timeoutMs) => {
|
||||
if (timeoutMs === 400) {
|
||||
fakeNow += 2001;
|
||||
}
|
||||
return { ok: true, pids: [stalePid] };
|
||||
});
|
||||
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: false, permanent: false });
|
||||
const killSpy = vi.spyOn(process, "kill").mockReturnValue(true);
|
||||
|
||||
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
|
||||
expect(mockReadWindowsProcessArgsResult).toHaveBeenCalledWith(stalePid, undefined);
|
||||
expect(mockRestartWarn).toHaveBeenCalledWith(
|
||||
expect.stringContaining("port 18789 still in use after 2000ms"),
|
||||
);
|
||||
expect(killSpy).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
__testing.setDateNowOverride(null);
|
||||
if (origDescriptor) {
|
||||
Object.defineProperty(process, "platform", origDescriptor);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("does not report Windows pids as killed when taskkill fails", () => {
|
||||
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
const stalePid = process.pid + 911;
|
||||
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
|
||||
try {
|
||||
mockReadWindowsListeningPids.mockReturnValue([stalePid]);
|
||||
mockReadWindowsProcessArgs.mockReturnValue(["openclaw", "gateway"]);
|
||||
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: true, args: ["openclaw", "gateway"] });
|
||||
mockReadWindowsListeningPidsResult.mockReturnValue({ ok: true, pids: [stalePid] });
|
||||
mockSpawnSync.mockReturnValue({
|
||||
error: null,
|
||||
status: 1,
|
||||
stdout: "",
|
||||
stderr: "access denied",
|
||||
});
|
||||
vi.spyOn(process, "kill").mockImplementation((pid, signal) => {
|
||||
if (signal === 0 && pid === stalePid) {
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
|
||||
expect(mockSpawnSync).toHaveBeenCalledWith(
|
||||
expect.stringContaining("taskkill.exe"),
|
||||
["/T", "/PID", String(stalePid)],
|
||||
expect.objectContaining({ timeout: 5000 }),
|
||||
);
|
||||
} finally {
|
||||
if (origDescriptor) {
|
||||
Object.defineProperty(process, "platform", origDescriptor);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("treats Windows EPERM liveness checks as alive and still forces taskkill", () => {
|
||||
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
const stalePid = process.pid + 912;
|
||||
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
|
||||
try {
|
||||
mockReadWindowsListeningPidsResult.mockReturnValue({ ok: true, pids: [stalePid] });
|
||||
mockReadWindowsProcessArgs.mockReturnValue(["openclaw", "gateway"]);
|
||||
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: true, args: ["openclaw", "gateway"] });
|
||||
mockSpawnSync
|
||||
.mockReturnValueOnce({
|
||||
error: null,
|
||||
status: 1,
|
||||
stdout: "",
|
||||
stderr: "access denied",
|
||||
})
|
||||
.mockReturnValueOnce({
|
||||
error: null,
|
||||
status: 1,
|
||||
stdout: "",
|
||||
stderr: "still denied",
|
||||
});
|
||||
vi.spyOn(process, "kill").mockImplementation((pid, signal) => {
|
||||
if (signal === 0 && pid === stalePid) {
|
||||
throw Object.assign(new Error("EPERM"), { code: "EPERM" });
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
|
||||
expect(mockSpawnSync).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.stringContaining("taskkill.exe"),
|
||||
["/T", "/PID", String(stalePid)],
|
||||
expect.objectContaining({ timeout: 5000 }),
|
||||
);
|
||||
expect(mockSpawnSync).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.stringContaining("taskkill.exe"),
|
||||
["/F", "/T", "/PID", String(stalePid)],
|
||||
expect.objectContaining({ timeout: 5000 }),
|
||||
);
|
||||
} finally {
|
||||
if (origDescriptor) {
|
||||
Object.defineProperty(process, "platform", origDescriptor);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -1,7 +1,17 @@
|
||||
import { spawnSync } from "node:child_process";
|
||||
import path from "node:path";
|
||||
import { resolveGatewayPort } from "../config/paths.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { isGatewayArgv } from "./gateway-process-argv.js";
|
||||
import { resolveLsofCommandSync } from "./ports-lsof.js";
|
||||
import {
|
||||
readWindowsListeningPidsOnPortSync,
|
||||
readWindowsListeningPidsResultSync,
|
||||
readWindowsProcessArgsResultSync,
|
||||
readWindowsProcessArgsSync,
|
||||
type WindowsProcessArgsResult,
|
||||
type WindowsListeningPidsResult,
|
||||
} from "./windows-port-pids.js";
|
||||
|
||||
const SPAWN_TIMEOUT_MS = 2000;
|
||||
const STALE_SIGTERM_WAIT_MS = 600;
|
||||
@@ -78,6 +88,53 @@ function parsePidsFromLsofOutput(stdout: string): number[] {
|
||||
return [...new Set(pids)].filter((pid) => pid !== process.pid);
|
||||
}
|
||||
|
||||
/**
|
||||
* Windows: find listening PIDs on the port, then verify each is an openclaw
|
||||
* gateway process via command-line inspection. Excludes the current process.
|
||||
*/
|
||||
function filterVerifiedWindowsGatewayPids(rawPids: number[]): number[] {
|
||||
return Array.from(new Set(rawPids))
|
||||
.filter((pid) => Number.isFinite(pid) && pid > 0 && pid !== process.pid)
|
||||
.filter((pid) => {
|
||||
const args = readWindowsProcessArgsSync(pid);
|
||||
return args != null && isGatewayArgv(args, { allowGatewayBinary: true });
|
||||
});
|
||||
}
|
||||
|
||||
function filterVerifiedWindowsGatewayPidsResult(
|
||||
rawPids: number[],
|
||||
processArgsResult: (pid: number) => WindowsProcessArgsResult,
|
||||
): WindowsListeningPidsResult {
|
||||
const verified: number[] = [];
|
||||
for (const pid of Array.from(new Set(rawPids))) {
|
||||
if (!Number.isFinite(pid) || pid <= 0 || pid === process.pid) {
|
||||
continue;
|
||||
}
|
||||
const argsResult = processArgsResult(pid);
|
||||
if (!argsResult.ok) {
|
||||
return { ok: false, permanent: argsResult.permanent };
|
||||
}
|
||||
if (argsResult.args != null && isGatewayArgv(argsResult.args, { allowGatewayBinary: true })) {
|
||||
verified.push(pid);
|
||||
}
|
||||
}
|
||||
return { ok: true, pids: verified };
|
||||
}
|
||||
|
||||
function findVerifiedWindowsGatewayPidsOnPortSync(port: number): number[] {
|
||||
return filterVerifiedWindowsGatewayPids(readWindowsListeningPidsOnPortSync(port));
|
||||
}
|
||||
|
||||
function findVerifiedWindowsGatewayPidsOnPortResultSync(port: number): WindowsListeningPidsResult {
|
||||
const result = readWindowsListeningPidsResultSync(port);
|
||||
if (!result.ok) {
|
||||
return result;
|
||||
}
|
||||
return filterVerifiedWindowsGatewayPidsResult(result.pids, (pid) =>
|
||||
readWindowsProcessArgsResultSync(pid),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find PIDs of gateway processes listening on the given port using synchronous lsof.
|
||||
* Returns only PIDs that belong to openclaw gateway processes (not the current process).
|
||||
@@ -87,7 +144,9 @@ export function findGatewayPidsOnPortSync(
|
||||
spawnTimeoutMs = SPAWN_TIMEOUT_MS,
|
||||
): number[] {
|
||||
if (process.platform === "win32") {
|
||||
return [];
|
||||
// Use the shared Windows port inspection (PowerShell / netstat) with
|
||||
// command-line verification to find only openclaw gateway processes.
|
||||
return findVerifiedWindowsGatewayPidsOnPortSync(port);
|
||||
}
|
||||
const lsof = resolveLsofCommandSync();
|
||||
const res = spawnSync(lsof, ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-Fpc"], {
|
||||
@@ -139,6 +198,9 @@ export function findGatewayPidsOnPortSync(
|
||||
type PollResult = { free: true } | { free: false } | { free: null; permanent: boolean };
|
||||
|
||||
function pollPortOnce(port: number): PollResult {
|
||||
if (process.platform === "win32") {
|
||||
return pollPortOnceWindows(port);
|
||||
}
|
||||
try {
|
||||
const lsof = resolveLsofCommandSync();
|
||||
const res = spawnSync(lsof, ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-Fpc"], {
|
||||
@@ -178,12 +240,36 @@ function pollPortOnce(port: number): PollResult {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Windows-specific port poll.
|
||||
* Uses a short timeout (POLL_SPAWN_TIMEOUT_MS) so a single slow PowerShell
|
||||
* invocation cannot exceed the waitForPortFreeSync wall-clock budget.
|
||||
* Only checks whether any process is listening — no gateway verification
|
||||
* needed because we already killed the stale gateway in the prior step.
|
||||
*/
|
||||
function pollPortOnceWindows(port: number): PollResult {
|
||||
try {
|
||||
const result = readWindowsListeningPidsResultSync(port, POLL_SPAWN_TIMEOUT_MS);
|
||||
if (!result.ok) {
|
||||
return { free: null, permanent: result.permanent };
|
||||
}
|
||||
return result.pids.length === 0 ? { free: true } : { free: false };
|
||||
} catch {
|
||||
return { free: null, permanent: false };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronously terminate stale gateway processes.
|
||||
* Callers must pass a non-empty pids array.
|
||||
* Sends SIGTERM, waits briefly, then SIGKILL for survivors.
|
||||
*
|
||||
* On Unix: sends SIGTERM, waits briefly, then SIGKILL for survivors.
|
||||
* On Windows: uses taskkill (graceful first, then /F for force-kill).
|
||||
*/
|
||||
function terminateStaleProcessesSync(pids: number[]): number[] {
|
||||
if (process.platform === "win32") {
|
||||
return terminateStaleProcessesWindows(pids);
|
||||
}
|
||||
const killed: number[] = [];
|
||||
for (const pid of pids) {
|
||||
try {
|
||||
@@ -209,6 +295,58 @@ function terminateStaleProcessesSync(pids: number[]): number[] {
|
||||
return killed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Windows-specific process termination using taskkill.
|
||||
* Sends a graceful taskkill first (/T for tree), waits, then escalates to /F.
|
||||
*/
|
||||
function terminateStaleProcessesWindows(pids: number[]): number[] {
|
||||
const taskkillPath = path.join(
|
||||
process.env.SystemRoot ?? "C:\\Windows",
|
||||
"System32",
|
||||
"taskkill.exe",
|
||||
);
|
||||
const killed: number[] = [];
|
||||
for (const pid of pids) {
|
||||
const graceful = spawnSync(taskkillPath, ["/T", "/PID", String(pid)], {
|
||||
stdio: "ignore",
|
||||
timeout: 5000,
|
||||
windowsHide: true,
|
||||
});
|
||||
const gracefulFailed = graceful.error != null || (graceful.status ?? 0) !== 0;
|
||||
if (!gracefulFailed && !isProcessAlive(pid)) {
|
||||
killed.push(pid);
|
||||
continue;
|
||||
}
|
||||
sleepSync(STALE_SIGTERM_WAIT_MS);
|
||||
if (!isProcessAlive(pid)) {
|
||||
killed.push(pid);
|
||||
continue;
|
||||
}
|
||||
const forced = spawnSync(taskkillPath, ["/F", "/T", "/PID", String(pid)], {
|
||||
stdio: "ignore",
|
||||
timeout: 5000,
|
||||
windowsHide: true,
|
||||
});
|
||||
if (forced.error != null || (forced.status ?? 0) !== 0) {
|
||||
continue;
|
||||
}
|
||||
sleepSync(STALE_SIGKILL_WAIT_MS);
|
||||
if (!isProcessAlive(pid)) {
|
||||
killed.push(pid);
|
||||
}
|
||||
}
|
||||
return killed;
|
||||
}
|
||||
|
||||
function isProcessAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (error) {
|
||||
return (error as NodeJS.ErrnoException).code === "EPERM";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll the given port until it is confirmed free, lsof is confirmed unavailable,
|
||||
* or the wall-clock budget expires.
|
||||
@@ -259,7 +397,17 @@ export function cleanStaleGatewayProcessesSync(portOverride?: number): number[]
|
||||
typeof portOverride === "number" && Number.isFinite(portOverride) && portOverride > 0
|
||||
? Math.floor(portOverride)
|
||||
: resolveGatewayPort(undefined, process.env);
|
||||
const stalePids = findGatewayPidsOnPortSync(port);
|
||||
const stalePids =
|
||||
process.platform === "win32"
|
||||
? (() => {
|
||||
const result = findVerifiedWindowsGatewayPidsOnPortResultSync(port);
|
||||
if (result.ok) {
|
||||
return result.pids;
|
||||
}
|
||||
waitForPortFreeSync(port);
|
||||
return [];
|
||||
})()
|
||||
: findGatewayPidsOnPortSync(port);
|
||||
if (stalePids.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
151
src/infra/windows-port-pids.ts
Normal file
151
src/infra/windows-port-pids.ts
Normal file
@@ -0,0 +1,151 @@
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { parseCmdScriptCommandLine } from "../daemon/cmd-argv.js";
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = 5_000;
|
||||
|
||||
export type WindowsListeningPidsResult =
|
||||
| { ok: true; pids: number[] }
|
||||
| { ok: false; permanent: boolean };
|
||||
|
||||
export type WindowsProcessArgsResult =
|
||||
| { ok: true; args: string[] | null }
|
||||
| { ok: false; permanent: boolean };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Windows listening-PID discovery (PowerShell → netstat fallback)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function readListeningPidsViaPowerShell(port: number, timeoutMs: number): number[] | null {
|
||||
const ps = spawnSync(
|
||||
"powershell",
|
||||
[
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
`(Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue | Select-Object -ExpandProperty OwningProcess)`,
|
||||
],
|
||||
{
|
||||
encoding: "utf8",
|
||||
timeout: timeoutMs,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
if (ps.error || ps.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
return ps.stdout
|
||||
.split(/\r?\n/)
|
||||
.map((line) => Number.parseInt(line.trim(), 10))
|
||||
.filter((pid) => Number.isFinite(pid) && pid > 0);
|
||||
}
|
||||
|
||||
function parseListeningPidsFromNetstat(stdout: string, port: number): number[] {
|
||||
const pids = new Set<number>();
|
||||
for (const line of stdout.split(/\r?\n/)) {
|
||||
const match = line.match(/^\s*TCP\s+(\S+):(\d+)\s+\S+\s+LISTENING\s+(\d+)\s*$/i);
|
||||
if (!match) {
|
||||
continue;
|
||||
}
|
||||
const parsedPort = Number.parseInt(match[2] ?? "", 10);
|
||||
const pid = Number.parseInt(match[3] ?? "", 10);
|
||||
if (parsedPort === port && Number.isFinite(pid) && pid > 0) {
|
||||
pids.add(pid);
|
||||
}
|
||||
}
|
||||
return [...pids];
|
||||
}
|
||||
|
||||
export function readWindowsListeningPidsOnPortSync(
|
||||
port: number,
|
||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||
): number[] {
|
||||
const result = readWindowsListeningPidsResultSync(port, timeoutMs);
|
||||
return result.ok ? result.pids : [];
|
||||
}
|
||||
|
||||
export function readWindowsListeningPidsResultSync(
|
||||
port: number,
|
||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||
): WindowsListeningPidsResult {
|
||||
const powershellPids = readListeningPidsViaPowerShell(port, timeoutMs);
|
||||
if (powershellPids != null) {
|
||||
return { ok: true, pids: powershellPids };
|
||||
}
|
||||
const netstat = spawnSync("netstat", ["-ano", "-p", "tcp"], {
|
||||
encoding: "utf8",
|
||||
timeout: timeoutMs,
|
||||
windowsHide: true,
|
||||
});
|
||||
if (netstat.error) {
|
||||
const code = (netstat.error as NodeJS.ErrnoException).code;
|
||||
return { ok: false, permanent: code === "ENOENT" || code === "EACCES" || code === "EPERM" };
|
||||
}
|
||||
if (netstat.status !== 0) {
|
||||
return { ok: false, permanent: false };
|
||||
}
|
||||
return { ok: true, pids: parseListeningPidsFromNetstat(netstat.stdout, port) };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Windows process-args reading (PowerShell → WMIC fallback)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function extractWindowsCommandLine(raw: string): string | null {
|
||||
const lines = raw
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean);
|
||||
for (const line of lines) {
|
||||
if (!line.toLowerCase().startsWith("commandline=")) {
|
||||
continue;
|
||||
}
|
||||
const value = line.slice("commandline=".length).trim();
|
||||
return value || null;
|
||||
}
|
||||
return lines.find((line) => line.toLowerCase() !== "commandline") ?? null;
|
||||
}
|
||||
|
||||
export function readWindowsProcessArgsSync(
|
||||
pid: number,
|
||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||
): string[] | null {
|
||||
const result = readWindowsProcessArgsResultSync(pid, timeoutMs);
|
||||
return result.ok ? result.args : null;
|
||||
}
|
||||
|
||||
export function readWindowsProcessArgsResultSync(
|
||||
pid: number,
|
||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||
): WindowsProcessArgsResult {
|
||||
const powershell = spawnSync(
|
||||
"powershell",
|
||||
[
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
`(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}" | Select-Object -ExpandProperty CommandLine)`,
|
||||
],
|
||||
{
|
||||
encoding: "utf8",
|
||||
timeout: timeoutMs,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
if (!powershell.error && powershell.status === 0) {
|
||||
const command = powershell.stdout.trim();
|
||||
return { ok: true, args: command ? parseCmdScriptCommandLine(command) : null };
|
||||
}
|
||||
const wmic = spawnSync(
|
||||
"wmic",
|
||||
["process", "where", `ProcessId=${pid}`, "get", "CommandLine", "/value"],
|
||||
{
|
||||
encoding: "utf8",
|
||||
timeout: timeoutMs,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
if (!wmic.error && wmic.status === 0) {
|
||||
const command = extractWindowsCommandLine(wmic.stdout);
|
||||
return { ok: true, args: command ? parseCmdScriptCommandLine(command) : null };
|
||||
}
|
||||
const code = ((wmic.error ?? powershell.error) as NodeJS.ErrnoException | undefined)?.code;
|
||||
return { ok: false, permanent: code === "ENOENT" || code === "EACCES" || code === "EPERM" };
|
||||
}
|
||||
Reference in New Issue
Block a user