fix: windows self-restart stale gateway cleanup (#60480) (thanks @arifahmedjoy)

* fix: implement Windows stale gateway process cleanup before restart

findGatewayPidsOnPortSync() returned [] immediately on Windows, causing
cleanStaleGatewayProcessesSync() to skip killing old gateway processes
during self-restart (triggerOpenClawRestart -> schtasks path). This led
to an infinite retry loop: 'gateway already running under schtasks;
waiting 5000ms before retrying startup'.

Changes:
- Extract Windows port/process helpers into shared windows-port-pids.ts
  to break the circular import between restart-stale-pids.ts and
  gateway-processes.ts, with configurable timeoutMs for poll compliance
- findGatewayPidsOnPortSync: discover + verify Windows gateway PIDs via
  readWindowsListeningPidsOnPortSync + readWindowsProcessArgsSync
- pollPortOnceWindows: use short POLL_SPAWN_TIMEOUT_MS (400ms) so a
  single slow PowerShell call cannot exceed the 2s polling budget
- terminateStaleProcessesSync: add terminateStaleProcessesWindows using
  taskkill.exe (graceful /T first, then /F force-kill)

Fixes the Windows gateway restart infinite loop caused by the schtasks
supervisor detecting a port conflict it cannot resolve.

* fix: tighten windows stale gateway cleanup

* fix: preserve windows restart probe failures

* refactor: unify windows gateway pid verification

* fix: preserve windows argv probe failures

* fix: windows self-restart stale gateway cleanup (#60480) (thanks @arifahmedjoy)

---------

Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
Engr. Arif Ahmed Joy
2026-04-05 21:31:17 +06:00
committed by GitHub
parent ff6fd18629
commit 63fcc52520
5 changed files with 570 additions and 120 deletions

View File

@@ -181,6 +181,7 @@ Docs: https://docs.openclaw.ai
- Plugin SDK/context engines: export the missing context-engine result and subagent lifecycle types from `openclaw/plugin-sdk` so context engine plugins can type `ContextEngine` implementations without local workarounds. (#61251) Thanks @DaevMithran.
- Tasks/maintenance: reconcile stale cron and chat-backed CLI task rows against live cron-job and agent-run ownership instead of treating any persisted session key as proof that the task is still running. (#60310) Thanks @lml2468.
- Update/npm: prefer the npm binary that owns the installed global OpenClaw prefix so mixed Homebrew-plus-nvm setups update the right install. (#60153) Thanks @jayeshp19.
- Windows/restart: clean up stale gateway listeners before Windows self-restart and treat listener and argv probe failures as inconclusive, so scheduled-task relaunch no longer falls into an `EADDRINUSE` retry loop. (#60480) Thanks @arifahmedjoy.
## 2026.4.2

View File

@@ -1,114 +1,11 @@
import { spawnSync } from "node:child_process";
import fsSync from "node:fs";
import { parseCmdScriptCommandLine } from "../daemon/cmd-argv.js";
import { isGatewayArgv, parseProcCmdline } from "./gateway-process-argv.js";
import { findGatewayPidsOnPortSync as findUnixGatewayPidsOnPortSync } from "./restart-stale-pids.js";
const WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS = 5_000;
function extractWindowsCommandLine(raw: string): string | null {
const lines = raw
.split(/\r?\n/)
.map((line) => line.trim())
.filter(Boolean);
for (const line of lines) {
if (!line.toLowerCase().startsWith("commandline=")) {
continue;
}
const value = line.slice("commandline=".length).trim();
return value || null;
}
return lines.find((line) => line.toLowerCase() !== "commandline") ?? null;
}
function readWindowsProcessArgsViaPowerShell(pid: number): string[] | null {
const ps = spawnSync(
"powershell",
[
"-NoProfile",
"-Command",
`(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}" | Select-Object -ExpandProperty CommandLine)`,
],
{
encoding: "utf8",
timeout: WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS,
windowsHide: true,
},
);
if (ps.error || ps.status !== 0) {
return null;
}
const command = ps.stdout.trim();
return command ? parseCmdScriptCommandLine(command) : null;
}
function readWindowsProcessArgsViaWmic(pid: number): string[] | null {
const wmic = spawnSync(
"wmic",
["process", "where", `ProcessId=${pid}`, "get", "CommandLine", "/value"],
{
encoding: "utf8",
timeout: WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS,
windowsHide: true,
},
);
if (wmic.error || wmic.status !== 0) {
return null;
}
const command = extractWindowsCommandLine(wmic.stdout);
return command ? parseCmdScriptCommandLine(command) : null;
}
function readWindowsListeningPidsViaPowerShell(port: number): number[] | null {
const ps = spawnSync(
"powershell",
[
"-NoProfile",
"-Command",
`(Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue | Select-Object -ExpandProperty OwningProcess)`,
],
{
encoding: "utf8",
timeout: WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS,
windowsHide: true,
},
);
if (ps.error || ps.status !== 0) {
return null;
}
return ps.stdout
.split(/\r?\n/)
.map((line) => Number.parseInt(line.trim(), 10))
.filter((pid) => Number.isFinite(pid) && pid > 0);
}
function readWindowsListeningPidsViaNetstat(port: number): number[] {
const netstat = spawnSync("netstat", ["-ano", "-p", "tcp"], {
encoding: "utf8",
timeout: WINDOWS_GATEWAY_DISCOVERY_TIMEOUT_MS,
windowsHide: true,
});
if (netstat.error || netstat.status !== 0) {
return [];
}
const pids = new Set<number>();
for (const line of netstat.stdout.split(/\r?\n/)) {
const match = line.match(/^\s*TCP\s+(\S+):(\d+)\s+\S+\s+LISTENING\s+(\d+)\s*$/i);
if (!match) {
continue;
}
const parsedPort = Number.parseInt(match[2] ?? "", 10);
const pid = Number.parseInt(match[3] ?? "", 10);
if (parsedPort === port && Number.isFinite(pid) && pid > 0) {
pids.add(pid);
}
}
return [...pids];
}
function readWindowsListeningPidsOnPortSync(port: number): number[] {
return readWindowsListeningPidsViaPowerShell(port) ?? readWindowsListeningPidsViaNetstat(port);
}
import {
readWindowsListeningPidsOnPortSync,
readWindowsProcessArgsSync,
} from "./windows-port-pids.js";
export function readGatewayProcessArgsSync(pid: number): string[] | null {
if (process.platform === "linux") {
@@ -130,7 +27,7 @@ export function readGatewayProcessArgsSync(pid: number): string[] | null {
return command ? command.split(/\s+/) : null;
}
if (process.platform === "win32") {
return readWindowsProcessArgsViaPowerShell(pid) ?? readWindowsProcessArgsViaWmic(pid);
return readWindowsProcessArgsSync(pid);
}
return null;
}

View File

@@ -1,14 +1,31 @@
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
// This entire file tests lsof-based Unix port polling. The feature is a deliberate
// no-op on Windows (findGatewayPidsOnPortSync returns [] immediately). Running these
// tests on a Windows CI runner would require lsof which does not exist there, so we
// skip the suite entirely and rely on the Linux/macOS runners for coverage.
// This file primarily tests lsof-based Unix port polling. On Windows,
// findGatewayPidsOnPortSync delegates to findVerifiedGatewayListenerPidsOnPortSync
// (PowerShell/netstat discovery in gateway-processes.ts) instead of returning [].
// Running lsof-dependent tests on a Windows CI runner is not possible, so the suite
// is skipped on Windows; cross-platform tests mock process.platform to win32.
const isWindows = process.platform === "win32";
const mockSpawnSync = vi.hoisted(() => vi.fn());
const mockResolveGatewayPort = vi.hoisted(() => vi.fn(() => 18789));
const mockRestartWarn = vi.hoisted(() => vi.fn());
const mockReadWindowsListeningPids = vi.hoisted(() =>
vi.fn((_port: number, _timeoutMs?: number): number[] => []),
);
const mockReadWindowsListeningPidsResult = vi.hoisted(() =>
vi.fn<(_port: number, _timeoutMs?: number) => MockWindowsListeningPidsResult>(
(_port: number, _timeoutMs?: number) => ({ ok: true, pids: [] }),
),
);
const mockReadWindowsProcessArgs = vi.hoisted(() =>
vi.fn((_pid: number, _timeoutMs?: number): string[] | null => null),
);
const mockReadWindowsProcessArgsResult = vi.hoisted(() =>
vi.fn<(_pid: number, _timeoutMs?: number) => MockWindowsProcessArgsResult>(
(_pid: number, _timeoutMs?: number) => ({ ok: true, args: null }),
),
);
vi.mock("node:child_process", async () => {
const { mockNodeBuiltinModule } = await import("../../test/helpers/node-builtin-mocks.js");
@@ -37,6 +54,19 @@ vi.mock("../logging/subsystem.js", () => ({
})),
}));
vi.mock("./gateway-processes.js", () => ({}));
vi.mock("./windows-port-pids.js", () => ({
readWindowsListeningPidsOnPortSync: (port: number, timeoutMs?: number) =>
mockReadWindowsListeningPids(port, timeoutMs),
readWindowsListeningPidsResultSync: (port: number, timeoutMs?: number) =>
mockReadWindowsListeningPidsResult(port, timeoutMs),
readWindowsProcessArgsSync: (pid: number, timeoutMs?: number) =>
mockReadWindowsProcessArgs(pid, timeoutMs),
readWindowsProcessArgsResultSync: (pid: number, timeoutMs?: number) =>
mockReadWindowsProcessArgsResult(pid, timeoutMs),
}));
import { resolveLsofCommandSync } from "./ports-lsof.js";
let __testing: typeof import("./restart-stale-pids.js").__testing;
let cleanStaleGatewayProcessesSync: typeof import("./restart-stale-pids.js").cleanStaleGatewayProcessesSync;
@@ -53,6 +83,14 @@ type MockLsofResult = {
stderr: string;
};
type MockWindowsListeningPidsResult =
| { ok: true; pids: number[] }
| { ok: false; permanent: boolean };
type MockWindowsProcessArgsResult =
| { ok: true; args: string[] | null }
| { ok: false; permanent: boolean };
function createLsofResult(overrides: Partial<MockLsofResult> = {}): MockLsofResult {
return {
error: null,
@@ -101,7 +139,15 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
mockSpawnSync.mockReset();
mockResolveGatewayPort.mockReset();
mockRestartWarn.mockReset();
mockReadWindowsListeningPids.mockReset();
mockReadWindowsListeningPidsResult.mockReset();
mockReadWindowsProcessArgs.mockReset();
mockReadWindowsProcessArgsResult.mockReset();
mockResolveGatewayPort.mockReturnValue(18789);
mockReadWindowsListeningPids.mockReturnValue([]);
mockReadWindowsListeningPidsResult.mockReturnValue({ ok: true, pids: [] });
mockReadWindowsProcessArgs.mockReturnValue(null);
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: true, args: null });
__testing.setSleepSyncOverride(() => {});
});
@@ -189,15 +235,14 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
expect(result).toEqual([stalePid]); // deduped — not [pid, pid]
});
it("returns [] and skips lsof on win32", () => {
// The entire describe block is skipped on Windows (isWindows guard at top),
// so this test only runs on Linux/macOS. It mocks platform to win32 for the
// single assertion without needing to restore — the suite-level skipIf means
// this will never run on an actual Windows runner where the mock could leak.
it("delegates to Windows port helpers on win32 and skips lsof", () => {
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
try {
mockReadWindowsListeningPids.mockReturnValue([]);
expect(findGatewayPidsOnPortSync(18789)).toEqual([]);
expect(mockReadWindowsListeningPids).toHaveBeenCalledWith(18789, undefined);
// lsof must NOT be invoked — Windows uses PowerShell/netstat
expect(mockSpawnSync).not.toHaveBeenCalled();
} finally {
if (origDescriptor) {
@@ -205,6 +250,24 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
}
}
});
it("returns verified gateway pids from Windows helpers on win32", () => {
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
const stalePid = process.pid + 900;
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
try {
mockReadWindowsListeningPids.mockReturnValue([stalePid]);
// Simulate a verified gateway process (must pass real isGatewayArgv)
mockReadWindowsProcessArgs.mockReturnValue(["openclaw", "gateway"]);
expect(findGatewayPidsOnPortSync(18789)).toEqual([stalePid]);
expect(mockReadWindowsListeningPids).toHaveBeenCalledWith(18789, undefined);
expect(mockReadWindowsProcessArgs).toHaveBeenCalledWith(stalePid, undefined);
} finally {
if (origDescriptor) {
Object.defineProperty(process, "platform", origDescriptor);
}
}
});
});
// -------------------------------------------------------------------------
@@ -597,6 +660,196 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
expect(killSpy).not.toHaveBeenCalled();
});
it("treats failed Windows port probes as inconclusive, not free", () => {
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
const stalePid = process.pid + 910;
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
try {
mockReadWindowsListeningPids.mockReturnValue([stalePid]);
mockReadWindowsProcessArgs.mockReturnValue(["openclaw", "gateway"]);
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: true, args: ["openclaw", "gateway"] });
mockSpawnSync.mockReturnValue({
error: null,
status: 0,
stdout: "",
stderr: "",
});
let fakeNow = 0;
__testing.setDateNowOverride(() => fakeNow);
mockReadWindowsListeningPidsResult.mockImplementation((_port, timeoutMs) => {
if (timeoutMs === 400) {
fakeNow += 2001;
return { ok: false, permanent: false };
}
return { ok: true, pids: [stalePid] };
});
let aliveChecks = 0;
const killSpy = vi.spyOn(process, "kill").mockImplementation((pid, signal) => {
if (signal === 0 && pid === stalePid) {
aliveChecks += 1;
if (aliveChecks < 3) {
return true;
}
throw Object.assign(new Error("ESRCH"), { code: "ESRCH" });
}
return true;
});
expect(cleanStaleGatewayProcessesSync()).toEqual([stalePid]);
expect(mockReadWindowsListeningPidsResult).toHaveBeenCalledWith(18789, 400);
expect(mockRestartWarn).toHaveBeenCalledWith(
expect.stringContaining("port 18789 still in use after 2000ms"),
);
expect(killSpy).toHaveBeenCalledWith(stalePid, 0);
} finally {
__testing.setDateNowOverride(null);
if (origDescriptor) {
Object.defineProperty(process, "platform", origDescriptor);
}
}
});
it("waits for port release when the initial Windows stale-pid probe is inconclusive", () => {
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
try {
let fakeNow = 0;
__testing.setDateNowOverride(() => fakeNow);
mockReadWindowsListeningPidsResult.mockImplementation((_port, timeoutMs) => {
if (timeoutMs === 400) {
fakeNow += 2001;
}
return { ok: false, permanent: false };
});
const killSpy = vi.spyOn(process, "kill").mockReturnValue(true);
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
expect(mockReadWindowsListeningPidsResult).toHaveBeenCalledWith(18789, 400);
expect(mockRestartWarn).toHaveBeenCalledWith(
expect.stringContaining("port 18789 still in use after 2000ms"),
);
expect(killSpy).not.toHaveBeenCalled();
} finally {
__testing.setDateNowOverride(null);
if (origDescriptor) {
Object.defineProperty(process, "platform", origDescriptor);
}
}
});
it("waits for port release when Windows listener argv inspection is inconclusive", () => {
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
const stalePid = process.pid + 913;
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
try {
let fakeNow = 0;
__testing.setDateNowOverride(() => fakeNow);
mockReadWindowsListeningPidsResult.mockImplementation((_port, timeoutMs) => {
if (timeoutMs === 400) {
fakeNow += 2001;
}
return { ok: true, pids: [stalePid] };
});
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: false, permanent: false });
const killSpy = vi.spyOn(process, "kill").mockReturnValue(true);
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
expect(mockReadWindowsProcessArgsResult).toHaveBeenCalledWith(stalePid, undefined);
expect(mockRestartWarn).toHaveBeenCalledWith(
expect.stringContaining("port 18789 still in use after 2000ms"),
);
expect(killSpy).not.toHaveBeenCalled();
} finally {
__testing.setDateNowOverride(null);
if (origDescriptor) {
Object.defineProperty(process, "platform", origDescriptor);
}
}
});
it("does not report Windows pids as killed when taskkill fails", () => {
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
const stalePid = process.pid + 911;
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
try {
mockReadWindowsListeningPids.mockReturnValue([stalePid]);
mockReadWindowsProcessArgs.mockReturnValue(["openclaw", "gateway"]);
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: true, args: ["openclaw", "gateway"] });
mockReadWindowsListeningPidsResult.mockReturnValue({ ok: true, pids: [stalePid] });
mockSpawnSync.mockReturnValue({
error: null,
status: 1,
stdout: "",
stderr: "access denied",
});
vi.spyOn(process, "kill").mockImplementation((pid, signal) => {
if (signal === 0 && pid === stalePid) {
return true;
}
return true;
});
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
expect(mockSpawnSync).toHaveBeenCalledWith(
expect.stringContaining("taskkill.exe"),
["/T", "/PID", String(stalePid)],
expect.objectContaining({ timeout: 5000 }),
);
} finally {
if (origDescriptor) {
Object.defineProperty(process, "platform", origDescriptor);
}
}
});
it("treats Windows EPERM liveness checks as alive and still forces taskkill", () => {
const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
const stalePid = process.pid + 912;
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
try {
mockReadWindowsListeningPidsResult.mockReturnValue({ ok: true, pids: [stalePid] });
mockReadWindowsProcessArgs.mockReturnValue(["openclaw", "gateway"]);
mockReadWindowsProcessArgsResult.mockReturnValue({ ok: true, args: ["openclaw", "gateway"] });
mockSpawnSync
.mockReturnValueOnce({
error: null,
status: 1,
stdout: "",
stderr: "access denied",
})
.mockReturnValueOnce({
error: null,
status: 1,
stdout: "",
stderr: "still denied",
});
vi.spyOn(process, "kill").mockImplementation((pid, signal) => {
if (signal === 0 && pid === stalePid) {
throw Object.assign(new Error("EPERM"), { code: "EPERM" });
}
return true;
});
expect(cleanStaleGatewayProcessesSync()).toEqual([]);
expect(mockSpawnSync).toHaveBeenNthCalledWith(
1,
expect.stringContaining("taskkill.exe"),
["/T", "/PID", String(stalePid)],
expect.objectContaining({ timeout: 5000 }),
);
expect(mockSpawnSync).toHaveBeenNthCalledWith(
2,
expect.stringContaining("taskkill.exe"),
["/F", "/T", "/PID", String(stalePid)],
expect.objectContaining({ timeout: 5000 }),
);
} finally {
if (origDescriptor) {
Object.defineProperty(process, "platform", origDescriptor);
}
}
});
});
// -------------------------------------------------------------------------

View File

@@ -1,7 +1,17 @@
import { spawnSync } from "node:child_process";
import path from "node:path";
import { resolveGatewayPort } from "../config/paths.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { isGatewayArgv } from "./gateway-process-argv.js";
import { resolveLsofCommandSync } from "./ports-lsof.js";
import {
readWindowsListeningPidsOnPortSync,
readWindowsListeningPidsResultSync,
readWindowsProcessArgsResultSync,
readWindowsProcessArgsSync,
type WindowsProcessArgsResult,
type WindowsListeningPidsResult,
} from "./windows-port-pids.js";
const SPAWN_TIMEOUT_MS = 2000;
const STALE_SIGTERM_WAIT_MS = 600;
@@ -78,6 +88,53 @@ function parsePidsFromLsofOutput(stdout: string): number[] {
return [...new Set(pids)].filter((pid) => pid !== process.pid);
}
/**
* Windows: find listening PIDs on the port, then verify each is an openclaw
* gateway process via command-line inspection. Excludes the current process.
*/
function filterVerifiedWindowsGatewayPids(rawPids: number[]): number[] {
return Array.from(new Set(rawPids))
.filter((pid) => Number.isFinite(pid) && pid > 0 && pid !== process.pid)
.filter((pid) => {
const args = readWindowsProcessArgsSync(pid);
return args != null && isGatewayArgv(args, { allowGatewayBinary: true });
});
}
function filterVerifiedWindowsGatewayPidsResult(
rawPids: number[],
processArgsResult: (pid: number) => WindowsProcessArgsResult,
): WindowsListeningPidsResult {
const verified: number[] = [];
for (const pid of Array.from(new Set(rawPids))) {
if (!Number.isFinite(pid) || pid <= 0 || pid === process.pid) {
continue;
}
const argsResult = processArgsResult(pid);
if (!argsResult.ok) {
return { ok: false, permanent: argsResult.permanent };
}
if (argsResult.args != null && isGatewayArgv(argsResult.args, { allowGatewayBinary: true })) {
verified.push(pid);
}
}
return { ok: true, pids: verified };
}
function findVerifiedWindowsGatewayPidsOnPortSync(port: number): number[] {
return filterVerifiedWindowsGatewayPids(readWindowsListeningPidsOnPortSync(port));
}
function findVerifiedWindowsGatewayPidsOnPortResultSync(port: number): WindowsListeningPidsResult {
const result = readWindowsListeningPidsResultSync(port);
if (!result.ok) {
return result;
}
return filterVerifiedWindowsGatewayPidsResult(result.pids, (pid) =>
readWindowsProcessArgsResultSync(pid),
);
}
/**
* Find PIDs of gateway processes listening on the given port using synchronous lsof.
* Returns only PIDs that belong to openclaw gateway processes (not the current process).
@@ -87,7 +144,9 @@ export function findGatewayPidsOnPortSync(
spawnTimeoutMs = SPAWN_TIMEOUT_MS,
): number[] {
if (process.platform === "win32") {
return [];
// Use the shared Windows port inspection (PowerShell / netstat) with
// command-line verification to find only openclaw gateway processes.
return findVerifiedWindowsGatewayPidsOnPortSync(port);
}
const lsof = resolveLsofCommandSync();
const res = spawnSync(lsof, ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-Fpc"], {
@@ -139,6 +198,9 @@ export function findGatewayPidsOnPortSync(
type PollResult = { free: true } | { free: false } | { free: null; permanent: boolean };
function pollPortOnce(port: number): PollResult {
if (process.platform === "win32") {
return pollPortOnceWindows(port);
}
try {
const lsof = resolveLsofCommandSync();
const res = spawnSync(lsof, ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-Fpc"], {
@@ -178,12 +240,36 @@ function pollPortOnce(port: number): PollResult {
}
}
/**
* Windows-specific port poll.
* Uses a short timeout (POLL_SPAWN_TIMEOUT_MS) so a single slow PowerShell
* invocation cannot exceed the waitForPortFreeSync wall-clock budget.
* Only checks whether any process is listening — no gateway verification
* needed because we already killed the stale gateway in the prior step.
*/
function pollPortOnceWindows(port: number): PollResult {
try {
const result = readWindowsListeningPidsResultSync(port, POLL_SPAWN_TIMEOUT_MS);
if (!result.ok) {
return { free: null, permanent: result.permanent };
}
return result.pids.length === 0 ? { free: true } : { free: false };
} catch {
return { free: null, permanent: false };
}
}
/**
* Synchronously terminate stale gateway processes.
* Callers must pass a non-empty pids array.
* Sends SIGTERM, waits briefly, then SIGKILL for survivors.
*
* On Unix: sends SIGTERM, waits briefly, then SIGKILL for survivors.
* On Windows: uses taskkill (graceful first, then /F for force-kill).
*/
function terminateStaleProcessesSync(pids: number[]): number[] {
if (process.platform === "win32") {
return terminateStaleProcessesWindows(pids);
}
const killed: number[] = [];
for (const pid of pids) {
try {
@@ -209,6 +295,58 @@ function terminateStaleProcessesSync(pids: number[]): number[] {
return killed;
}
/**
* Windows-specific process termination using taskkill.
* Sends a graceful taskkill first (/T for tree), waits, then escalates to /F.
*/
function terminateStaleProcessesWindows(pids: number[]): number[] {
const taskkillPath = path.join(
process.env.SystemRoot ?? "C:\\Windows",
"System32",
"taskkill.exe",
);
const killed: number[] = [];
for (const pid of pids) {
const graceful = spawnSync(taskkillPath, ["/T", "/PID", String(pid)], {
stdio: "ignore",
timeout: 5000,
windowsHide: true,
});
const gracefulFailed = graceful.error != null || (graceful.status ?? 0) !== 0;
if (!gracefulFailed && !isProcessAlive(pid)) {
killed.push(pid);
continue;
}
sleepSync(STALE_SIGTERM_WAIT_MS);
if (!isProcessAlive(pid)) {
killed.push(pid);
continue;
}
const forced = spawnSync(taskkillPath, ["/F", "/T", "/PID", String(pid)], {
stdio: "ignore",
timeout: 5000,
windowsHide: true,
});
if (forced.error != null || (forced.status ?? 0) !== 0) {
continue;
}
sleepSync(STALE_SIGKILL_WAIT_MS);
if (!isProcessAlive(pid)) {
killed.push(pid);
}
}
return killed;
}
function isProcessAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch (error) {
return (error as NodeJS.ErrnoException).code === "EPERM";
}
}
/**
* Poll the given port until it is confirmed free, lsof is confirmed unavailable,
* or the wall-clock budget expires.
@@ -259,7 +397,17 @@ export function cleanStaleGatewayProcessesSync(portOverride?: number): number[]
typeof portOverride === "number" && Number.isFinite(portOverride) && portOverride > 0
? Math.floor(portOverride)
: resolveGatewayPort(undefined, process.env);
const stalePids = findGatewayPidsOnPortSync(port);
const stalePids =
process.platform === "win32"
? (() => {
const result = findVerifiedWindowsGatewayPidsOnPortResultSync(port);
if (result.ok) {
return result.pids;
}
waitForPortFreeSync(port);
return [];
})()
: findGatewayPidsOnPortSync(port);
if (stalePids.length === 0) {
return [];
}

View File

@@ -0,0 +1,151 @@
import { spawnSync } from "node:child_process";
import { parseCmdScriptCommandLine } from "../daemon/cmd-argv.js";
const DEFAULT_TIMEOUT_MS = 5_000;
export type WindowsListeningPidsResult =
| { ok: true; pids: number[] }
| { ok: false; permanent: boolean };
export type WindowsProcessArgsResult =
| { ok: true; args: string[] | null }
| { ok: false; permanent: boolean };
// ---------------------------------------------------------------------------
// Windows listening-PID discovery (PowerShell → netstat fallback)
// ---------------------------------------------------------------------------
function readListeningPidsViaPowerShell(port: number, timeoutMs: number): number[] | null {
const ps = spawnSync(
"powershell",
[
"-NoProfile",
"-Command",
`(Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue | Select-Object -ExpandProperty OwningProcess)`,
],
{
encoding: "utf8",
timeout: timeoutMs,
windowsHide: true,
},
);
if (ps.error || ps.status !== 0) {
return null;
}
return ps.stdout
.split(/\r?\n/)
.map((line) => Number.parseInt(line.trim(), 10))
.filter((pid) => Number.isFinite(pid) && pid > 0);
}
function parseListeningPidsFromNetstat(stdout: string, port: number): number[] {
const pids = new Set<number>();
for (const line of stdout.split(/\r?\n/)) {
const match = line.match(/^\s*TCP\s+(\S+):(\d+)\s+\S+\s+LISTENING\s+(\d+)\s*$/i);
if (!match) {
continue;
}
const parsedPort = Number.parseInt(match[2] ?? "", 10);
const pid = Number.parseInt(match[3] ?? "", 10);
if (parsedPort === port && Number.isFinite(pid) && pid > 0) {
pids.add(pid);
}
}
return [...pids];
}
export function readWindowsListeningPidsOnPortSync(
port: number,
timeoutMs = DEFAULT_TIMEOUT_MS,
): number[] {
const result = readWindowsListeningPidsResultSync(port, timeoutMs);
return result.ok ? result.pids : [];
}
export function readWindowsListeningPidsResultSync(
port: number,
timeoutMs = DEFAULT_TIMEOUT_MS,
): WindowsListeningPidsResult {
const powershellPids = readListeningPidsViaPowerShell(port, timeoutMs);
if (powershellPids != null) {
return { ok: true, pids: powershellPids };
}
const netstat = spawnSync("netstat", ["-ano", "-p", "tcp"], {
encoding: "utf8",
timeout: timeoutMs,
windowsHide: true,
});
if (netstat.error) {
const code = (netstat.error as NodeJS.ErrnoException).code;
return { ok: false, permanent: code === "ENOENT" || code === "EACCES" || code === "EPERM" };
}
if (netstat.status !== 0) {
return { ok: false, permanent: false };
}
return { ok: true, pids: parseListeningPidsFromNetstat(netstat.stdout, port) };
}
// ---------------------------------------------------------------------------
// Windows process-args reading (PowerShell → WMIC fallback)
// ---------------------------------------------------------------------------
function extractWindowsCommandLine(raw: string): string | null {
const lines = raw
.split(/\r?\n/)
.map((line) => line.trim())
.filter(Boolean);
for (const line of lines) {
if (!line.toLowerCase().startsWith("commandline=")) {
continue;
}
const value = line.slice("commandline=".length).trim();
return value || null;
}
return lines.find((line) => line.toLowerCase() !== "commandline") ?? null;
}
export function readWindowsProcessArgsSync(
pid: number,
timeoutMs = DEFAULT_TIMEOUT_MS,
): string[] | null {
const result = readWindowsProcessArgsResultSync(pid, timeoutMs);
return result.ok ? result.args : null;
}
export function readWindowsProcessArgsResultSync(
pid: number,
timeoutMs = DEFAULT_TIMEOUT_MS,
): WindowsProcessArgsResult {
const powershell = spawnSync(
"powershell",
[
"-NoProfile",
"-Command",
`(Get-CimInstance Win32_Process -Filter "ProcessId = ${pid}" | Select-Object -ExpandProperty CommandLine)`,
],
{
encoding: "utf8",
timeout: timeoutMs,
windowsHide: true,
},
);
if (!powershell.error && powershell.status === 0) {
const command = powershell.stdout.trim();
return { ok: true, args: command ? parseCmdScriptCommandLine(command) : null };
}
const wmic = spawnSync(
"wmic",
["process", "where", `ProcessId=${pid}`, "get", "CommandLine", "/value"],
{
encoding: "utf8",
timeout: timeoutMs,
windowsHide: true,
},
);
if (!wmic.error && wmic.status === 0) {
const command = extractWindowsCommandLine(wmic.stdout);
return { ok: true, args: command ? parseCmdScriptCommandLine(command) : null };
}
const code = ((wmic.error ?? powershell.error) as NodeJS.ErrnoException | undefined)?.code;
return { ok: false, permanent: code === "ENOENT" || code === "EACCES" || code === "EPERM" };
}