fix(cli): make gateway --force resilient to lsof EACCES

This commit is contained in:
Peter Steinberger
2026-02-26 23:02:47 +01:00
parent c03adfb41a
commit 39f7dbfe02
3 changed files with 266 additions and 16 deletions

View File

@@ -61,6 +61,7 @@ Docs: https://docs.openclaw.ai
- Doctor/State integrity: ignore metadata-only slash routing sessions when checking recent missing transcripts so `openclaw doctor` no longer reports false-positive transcript-missing warnings for `*:slash:*` keys. (#27375) thanks @gumadeiras.
- CLI/Gateway status: force local `gateway status` probe host to `127.0.0.1` for `bind=lan` so co-located probes do not trip non-loopback plaintext WebSocket checks. (#26997) thanks @chikko80.
- CLI/Gateway auth: align `gateway run --auth` parsing/help text with supported gateway auth modes by accepting `none` and `trusted-proxy` (in addition to `token`/`password`) for CLI overrides. (#27469) thanks @s1korrrr.
- CLI/Gateway `--force` in non-root Docker: recover from `lsof` permission failures (`EACCES`/`EPERM`) by falling back to `fuser` kill + probe-based port checks, so `openclaw gateway --force` works for default container `node` user flows. (#27941)
- CLI/Daemon status TLS probe: use `wss://` and forward local TLS certificate fingerprint for TLS-enabled gateway daemon probes so `openclaw daemon status` works with `gateway.bind=lan` + `gateway.tls.enabled=true`. (#24234) thanks @liuy.
- Podman/Default bind: change `run-openclaw-podman.sh` default gateway bind from `lan` to `loopback` and document explicit LAN opt-in with Control UI origin configuration. (#27491) thanks @robbyczgw-cla.
- Daemon/macOS launchd: forward proxy env vars into supervised service environments, keep LaunchAgent `KeepAlive=true` semantics, and harden restart sequencing to `print -> bootout -> wait old pid exit -> bootstrap -> kickstart`. (#27276) thanks @frankekn.

View File

@@ -1,5 +1,6 @@
import { execFileSync } from "node:child_process";
import { resolveLsofCommandSync } from "../infra/ports-lsof.js";
import { tryListenOnPort } from "../infra/ports-probe.js";
import { sleep } from "../utils.js";
export type PortProcess = { pid: number; command?: string };
@@ -10,6 +11,132 @@ export type ForceFreePortResult = {
escalatedToSigkill: boolean;
};
type ExecFileError = NodeJS.ErrnoException & {
status?: number | null;
stderr?: string | Buffer;
stdout?: string | Buffer;
cause?: unknown;
};
const FUSER_SIGNALS: Record<"SIGTERM" | "SIGKILL", string> = {
SIGTERM: "TERM",
SIGKILL: "KILL",
};
function readExecOutput(value: string | Buffer | undefined): string {
if (typeof value === "string") {
return value;
}
if (value instanceof Buffer) {
return value.toString("utf8");
}
return "";
}
function withErrnoCode(message: string, code: string, cause: unknown): Error {
const out = new Error(message, { cause: cause instanceof Error ? cause : undefined }) as Error &
NodeJS.ErrnoException;
out.code = code;
return out;
}
function getErrnoCode(err: unknown): string | undefined {
if (!err || typeof err !== "object") {
return undefined;
}
const direct = (err as { code?: unknown }).code;
if (typeof direct === "string" && direct.length > 0) {
return direct;
}
const cause = (err as { cause?: unknown }).cause;
if (cause && typeof cause === "object") {
const nested = (cause as { code?: unknown }).code;
if (typeof nested === "string" && nested.length > 0) {
return nested;
}
}
return undefined;
}
function isRecoverableLsofError(err: unknown): boolean {
const code = getErrnoCode(err);
if (code === "ENOENT" || code === "EACCES" || code === "EPERM") {
return true;
}
const message = err instanceof Error ? err.message : String(err);
return /lsof.*(permission denied|not permitted|operation not permitted|eacces|eperm)/i.test(
message,
);
}
function parseFuserPidList(output: string): number[] {
if (!output) {
return [];
}
const values = new Set<number>();
for (const rawLine of output.split(/\r?\n/)) {
const line = rawLine.trim();
if (!line) {
continue;
}
const pidRegion = line.includes(":") ? line.slice(line.indexOf(":") + 1) : line;
const pidMatches = pidRegion.match(/\d+/g) ?? [];
for (const match of pidMatches) {
const pid = Number.parseInt(match, 10);
if (Number.isFinite(pid) && pid > 0) {
values.add(pid);
}
}
}
return [...values];
}
function killPortWithFuser(port: number, signal: "SIGTERM" | "SIGKILL"): PortProcess[] {
const args = ["-k", `-${FUSER_SIGNALS[signal]}`, `${port}/tcp`];
try {
const stdout = execFileSync("fuser", args, {
encoding: "utf-8",
stdio: ["ignore", "pipe", "pipe"],
});
return parseFuserPidList(stdout).map((pid) => ({ pid }));
} catch (err: unknown) {
const execErr = err as ExecFileError;
const code = execErr.code;
const status = execErr.status;
const stdout = readExecOutput(execErr.stdout);
const stderr = readExecOutput(execErr.stderr);
const parsed = parseFuserPidList([stdout, stderr].filter(Boolean).join("\n"));
if (status === 1) {
// fuser exits 1 if nothing matched; keep any parsed PIDs in case signal succeeded.
return parsed.map((pid) => ({ pid }));
}
if (code === "ENOENT") {
throw withErrnoCode(
"fuser not found; required for --force when lsof is unavailable",
"ENOENT",
err,
);
}
if (code === "EACCES" || code === "EPERM") {
throw withErrnoCode("fuser permission denied while forcing gateway port", code, err);
}
throw err instanceof Error ? err : new Error(String(err));
}
}
async function isPortBusy(port: number): Promise<boolean> {
try {
await tryListenOnPort({ port, exclusive: true });
return false;
} catch (err: unknown) {
const code = (err as NodeJS.ErrnoException).code;
if (code === "EADDRINUSE") {
return true;
}
throw err instanceof Error ? err : new Error(String(err));
}
}
export function parseLsofOutput(output: string): PortProcess[] {
const lines = output.split(/\r?\n/).filter(Boolean);
const results: PortProcess[] = [];
@@ -38,12 +165,27 @@ export function listPortListeners(port: number): PortProcess[] {
});
return parseLsofOutput(out);
} catch (err: unknown) {
const status = (err as { status?: number }).status;
const code = (err as { code?: string }).code;
const execErr = err as ExecFileError;
const status = execErr.status ?? undefined;
const code = execErr.code;
if (code === "ENOENT") {
throw new Error("lsof not found; required for --force", { cause: err });
throw withErrnoCode("lsof not found; required for --force", "ENOENT", err);
}
if (code === "EACCES" || code === "EPERM") {
throw withErrnoCode("lsof permission denied while inspecting gateway port", code, err);
}
if (status === 1) {
const stderr = readExecOutput(execErr.stderr).trim();
if (
stderr &&
/permission denied|not permitted|operation not permitted|can't stat/i.test(stderr)
) {
throw withErrnoCode(
`lsof permission denied while inspecting gateway port: ${stderr}`,
"EACCES",
err,
);
}
return [];
} // no listeners
throw err instanceof Error ? err : new Error(String(err));
@@ -93,43 +235,65 @@ export async function forceFreePortAndWait(
const intervalMs = Math.max(opts.intervalMs ?? 100, 1);
const sigtermTimeoutMs = Math.min(Math.max(opts.sigtermTimeoutMs ?? 600, 0), timeoutMs);
const killed = forceFreePort(port);
if (killed.length === 0) {
let killed: PortProcess[] = [];
let useFuserFallback = false;
try {
killed = forceFreePort(port);
} catch (err) {
if (!isRecoverableLsofError(err)) {
throw err;
}
useFuserFallback = true;
killed = killPortWithFuser(port, "SIGTERM");
}
const checkBusy = async (): Promise<boolean> =>
useFuserFallback ? isPortBusy(port) : listPortListeners(port).length > 0;
if (!(await checkBusy())) {
return { killed, waitedMs: 0, escalatedToSigkill: false };
}
let waitedMs = 0;
const triesSigterm = intervalMs > 0 ? Math.ceil(sigtermTimeoutMs / intervalMs) : 0;
for (let i = 0; i < triesSigterm; i++) {
if (listPortListeners(port).length === 0) {
if (!(await checkBusy())) {
return { killed, waitedMs, escalatedToSigkill: false };
}
await sleep(intervalMs);
waitedMs += intervalMs;
}
if (listPortListeners(port).length === 0) {
if (!(await checkBusy())) {
return { killed, waitedMs, escalatedToSigkill: false };
}
const remaining = listPortListeners(port);
killPids(remaining, "SIGKILL");
if (useFuserFallback) {
killPortWithFuser(port, "SIGKILL");
} else {
const remaining = listPortListeners(port);
killPids(remaining, "SIGKILL");
}
const remainingBudget = Math.max(timeoutMs - waitedMs, 0);
const triesSigkill = intervalMs > 0 ? Math.ceil(remainingBudget / intervalMs) : 0;
for (let i = 0; i < triesSigkill; i++) {
if (listPortListeners(port).length === 0) {
if (!(await checkBusy())) {
return { killed, waitedMs, escalatedToSigkill: true };
}
await sleep(intervalMs);
waitedMs += intervalMs;
}
const still = listPortListeners(port);
if (still.length === 0) {
if (!(await checkBusy())) {
return { killed, waitedMs, escalatedToSigkill: true };
}
if (useFuserFallback) {
throw new Error(`port ${port} still has listeners after --force (fuser fallback)`);
}
const still = listPortListeners(port);
throw new Error(
`port ${port} still has listeners after --force: ${still.map((p) => p.pid).join(", ")}`,
);

View File

@@ -8,6 +8,12 @@ vi.mock("node:child_process", async () => {
};
});
const tryListenOnPortMock = vi.hoisted(() => vi.fn());
vi.mock("../infra/ports-probe.js", () => ({
tryListenOnPort: (...args: unknown[]) => tryListenOnPortMock(...args),
}));
import { execFileSync } from "node:child_process";
import {
forceFreePort,
@@ -23,6 +29,7 @@ describe("gateway --force helpers", () => {
beforeEach(() => {
vi.clearAllMocks();
originalKill = process.kill.bind(process);
tryListenOnPortMock.mockReset();
});
afterEach(() => {
@@ -80,11 +87,13 @@ describe("gateway --force helpers", () => {
let call = 0;
(execFileSync as unknown as Mock).mockImplementation(() => {
call += 1;
// 1st call: initial listeners to kill; 2nd call: still listed; 3rd call: gone.
// 1st call: initial listeners to kill.
// 2nd/3rd calls: still listed.
// 4th call: gone.
if (call === 1) {
return ["p42", "cnode", ""].join("\n");
}
if (call === 2) {
if (call === 2 || call === 3) {
return ["p42", "cnode", ""].join("\n");
}
return "";
@@ -105,7 +114,7 @@ describe("gateway --force helpers", () => {
expect(killMock).toHaveBeenCalledWith(42, "SIGTERM");
expect(res.killed).toEqual<PortProcess[]>([{ pid: 42, command: "node" }]);
expect(res.escalatedToSigkill).toBe(false);
expect(res.waitedMs).toBeGreaterThan(0);
expect(res.waitedMs).toBe(100);
vi.useRealTimers();
});
@@ -116,7 +125,7 @@ describe("gateway --force helpers", () => {
(execFileSync as unknown as Mock).mockImplementation(() => {
call += 1;
// 1st call: initial kill list; then keep showing until after SIGKILL.
if (call <= 6) {
if (call <= 7) {
return ["p42", "cnode", ""].join("\n");
}
return "";
@@ -140,4 +149,80 @@ describe("gateway --force helpers", () => {
vi.useRealTimers();
});
it("falls back to fuser when lsof is permission denied", async () => {
(execFileSync as unknown as Mock).mockImplementation((cmd: string) => {
if (cmd.includes("lsof")) {
const err = new Error("spawnSync lsof EACCES") as NodeJS.ErrnoException;
err.code = "EACCES";
throw err;
}
return "18789/tcp: 4242\n";
});
tryListenOnPortMock.mockResolvedValue(undefined);
const result = await forceFreePortAndWait(18789, { timeoutMs: 500, intervalMs: 100 });
expect(result.escalatedToSigkill).toBe(false);
expect(result.killed).toEqual<PortProcess[]>([{ pid: 4242 }]);
expect(execFileSync).toHaveBeenCalledWith(
"fuser",
["-k", "-TERM", "18789/tcp"],
expect.objectContaining({ encoding: "utf-8" }),
);
});
it("uses fuser SIGKILL escalation when port stays busy", async () => {
vi.useFakeTimers();
(execFileSync as unknown as Mock).mockImplementation((cmd: string, args: string[]) => {
if (cmd.includes("lsof")) {
const err = new Error("spawnSync lsof EACCES") as NodeJS.ErrnoException;
err.code = "EACCES";
throw err;
}
if (args.includes("-TERM")) {
return "18789/tcp: 1337\n";
}
if (args.includes("-KILL")) {
return "18789/tcp: 1337\n";
}
return "";
});
const busyErr = Object.assign(new Error("in use"), { code: "EADDRINUSE" });
tryListenOnPortMock
.mockRejectedValueOnce(busyErr)
.mockRejectedValueOnce(busyErr)
.mockRejectedValueOnce(busyErr)
.mockResolvedValueOnce(undefined);
const promise = forceFreePortAndWait(18789, {
timeoutMs: 300,
intervalMs: 100,
sigtermTimeoutMs: 100,
});
await vi.runAllTimersAsync();
const result = await promise;
expect(result.escalatedToSigkill).toBe(true);
expect(result.waitedMs).toBe(100);
expect(execFileSync).toHaveBeenCalledWith(
"fuser",
["-k", "-KILL", "18789/tcp"],
expect.objectContaining({ encoding: "utf-8" }),
);
vi.useRealTimers();
});
it("throws when lsof is unavailable and fuser is missing", async () => {
(execFileSync as unknown as Mock).mockImplementation((cmd: string) => {
const err = new Error(`spawnSync ${cmd} ENOENT`) as NodeJS.ErrnoException;
err.code = "ENOENT";
throw err;
});
await expect(forceFreePortAndWait(18789, { timeoutMs: 200, intervalMs: 100 })).rejects.toThrow(
/fuser not found/i,
);
});
});