mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
fix(cli): make gateway --force resilient to lsof EACCES
This commit is contained in:
@@ -61,6 +61,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Doctor/State integrity: ignore metadata-only slash routing sessions when checking recent missing transcripts so `openclaw doctor` no longer reports false-positive transcript-missing warnings for `*:slash:*` keys. (#27375) thanks @gumadeiras.
|
||||
- CLI/Gateway status: force local `gateway status` probe host to `127.0.0.1` for `bind=lan` so co-located probes do not trip non-loopback plaintext WebSocket checks. (#26997) thanks @chikko80.
|
||||
- CLI/Gateway auth: align `gateway run --auth` parsing/help text with supported gateway auth modes by accepting `none` and `trusted-proxy` (in addition to `token`/`password`) for CLI overrides. (#27469) thanks @s1korrrr.
|
||||
- CLI/Gateway `--force` in non-root Docker: recover from `lsof` permission failures (`EACCES`/`EPERM`) by falling back to `fuser` kill + probe-based port checks, so `openclaw gateway --force` works for default container `node` user flows. (#27941)
|
||||
- CLI/Daemon status TLS probe: use `wss://` and forward local TLS certificate fingerprint for TLS-enabled gateway daemon probes so `openclaw daemon status` works with `gateway.bind=lan` + `gateway.tls.enabled=true`. (#24234) thanks @liuy.
|
||||
- Podman/Default bind: change `run-openclaw-podman.sh` default gateway bind from `lan` to `loopback` and document explicit LAN opt-in with Control UI origin configuration. (#27491) thanks @robbyczgw-cla.
|
||||
- Daemon/macOS launchd: forward proxy env vars into supervised service environments, keep LaunchAgent `KeepAlive=true` semantics, and harden restart sequencing to `print -> bootout -> wait old pid exit -> bootstrap -> kickstart`. (#27276) thanks @frankekn.
|
||||
|
||||
188
src/cli/ports.ts
188
src/cli/ports.ts
@@ -1,5 +1,6 @@
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { resolveLsofCommandSync } from "../infra/ports-lsof.js";
|
||||
import { tryListenOnPort } from "../infra/ports-probe.js";
|
||||
import { sleep } from "../utils.js";
|
||||
|
||||
export type PortProcess = { pid: number; command?: string };
|
||||
@@ -10,6 +11,132 @@ export type ForceFreePortResult = {
|
||||
escalatedToSigkill: boolean;
|
||||
};
|
||||
|
||||
type ExecFileError = NodeJS.ErrnoException & {
|
||||
status?: number | null;
|
||||
stderr?: string | Buffer;
|
||||
stdout?: string | Buffer;
|
||||
cause?: unknown;
|
||||
};
|
||||
|
||||
const FUSER_SIGNALS: Record<"SIGTERM" | "SIGKILL", string> = {
|
||||
SIGTERM: "TERM",
|
||||
SIGKILL: "KILL",
|
||||
};
|
||||
|
||||
function readExecOutput(value: string | Buffer | undefined): string {
|
||||
if (typeof value === "string") {
|
||||
return value;
|
||||
}
|
||||
if (value instanceof Buffer) {
|
||||
return value.toString("utf8");
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function withErrnoCode(message: string, code: string, cause: unknown): Error {
|
||||
const out = new Error(message, { cause: cause instanceof Error ? cause : undefined }) as Error &
|
||||
NodeJS.ErrnoException;
|
||||
out.code = code;
|
||||
return out;
|
||||
}
|
||||
|
||||
function getErrnoCode(err: unknown): string | undefined {
|
||||
if (!err || typeof err !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const direct = (err as { code?: unknown }).code;
|
||||
if (typeof direct === "string" && direct.length > 0) {
|
||||
return direct;
|
||||
}
|
||||
const cause = (err as { cause?: unknown }).cause;
|
||||
if (cause && typeof cause === "object") {
|
||||
const nested = (cause as { code?: unknown }).code;
|
||||
if (typeof nested === "string" && nested.length > 0) {
|
||||
return nested;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isRecoverableLsofError(err: unknown): boolean {
|
||||
const code = getErrnoCode(err);
|
||||
if (code === "ENOENT" || code === "EACCES" || code === "EPERM") {
|
||||
return true;
|
||||
}
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
return /lsof.*(permission denied|not permitted|operation not permitted|eacces|eperm)/i.test(
|
||||
message,
|
||||
);
|
||||
}
|
||||
|
||||
function parseFuserPidList(output: string): number[] {
|
||||
if (!output) {
|
||||
return [];
|
||||
}
|
||||
const values = new Set<number>();
|
||||
for (const rawLine of output.split(/\r?\n/)) {
|
||||
const line = rawLine.trim();
|
||||
if (!line) {
|
||||
continue;
|
||||
}
|
||||
const pidRegion = line.includes(":") ? line.slice(line.indexOf(":") + 1) : line;
|
||||
const pidMatches = pidRegion.match(/\d+/g) ?? [];
|
||||
for (const match of pidMatches) {
|
||||
const pid = Number.parseInt(match, 10);
|
||||
if (Number.isFinite(pid) && pid > 0) {
|
||||
values.add(pid);
|
||||
}
|
||||
}
|
||||
}
|
||||
return [...values];
|
||||
}
|
||||
|
||||
function killPortWithFuser(port: number, signal: "SIGTERM" | "SIGKILL"): PortProcess[] {
|
||||
const args = ["-k", `-${FUSER_SIGNALS[signal]}`, `${port}/tcp`];
|
||||
try {
|
||||
const stdout = execFileSync("fuser", args, {
|
||||
encoding: "utf-8",
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
return parseFuserPidList(stdout).map((pid) => ({ pid }));
|
||||
} catch (err: unknown) {
|
||||
const execErr = err as ExecFileError;
|
||||
const code = execErr.code;
|
||||
const status = execErr.status;
|
||||
const stdout = readExecOutput(execErr.stdout);
|
||||
const stderr = readExecOutput(execErr.stderr);
|
||||
const parsed = parseFuserPidList([stdout, stderr].filter(Boolean).join("\n"));
|
||||
if (status === 1) {
|
||||
// fuser exits 1 if nothing matched; keep any parsed PIDs in case signal succeeded.
|
||||
return parsed.map((pid) => ({ pid }));
|
||||
}
|
||||
if (code === "ENOENT") {
|
||||
throw withErrnoCode(
|
||||
"fuser not found; required for --force when lsof is unavailable",
|
||||
"ENOENT",
|
||||
err,
|
||||
);
|
||||
}
|
||||
if (code === "EACCES" || code === "EPERM") {
|
||||
throw withErrnoCode("fuser permission denied while forcing gateway port", code, err);
|
||||
}
|
||||
throw err instanceof Error ? err : new Error(String(err));
|
||||
}
|
||||
}
|
||||
|
||||
async function isPortBusy(port: number): Promise<boolean> {
|
||||
try {
|
||||
await tryListenOnPort({ port, exclusive: true });
|
||||
return false;
|
||||
} catch (err: unknown) {
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
if (code === "EADDRINUSE") {
|
||||
return true;
|
||||
}
|
||||
throw err instanceof Error ? err : new Error(String(err));
|
||||
}
|
||||
}
|
||||
|
||||
export function parseLsofOutput(output: string): PortProcess[] {
|
||||
const lines = output.split(/\r?\n/).filter(Boolean);
|
||||
const results: PortProcess[] = [];
|
||||
@@ -38,12 +165,27 @@ export function listPortListeners(port: number): PortProcess[] {
|
||||
});
|
||||
return parseLsofOutput(out);
|
||||
} catch (err: unknown) {
|
||||
const status = (err as { status?: number }).status;
|
||||
const code = (err as { code?: string }).code;
|
||||
const execErr = err as ExecFileError;
|
||||
const status = execErr.status ?? undefined;
|
||||
const code = execErr.code;
|
||||
if (code === "ENOENT") {
|
||||
throw new Error("lsof not found; required for --force", { cause: err });
|
||||
throw withErrnoCode("lsof not found; required for --force", "ENOENT", err);
|
||||
}
|
||||
if (code === "EACCES" || code === "EPERM") {
|
||||
throw withErrnoCode("lsof permission denied while inspecting gateway port", code, err);
|
||||
}
|
||||
if (status === 1) {
|
||||
const stderr = readExecOutput(execErr.stderr).trim();
|
||||
if (
|
||||
stderr &&
|
||||
/permission denied|not permitted|operation not permitted|can't stat/i.test(stderr)
|
||||
) {
|
||||
throw withErrnoCode(
|
||||
`lsof permission denied while inspecting gateway port: ${stderr}`,
|
||||
"EACCES",
|
||||
err,
|
||||
);
|
||||
}
|
||||
return [];
|
||||
} // no listeners
|
||||
throw err instanceof Error ? err : new Error(String(err));
|
||||
@@ -93,43 +235,65 @@ export async function forceFreePortAndWait(
|
||||
const intervalMs = Math.max(opts.intervalMs ?? 100, 1);
|
||||
const sigtermTimeoutMs = Math.min(Math.max(opts.sigtermTimeoutMs ?? 600, 0), timeoutMs);
|
||||
|
||||
const killed = forceFreePort(port);
|
||||
if (killed.length === 0) {
|
||||
let killed: PortProcess[] = [];
|
||||
let useFuserFallback = false;
|
||||
|
||||
try {
|
||||
killed = forceFreePort(port);
|
||||
} catch (err) {
|
||||
if (!isRecoverableLsofError(err)) {
|
||||
throw err;
|
||||
}
|
||||
useFuserFallback = true;
|
||||
killed = killPortWithFuser(port, "SIGTERM");
|
||||
}
|
||||
|
||||
const checkBusy = async (): Promise<boolean> =>
|
||||
useFuserFallback ? isPortBusy(port) : listPortListeners(port).length > 0;
|
||||
|
||||
if (!(await checkBusy())) {
|
||||
return { killed, waitedMs: 0, escalatedToSigkill: false };
|
||||
}
|
||||
|
||||
let waitedMs = 0;
|
||||
const triesSigterm = intervalMs > 0 ? Math.ceil(sigtermTimeoutMs / intervalMs) : 0;
|
||||
for (let i = 0; i < triesSigterm; i++) {
|
||||
if (listPortListeners(port).length === 0) {
|
||||
if (!(await checkBusy())) {
|
||||
return { killed, waitedMs, escalatedToSigkill: false };
|
||||
}
|
||||
await sleep(intervalMs);
|
||||
waitedMs += intervalMs;
|
||||
}
|
||||
|
||||
if (listPortListeners(port).length === 0) {
|
||||
if (!(await checkBusy())) {
|
||||
return { killed, waitedMs, escalatedToSigkill: false };
|
||||
}
|
||||
|
||||
const remaining = listPortListeners(port);
|
||||
killPids(remaining, "SIGKILL");
|
||||
if (useFuserFallback) {
|
||||
killPortWithFuser(port, "SIGKILL");
|
||||
} else {
|
||||
const remaining = listPortListeners(port);
|
||||
killPids(remaining, "SIGKILL");
|
||||
}
|
||||
|
||||
const remainingBudget = Math.max(timeoutMs - waitedMs, 0);
|
||||
const triesSigkill = intervalMs > 0 ? Math.ceil(remainingBudget / intervalMs) : 0;
|
||||
for (let i = 0; i < triesSigkill; i++) {
|
||||
if (listPortListeners(port).length === 0) {
|
||||
if (!(await checkBusy())) {
|
||||
return { killed, waitedMs, escalatedToSigkill: true };
|
||||
}
|
||||
await sleep(intervalMs);
|
||||
waitedMs += intervalMs;
|
||||
}
|
||||
|
||||
const still = listPortListeners(port);
|
||||
if (still.length === 0) {
|
||||
if (!(await checkBusy())) {
|
||||
return { killed, waitedMs, escalatedToSigkill: true };
|
||||
}
|
||||
|
||||
if (useFuserFallback) {
|
||||
throw new Error(`port ${port} still has listeners after --force (fuser fallback)`);
|
||||
}
|
||||
const still = listPortListeners(port);
|
||||
throw new Error(
|
||||
`port ${port} still has listeners after --force: ${still.map((p) => p.pid).join(", ")}`,
|
||||
);
|
||||
|
||||
@@ -8,6 +8,12 @@ vi.mock("node:child_process", async () => {
|
||||
};
|
||||
});
|
||||
|
||||
const tryListenOnPortMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("../infra/ports-probe.js", () => ({
|
||||
tryListenOnPort: (...args: unknown[]) => tryListenOnPortMock(...args),
|
||||
}));
|
||||
|
||||
import { execFileSync } from "node:child_process";
|
||||
import {
|
||||
forceFreePort,
|
||||
@@ -23,6 +29,7 @@ describe("gateway --force helpers", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
originalKill = process.kill.bind(process);
|
||||
tryListenOnPortMock.mockReset();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -80,11 +87,13 @@ describe("gateway --force helpers", () => {
|
||||
let call = 0;
|
||||
(execFileSync as unknown as Mock).mockImplementation(() => {
|
||||
call += 1;
|
||||
// 1st call: initial listeners to kill; 2nd call: still listed; 3rd call: gone.
|
||||
// 1st call: initial listeners to kill.
|
||||
// 2nd/3rd calls: still listed.
|
||||
// 4th call: gone.
|
||||
if (call === 1) {
|
||||
return ["p42", "cnode", ""].join("\n");
|
||||
}
|
||||
if (call === 2) {
|
||||
if (call === 2 || call === 3) {
|
||||
return ["p42", "cnode", ""].join("\n");
|
||||
}
|
||||
return "";
|
||||
@@ -105,7 +114,7 @@ describe("gateway --force helpers", () => {
|
||||
expect(killMock).toHaveBeenCalledWith(42, "SIGTERM");
|
||||
expect(res.killed).toEqual<PortProcess[]>([{ pid: 42, command: "node" }]);
|
||||
expect(res.escalatedToSigkill).toBe(false);
|
||||
expect(res.waitedMs).toBeGreaterThan(0);
|
||||
expect(res.waitedMs).toBe(100);
|
||||
|
||||
vi.useRealTimers();
|
||||
});
|
||||
@@ -116,7 +125,7 @@ describe("gateway --force helpers", () => {
|
||||
(execFileSync as unknown as Mock).mockImplementation(() => {
|
||||
call += 1;
|
||||
// 1st call: initial kill list; then keep showing until after SIGKILL.
|
||||
if (call <= 6) {
|
||||
if (call <= 7) {
|
||||
return ["p42", "cnode", ""].join("\n");
|
||||
}
|
||||
return "";
|
||||
@@ -140,4 +149,80 @@ describe("gateway --force helpers", () => {
|
||||
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("falls back to fuser when lsof is permission denied", async () => {
|
||||
(execFileSync as unknown as Mock).mockImplementation((cmd: string) => {
|
||||
if (cmd.includes("lsof")) {
|
||||
const err = new Error("spawnSync lsof EACCES") as NodeJS.ErrnoException;
|
||||
err.code = "EACCES";
|
||||
throw err;
|
||||
}
|
||||
return "18789/tcp: 4242\n";
|
||||
});
|
||||
tryListenOnPortMock.mockResolvedValue(undefined);
|
||||
|
||||
const result = await forceFreePortAndWait(18789, { timeoutMs: 500, intervalMs: 100 });
|
||||
|
||||
expect(result.escalatedToSigkill).toBe(false);
|
||||
expect(result.killed).toEqual<PortProcess[]>([{ pid: 4242 }]);
|
||||
expect(execFileSync).toHaveBeenCalledWith(
|
||||
"fuser",
|
||||
["-k", "-TERM", "18789/tcp"],
|
||||
expect.objectContaining({ encoding: "utf-8" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("uses fuser SIGKILL escalation when port stays busy", async () => {
|
||||
vi.useFakeTimers();
|
||||
(execFileSync as unknown as Mock).mockImplementation((cmd: string, args: string[]) => {
|
||||
if (cmd.includes("lsof")) {
|
||||
const err = new Error("spawnSync lsof EACCES") as NodeJS.ErrnoException;
|
||||
err.code = "EACCES";
|
||||
throw err;
|
||||
}
|
||||
if (args.includes("-TERM")) {
|
||||
return "18789/tcp: 1337\n";
|
||||
}
|
||||
if (args.includes("-KILL")) {
|
||||
return "18789/tcp: 1337\n";
|
||||
}
|
||||
return "";
|
||||
});
|
||||
|
||||
const busyErr = Object.assign(new Error("in use"), { code: "EADDRINUSE" });
|
||||
tryListenOnPortMock
|
||||
.mockRejectedValueOnce(busyErr)
|
||||
.mockRejectedValueOnce(busyErr)
|
||||
.mockRejectedValueOnce(busyErr)
|
||||
.mockResolvedValueOnce(undefined);
|
||||
|
||||
const promise = forceFreePortAndWait(18789, {
|
||||
timeoutMs: 300,
|
||||
intervalMs: 100,
|
||||
sigtermTimeoutMs: 100,
|
||||
});
|
||||
await vi.runAllTimersAsync();
|
||||
const result = await promise;
|
||||
|
||||
expect(result.escalatedToSigkill).toBe(true);
|
||||
expect(result.waitedMs).toBe(100);
|
||||
expect(execFileSync).toHaveBeenCalledWith(
|
||||
"fuser",
|
||||
["-k", "-KILL", "18789/tcp"],
|
||||
expect.objectContaining({ encoding: "utf-8" }),
|
||||
);
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("throws when lsof is unavailable and fuser is missing", async () => {
|
||||
(execFileSync as unknown as Mock).mockImplementation((cmd: string) => {
|
||||
const err = new Error(`spawnSync ${cmd} ENOENT`) as NodeJS.ErrnoException;
|
||||
err.code = "ENOENT";
|
||||
throw err;
|
||||
});
|
||||
|
||||
await expect(forceFreePortAndWait(18789, { timeoutMs: 200, intervalMs: 100 })).rejects.toThrow(
|
||||
/fuser not found/i,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user