Fix gateway restart false timeouts on Debian/systemd (#34874)

* daemon(systemd): target sudo caller user scope

* test(systemd): cover sudo user scope commands

* infra(ports): fall back to ss when lsof missing

* test(ports): verify ss fallback listener detection

* cli(gateway): use probe fallback for restart health

* test(gateway): cover restart-health probe fallback
This commit is contained in:
Vincent Koc
2026-03-04 10:52:33 -08:00
committed by GitHub
parent 4cc293d084
commit 2b98cb6d8b
6 changed files with 311 additions and 49 deletions

View File

@@ -6,6 +6,7 @@ const inspectPortUsage = vi.hoisted(() => vi.fn<(port: number) => Promise<PortUs
const classifyPortListener = vi.hoisted(() =>
vi.fn<(_listener: unknown, _port: number) => PortListenerKind>(() => "gateway"),
);
const probeGateway = vi.hoisted(() => vi.fn());
vi.mock("../../infra/ports.js", () => ({
classifyPortListener: (listener: unknown, port: number) => classifyPortListener(listener, port),
@@ -13,6 +14,10 @@ vi.mock("../../infra/ports.js", () => ({
inspectPortUsage: (port: number) => inspectPortUsage(port),
}));
vi.mock("../../gateway/probe.js", () => ({
probeGateway: (opts: unknown) => probeGateway(opts),
}));
const originalPlatform = process.platform;
async function inspectUnknownListenerFallback(params: {
@@ -52,6 +57,11 @@ describe("inspectGatewayRestart", () => {
});
classifyPortListener.mockReset();
classifyPortListener.mockReturnValue("gateway");
probeGateway.mockReset();
probeGateway.mockResolvedValue({
ok: false,
close: null,
});
});
afterEach(() => {
@@ -147,4 +157,53 @@ describe("inspectGatewayRestart", () => {
expect(snapshot.staleGatewayPids).toEqual([]);
});
it("uses a local gateway probe when ownership is ambiguous", async () => {
const service = {
readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })),
} as unknown as GatewayService;
inspectPortUsage.mockResolvedValue({
port: 18789,
status: "busy",
listeners: [{ commandLine: "" }],
hints: [],
});
classifyPortListener.mockReturnValue("unknown");
probeGateway.mockResolvedValue({
ok: true,
close: null,
});
const { inspectGatewayRestart } = await import("./restart-health.js");
const snapshot = await inspectGatewayRestart({ service, port: 18789 });
expect(snapshot.healthy).toBe(true);
expect(probeGateway).toHaveBeenCalledWith(
expect.objectContaining({ url: "ws://127.0.0.1:18789" }),
);
});
it("treats auth-closed probe as healthy gateway reachability", async () => {
const service = {
readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })),
} as unknown as GatewayService;
inspectPortUsage.mockResolvedValue({
port: 18789,
status: "busy",
listeners: [{ commandLine: "" }],
hints: [],
});
classifyPortListener.mockReturnValue("unknown");
probeGateway.mockResolvedValue({
ok: false,
close: { code: 1008, reason: "auth required" },
});
const { inspectGatewayRestart } = await import("./restart-health.js");
const snapshot = await inspectGatewayRestart({ service, port: 18789 });
expect(snapshot.healthy).toBe(true);
});
});

View File

@@ -1,5 +1,6 @@
import type { GatewayServiceRuntime } from "../../daemon/service-runtime.js";
import type { GatewayService } from "../../daemon/service.js";
import { probeGateway } from "../../gateway/probe.js";
import {
classifyPortListener,
formatPortDiagnostics,
@@ -29,6 +30,31 @@ function listenerOwnedByRuntimePid(params: {
return params.listener.pid === params.runtimePid || params.listener.ppid === params.runtimePid;
}
function looksLikeAuthClose(code: number | undefined, reason: string | undefined): boolean {
if (code !== 1008) {
return false;
}
const normalized = (reason ?? "").toLowerCase();
return (
normalized.includes("auth") ||
normalized.includes("token") ||
normalized.includes("password") ||
normalized.includes("scope") ||
normalized.includes("role")
);
}
async function confirmGatewayReachable(port: number): Promise<boolean> {
const token = process.env.OPENCLAW_GATEWAY_TOKEN?.trim() || undefined;
const password = process.env.OPENCLAW_GATEWAY_PASSWORD?.trim() || undefined;
const probe = await probeGateway({
url: `ws://127.0.0.1:${port}`,
auth: token || password ? { token, password } : undefined,
timeoutMs: 1_000,
});
return probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason);
}
export async function inspectGatewayRestart(params: {
service: GatewayService;
port: number;
@@ -79,7 +105,14 @@ export async function inspectGatewayRestart(params: {
? portUsage.listeners.some((listener) => listenerOwnedByRuntimePid({ listener, runtimePid }))
: gatewayListeners.length > 0 ||
(portUsage.status === "busy" && portUsage.listeners.length === 0);
const healthy = running && ownsPort;
let healthy = running && ownsPort;
if (!healthy && running && portUsage.status === "busy") {
try {
healthy = await confirmGatewayReachable(params.port);
} catch {
// best-effort probe
}
}
const staleGatewayPids = Array.from(
new Set([
...gatewayListeners