mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-20 06:20:55 +00:00
Fix gateway restart false timeouts on Debian/systemd (#34874)
* daemon(systemd): target sudo caller user scope * test(systemd): cover sudo user scope commands * infra(ports): fall back to ss when lsof missing * test(ports): verify ss fallback listener detection * cli(gateway): use probe fallback for restart health * test(gateway): cover restart-health probe fallback
This commit is contained in:
@@ -6,6 +6,7 @@ const inspectPortUsage = vi.hoisted(() => vi.fn<(port: number) => Promise<PortUs
|
||||
const classifyPortListener = vi.hoisted(() =>
|
||||
vi.fn<(_listener: unknown, _port: number) => PortListenerKind>(() => "gateway"),
|
||||
);
|
||||
const probeGateway = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("../../infra/ports.js", () => ({
|
||||
classifyPortListener: (listener: unknown, port: number) => classifyPortListener(listener, port),
|
||||
@@ -13,6 +14,10 @@ vi.mock("../../infra/ports.js", () => ({
|
||||
inspectPortUsage: (port: number) => inspectPortUsage(port),
|
||||
}));
|
||||
|
||||
vi.mock("../../gateway/probe.js", () => ({
|
||||
probeGateway: (opts: unknown) => probeGateway(opts),
|
||||
}));
|
||||
|
||||
const originalPlatform = process.platform;
|
||||
|
||||
async function inspectUnknownListenerFallback(params: {
|
||||
@@ -52,6 +57,11 @@ describe("inspectGatewayRestart", () => {
|
||||
});
|
||||
classifyPortListener.mockReset();
|
||||
classifyPortListener.mockReturnValue("gateway");
|
||||
probeGateway.mockReset();
|
||||
probeGateway.mockResolvedValue({
|
||||
ok: false,
|
||||
close: null,
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -147,4 +157,53 @@ describe("inspectGatewayRestart", () => {
|
||||
|
||||
expect(snapshot.staleGatewayPids).toEqual([]);
|
||||
});
|
||||
|
||||
it("uses a local gateway probe when ownership is ambiguous", async () => {
|
||||
const service = {
|
||||
readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })),
|
||||
} as unknown as GatewayService;
|
||||
|
||||
inspectPortUsage.mockResolvedValue({
|
||||
port: 18789,
|
||||
status: "busy",
|
||||
listeners: [{ commandLine: "" }],
|
||||
hints: [],
|
||||
});
|
||||
classifyPortListener.mockReturnValue("unknown");
|
||||
probeGateway.mockResolvedValue({
|
||||
ok: true,
|
||||
close: null,
|
||||
});
|
||||
|
||||
const { inspectGatewayRestart } = await import("./restart-health.js");
|
||||
const snapshot = await inspectGatewayRestart({ service, port: 18789 });
|
||||
|
||||
expect(snapshot.healthy).toBe(true);
|
||||
expect(probeGateway).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ url: "ws://127.0.0.1:18789" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("treats auth-closed probe as healthy gateway reachability", async () => {
|
||||
const service = {
|
||||
readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })),
|
||||
} as unknown as GatewayService;
|
||||
|
||||
inspectPortUsage.mockResolvedValue({
|
||||
port: 18789,
|
||||
status: "busy",
|
||||
listeners: [{ commandLine: "" }],
|
||||
hints: [],
|
||||
});
|
||||
classifyPortListener.mockReturnValue("unknown");
|
||||
probeGateway.mockResolvedValue({
|
||||
ok: false,
|
||||
close: { code: 1008, reason: "auth required" },
|
||||
});
|
||||
|
||||
const { inspectGatewayRestart } = await import("./restart-health.js");
|
||||
const snapshot = await inspectGatewayRestart({ service, port: 18789 });
|
||||
|
||||
expect(snapshot.healthy).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { GatewayServiceRuntime } from "../../daemon/service-runtime.js";
|
||||
import type { GatewayService } from "../../daemon/service.js";
|
||||
import { probeGateway } from "../../gateway/probe.js";
|
||||
import {
|
||||
classifyPortListener,
|
||||
formatPortDiagnostics,
|
||||
@@ -29,6 +30,31 @@ function listenerOwnedByRuntimePid(params: {
|
||||
return params.listener.pid === params.runtimePid || params.listener.ppid === params.runtimePid;
|
||||
}
|
||||
|
||||
function looksLikeAuthClose(code: number | undefined, reason: string | undefined): boolean {
|
||||
if (code !== 1008) {
|
||||
return false;
|
||||
}
|
||||
const normalized = (reason ?? "").toLowerCase();
|
||||
return (
|
||||
normalized.includes("auth") ||
|
||||
normalized.includes("token") ||
|
||||
normalized.includes("password") ||
|
||||
normalized.includes("scope") ||
|
||||
normalized.includes("role")
|
||||
);
|
||||
}
|
||||
|
||||
async function confirmGatewayReachable(port: number): Promise<boolean> {
|
||||
const token = process.env.OPENCLAW_GATEWAY_TOKEN?.trim() || undefined;
|
||||
const password = process.env.OPENCLAW_GATEWAY_PASSWORD?.trim() || undefined;
|
||||
const probe = await probeGateway({
|
||||
url: `ws://127.0.0.1:${port}`,
|
||||
auth: token || password ? { token, password } : undefined,
|
||||
timeoutMs: 1_000,
|
||||
});
|
||||
return probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason);
|
||||
}
|
||||
|
||||
export async function inspectGatewayRestart(params: {
|
||||
service: GatewayService;
|
||||
port: number;
|
||||
@@ -79,7 +105,14 @@ export async function inspectGatewayRestart(params: {
|
||||
? portUsage.listeners.some((listener) => listenerOwnedByRuntimePid({ listener, runtimePid }))
|
||||
: gatewayListeners.length > 0 ||
|
||||
(portUsage.status === "busy" && portUsage.listeners.length === 0);
|
||||
const healthy = running && ownsPort;
|
||||
let healthy = running && ownsPort;
|
||||
if (!healthy && running && portUsage.status === "busy") {
|
||||
try {
|
||||
healthy = await confirmGatewayReachable(params.port);
|
||||
} catch {
|
||||
// best-effort probe
|
||||
}
|
||||
}
|
||||
const staleGatewayPids = Array.from(
|
||||
new Set([
|
||||
...gatewayListeners
|
||||
|
||||
Reference in New Issue
Block a user