mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-03 17:14:06 +00:00
fix(native-hook-relay): prune stale bridge files on registration (#87706)
Summary: - The PR adds registration-time pruning of expired or ESRCH-dead native-hook relay bridge JSON files and regression tests for dead, expired, live, and unknown-liveness foreign records. - PR surface: Source +59, Tests +148. Total +207 across 2 files. - Reproducibility: yes. The linked source PR includes a concrete live WSL2/systemd reproduction with stale bri ... hook failures, and current source shows the native hook CLI fails closed when the relay cannot be reached. Automerge notes: - PR branch already contained follow-up commit before automerge: test(native-hook-relay): cover stale bridge pruning - PR branch already contained follow-up commit before automerge: ci: raise plugin sdk strict smoke heap - PR branch already contained follow-up commit before automerge: test(native-hook-relay): satisfy process kill mock types - PR branch already contained follow-up commit before automerge: fix(native-hook-relay): prune stale bridge files on registration Validation: - ClawSweeper review passed for head65c17cdf6e. - Required merge gates passed before the squash merge. Prepared head SHA:65c17cdf6eReview: https://github.com/openclaw/openclaw/pull/87706#issuecomment-4566131519 Co-authored-by: Applied-AI-Solutions-hub <Applied-AI-Solutions-hub@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { statSync, writeFileSync } from "node:fs";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { rmSync, statSync, writeFileSync } from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import { createServer, request as httpRequest } from "node:http";
|
||||
import { tmpdir } from "node:os";
|
||||
@@ -24,6 +25,7 @@ import {
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
vi.restoreAllMocks();
|
||||
resetGlobalHookRunner();
|
||||
setActivePluginRegistry(createEmptyPluginRegistry());
|
||||
testing.clearNativeHookRelaysForTests();
|
||||
@@ -80,6 +82,36 @@ async function waitForNativeHookRelayBridgeRecord(
|
||||
return record as Record<string, unknown>;
|
||||
}
|
||||
|
||||
async function writeForeignNativeHookRelayBridgeRecordForTests(
|
||||
relayId: string,
|
||||
record: {
|
||||
pid: number;
|
||||
expiresAtMs: number;
|
||||
},
|
||||
): Promise<string> {
|
||||
const bridgeDir = testing.getNativeHookRelayBridgeDirForTests();
|
||||
await fs.mkdir(bridgeDir, { recursive: true, mode: 0o700 });
|
||||
const registryPath = testing.getNativeHookRelayBridgeRegistryPathForTests(relayId);
|
||||
writeFileSync(
|
||||
registryPath,
|
||||
`${JSON.stringify({
|
||||
version: 1,
|
||||
relayId,
|
||||
pid: record.pid,
|
||||
hostname: "127.0.0.1",
|
||||
port: 9,
|
||||
token: `token-${relayId}`,
|
||||
expiresAtMs: record.expiresAtMs,
|
||||
})}\n`,
|
||||
{ mode: 0o600 },
|
||||
);
|
||||
return registryPath;
|
||||
}
|
||||
|
||||
function uniqueNativeHookRelayIdForTests(prefix: string): string {
|
||||
return `${prefix}-${randomUUID()}`;
|
||||
}
|
||||
|
||||
function openDeferredNativeHookRelayBridgeRequest(
|
||||
record: Record<string, unknown>,
|
||||
payload: Record<string, unknown>,
|
||||
@@ -852,6 +884,122 @@ describe("native hook relay registry", () => {
|
||||
expect(response).toEqual({ stdout: "", stderr: "", exitCode: 0 });
|
||||
});
|
||||
|
||||
it("prunes dead foreign direct bridge registry files during registration", async () => {
|
||||
const stalePath = await writeForeignNativeHookRelayBridgeRecordForTests(
|
||||
uniqueNativeHookRelayIdForTests("codex-dead-foreign-bridge"),
|
||||
{
|
||||
pid: 9_999_991,
|
||||
expiresAtMs: Date.now() + 60_000,
|
||||
},
|
||||
);
|
||||
const kill = vi.spyOn(process, "kill").mockImplementation((pid) => {
|
||||
if (pid === 9_999_991) {
|
||||
throw Object.assign(new Error("missing process"), { code: "ESRCH" });
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
registerNativeHookRelay({
|
||||
provider: "codex",
|
||||
relayId: "codex-prune-dead-foreign-bridge-session",
|
||||
sessionId: "session-1",
|
||||
runId: "run-1",
|
||||
allowedEvents: ["pre_tool_use"],
|
||||
});
|
||||
|
||||
expect(kill).toHaveBeenCalledWith(9_999_991, 0);
|
||||
await expect(fs.stat(stalePath)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
});
|
||||
|
||||
it("prunes expired foreign direct bridge registry files even when their pid is alive", async () => {
|
||||
const stalePath = await writeForeignNativeHookRelayBridgeRecordForTests(
|
||||
uniqueNativeHookRelayIdForTests("codex-expired-foreign-bridge"),
|
||||
{
|
||||
pid: 9_999_992,
|
||||
expiresAtMs: Date.now() - 1,
|
||||
},
|
||||
);
|
||||
const kill = vi.spyOn(process, "kill").mockImplementation((pid) => {
|
||||
if (pid !== 9_999_992) {
|
||||
throw Object.assign(new Error("unexpected process"), { code: "ESRCH" });
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
registerNativeHookRelay({
|
||||
provider: "codex",
|
||||
relayId: "codex-prune-expired-foreign-bridge-session",
|
||||
sessionId: "session-1",
|
||||
runId: "run-1",
|
||||
allowedEvents: ["pre_tool_use"],
|
||||
});
|
||||
|
||||
expect(kill).not.toHaveBeenCalled();
|
||||
await expect(fs.stat(stalePath)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
});
|
||||
|
||||
it("preserves live unexpired foreign direct bridge registry files during registration", async () => {
|
||||
const livePath = await writeForeignNativeHookRelayBridgeRecordForTests(
|
||||
uniqueNativeHookRelayIdForTests("codex-live-foreign-bridge"),
|
||||
{
|
||||
pid: 9_999_993,
|
||||
expiresAtMs: Date.now() + 60_000,
|
||||
},
|
||||
);
|
||||
const kill = vi.spyOn(process, "kill").mockImplementation((pid) => {
|
||||
if (pid !== 9_999_993) {
|
||||
throw Object.assign(new Error("unexpected process"), { code: "ESRCH" });
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
try {
|
||||
registerNativeHookRelay({
|
||||
provider: "codex",
|
||||
relayId: "codex-preserve-live-foreign-bridge-session",
|
||||
sessionId: "session-1",
|
||||
runId: "run-1",
|
||||
allowedEvents: ["pre_tool_use"],
|
||||
});
|
||||
|
||||
expect(kill).toHaveBeenCalledWith(9_999_993, 0);
|
||||
await expect(fs.stat(livePath)).resolves.toBeDefined();
|
||||
} finally {
|
||||
rmSync(livePath, { force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("preserves foreign direct bridge registry files when liveness is unknown", async () => {
|
||||
const livePath = await writeForeignNativeHookRelayBridgeRecordForTests(
|
||||
uniqueNativeHookRelayIdForTests("codex-unknown-liveness-foreign-bridge"),
|
||||
{
|
||||
pid: 9_999_994,
|
||||
expiresAtMs: Date.now() + 60_000,
|
||||
},
|
||||
);
|
||||
const kill = vi.spyOn(process, "kill").mockImplementation((pid) => {
|
||||
if (pid === 9_999_994) {
|
||||
throw Object.assign(new Error("permission denied"), { code: "EPERM" });
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
try {
|
||||
registerNativeHookRelay({
|
||||
provider: "codex",
|
||||
relayId: "codex-preserve-unknown-liveness-foreign-bridge-session",
|
||||
sessionId: "session-1",
|
||||
runId: "run-1",
|
||||
allowedEvents: ["pre_tool_use"],
|
||||
});
|
||||
|
||||
expect(kill).toHaveBeenCalledWith(9_999_994, 0);
|
||||
await expect(fs.stat(livePath)).resolves.toBeDefined();
|
||||
} finally {
|
||||
rmSync(livePath, { force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps direct bridge registry files private and loopback-only", async () => {
|
||||
const relay = registerNativeHookRelay({
|
||||
provider: "codex",
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
import { createHash, randomUUID } from "node:crypto";
|
||||
import { chmodSync, existsSync, lstatSync, mkdirSync, readFileSync, rmSync } from "node:fs";
|
||||
import {
|
||||
chmodSync,
|
||||
existsSync,
|
||||
lstatSync,
|
||||
mkdirSync,
|
||||
readdirSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
} from "node:fs";
|
||||
import {
|
||||
createServer,
|
||||
request as httpRequest,
|
||||
@@ -700,7 +708,58 @@ function pruneExpiredNativeHookRelays(now = Date.now()): void {
|
||||
}
|
||||
}
|
||||
|
||||
function isNativeHookRelayBridgePidDead(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return false;
|
||||
} catch (error) {
|
||||
return typeof error === "object" && error !== null && "code" in error && error.code === "ESRCH";
|
||||
}
|
||||
}
|
||||
|
||||
function registerNativeHookRelayBridge(registration: ActiveNativeHookRelayRegistration): void {
|
||||
// Prune actually stale bridge files from prior gateway processes. The bridge
|
||||
// directory is scoped by OS user (uid) and is shared across all OpenClaw
|
||||
// gateways/profiles run by that user, so a record with a non-current PID is
|
||||
// NOT automatically stale — it can legitimately belong to another live
|
||||
// gateway under the same uid. Only prune records whose owning PID is dead
|
||||
// or whose expiry has passed; leave live foreign records alone.
|
||||
try {
|
||||
const staleDir = ensureNativeHookRelayBridgeDir();
|
||||
const now = Date.now();
|
||||
for (const name of readdirSync(staleDir)) {
|
||||
if (!name.endsWith(".json")) {
|
||||
continue;
|
||||
}
|
||||
const full = path.join(staleDir, name);
|
||||
try {
|
||||
const rec = JSON.parse(readFileSync(full, "utf8")) as {
|
||||
pid?: number;
|
||||
expiresAtMs?: number;
|
||||
};
|
||||
if (!rec || typeof rec.pid !== "number" || rec.pid === process.pid) {
|
||||
continue;
|
||||
}
|
||||
const expired = typeof rec.expiresAtMs === "number" && now > rec.expiresAtMs;
|
||||
const deadPid = !expired && isNativeHookRelayBridgePidDead(rec.pid);
|
||||
if (!expired && !deadPid) {
|
||||
// Live foreign record from another same-uid gateway/profile. Preserve it.
|
||||
continue;
|
||||
}
|
||||
rmSync(full, { force: true });
|
||||
log.debug("pruned stale native hook relay bridge file", {
|
||||
file: name,
|
||||
stalePid: rec.pid,
|
||||
currentPid: process.pid,
|
||||
reason: deadPid ? "dead-pid" : "expired",
|
||||
});
|
||||
} catch {
|
||||
// ignore unparseable / racing files
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
log.debug("native hook relay bridge dir prune skipped", { error });
|
||||
}
|
||||
unregisterNativeHookRelayBridge(registration.relayId);
|
||||
const token = randomUUID();
|
||||
const bridgeDir = ensureNativeHookRelayBridgeDir();
|
||||
|
||||
Reference in New Issue
Block a user