fix(native-hook-relay): prune stale bridge files on registration (#87706)

Summary:
- The PR adds registration-time pruning of expired or ESRCH-dead native-hook relay bridge JSON files and regression tests for dead, expired, live, and unknown-liveness foreign records.
- PR surface: Source +59, Tests +148. Total +207 across 2 files.
- Reproducibility: yes. The linked source PR includes a concrete live WSL2/systemd reproduction with stale bri ...  hook failures, and current source shows the native hook CLI fails closed when the relay cannot be reached.

Automerge notes:
- PR branch already contained follow-up commit before automerge: test(native-hook-relay): cover stale bridge pruning
- PR branch already contained follow-up commit before automerge: ci: raise plugin sdk strict smoke heap
- PR branch already contained follow-up commit before automerge: test(native-hook-relay): satisfy process kill mock types
- PR branch already contained follow-up commit before automerge: fix(native-hook-relay): prune stale bridge files on registration

Validation:
- ClawSweeper review passed for head 65c17cdf6e.
- Required merge gates passed before the squash merge.

Prepared head SHA: 65c17cdf6e
Review: https://github.com/openclaw/openclaw/pull/87706#issuecomment-4566131519

Co-authored-by: Applied-AI-Solutions-hub <Applied-AI-Solutions-hub@users.noreply.github.com>
Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com>
Approved-by: takhoffman
Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
This commit is contained in:
clawsweeper[bot]
2026-05-28 17:17:28 +00:00
committed by GitHub
parent 27dfb9149f
commit 202ccf4cf7
2 changed files with 209 additions and 2 deletions

View File

@@ -1,4 +1,5 @@
import { statSync, writeFileSync } from "node:fs";
import { randomUUID } from "node:crypto";
import { rmSync, statSync, writeFileSync } from "node:fs";
import fs from "node:fs/promises";
import { createServer, request as httpRequest } from "node:http";
import { tmpdir } from "node:os";
@@ -24,6 +25,7 @@ import {
afterEach(() => {
vi.useRealTimers();
vi.restoreAllMocks();
resetGlobalHookRunner();
setActivePluginRegistry(createEmptyPluginRegistry());
testing.clearNativeHookRelaysForTests();
@@ -80,6 +82,36 @@ async function waitForNativeHookRelayBridgeRecord(
return record as Record<string, unknown>;
}
async function writeForeignNativeHookRelayBridgeRecordForTests(
relayId: string,
record: {
pid: number;
expiresAtMs: number;
},
): Promise<string> {
const bridgeDir = testing.getNativeHookRelayBridgeDirForTests();
await fs.mkdir(bridgeDir, { recursive: true, mode: 0o700 });
const registryPath = testing.getNativeHookRelayBridgeRegistryPathForTests(relayId);
writeFileSync(
registryPath,
`${JSON.stringify({
version: 1,
relayId,
pid: record.pid,
hostname: "127.0.0.1",
port: 9,
token: `token-${relayId}`,
expiresAtMs: record.expiresAtMs,
})}\n`,
{ mode: 0o600 },
);
return registryPath;
}
function uniqueNativeHookRelayIdForTests(prefix: string): string {
return `${prefix}-${randomUUID()}`;
}
function openDeferredNativeHookRelayBridgeRequest(
record: Record<string, unknown>,
payload: Record<string, unknown>,
@@ -852,6 +884,122 @@ describe("native hook relay registry", () => {
expect(response).toEqual({ stdout: "", stderr: "", exitCode: 0 });
});
it("prunes dead foreign direct bridge registry files during registration", async () => {
const stalePath = await writeForeignNativeHookRelayBridgeRecordForTests(
uniqueNativeHookRelayIdForTests("codex-dead-foreign-bridge"),
{
pid: 9_999_991,
expiresAtMs: Date.now() + 60_000,
},
);
const kill = vi.spyOn(process, "kill").mockImplementation((pid) => {
if (pid === 9_999_991) {
throw Object.assign(new Error("missing process"), { code: "ESRCH" });
}
return true;
});
registerNativeHookRelay({
provider: "codex",
relayId: "codex-prune-dead-foreign-bridge-session",
sessionId: "session-1",
runId: "run-1",
allowedEvents: ["pre_tool_use"],
});
expect(kill).toHaveBeenCalledWith(9_999_991, 0);
await expect(fs.stat(stalePath)).rejects.toMatchObject({ code: "ENOENT" });
});
it("prunes expired foreign direct bridge registry files even when their pid is alive", async () => {
const stalePath = await writeForeignNativeHookRelayBridgeRecordForTests(
uniqueNativeHookRelayIdForTests("codex-expired-foreign-bridge"),
{
pid: 9_999_992,
expiresAtMs: Date.now() - 1,
},
);
const kill = vi.spyOn(process, "kill").mockImplementation((pid) => {
if (pid !== 9_999_992) {
throw Object.assign(new Error("unexpected process"), { code: "ESRCH" });
}
return true;
});
registerNativeHookRelay({
provider: "codex",
relayId: "codex-prune-expired-foreign-bridge-session",
sessionId: "session-1",
runId: "run-1",
allowedEvents: ["pre_tool_use"],
});
expect(kill).not.toHaveBeenCalled();
await expect(fs.stat(stalePath)).rejects.toMatchObject({ code: "ENOENT" });
});
it("preserves live unexpired foreign direct bridge registry files during registration", async () => {
const livePath = await writeForeignNativeHookRelayBridgeRecordForTests(
uniqueNativeHookRelayIdForTests("codex-live-foreign-bridge"),
{
pid: 9_999_993,
expiresAtMs: Date.now() + 60_000,
},
);
const kill = vi.spyOn(process, "kill").mockImplementation((pid) => {
if (pid !== 9_999_993) {
throw Object.assign(new Error("unexpected process"), { code: "ESRCH" });
}
return true;
});
try {
registerNativeHookRelay({
provider: "codex",
relayId: "codex-preserve-live-foreign-bridge-session",
sessionId: "session-1",
runId: "run-1",
allowedEvents: ["pre_tool_use"],
});
expect(kill).toHaveBeenCalledWith(9_999_993, 0);
await expect(fs.stat(livePath)).resolves.toBeDefined();
} finally {
rmSync(livePath, { force: true });
}
});
it("preserves foreign direct bridge registry files when liveness is unknown", async () => {
const livePath = await writeForeignNativeHookRelayBridgeRecordForTests(
uniqueNativeHookRelayIdForTests("codex-unknown-liveness-foreign-bridge"),
{
pid: 9_999_994,
expiresAtMs: Date.now() + 60_000,
},
);
const kill = vi.spyOn(process, "kill").mockImplementation((pid) => {
if (pid === 9_999_994) {
throw Object.assign(new Error("permission denied"), { code: "EPERM" });
}
return true;
});
try {
registerNativeHookRelay({
provider: "codex",
relayId: "codex-preserve-unknown-liveness-foreign-bridge-session",
sessionId: "session-1",
runId: "run-1",
allowedEvents: ["pre_tool_use"],
});
expect(kill).toHaveBeenCalledWith(9_999_994, 0);
await expect(fs.stat(livePath)).resolves.toBeDefined();
} finally {
rmSync(livePath, { force: true });
}
});
it("keeps direct bridge registry files private and loopback-only", async () => {
const relay = registerNativeHookRelay({
provider: "codex",

View File

@@ -1,5 +1,13 @@
import { createHash, randomUUID } from "node:crypto";
import { chmodSync, existsSync, lstatSync, mkdirSync, readFileSync, rmSync } from "node:fs";
import {
chmodSync,
existsSync,
lstatSync,
mkdirSync,
readdirSync,
readFileSync,
rmSync,
} from "node:fs";
import {
createServer,
request as httpRequest,
@@ -700,7 +708,58 @@ function pruneExpiredNativeHookRelays(now = Date.now()): void {
}
}
function isNativeHookRelayBridgePidDead(pid: number): boolean {
try {
process.kill(pid, 0);
return false;
} catch (error) {
return typeof error === "object" && error !== null && "code" in error && error.code === "ESRCH";
}
}
function registerNativeHookRelayBridge(registration: ActiveNativeHookRelayRegistration): void {
// Prune actually stale bridge files from prior gateway processes. The bridge
// directory is scoped by OS user (uid) and is shared across all OpenClaw
// gateways/profiles run by that user, so a record with a non-current PID is
// NOT automatically stale — it can legitimately belong to another live
// gateway under the same uid. Only prune records whose owning PID is dead
// or whose expiry has passed; leave live foreign records alone.
try {
const staleDir = ensureNativeHookRelayBridgeDir();
const now = Date.now();
for (const name of readdirSync(staleDir)) {
if (!name.endsWith(".json")) {
continue;
}
const full = path.join(staleDir, name);
try {
const rec = JSON.parse(readFileSync(full, "utf8")) as {
pid?: number;
expiresAtMs?: number;
};
if (!rec || typeof rec.pid !== "number" || rec.pid === process.pid) {
continue;
}
const expired = typeof rec.expiresAtMs === "number" && now > rec.expiresAtMs;
const deadPid = !expired && isNativeHookRelayBridgePidDead(rec.pid);
if (!expired && !deadPid) {
// Live foreign record from another same-uid gateway/profile. Preserve it.
continue;
}
rmSync(full, { force: true });
log.debug("pruned stale native hook relay bridge file", {
file: name,
stalePid: rec.pid,
currentPid: process.pid,
reason: deadPid ? "dead-pid" : "expired",
});
} catch {
// ignore unparseable / racing files
}
}
} catch (error) {
log.debug("native hook relay bridge dir prune skipped", { error });
}
unregisterNativeHookRelayBridge(registration.relayId);
const token = randomUUID();
const bridgeDir = ensureNativeHookRelayBridgeDir();