fix: close native hook relay replacement race

This commit is contained in:
Peter Steinberger
2026-05-29 21:47:07 +02:00
parent 1e2fda9e68
commit c4e1bb30da
2 changed files with 55 additions and 3 deletions

View File

@@ -776,6 +776,20 @@ describe("native hook relay registry", () => {
});
});
it("treats stale direct bridge records as retryable during lookup", () => {
expect(
testing.isNativeHookRelayBridgeLookupRetryableForTests(
new Error("native hook relay bridge stale registration"),
),
).toBe(true);
expect(
testing.isNativeHookRelayBridgeLookupRetryableForTests(
new Error("native hook relay bridge stale registration"),
300,
),
).toBe(false);
});
it("accepts bootstrap generation mismatches during a bounded grace window", async () => {
const relay = registerNativeHookRelay({
provider: "codex",

View File

@@ -201,6 +201,7 @@ const MAX_PERMISSION_ALLOW_ALWAYS_ENTRIES = 512;
const MAX_NATIVE_HOOK_BRIDGE_BODY_BYTES = 5_000_000;
const MAX_NATIVE_HOOK_BRIDGE_RESPONSE_BYTES = 5_000_000;
const NATIVE_HOOK_BRIDGE_RETRY_INTERVAL_MS = 25;
const NATIVE_HOOK_BRIDGE_REPLACEMENT_RECORD_GRACE_MS = 250;
const NATIVE_HOOK_RELAY_BRIDGE_STALE_REGISTRATION_ERROR =
"native hook relay bridge stale registration";
const ANSI_ESCAPE_PATTERN = new RegExp(`${String.fromCharCode(27)}\\[[0-?]*[ -/]*[@-~]`, "g");
@@ -717,7 +718,12 @@ export async function invokeNativeHookRelayBridge(
) {
break;
}
if (!isRetryableNativeHookRelayBridgeError(error)) {
if (
!isRetryableNativeHookRelayBridgeLookupError({
error,
elapsedMs: Date.now() - startedAt,
})
) {
break;
}
await delay(
@@ -894,7 +900,9 @@ function registerNativeHookRelayBridge(registration: ActiveNativeHookRelayRegist
} catch (error) {
log.debug("native hook relay bridge dir prune skipped", { error });
}
unregisterNativeHookRelayBridge(registration.relayId);
unregisterNativeHookRelayBridge(registration.relayId, {
deferRegistryRemovalMs: NATIVE_HOOK_BRIDGE_REPLACEMENT_RECORD_GRACE_MS,
});
const token = randomUUID();
const bridgeDir = ensureNativeHookRelayBridgeDir();
const bridgeKey = nativeHookRelayBridgeKey(registration.relayId);
@@ -954,7 +962,10 @@ function writeNativeHookRelayBridgeRecordForRegistration(
writeNativeHookRelayBridgeRecord(bridge.registryPath, record);
}
function unregisterNativeHookRelayBridge(relayId: string): void {
function unregisterNativeHookRelayBridge(
relayId: string,
options?: { deferRegistryRemovalMs?: number },
): void {
const bridge = relayBridges.get(relayId);
if (!bridge) {
return;
@@ -963,6 +974,19 @@ function unregisterNativeHookRelayBridge(relayId: string): void {
bridge.server.close();
const record = readNativeHookRelayBridgeRecordIfExists(relayId);
if (record?.token === bridge.token) {
const deferRegistryRemovalMs = normalizePositiveInteger(options?.deferRegistryRemovalMs, 0);
if (deferRegistryRemovalMs > 0) {
// During stable-id replacement, leave the old record in place until the
// new bridge writes over it. Hook subprocesses can then retry
// ECONNREFUSED/stale-registration instead of observing a missing relay.
const timeout = setTimeout(() => {
if (readNativeHookRelayBridgeRecordIfExists(relayId)?.token === bridge.token) {
rmSync(bridge.registryPath, { force: true });
}
}, deferRegistryRemovalMs);
timeout.unref();
return;
}
rmSync(bridge.registryPath, { force: true });
}
}
@@ -1221,6 +1245,17 @@ function isRetryableNativeHookRelayBridgeError(error: unknown): boolean {
);
}
function isRetryableNativeHookRelayBridgeLookupError(params: {
error: unknown;
elapsedMs: number;
}): boolean {
return (
isRetryableNativeHookRelayBridgeError(params.error) ||
(params.elapsedMs < NATIVE_HOOK_BRIDGE_REPLACEMENT_RECORD_GRACE_MS &&
isNativeHookRelayBridgeStaleRegistrationError(params.error))
);
}
function nativeHookRelayBridgeDir(): string {
const uid = typeof process.getuid === "function" ? process.getuid() : "nouid";
return path.join(tmpdir(), `openclaw-native-hook-relays-${uid}`);
@@ -2235,6 +2270,9 @@ export const testing = {
const record = readNativeHookRelayBridgeRecordIfExists(relayId);
return record ? { ...record } : undefined;
},
isNativeHookRelayBridgeLookupRetryableForTests(error: unknown, elapsedMs = 0): boolean {
return isRetryableNativeHookRelayBridgeLookupError({ error, elapsedMs });
},
formatPermissionApprovalDescriptionForTests(
request: NativeHookRelayPermissionApprovalRequest,
): string {