From c4e1bb30da113f6b13962f6c5ddf07f36fa2c35d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 29 May 2026 21:47:07 +0200 Subject: [PATCH] fix: close native hook relay replacement race --- src/agents/harness/native-hook-relay.test.ts | 14 +++++++ src/agents/harness/native-hook-relay.ts | 44 ++++++++++++++++++-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/agents/harness/native-hook-relay.test.ts b/src/agents/harness/native-hook-relay.test.ts index af8c2cc594a..8b53eb10a00 100644 --- a/src/agents/harness/native-hook-relay.test.ts +++ b/src/agents/harness/native-hook-relay.test.ts @@ -776,6 +776,20 @@ describe("native hook relay registry", () => { }); }); + it("treats stale direct bridge records as retryable during lookup", () => { + expect( + testing.isNativeHookRelayBridgeLookupRetryableForTests( + new Error("native hook relay bridge stale registration"), + ), + ).toBe(true); + expect( + testing.isNativeHookRelayBridgeLookupRetryableForTests( + new Error("native hook relay bridge stale registration"), + 300, + ), + ).toBe(false); + }); + it("accepts bootstrap generation mismatches during a bounded grace window", async () => { const relay = registerNativeHookRelay({ provider: "codex", diff --git a/src/agents/harness/native-hook-relay.ts b/src/agents/harness/native-hook-relay.ts index d7e0725efcb..be33b0814a5 100644 --- a/src/agents/harness/native-hook-relay.ts +++ b/src/agents/harness/native-hook-relay.ts @@ -201,6 +201,7 @@ const MAX_PERMISSION_ALLOW_ALWAYS_ENTRIES = 512; const MAX_NATIVE_HOOK_BRIDGE_BODY_BYTES = 5_000_000; const MAX_NATIVE_HOOK_BRIDGE_RESPONSE_BYTES = 5_000_000; const NATIVE_HOOK_BRIDGE_RETRY_INTERVAL_MS = 25; +const NATIVE_HOOK_BRIDGE_REPLACEMENT_RECORD_GRACE_MS = 250; const NATIVE_HOOK_RELAY_BRIDGE_STALE_REGISTRATION_ERROR = "native hook relay bridge stale registration"; const ANSI_ESCAPE_PATTERN = new RegExp(`${String.fromCharCode(27)}\\[[0-?]*[ -/]*[@-~]`, "g"); @@ -717,7 +718,12 @@ export async function invokeNativeHookRelayBridge( ) { break; } - if (!isRetryableNativeHookRelayBridgeError(error)) { + if ( + !isRetryableNativeHookRelayBridgeLookupError({ + error, + elapsedMs: Date.now() - startedAt, + }) + ) { break; } await delay( @@ -894,7 +900,9 @@ function registerNativeHookRelayBridge(registration: ActiveNativeHookRelayRegist } catch (error) { log.debug("native hook relay bridge dir prune skipped", { error }); } - unregisterNativeHookRelayBridge(registration.relayId); + unregisterNativeHookRelayBridge(registration.relayId, { + deferRegistryRemovalMs: NATIVE_HOOK_BRIDGE_REPLACEMENT_RECORD_GRACE_MS, + }); const token = randomUUID(); const bridgeDir = ensureNativeHookRelayBridgeDir(); const bridgeKey = nativeHookRelayBridgeKey(registration.relayId); @@ -954,7 +962,10 @@ function writeNativeHookRelayBridgeRecordForRegistration( writeNativeHookRelayBridgeRecord(bridge.registryPath, record); } -function unregisterNativeHookRelayBridge(relayId: string): void { +function unregisterNativeHookRelayBridge( + relayId: string, + options?: { deferRegistryRemovalMs?: number }, +): void { const bridge = relayBridges.get(relayId); if (!bridge) { return; @@ -963,6 +974,19 @@ function unregisterNativeHookRelayBridge(relayId: string): void { bridge.server.close(); const record = readNativeHookRelayBridgeRecordIfExists(relayId); if (record?.token === bridge.token) { + const deferRegistryRemovalMs = normalizePositiveInteger(options?.deferRegistryRemovalMs, 0); + if (deferRegistryRemovalMs > 0) { + // During stable-id replacement, leave the old record in place until the + // new bridge writes over it. Hook subprocesses can then retry + // ECONNREFUSED/stale-registration instead of observing a missing relay. + const timeout = setTimeout(() => { + if (readNativeHookRelayBridgeRecordIfExists(relayId)?.token === bridge.token) { + rmSync(bridge.registryPath, { force: true }); + } + }, deferRegistryRemovalMs); + timeout.unref(); + return; + } rmSync(bridge.registryPath, { force: true }); } } @@ -1221,6 +1245,17 @@ function isRetryableNativeHookRelayBridgeError(error: unknown): boolean { ); } +function isRetryableNativeHookRelayBridgeLookupError(params: { + error: unknown; + elapsedMs: number; +}): boolean { + return ( + isRetryableNativeHookRelayBridgeError(params.error) || + (params.elapsedMs < NATIVE_HOOK_BRIDGE_REPLACEMENT_RECORD_GRACE_MS && + isNativeHookRelayBridgeStaleRegistrationError(params.error)) + ); +} + function nativeHookRelayBridgeDir(): string { const uid = typeof process.getuid === "function" ? process.getuid() : "nouid"; return path.join(tmpdir(), `openclaw-native-hook-relays-${uid}`); @@ -2235,6 +2270,9 @@ export const testing = { const record = readNativeHookRelayBridgeRecordIfExists(relayId); return record ? { ...record } : undefined; }, + isNativeHookRelayBridgeLookupRetryableForTests(error: unknown, elapsedMs = 0): boolean { + return isRetryableNativeHookRelayBridgeLookupError({ error, elapsedMs }); + }, formatPermissionApprovalDescriptionForTests( request: NativeHookRelayPermissionApprovalRequest, ): string {