From c39ca49c718e5b23aaacd3b08d3ce2bf65459928 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 30 Apr 2026 03:46:11 +0100 Subject: [PATCH] fix(bonjour): cap flapping advertiser restarts --- CHANGELOG.md | 1 + extensions/bonjour/src/advertiser.test.ts | 43 +++++++++++++++++++++++ extensions/bonjour/src/advertiser.ts | 24 +++++++++++-- 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d46b8bafb0..e6825b6d352 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Bonjour/Gateway: cap flapping advertiser restarts in a sliding window, so mDNS probing/name-conflict loops disable discovery instead of churning indefinitely on constrained hosts. Refs #74209 and #74242. Thanks @ndj888 and @Sanjays2402. - Plugins/runtime-deps: verify staged package entry files before reusing mirrored runtime roots, so browser-control repairs incomplete `ajv`/MCP SDK installs after update instead of failing after restart on a missing `ajv/dist/ajv.js`. Refs #74630. Thanks @spickeringlr. - Channels/Feishu: retry file-typed iOS video resource downloads as `media` after a Feishu/Lark HTTP 502 and preserve the original 502 when the fallback also fails. Fixes #49855; carries forward #50164 and #73986. Thanks @alex-xuweilong. - Providers/Amazon Bedrock: expose the full Claude Opus 4.7 thinking profile (`xhigh`, `adaptive`, and `max`) for Bedrock model refs, while keeping Opus/Sonnet 4.6 on adaptive-by-default, so `/think` menus and validation match the Anthropic transport behavior. Fixes #74701. Thanks @prasad-yashdeep, @sparkleHazard, @Sanjays2402, and @hclsys. diff --git a/extensions/bonjour/src/advertiser.test.ts b/extensions/bonjour/src/advertiser.test.ts index a62694fe890..f296b3653ea 100644 --- a/extensions/bonjour/src/advertiser.test.ts +++ b/extensions/bonjour/src/advertiser.test.ts @@ -727,6 +727,49 @@ describe("gateway bonjour advertiser", () => { expect(shutdown).toHaveBeenCalledTimes(1); }); + it("disables bonjour when the advertiser flaps within a sliding window", async () => { + enableAdvertiserUnitMode(); + vi.useFakeTimers(); + + const stateRef = { value: "announced" }; + const destroy = vi.fn().mockResolvedValue(undefined); + const advertise = vi.fn().mockResolvedValue(undefined); + mockCiaoService({ advertise, destroy, stateRef }); + + const started = await startAdvertiser({ + gatewayPort: 18789, + sshPort: 2222, + }); + + for (let cycle = 0; cycle < 12; cycle += 1) { + stateRef.value = "announced"; + await vi.advanceTimersByTimeAsync(5_000); + stateRef.value = "probing"; + await vi.advanceTimersByTimeAsync(25_000); + if ( + logger.warn.mock.calls.some( + (call) => typeof call[0] === "string" && call[0].includes("disabling advertiser after"), + ) + ) { + break; + } + } + + const disableLog = logger.warn.mock.calls.find( + (call) => typeof call[0] === "string" && call[0].includes("disabling advertiser after"), + ); + expect(disableLog).toBeDefined(); + expect(String(disableLog?.[0])).toMatch(/restarts within \d+ minutes/); + + const advertiseCallsAtDisable = advertise.mock.calls.length; + const createServiceCallsAtDisable = createService.mock.calls.length; + await vi.advanceTimersByTimeAsync(5 * 60_000); + expect(advertise).toHaveBeenCalledTimes(advertiseCallsAtDisable); + expect(createService).toHaveBeenCalledTimes(createServiceCallsAtDisable); + + await started.stop(); + }); + it("normalizes hostnames with domains for service names", async () => { // Allow advertiser to run in unit tests. delete process.env.VITEST; diff --git a/extensions/bonjour/src/advertiser.ts b/extensions/bonjour/src/advertiser.ts index d5a1747fc1d..251ad65413a 100644 --- a/extensions/bonjour/src/advertiser.ts +++ b/extensions/bonjour/src/advertiser.ts @@ -88,6 +88,10 @@ const REPAIR_DEBOUNCE_MS = 30_000; // See https://github.com/openclaw/openclaw/issues/72481 const STUCK_ANNOUNCING_MS = 20_000; const MAX_CONSECUTIVE_RESTARTS = 3; +// A flapping advertiser can briefly reach "announced" between probing +// failures, which resets the consecutive counter. Bound total restarts too. +const RESTART_WINDOW_MS = 30 * 60_000; +const MAX_RESTARTS_IN_WINDOW = 5; const BONJOUR_ANNOUNCED_STATE = "announced"; const CIAO_SELF_PROBE_RETRY_FRAGMENT = "failed probing with reason: Error: Can't probe for a service which is announced already."; @@ -563,6 +567,7 @@ export async function startGatewayBonjourAdvertiser( let recreatePromise: Promise | null = null; let disabled = false; let consecutiveRestarts = 0; + const restartTimestamps: number[] = []; let cycle: BonjourCycle | null = createCycle(); const stateTracker = new Map(); @@ -590,10 +595,25 @@ export async function startGatewayBonjourAdvertiser( } recreatePromise = (async () => { consecutiveRestarts += 1; - if (consecutiveRestarts > MAX_CONSECUTIVE_RESTARTS) { + const now = Date.now(); + while ( + restartTimestamps.length > 0 && + now - (restartTimestamps[0] ?? 0) > RESTART_WINDOW_MS + ) { + restartTimestamps.shift(); + } + restartTimestamps.push(now); + const tooManyConsecutive = consecutiveRestarts > MAX_CONSECUTIVE_RESTARTS; + const tooManyInWindow = restartTimestamps.length >= MAX_RESTARTS_IN_WINDOW; + if (tooManyConsecutive || tooManyInWindow) { disabled = true; + const detail = tooManyConsecutive + ? `${MAX_CONSECUTIVE_RESTARTS} failed restarts` + : `${MAX_RESTARTS_IN_WINDOW} restarts within ${Math.round( + RESTART_WINDOW_MS / 60_000, + )} minutes`; logger.warn( - `bonjour: disabling advertiser after ${MAX_CONSECUTIVE_RESTARTS} failed restarts (${reason}); set discovery.mdns.mode="off" or OPENCLAW_DISABLE_BONJOUR=1 to disable mDNS discovery`, + `bonjour: disabling advertiser after ${detail} (${reason}); set discovery.mdns.mode="off" or OPENCLAW_DISABLE_BONJOUR=1 to disable mDNS discovery`, ); const previous = cycle; cycle = null;