From ca0232ff0ea12fb74081ab17e5b122fb488d6217 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 26 Apr 2026 01:11:44 +0100 Subject: [PATCH] fix(bonjour): bound stuck advertiser restarts --- CHANGELOG.md | 5 ++++ docs/gateway/bonjour.md | 5 ++++ extensions/bonjour/src/advertiser.test.ts | 32 +++++++++++++++++++++++ extensions/bonjour/src/advertiser.ts | 26 ++++++++++++++++-- 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d036051bd82..1851db353ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,11 @@ Docs: https://docs.openclaw.ai ### Fixes +- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when + the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables + itself for the current Gateway process after repeated failed restarts while + the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux, + @FiredMosquito831, and @spikefcz. - Feishu: accept Schema 2.0 card action callbacks that report `context.open_chat_id` instead of legacy `context.chat_id`, so button callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068. diff --git a/docs/gateway/bonjour.md b/docs/gateway/bonjour.md index 90ef9cefd85..0f0e8e5cd06 100644 --- a/docs/gateway/bonjour.md +++ b/docs/gateway/bonjour.md @@ -139,6 +139,7 @@ The Gateway writes a rolling log file (printed on startup as - `bonjour: advertise failed ...` - `bonjour: ... name conflict resolved` / `hostname conflict resolved` - `bonjour: watchdog detected non-announced service ...` +- `bonjour: disabling advertiser after ... failed restarts ...` ## Debugging on iOS node @@ -155,6 +156,10 @@ The log includes browser state transitions and result‑set changes. - **Bonjour doesn’t cross networks**: use Tailnet or SSH. - **Multicast blocked**: some Wi‑Fi networks disable mDNS. +- **Advertiser stuck in probing/announcing**: hosts with blocked multicast, + container bridges, WSL, or interface churn can leave the ciao advertiser in a + non-announced state. OpenClaw retries a few times and then disables Bonjour + for the current Gateway process instead of restarting the advertiser forever. - **Sleep / interface churn**: macOS may temporarily drop mDNS results; retry. - **Browse works but resolve fails**: keep machine names simple (avoid emojis or punctuation), then restart the Gateway. The service instance name derives from diff --git a/extensions/bonjour/src/advertiser.test.ts b/extensions/bonjour/src/advertiser.test.ts index 7888a9b6c01..bd59c3ac69c 100644 --- a/extensions/bonjour/src/advertiser.test.ts +++ b/extensions/bonjour/src/advertiser.test.ts @@ -533,6 +533,38 @@ describe("gateway bonjour advertiser", () => { expect(shutdown).toHaveBeenCalledTimes(1); }); + it("disables bonjour for the process after repeated stuck advertiser restarts", async () => { + enableAdvertiserUnitMode(); + vi.useFakeTimers(); + + const stateRef = { value: "announcing" }; + const destroy = vi.fn().mockResolvedValue(undefined); + const advertise = vi.fn(() => new Promise(() => {})); + mockCiaoService({ advertise, destroy, stateRef }); + + const started = await startAdvertiser({ + gatewayPort: 18789, + sshPort: 2222, + }); + + await vi.advanceTimersByTimeAsync(65_000); + + expect(logger.warn).toHaveBeenCalledWith( + expect.stringContaining("disabling advertiser after 3 failed restarts"), + ); + expect(createService).toHaveBeenCalledTimes(4); + expect(advertise).toHaveBeenCalledTimes(4); + expect(destroy).toHaveBeenCalledTimes(4); + expect(shutdown).toHaveBeenCalledTimes(1); + + await vi.advanceTimersByTimeAsync(60_000); + expect(createService).toHaveBeenCalledTimes(4); + expect(advertise).toHaveBeenCalledTimes(4); + + await started.stop(); + expect(shutdown).toHaveBeenCalledTimes(1); + }); + it("normalizes hostnames with domains for service names", async () => { // Allow advertiser to run in unit tests. delete process.env.VITEST; diff --git a/extensions/bonjour/src/advertiser.ts b/extensions/bonjour/src/advertiser.ts index e2be9b082d2..efa5a41a498 100644 --- a/extensions/bonjour/src/advertiser.ts +++ b/extensions/bonjour/src/advertiser.ts @@ -69,6 +69,7 @@ type BonjourAdvertiserDeps = { const WATCHDOG_INTERVAL_MS = 5_000; const REPAIR_DEBOUNCE_MS = 30_000; const STUCK_ANNOUNCING_MS = 8_000; +const MAX_CONSECUTIVE_RESTARTS = 3; const BONJOUR_ANNOUNCED_STATE = "announced"; const CIAO_SELF_PROBE_RETRY_FRAGMENT = "failed probing with reason: Error: Can't probe for a service which is announced already."; @@ -332,7 +333,9 @@ export async function startGatewayBonjourAdvertiser( let stopped = false; let recreatePromise: Promise | null = null; - let cycle = createCycle(); + let disabled = false; + let consecutiveRestarts = 0; + let cycle: BonjourCycle | null = createCycle(); const stateTracker = new Map(); attachConflictListeners(cycle.services); startAdvertising(cycle.services); @@ -353,13 +356,26 @@ export async function startGatewayBonjourAdvertiser( }; const recreateAdvertiser = async (reason: string) => { - if (stopped) { + if (stopped || disabled) { return; } if (recreatePromise) { return recreatePromise; } recreatePromise = (async () => { + consecutiveRestarts += 1; + if (consecutiveRestarts > MAX_CONSECUTIVE_RESTARTS) { + disabled = true; + logger.warn( + `bonjour: disabling advertiser after ${MAX_CONSECUTIVE_RESTARTS} failed restarts (${reason}); set discovery.mdns.mode="off" or OPENCLAW_DISABLE_BONJOUR=1 to disable mDNS discovery`, + ); + const previous = cycle; + cycle = null; + stateTracker.clear(); + await stopCycle(previous, { shutdownResponder: true }); + restoreConsoleLog(); + return; + } logger.warn(`bonjour: restarting advertiser (${reason})`); const previous = cycle; await stopCycle(previous); @@ -378,12 +394,18 @@ export async function startGatewayBonjourAdvertiser( if (stopped || recreatePromise) { return; } + if (disabled || !cycle) { + return; + } updateStateTrackers(cycle.services); for (const { label, svc } of cycle.services) { const stateUnknown = (svc as { serviceState?: unknown }).serviceState; if (typeof stateUnknown !== "string") { continue; } + if (stateUnknown === "announced") { + consecutiveRestarts = 0; + } const tracked = stateTracker.get(label); if ( stateUnknown !== "announced" &&