fix(bonjour): bound stuck advertiser restarts

This commit is contained in:
Peter Steinberger
2026-04-26 01:11:44 +01:00
parent 3a4325b285
commit ca0232ff0e
4 changed files with 66 additions and 2 deletions

View File

@@ -59,6 +59,11 @@ Docs: https://docs.openclaw.ai
### Fixes
- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when
the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables
itself for the current Gateway process after repeated failed restarts while
the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux,
@FiredMosquito831, and @spikefcz.
- Feishu: accept Schema 2.0 card action callbacks that report
`context.open_chat_id` instead of legacy `context.chat_id`, so button
callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068.

View File

@@ -139,6 +139,7 @@ The Gateway writes a rolling log file (printed on startup as
- `bonjour: advertise failed ...`
- `bonjour: ... name conflict resolved` / `hostname conflict resolved`
- `bonjour: watchdog detected non-announced service ...`
- `bonjour: disabling advertiser after ... failed restarts ...`
## Debugging on iOS node
@@ -155,6 +156,10 @@ The log includes browser state transitions and resultset changes.
- **Bonjour doesnt cross networks**: use Tailnet or SSH.
- **Multicast blocked**: some WiFi networks disable mDNS.
- **Advertiser stuck in probing/announcing**: hosts with blocked multicast,
container bridges, WSL, or interface churn can leave the ciao advertiser in a
non-announced state. OpenClaw retries a few times and then disables Bonjour
for the current Gateway process instead of restarting the advertiser forever.
- **Sleep / interface churn**: macOS may temporarily drop mDNS results; retry.
- **Browse works but resolve fails**: keep machine names simple (avoid emojis or
punctuation), then restart the Gateway. The service instance name derives from

View File

@@ -533,6 +533,38 @@ describe("gateway bonjour advertiser", () => {
expect(shutdown).toHaveBeenCalledTimes(1);
});
it("disables bonjour for the process after repeated stuck advertiser restarts", async () => {
enableAdvertiserUnitMode();
vi.useFakeTimers();
const stateRef = { value: "announcing" };
const destroy = vi.fn().mockResolvedValue(undefined);
const advertise = vi.fn(() => new Promise<void>(() => {}));
mockCiaoService({ advertise, destroy, stateRef });
const started = await startAdvertiser({
gatewayPort: 18789,
sshPort: 2222,
});
await vi.advanceTimersByTimeAsync(65_000);
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining("disabling advertiser after 3 failed restarts"),
);
expect(createService).toHaveBeenCalledTimes(4);
expect(advertise).toHaveBeenCalledTimes(4);
expect(destroy).toHaveBeenCalledTimes(4);
expect(shutdown).toHaveBeenCalledTimes(1);
await vi.advanceTimersByTimeAsync(60_000);
expect(createService).toHaveBeenCalledTimes(4);
expect(advertise).toHaveBeenCalledTimes(4);
await started.stop();
expect(shutdown).toHaveBeenCalledTimes(1);
});
it("normalizes hostnames with domains for service names", async () => {
// Allow advertiser to run in unit tests.
delete process.env.VITEST;

View File

@@ -69,6 +69,7 @@ type BonjourAdvertiserDeps = {
const WATCHDOG_INTERVAL_MS = 5_000;
const REPAIR_DEBOUNCE_MS = 30_000;
const STUCK_ANNOUNCING_MS = 8_000;
const MAX_CONSECUTIVE_RESTARTS = 3;
const BONJOUR_ANNOUNCED_STATE = "announced";
const CIAO_SELF_PROBE_RETRY_FRAGMENT =
"failed probing with reason: Error: Can't probe for a service which is announced already.";
@@ -332,7 +333,9 @@ export async function startGatewayBonjourAdvertiser(
let stopped = false;
let recreatePromise: Promise<void> | null = null;
let cycle = createCycle();
let disabled = false;
let consecutiveRestarts = 0;
let cycle: BonjourCycle | null = createCycle();
const stateTracker = new Map<string, ServiceStateTracker>();
attachConflictListeners(cycle.services);
startAdvertising(cycle.services);
@@ -353,13 +356,26 @@ export async function startGatewayBonjourAdvertiser(
};
const recreateAdvertiser = async (reason: string) => {
if (stopped) {
if (stopped || disabled) {
return;
}
if (recreatePromise) {
return recreatePromise;
}
recreatePromise = (async () => {
consecutiveRestarts += 1;
if (consecutiveRestarts > MAX_CONSECUTIVE_RESTARTS) {
disabled = true;
logger.warn(
`bonjour: disabling advertiser after ${MAX_CONSECUTIVE_RESTARTS} failed restarts (${reason}); set discovery.mdns.mode="off" or OPENCLAW_DISABLE_BONJOUR=1 to disable mDNS discovery`,
);
const previous = cycle;
cycle = null;
stateTracker.clear();
await stopCycle(previous, { shutdownResponder: true });
restoreConsoleLog();
return;
}
logger.warn(`bonjour: restarting advertiser (${reason})`);
const previous = cycle;
await stopCycle(previous);
@@ -378,12 +394,18 @@ export async function startGatewayBonjourAdvertiser(
if (stopped || recreatePromise) {
return;
}
if (disabled || !cycle) {
return;
}
updateStateTrackers(cycle.services);
for (const { label, svc } of cycle.services) {
const stateUnknown = (svc as { serviceState?: unknown }).serviceState;
if (typeof stateUnknown !== "string") {
continue;
}
if (stateUnknown === "announced") {
consecutiveRestarts = 0;
}
const tracked = stateTracker.get(label);
if (
stateUnknown !== "announced" &&