fix(bonjour): raise stuck announcing threshold

Raise the Bonjour stuck-announcing watchdog threshold from 8s to 20s and align watchdog timer coverage so healthy 12-13s LAN announcements do not trigger false-positive advertiser teardown.
This commit is contained in:
SymbolStar
2026-04-28 11:44:08 +08:00
committed by GitHub
parent c17b9fe623
commit f53ec52e7d
2 changed files with 10 additions and 5 deletions

View File

@@ -448,7 +448,7 @@ describe("gateway bonjour advertiser", () => {
// watchdog first retries, then recreates the advertiser after the service
// stays unhealthy across multiple 5s ticks.
await vi.advanceTimersByTimeAsync(15_000);
await vi.advanceTimersByTimeAsync(25_000);
expect(advertise).toHaveBeenCalledTimes(3);
expect(createService).toHaveBeenCalledTimes(2);
@@ -605,7 +605,7 @@ describe("gateway bonjour advertiser", () => {
expect(registerUncaughtExceptionHandler).toHaveBeenCalledTimes(1);
expect(registerUnhandledRejectionHandler).toHaveBeenCalledTimes(1);
await vi.advanceTimersByTimeAsync(15_000);
await vi.advanceTimersByTimeAsync(25_000);
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining("restarting advertiser"));
expect(createService).toHaveBeenCalledTimes(2);
@@ -650,7 +650,7 @@ describe("gateway bonjour advertiser", () => {
expect(createService).toHaveBeenCalledTimes(1);
expect(advertise).toHaveBeenCalledTimes(1);
await vi.advanceTimersByTimeAsync(15_000);
await vi.advanceTimersByTimeAsync(25_000);
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining("service stuck in announcing"),
@@ -678,7 +678,7 @@ describe("gateway bonjour advertiser", () => {
sshPort: 2222,
});
await vi.advanceTimersByTimeAsync(65_000);
await vi.advanceTimersByTimeAsync(105_000);
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining("disabling advertiser after 3 failed restarts"),

View File

@@ -80,7 +80,12 @@ type BonjourAdvertiserDeps = {
const WATCHDOG_INTERVAL_MS = 5_000;
const REPAIR_DEBOUNCE_MS = 30_000;
const STUCK_ANNOUNCING_MS = 8_000;
// Real-world LAN announce phase typically takes 12-13s on Mac/iOS networks. The
// previous 8s threshold was triggering false-positive teardowns on every gateway
// restart in such environments. 20s gives healthy networks plenty of room while
// still catching genuinely stuck advertisers (announce that never completes).
// See https://github.com/openclaw/openclaw/issues/72481
const STUCK_ANNOUNCING_MS = 20_000;
const MAX_CONSECUTIVE_RESTARTS = 3;
const BONJOUR_ANNOUNCED_STATE = "announced";
const CIAO_SELF_PROBE_RETRY_FRAGMENT =