fix: stop bonjour stuck-announcing churn

This commit is contained in:
Peter Steinberger
2026-05-02 23:40:23 +01:00
parent 816f3f11a1
commit 9404a4ddcd
3 changed files with 22 additions and 12 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Channels: keep Matrix and Mattermost bundled in the core package instead of advertising external npm installs before those channels are cut over. Thanks @vincentkoc.
- Bonjour: disable LAN mDNS advertising after a repeated stuck-announcing recovery instead of repeatedly restarting ciao and saturating the Gateway event loop.
- CLI/plugins: stop treating the non-plugin `auth` command root as a bundled plugin id, so restrictive `plugins.allow` configs no longer tell users to add stale `auth` plugin entries.
- Doctor/plugins: update configured plugin installs whose stale manifests still declare channels without `channelConfigs`, so beta upgrades repair old Discord-style package payloads during `doctor --fix`.
- Active Memory: keep non-empty `memory_search` results from being fast-failed as empty when debug telemetry reports zero hits.

View File

@@ -721,19 +721,19 @@ describe("gateway bonjour advertiser", () => {
sshPort: 2222,
});
await vi.advanceTimersByTimeAsync(105_000);
await vi.advanceTimersByTimeAsync(55_000);
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining("disabling advertiser after 3 failed restarts"),
expect.stringContaining("disabling advertiser after 1 stuck-state restart"),
);
expect(createService).toHaveBeenCalledTimes(4);
expect(advertise).toHaveBeenCalledTimes(4);
expect(destroy).toHaveBeenCalledTimes(4);
expect(createService).toHaveBeenCalledTimes(2);
expect(advertise).toHaveBeenCalledTimes(2);
expect(destroy).toHaveBeenCalledTimes(2);
expect(shutdown).toHaveBeenCalledTimes(1);
await vi.advanceTimersByTimeAsync(60_000);
expect(createService).toHaveBeenCalledTimes(4);
expect(advertise).toHaveBeenCalledTimes(4);
expect(createService).toHaveBeenCalledTimes(2);
expect(advertise).toHaveBeenCalledTimes(2);
await started.stop();
expect(shutdown).toHaveBeenCalledTimes(1);

View File

@@ -88,6 +88,7 @@ const REPAIR_DEBOUNCE_MS = 30_000;
// See https://github.com/openclaw/openclaw/issues/72481
const STUCK_ANNOUNCING_MS = 20_000;
const MAX_CONSECUTIVE_RESTARTS = 3;
const MAX_CONSECUTIVE_STUCK_STATE_RESTARTS = 1;
// A flapping advertiser can briefly reach "announced" between probing
// failures, which resets the consecutive counter. Bound total restarts too.
const RESTART_WINDOW_MS = 30 * 60_000;
@@ -571,6 +572,7 @@ export async function startGatewayBonjourAdvertiser(
let recreatePromise: Promise<void> | null = null;
let disabled = false;
let consecutiveRestarts = 0;
let consecutiveStuckStateRestarts = 0;
const restartTimestamps: number[] = [];
let cycle: BonjourCycle | null = createCycle();
const stateTracker = new Map<string, ServiceStateTracker>();
@@ -590,7 +592,7 @@ export async function startGatewayBonjourAdvertiser(
}
};
const recreateAdvertiser = async (reason: string) => {
const recreateAdvertiser = async (reason: string, opts?: { stuckState?: boolean }) => {
if (stopped || disabled) {
return;
}
@@ -599,6 +601,7 @@ export async function startGatewayBonjourAdvertiser(
}
recreatePromise = (async () => {
consecutiveRestarts += 1;
consecutiveStuckStateRestarts = opts?.stuckState ? consecutiveStuckStateRestarts + 1 : 0;
const now = Date.now();
while (
restartTimestamps.length > 0 &&
@@ -608,14 +611,18 @@ export async function startGatewayBonjourAdvertiser(
}
restartTimestamps.push(now);
const tooManyConsecutive = consecutiveRestarts > MAX_CONSECUTIVE_RESTARTS;
const tooManyStuckStates =
consecutiveStuckStateRestarts > MAX_CONSECUTIVE_STUCK_STATE_RESTARTS;
const tooManyInWindow = restartTimestamps.length >= MAX_RESTARTS_IN_WINDOW;
if (tooManyConsecutive || tooManyInWindow) {
if (tooManyConsecutive || tooManyStuckStates || tooManyInWindow) {
disabled = true;
const detail = tooManyConsecutive
? `${MAX_CONSECUTIVE_RESTARTS} failed restarts`
: `${MAX_RESTARTS_IN_WINDOW} restarts within ${Math.round(
RESTART_WINDOW_MS / 60_000,
)} minutes`;
: tooManyStuckStates
? `${MAX_CONSECUTIVE_STUCK_STATE_RESTARTS} stuck-state restart`
: `${MAX_RESTARTS_IN_WINDOW} restarts within ${Math.round(
RESTART_WINDOW_MS / 60_000,
)} minutes`;
logger.warn(
`bonjour: disabling advertiser after ${detail} (${reason}); set discovery.mdns.mode="off" or OPENCLAW_DISABLE_BONJOUR=1 to disable mDNS discovery`,
);
@@ -661,6 +668,7 @@ export async function startGatewayBonjourAdvertiser(
}
if (stateUnknown === "announced") {
consecutiveRestarts = 0;
consecutiveStuckStateRestarts = 0;
}
const tracked = stateTracker.get(label);
if (
@@ -673,6 +681,7 @@ export async function startGatewayBonjourAdvertiser(
label,
svc,
)})`,
{ stuckState: true },
);
return;
}