fix(bonjour): recover from ciao cancellation

This commit is contained in:
Peter Steinberger
2026-04-29 00:47:58 +01:00
parent b8d15f8219
commit c34ba97262
5 changed files with 64 additions and 20 deletions

View File

@@ -137,6 +137,7 @@ The Gateway writes a rolling log file (printed on startup as
`gateway log file: ...`). Look for `bonjour:` lines, especially:
- `bonjour: advertise failed ...`
- `bonjour: suppressing ciao cancellation ...`
- `bonjour: ... name conflict resolved` / `hostname conflict resolved`
- `bonjour: watchdog detected non-announced service ...`
- `bonjour: disabling advertiser after ... failed restarts ...`

View File

@@ -393,11 +393,11 @@ describe("gateway bonjour advertiser", () => {
expect(exceptionHandler).toBeTypeOf("function");
expect(handler?.(new Error("CIAO PROBING CANCELLED"))).toBe(true);
expect(logger.debug).toHaveBeenCalledWith(
expect.stringContaining("ignoring unhandled ciao rejection"),
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining("suppressing ciao cancellation"),
);
logger.debug.mockClear();
logger.warn.mockClear();
expect(
handler?.(new Error("Reached illegal state! IPV4 address change from defined to undefined!")),
).toBe(true);
@@ -423,6 +423,37 @@ describe("gateway bonjour advertiser", () => {
await started.stop();
});
it("recovers when ciao cancellation escapes the advertiser", async () => {
enableAdvertiserUnitMode();
const destroy = vi.fn().mockResolvedValue(undefined);
const advertise = vi.fn().mockResolvedValue(undefined);
mockCiaoService({ advertise, destroy });
const started = await startAdvertiser({
gatewayPort: 18789,
sshPort: 2222,
});
const handler = registerUnhandledRejectionHandler.mock.calls[0]?.[0] as
| ((reason: unknown) => boolean)
| undefined;
expect(handler?.(new Error("CIAO ANNOUNCEMENT CANCELLED"))).toBe(true);
await vi.waitFor(() => {
expect(createService).toHaveBeenCalledTimes(2);
});
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining("suppressing ciao cancellation"),
);
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining("restarting advertiser"));
expect(destroy).toHaveBeenCalledTimes(1);
expect(advertise).toHaveBeenCalledTimes(2);
await started.stop();
});
it("logs advertise failures and retries via watchdog", async () => {
enableAdvertiserUnitMode();
vi.useFakeTimers();

View File

@@ -363,7 +363,8 @@ export async function startGatewayBonjourAdvertiser(
}
if (classification.kind === "cancellation") {
logger.debug(`bonjour: ignoring unhandled ciao rejection: ${classification.formatted}`);
logger.warn(`bonjour: suppressing ciao cancellation: ${classification.formatted}`);
requestCiaoRecovery?.(classification);
} else if (classification.kind === "interface-enumeration-failure") {
// Restricted sandboxes can refuse os.networkInterfaces(); mDNS cannot
// function without it, so surface a single warning and skip recovery.
@@ -490,6 +491,28 @@ export async function startGatewayBonjourAdvertiser(
}
}
function handleAdvertiseFailure(
label: string,
svc: BonjourService,
err: unknown,
action: "failed" | "threw",
) {
const classification = classifyCiaoProcessError(err);
if (classification) {
logger.warn(
`bonjour: advertise ${action} with ciao ${classification.kind} (${serviceSummary(
label,
svc,
)}): ${classification.formatted}`,
);
requestCiaoRecovery?.(classification);
return;
}
logger.warn(
`bonjour: advertise ${action} (${serviceSummary(label, svc)}): ${formatBonjourError(err)}`,
);
}
function startAdvertising(services: Array<{ label: string; svc: BonjourService }>) {
for (const { label, svc } of services) {
try {
@@ -499,14 +522,10 @@ export async function startGatewayBonjourAdvertiser(
logger.info(`bonjour: advertised ${serviceSummary(label, svc)}`);
})
.catch((err) => {
logger.warn(
`bonjour: advertise failed (${serviceSummary(label, svc)}): ${formatBonjourError(err)}`,
);
handleAdvertiseFailure(label, svc, err, "failed");
});
} catch (err) {
logger.warn(
`bonjour: advertise threw (${serviceSummary(label, svc)}): ${formatBonjourError(err)}`,
);
handleAdvertiseFailure(label, svc, err, "threw");
}
}
}
@@ -523,8 +542,6 @@ export async function startGatewayBonjourAdvertiser(
let consecutiveRestarts = 0;
let cycle: BonjourCycle | null = createCycle();
const stateTracker = new Map<string, ServiceStateTracker>();
attachConflictListeners(cycle.services);
startAdvertising(cycle.services);
const updateStateTrackers = (services: Array<{ label: string; svc: BonjourService }>) => {
const now = Date.now();
@@ -578,6 +595,8 @@ export async function startGatewayBonjourAdvertiser(
requestCiaoRecovery = (classification) => {
void recreateAdvertiser(`ciao ${classification.kind}: ${classification.formatted}`);
};
attachConflictListeners(cycle.services);
startAdvertising(cycle.services);
const lastRepairAttempt = new Map<string, number>();
const watchdog = setInterval(() => {

View File

@@ -42,7 +42,7 @@ TIMEOUT_ONBOARD_S=180
TIMEOUT_AGENT_S="${OPENCLAW_PARALLELS_LINUX_AGENT_TIMEOUT_S:-300}"
TIMEOUT_GATEWAY_S=240
PHASE_STALE_WARN_S=60
DISABLE_BONJOUR_FOR_GATEWAY=1
DISABLE_BONJOUR_FOR_GATEWAY=0
FRESH_MAIN_STATUS="skip"
FRESH_MAIN_VERSION="skip"

View File

@@ -41,13 +41,6 @@ describe("Parallels smoke model selection", () => {
expect(script).toMatch(/parallels-linux-smoke\.sh"[\s\S]*?--model "\$MODEL_ID"/);
});
it("disables Bonjour by default for the standalone Linux gateway smoke", () => {
const script = readFileSync("scripts/e2e/parallels-linux-smoke.sh", "utf8");
expect(script).toContain("DISABLE_BONJOUR_FOR_GATEWAY=1");
expect(script).toContain("OPENCLAW_DISABLE_BONJOUR=1");
});
it("lets the macOS gateway status probe use the full phase budget", () => {
const script = readFileSync("scripts/e2e/parallels-macos-smoke.sh", "utf8");