diff --git a/CHANGELOG.md b/CHANGELOG.md index 099566e77d1..91f8037aba9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai - Plugins/tokenjuice: compile the bundled plugin against tokenjuice 0.7.0's published OpenClaw host types instead of a local compatibility shim, so package contract drift fails in OpenClaw validation before release. Thanks @vincentkoc. - OAuth/secrets: ignore root-level Google OAuth `client_secret_*.json` downloads so local client-secret files do not appear as commit candidates. (#74689) Thanks @jeongdulee. - Memory: mirror `sqlite-vec` into packaged bundled-plugin runtime deps for the default memory plugin, so builtin vector search does not lose its SQLite extension after upgrading to 2026.4.27. Fixes #74692. Thanks @mozi1924. +- Gateway/startup: bound local discovery advertisement during startup, so a stuck discovery plugin can no longer keep the Gateway from reaching ready. Fixes #73865; refs #74630 and #74633. Thanks @lpendeavors, @moltar-bot, and @Saboor711. - CLI/status: resolve read-only channel setup runtime fallback from the packaged OpenClaw dist root, so `status --all`, `status --deep`, channel, and doctor paths do not crash when an external channel plugin needs setup metadata. Fixes #74693. Thanks @giangthb. - Google Meet: block managed Chrome intro/test speech until browser health proves the participant is in-call, and expose `speechReady` diagnostics so login, admission, permission, and audio-bridge blockers no longer look like successful speech. Refs #72478. Thanks @DougButdorf. - Slack/commands: keep native command argument menus on select controls for encoded choice values up to Slack's option limit and truncate fallback button labels to Slack's button-text limit, so long valid choices no longer render invalid Slack blocks. Thanks @slackapi. diff --git a/src/gateway/server-discovery-runtime.test.ts b/src/gateway/server-discovery-runtime.test.ts index 1d58152749a..159a7e570be 100644 --- a/src/gateway/server-discovery-runtime.test.ts +++ b/src/gateway/server-discovery-runtime.test.ts @@ -56,6 +56,7 @@ describe("startGatewayDiscovery", () => { const prevEnv = { ...process.env }; afterEach(() => { + vi.useRealTimers(); for (const key of Object.keys(process.env)) { if (!(key in prevEnv)) { delete process.env[key]; @@ -120,6 +121,42 @@ describe("startGatewayDiscovery", () => { expect(stopped).toEqual(["peer", "bonjour"]); }); + it("continues startup when a local discovery service never settles", async () => { + vi.useFakeTimers(); + process.env.NODE_ENV = "development"; + delete process.env.VITEST; + process.env.OPENCLAW_GATEWAY_DISCOVERY_ADVERTISE_TIMEOUT_MS = "10"; + + const service = makeDiscoveryService({ + id: "stuck-discovery", + advertise: vi.fn(() => new Promise(() => {})), + }); + const logs = makeLogs(); + + const resultPromise = startGatewayDiscovery({ + machineDisplayName: "Lab Mac", + port: 18789, + wideAreaDiscoveryEnabled: false, + tailscaleMode: "off", + mdnsMode: "full", + gatewayDiscoveryServices: [service], + logDiscovery: logs, + }); + + await vi.advanceTimersByTimeAsync(10); + const result = await resultPromise; + + expect(result.bonjourStop).toBeTypeOf("function"); + await result.bonjourStop?.(); + expect(logs.warn).toHaveBeenCalledWith( + expect.stringContaining( + "gateway discovery service timed out after 10ms (stuck-discovery, plugin=stuck-discovery)", + ), + ); + + vi.useRealTimers(); + }); + it("skips local discovery services when mDNS mode is off", async () => { process.env.NODE_ENV = "development"; delete process.env.VITEST; diff --git a/src/gateway/server-discovery-runtime.ts b/src/gateway/server-discovery-runtime.ts index 43a41d5c8b4..eebdd20cc98 100644 --- a/src/gateway/server-discovery-runtime.ts +++ b/src/gateway/server-discovery-runtime.ts @@ -8,6 +8,20 @@ import { resolveTailnetDnsHint, } from "./server-discovery.js"; +const DEFAULT_DISCOVERY_ADVERTISE_TIMEOUT_MS = 5_000; + +function resolveDiscoveryAdvertiseTimeoutMs(env: NodeJS.ProcessEnv): number { + const raw = env.OPENCLAW_GATEWAY_DISCOVERY_ADVERTISE_TIMEOUT_MS?.trim(); + if (!raw) { + return DEFAULT_DISCOVERY_ADVERTISE_TIMEOUT_MS; + } + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + return DEFAULT_DISCOVERY_ADVERTISE_TIMEOUT_MS; + } + return parsed; +} + export async function startGatewayDiscovery(params: { machineDisplayName: string; port: number; @@ -32,6 +46,7 @@ export async function startGatewayDiscovery(params: { const mdnsMinimal = mdnsMode !== "full"; const tailscaleEnabled = params.tailscaleMode !== "off"; const needsTailnetDns = localDiscoveryEnabled || params.wideAreaDiscoveryEnabled; + const advertiseTimeoutMs = resolveDiscoveryAdvertiseTimeoutMs(process.env); const tailnetDns = needsTailnetDns ? await resolveTailnetDnsHint({ enabled: tailscaleEnabled }) : undefined; @@ -42,9 +57,14 @@ export async function startGatewayDiscovery(params: { if (localDiscoveryEnabled) { const stops: Array<() => void | Promise> = []; + let attemptedLocalDiscovery = false; + let stoppedLocalDiscovery = false; for (const entry of params.gatewayDiscoveryServices ?? []) { + attemptedLocalDiscovery = true; try { - const started = await entry.service.advertise({ + let timer: ReturnType | undefined; + let timedOut = false; + const context = { machineDisplayName: params.machineDisplayName, gatewayPort: params.port, gatewayTlsEnabled: params.gatewayTls?.enabled ?? false, @@ -54,7 +74,50 @@ export async function startGatewayDiscovery(params: { tailnetDns, cliPath, minimal: mdnsMinimal, + }; + const advertisePromise = Promise.resolve() + .then(() => entry.service.advertise(context)) + .then( + async (started) => { + if (timedOut) { + if (started?.stop) { + if (stoppedLocalDiscovery) { + try { + await started.stop(); + } catch (err) { + params.logDiscovery.warn(`gateway discovery stop failed: ${String(err)}`); + } + } else { + stops.push(started.stop); + } + } + params.logDiscovery.warn( + `gateway discovery service completed after startup timeout (${entry.service.id}, plugin=${entry.pluginId})`, + ); + } + return started; + }, + (err) => { + params.logDiscovery.warn( + `gateway discovery service failed${timedOut ? " after startup timeout" : ""} (${entry.service.id}, plugin=${entry.pluginId}): ${String(err)}`, + ); + return undefined; + }, + ); + const timeoutPromise = new Promise((resolve) => { + timer = setTimeout(() => { + timedOut = true; + params.logDiscovery.warn( + `gateway discovery service timed out after ${advertiseTimeoutMs}ms (${entry.service.id}, plugin=${entry.pluginId}); continuing startup`, + ); + resolve(undefined); + }, advertiseTimeoutMs); + timer.unref?.(); }); + const started = await Promise.race([advertisePromise, timeoutPromise]); + if (timer) { + clearTimeout(timer); + } if (started?.stop) { stops.push(started.stop); } @@ -64,8 +127,9 @@ export async function startGatewayDiscovery(params: { ); } } - if (stops.length > 0) { + if (attemptedLocalDiscovery) { bonjourStop = async () => { + stoppedLocalDiscovery = true; for (const stop of stops.toReversed()) { try { await stop();