From 5bb5d7dab4b29e68b15bb7665d0736f46499a35c Mon Sep 17 00:00:00 2001 From: Ted Li Date: Fri, 20 Mar 2026 22:27:50 -0700 Subject: [PATCH] CLI: respect full timeout for loopback gateway probes (#47533) * CLI: respect loopback gateway probe timeout * CLI: name gateway probe budgets * CLI: keep inactive loopback probes fast * CLI: inline simple gateway probe caps * Update helpers.ts * Gateway: clamp probe timeout to timer-safe max * fix: note loopback gateway probe timeout fix (#47533) (thanks @MonkeyLeeT) --------- Co-authored-by: Ayaan Zaidi --- CHANGELOG.md | 1 + src/commands/gateway-status.test.ts | 41 +++++++++++++++++++++ src/commands/gateway-status.ts | 2 +- src/commands/gateway-status/helpers.test.ts | 19 ++++++++++ src/commands/gateway-status/helpers.ts | 21 +++++++---- src/gateway/probe.test.ts | 8 +++- src/gateway/probe.ts | 34 +++++++++-------- 7 files changed, 102 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c445ca8c2d9..c2e2f7521ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -194,6 +194,7 @@ Docs: https://docs.openclaw.ai - Plugins/context engines: retry strict legacy `assemble()` calls without the new `prompt` field when older engines reject it, preserving prompt-aware retrieval compatibility for pre-prompt plugins. (#50848) thanks @danhdoan. - Agents/embedded transport errors: distinguish common network failures like connection refused, DNS lookup failure, and interrupted sockets from true timeouts in embedded-run user messaging and lifecycle diagnostics. (#51419) Thanks @scoootscooob. - Discord/startup logging: report client initialization while the gateway is still connecting instead of claiming Discord is logged in before readiness is reached. (#51425) Thanks @scoootscooob. +- Gateway/probe: honor caller `--timeout` for active local loopback probes in `gateway status`, keep inactive remote-mode loopback probes fast, and clamp probe timers to JS-safe bounds so slow local/container gateways stop reporting false timeouts. (#47533) Thanks @MonkeyLeeT. ### Breaking diff --git a/src/commands/gateway-status.test.ts b/src/commands/gateway-status.test.ts index 46212816410..3762afc6d8a 100644 --- a/src/commands/gateway-status.test.ts +++ b/src/commands/gateway-status.test.ts @@ -567,6 +567,47 @@ describe("gateway-status command", () => { expect(targets.some((t) => t.kind === "sshTunnel")).toBe(true); }); + it("passes the full caller timeout through to local loopback probes", async () => { + const { runtime } = createRuntimeCapture(); + probeGateway.mockClear(); + readBestEffortConfig.mockResolvedValueOnce({ + gateway: { + mode: "local", + auth: { mode: "token", token: "ltok" }, + }, + } as never); + + await runGatewayStatus(runtime, { timeout: "15000", json: true }); + + expect(probeGateway).toHaveBeenCalledWith( + expect.objectContaining({ + url: "ws://127.0.0.1:18789", + timeoutMs: 15_000, + }), + ); + }); + + it("keeps inactive local loopback probes on the short timeout in remote mode", async () => { + const { runtime } = createRuntimeCapture(); + probeGateway.mockClear(); + readBestEffortConfig.mockResolvedValueOnce({ + gateway: { + mode: "remote", + auth: { mode: "token", token: "ltok" }, + remote: {}, + }, + } as never); + + await runGatewayStatus(runtime, { timeout: "15000", json: true }); + + expect(probeGateway).toHaveBeenCalledWith( + expect.objectContaining({ + url: "ws://127.0.0.1:18789", + timeoutMs: 800, + }), + ); + }); + it("skips invalid ssh-auto discovery targets", async () => { const { runtime } = createRuntimeCapture(); await withEnvAsync({ USER: "steipete" }, async () => { diff --git a/src/commands/gateway-status.ts b/src/commands/gateway-status.ts index ecdeeaa9570..c338d7fe55b 100644 --- a/src/commands/gateway-status.ts +++ b/src/commands/gateway-status.ts @@ -176,7 +176,7 @@ export async function gatewayStatusCommand( token: authResolution.token, password: authResolution.password, }; - const timeoutMs = resolveProbeBudgetMs(overallTimeoutMs, target.kind); + const timeoutMs = resolveProbeBudgetMs(overallTimeoutMs, target); const probe = await probeGateway({ url: target.url, auth, diff --git a/src/commands/gateway-status/helpers.test.ts b/src/commands/gateway-status/helpers.test.ts index e0c1ecee763..525b99db98c 100644 --- a/src/commands/gateway-status/helpers.test.ts +++ b/src/commands/gateway-status/helpers.test.ts @@ -6,6 +6,7 @@ import { isScopeLimitedProbeFailure, renderProbeSummaryLine, resolveAuthForTarget, + resolveProbeBudgetMs, } from "./helpers.js"; describe("extractConfigSummary", () => { @@ -273,3 +274,21 @@ describe("probe reachability classification", () => { expect(renderProbeSummaryLine(probe, false)).toContain("RPC: failed"); }); }); + +describe("resolveProbeBudgetMs", () => { + it("lets active local loopback probes use the full caller budget", () => { + expect(resolveProbeBudgetMs(15_000, { kind: "localLoopback", active: true })).toBe(15_000); + expect(resolveProbeBudgetMs(3_000, { kind: "localLoopback", active: true })).toBe(3_000); + }); + + it("keeps inactive local loopback probes on the short cap", () => { + expect(resolveProbeBudgetMs(15_000, { kind: "localLoopback", active: false })).toBe(800); + expect(resolveProbeBudgetMs(500, { kind: "localLoopback", active: false })).toBe(500); + }); + + it("keeps non-local probe caps unchanged", () => { + expect(resolveProbeBudgetMs(15_000, { kind: "configRemote", active: true })).toBe(1_500); + expect(resolveProbeBudgetMs(15_000, { kind: "explicit", active: true })).toBe(1_500); + expect(resolveProbeBudgetMs(15_000, { kind: "sshTunnel", active: true })).toBe(2_000); + }); +}); diff --git a/src/commands/gateway-status/helpers.ts b/src/commands/gateway-status/helpers.ts index 5f1a5e2f5ee..aec1a6a794d 100644 --- a/src/commands/gateway-status/helpers.ts +++ b/src/commands/gateway-status/helpers.ts @@ -116,14 +116,21 @@ export function resolveTargets(cfg: OpenClawConfig, explicitUrl?: string): Gatew return targets; } -export function resolveProbeBudgetMs(overallMs: number, kind: TargetKind): number { - if (kind === "localLoopback") { - return Math.min(800, overallMs); +export function resolveProbeBudgetMs( + overallMs: number, + target: Pick, +): number { + switch (target.kind) { + case "localLoopback": + // Active loopback probes should honor the caller budget because local shells/containers + // can legitimately take longer to connect. Inactive loopback probes stay bounded so + // remote-mode status checks do not stall on an expected local miss. + return target.active ? overallMs : Math.min(800, overallMs); + case "sshTunnel": + return Math.min(2_000, overallMs); + default: + return Math.min(1_500, overallMs); } - if (kind === "sshTunnel") { - return Math.min(2000, overallMs); - } - return Math.min(1500, overallMs); } export function sanitizeSshTarget(value: unknown): string | null { diff --git a/src/gateway/probe.test.ts b/src/gateway/probe.test.ts index 4a2374e17cb..01c69be5199 100644 --- a/src/gateway/probe.test.ts +++ b/src/gateway/probe.test.ts @@ -40,9 +40,15 @@ vi.mock("./client.js", () => ({ GatewayClient: MockGatewayClient, })); -const { probeGateway } = await import("./probe.js"); +const { clampProbeTimeoutMs, probeGateway } = await import("./probe.js"); describe("probeGateway", () => { + it("clamps probe timeout to timer-safe bounds", () => { + expect(clampProbeTimeoutMs(1)).toBe(250); + expect(clampProbeTimeoutMs(2_000)).toBe(2_000); + expect(clampProbeTimeoutMs(3_000_000_000)).toBe(2_147_483_647); + }); + it("connects with operator.read scope", async () => { const result = await probeGateway({ url: "ws://127.0.0.1:18789", diff --git a/src/gateway/probe.ts b/src/gateway/probe.ts index bbd36639b78..b285c395c3d 100644 --- a/src/gateway/probe.ts +++ b/src/gateway/probe.ts @@ -29,6 +29,13 @@ export type GatewayProbeResult = { configSnapshot: unknown; }; +export const MIN_PROBE_TIMEOUT_MS = 250; +export const MAX_TIMER_DELAY_MS = 2_147_483_647; + +export function clampProbeTimeoutMs(timeoutMs: number): number { + return Math.min(MAX_TIMER_DELAY_MS, Math.max(MIN_PROBE_TIMEOUT_MS, timeoutMs)); +} + export async function probeGateway(opts: { url: string; auth?: GatewayProbeAuth; @@ -144,21 +151,18 @@ export async function probeGateway(opts: { }, }); - const timer = setTimeout( - () => { - settle({ - ok: false, - connectLatencyMs, - error: connectError ? `connect failed: ${connectError}` : "timeout", - close, - health: null, - status: null, - presence: null, - configSnapshot: null, - }); - }, - Math.max(250, opts.timeoutMs), - ); + const timer = setTimeout(() => { + settle({ + ok: false, + connectLatencyMs, + error: connectError ? `connect failed: ${connectError}` : "timeout", + close, + health: null, + status: null, + presence: null, + configSnapshot: null, + }); + }, clampProbeTimeoutMs(opts.timeoutMs)); client.start(); });