From 96bd939995873d6e6a24fe7d0e6e7779e669c8ff Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Wed, 27 May 2026 07:22:36 +0200 Subject: [PATCH] fix(e2e): bound kitchen sink RPC probes --- CHANGELOG.md | 1 + scripts/e2e/kitchen-sink-rpc-walk.mjs | 25 +++++++++++++++++-- test/scripts/kitchen-sink-rpc-walk.test.ts | 29 +++++++++++++++++++++- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ca002d52f0..560dcbeb747 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,6 +78,7 @@ Docs: https://docs.openclaw.ai - Telegram: treat `/command@TargetBot` bot-command entities as explicit mentions for the addressed bot so `requireMention` groups no longer drop targeted commands or captions. Fixes #84462. (#86553) Thanks @luoyanglang. - CI: bound Docker/Bash E2E tarball npm installs with `OPENCLAW_E2E_NPM_INSTALL_TIMEOUT` so package, onboarding, plugin, and upgrade lanes fail instead of hanging on a stuck npm install. - CI: fail Parallels npm-update smoke jobs after the guest command timeout and cleanup backstop instead of only logging a timeout line. +- CI: bound kitchen-sink RPC HTTP probes so stalled gateway readiness or response bodies fail and retry instead of wedging the walker. - CI: keep `OPENCLAW_TESTBOX=1 pnpm check:changed` delegating to Blacksmith Testbox through Crabbox without forwarding local Testbox or worker env into the remote command. - CI: send KILL after the TERM grace period for manual checkout fetch timeouts so stuck Testbox and workflow checkout retries cannot hang behind a wedged `git fetch`. - CI: send KILL after the TERM grace period for Bun global install smoke command timeouts so trapped `openclaw` child processes cannot wedge the scheduled install smoke. diff --git a/scripts/e2e/kitchen-sink-rpc-walk.mjs b/scripts/e2e/kitchen-sink-rpc-walk.mjs index 2863601ef8a..5e78a846d78 100644 --- a/scripts/e2e/kitchen-sink-rpc-walk.mjs +++ b/scripts/e2e/kitchen-sink-rpc-walk.mjs @@ -27,6 +27,7 @@ const INSTALL_TIMEOUT_MS = readPositiveInt( Math.max(COMMAND_TIMEOUT_MS, 600000), ); const RPC_TIMEOUT_MS = readPositiveInt(process.env.OPENCLAW_KITCHEN_SINK_RPC_CALL_MS, 60000); +const FETCH_TIMEOUT_MS = readPositiveInt(process.env.OPENCLAW_KITCHEN_SINK_RPC_FETCH_MS, 10000); const MAX_RSS_MIB = readPositiveInt(process.env.OPENCLAW_KITCHEN_SINK_MAX_RSS_MIB, 2048); const GATEWAY_TEARDOWN_GRACE_MS = 10000; const GATEWAY_TEARDOWN_KILL_GRACE_MS = 2000; @@ -439,11 +440,27 @@ function isRetryableTransientNetworkError(error, seen = new Set()) { export async function fetchJson(url, options = {}) { const attempts = Math.max(1, options.attempts ?? 3); + const timeoutMs = Math.max(1, options.timeoutMs ?? FETCH_TIMEOUT_MS); let lastError; for (let attempt = 1; attempt <= attempts; attempt += 1) { + const controller = new AbortController(); + const timeoutError = Object.assign(new Error(`fetch ${url} timed out after ${timeoutMs}ms`), { + code: "ETIMEDOUT", + }); + let timeout; + const timeoutPromise = new Promise((_, reject) => { + timeout = setTimeout(() => { + controller.abort(timeoutError); + reject(timeoutError); + }, timeoutMs); + timeout.unref?.(); + }); try { - const response = await (options.fetchImpl ?? fetch)(url); - const text = await response.text(); + const response = await Promise.race([ + (options.fetchImpl ?? fetch)(url, { signal: controller.signal }), + timeoutPromise, + ]); + const text = await Promise.race([response.text(), timeoutPromise]); let body = null; try { body = text ? JSON.parse(text) : null; @@ -457,6 +474,10 @@ export async function fetchJson(url, options = {}) { throw error; } await delay(options.retryDelayMs ?? 250); + } finally { + if (timeout) { + clearTimeout(timeout); + } } } throw lastError ?? new Error(`fetch ${url} failed`); diff --git a/test/scripts/kitchen-sink-rpc-walk.test.ts b/test/scripts/kitchen-sink-rpc-walk.test.ts index 68336d316db..9c1ea0e66a8 100644 --- a/test/scripts/kitchen-sink-rpc-walk.test.ts +++ b/test/scripts/kitchen-sink-rpc-walk.test.ts @@ -3,7 +3,7 @@ import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync import { tmpdir } from "node:os"; import path from "node:path"; import { setTimeout as delay } from "node:timers/promises"; -import { describe, expect, it, vi } from "vitest"; +import { afterEach, describe, expect, it, vi } from "vitest"; import { appendBoundedOutput, assertDiagnosticStabilityClean, @@ -23,6 +23,10 @@ import { const posixIt = process.platform === "win32" ? it.skip : it; +afterEach(() => { + vi.useRealTimers(); +}); + describe("kitchen-sink RPC isolated state", () => { it("cleans up the generated temporary home tree", async () => { const { root, env } = makeEnv(); @@ -459,6 +463,29 @@ describe("kitchen-sink RPC process sampling", () => { expect(fetchImpl).toHaveBeenCalledTimes(2); }); + it("times out stalled HTTP probe response bodies", async () => { + vi.useFakeTimers(); + const fetchImpl = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + text: () => new Promise(() => undefined), + }); + + const result = fetchJson("http://127.0.0.1:19680/readyz", { + attempts: 1, + fetchImpl, + timeoutMs: 100, + }); + const rejection = expect(result).rejects.toMatchObject({ + code: "ETIMEDOUT", + message: "fetch http://127.0.0.1:19680/readyz timed out after 100ms", + }); + + await vi.advanceTimersByTimeAsync(100); + await rejection; + expect(fetchImpl.mock.calls[0]?.[1]?.signal.aborted).toBe(true); + }); + it("fails when the sampled RSS exceeds the configured ceiling", () => { expect(() => assertResourceCeiling({ rssMiB: 2049 })).toThrow( "gateway RSS exceeded 2048 MiB: 2049 MiB",