fix(e2e): bound kitchen sink RPC probes

This commit is contained in:
Vincent Koc
2026-05-27 07:22:36 +02:00
parent 95c8fc9678
commit 96bd939995
3 changed files with 52 additions and 3 deletions

View File

@@ -78,6 +78,7 @@ Docs: https://docs.openclaw.ai
- Telegram: treat `/command@TargetBot` bot-command entities as explicit mentions for the addressed bot so `requireMention` groups no longer drop targeted commands or captions. Fixes #84462. (#86553) Thanks @luoyanglang.
- CI: bound Docker/Bash E2E tarball npm installs with `OPENCLAW_E2E_NPM_INSTALL_TIMEOUT` so package, onboarding, plugin, and upgrade lanes fail instead of hanging on a stuck npm install.
- CI: fail Parallels npm-update smoke jobs after the guest command timeout and cleanup backstop instead of only logging a timeout line.
- CI: bound kitchen-sink RPC HTTP probes so stalled gateway readiness or response bodies fail and retry instead of wedging the walker.
- CI: keep `OPENCLAW_TESTBOX=1 pnpm check:changed` delegating to Blacksmith Testbox through Crabbox without forwarding local Testbox or worker env into the remote command.
- CI: send KILL after the TERM grace period for manual checkout fetch timeouts so stuck Testbox and workflow checkout retries cannot hang behind a wedged `git fetch`.
- CI: send KILL after the TERM grace period for Bun global install smoke command timeouts so trapped `openclaw` child processes cannot wedge the scheduled install smoke.

View File

@@ -27,6 +27,7 @@ const INSTALL_TIMEOUT_MS = readPositiveInt(
Math.max(COMMAND_TIMEOUT_MS, 600000),
);
const RPC_TIMEOUT_MS = readPositiveInt(process.env.OPENCLAW_KITCHEN_SINK_RPC_CALL_MS, 60000);
const FETCH_TIMEOUT_MS = readPositiveInt(process.env.OPENCLAW_KITCHEN_SINK_RPC_FETCH_MS, 10000);
const MAX_RSS_MIB = readPositiveInt(process.env.OPENCLAW_KITCHEN_SINK_MAX_RSS_MIB, 2048);
const GATEWAY_TEARDOWN_GRACE_MS = 10000;
const GATEWAY_TEARDOWN_KILL_GRACE_MS = 2000;
@@ -439,11 +440,27 @@ function isRetryableTransientNetworkError(error, seen = new Set()) {
export async function fetchJson(url, options = {}) {
const attempts = Math.max(1, options.attempts ?? 3);
const timeoutMs = Math.max(1, options.timeoutMs ?? FETCH_TIMEOUT_MS);
let lastError;
for (let attempt = 1; attempt <= attempts; attempt += 1) {
const controller = new AbortController();
const timeoutError = Object.assign(new Error(`fetch ${url} timed out after ${timeoutMs}ms`), {
code: "ETIMEDOUT",
});
let timeout;
const timeoutPromise = new Promise((_, reject) => {
timeout = setTimeout(() => {
controller.abort(timeoutError);
reject(timeoutError);
}, timeoutMs);
timeout.unref?.();
});
try {
const response = await (options.fetchImpl ?? fetch)(url);
const text = await response.text();
const response = await Promise.race([
(options.fetchImpl ?? fetch)(url, { signal: controller.signal }),
timeoutPromise,
]);
const text = await Promise.race([response.text(), timeoutPromise]);
let body = null;
try {
body = text ? JSON.parse(text) : null;
@@ -457,6 +474,10 @@ export async function fetchJson(url, options = {}) {
throw error;
}
await delay(options.retryDelayMs ?? 250);
} finally {
if (timeout) {
clearTimeout(timeout);
}
}
}
throw lastError ?? new Error(`fetch ${url} failed`);

View File

@@ -3,7 +3,7 @@ import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync
import { tmpdir } from "node:os";
import path from "node:path";
import { setTimeout as delay } from "node:timers/promises";
import { describe, expect, it, vi } from "vitest";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
appendBoundedOutput,
assertDiagnosticStabilityClean,
@@ -23,6 +23,10 @@ import {
const posixIt = process.platform === "win32" ? it.skip : it;
afterEach(() => {
vi.useRealTimers();
});
describe("kitchen-sink RPC isolated state", () => {
it("cleans up the generated temporary home tree", async () => {
const { root, env } = makeEnv();
@@ -459,6 +463,29 @@ describe("kitchen-sink RPC process sampling", () => {
expect(fetchImpl).toHaveBeenCalledTimes(2);
});
it("times out stalled HTTP probe response bodies", async () => {
vi.useFakeTimers();
const fetchImpl = vi.fn().mockResolvedValue({
ok: true,
status: 200,
text: () => new Promise(() => undefined),
});
const result = fetchJson("http://127.0.0.1:19680/readyz", {
attempts: 1,
fetchImpl,
timeoutMs: 100,
});
const rejection = expect(result).rejects.toMatchObject({
code: "ETIMEDOUT",
message: "fetch http://127.0.0.1:19680/readyz timed out after 100ms",
});
await vi.advanceTimersByTimeAsync(100);
await rejection;
expect(fetchImpl.mock.calls[0]?.[1]?.signal.aborted).toBe(true);
});
it("fails when the sampled RSS exceeds the configured ceiling", () => {
expect(() => assertResourceCeiling({ rssMiB: 2049 })).toThrow(
"gateway RSS exceeded 2048 MiB: 2049 MiB",