From 1507c6dac72d92ba8bad6d266cb538e43bd7f840 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 3 May 2026 12:44:17 +0100 Subject: [PATCH] perf: reduce gateway startup overhead --- CHANGELOG.md | 1 + scripts/bench-gateway-startup.ts | 31 +++++++++++++++++++ .../reply/agent-runner-helpers.test.ts | 29 +++++++++++++++-- src/auto-reply/reply/agent-runner-helpers.ts | 26 ++++++++++++++-- src/cli/ports.ts | 4 +++ src/cli/program.force.test.ts | 17 ++++++++++ test/scripts/bench-gateway-startup.test.ts | 23 ++++++++++++++ 7 files changed, 127 insertions(+), 4 deletions(-) create mode 100644 test/scripts/bench-gateway-startup.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index bb5a76e164e..903b41f5147 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,7 @@ Docs: https://docs.openclaw.ai - Plugins/install: require OpenClaw-owned install provenance before granting official npm plugin scanner trust, so direct npm package names no longer bypass launch-code scanning while catalog, onboarding, and doctor installs stay trusted. Thanks @fede-kamel and @vincentkoc. - Network proxy: preserve target TLS hostname validation for Node HTTPS requests routed through the managed HTTP proxy, so Discord-style CONNECT traffic no longer validates certificates against the local proxy host. Fixes #74809. (#76442) Thanks @jesse-merhi and @abnershang. - Gateway/sessions: keep async `sessions.list` title and preview hydration bounded to transcript head/tail reads so Control UI polling cannot full-scan large session transcripts every refresh. Thanks @vincentkoc. +- Gateway/performance: cache per-run verbose-level session reads, skip a redundant `lsof` scan in `gateway --force` when no listener was killed, and make the Gateway startup benchmark print usage for `--help`. - Gateway/sessions: keep agent runtime metadata on lightweight `sessions.list` rows so model-only session patches do not make Control UI lose runtime identity. Thanks @vincentkoc. - Gateway/sessions: keep bulk `sessions.list` rows lightweight by skipping per-row transcript usage fallback, display model inference, and plugin projection, avoiding event-loop stalls in large session stores. Thanks @Marvinthebored and @vincentkoc. - Gateway/models: keep read-only `models.list` fallbacks on persisted/current metadata and configured rows while using static auth checks, so missing `models.json` files no longer runtime-load provider discovery or stall gateway after restart. Fixes #76382; refs #76360 and #75707. Thanks @trojy13, @RayWoo, @AnathemaOfficial, and @vincentkoc. diff --git a/scripts/bench-gateway-startup.ts b/scripts/bench-gateway-startup.ts index 034d7648b53..1fefdf9eeb8 100644 --- a/scripts/bench-gateway-startup.ts +++ b/scripts/bench-gateway-startup.ts @@ -159,6 +159,10 @@ function hasFlag(flag: string): boolean { return process.argv.includes(flag); } +function hasHelpFlag(): boolean { + return hasFlag("--help") || hasFlag("-h"); +} + function parseRepeatableFlag(flag: string): string[] { const values: string[] = []; for (let index = 0; index < process.argv.length; index += 1) { @@ -206,6 +210,28 @@ function parseOptions(): CliOptions { }; } +function printUsage(): void { + console.log(`OpenClaw Gateway startup benchmark + +Usage: + pnpm test:startup:gateway -- [options] + node --import tsx scripts/bench-gateway-startup.ts [options] + +Options: + --case Specific case id to run; repeatable + --entry Gateway CLI entry file (default: ${DEFAULT_ENTRY}) + --runs Measured runs per case (default: ${DEFAULT_RUNS}) + --warmup Warmup runs per case (default: ${DEFAULT_WARMUP}) + --timeout-ms Per-run timeout (default: ${DEFAULT_TIMEOUT_MS}) + --output Write machine-readable JSON to a file + --json Emit machine-readable JSON + --help, -h Show this text + +Case ids: + ${GATEWAY_CASES.map((benchCase) => `${benchCase.id} (${benchCase.name})`).join("\n ")} +`); +} + function median(values: number[]): number { const sorted = [...values].toSorted((a, b) => a - b); const middle = Math.floor(sorted.length / 2); @@ -796,6 +822,11 @@ function printResult(result: CaseResult): void { } async function main() { + if (hasHelpFlag()) { + printUsage(); + return; + } + const options = parseOptions(); const results: CaseResult[] = []; for (const benchCase of options.cases) { diff --git a/src/auto-reply/reply/agent-runner-helpers.test.ts b/src/auto-reply/reply/agent-runner-helpers.test.ts index 797742f8aa7..1d7ceaf61ff 100644 --- a/src/auto-reply/reply/agent-runner-helpers.test.ts +++ b/src/auto-reply/reply/agent-runner-helpers.test.ts @@ -36,8 +36,9 @@ const { describe("agent runner helpers", () => { beforeEach(() => { - hoisted.loadSessionStoreMock.mockClear(); - hoisted.scheduleFollowupDrainMock.mockClear(); + vi.useRealTimers(); + hoisted.loadSessionStoreMock.mockReset(); + hoisted.scheduleFollowupDrainMock.mockReset(); }); it("detects audio payloads from mediaUrl/mediaUrls", () => { @@ -71,6 +72,30 @@ describe("agent runner helpers", () => { expect(shouldEmitOutput()).toBe(true); }); + it("caches session verbose reads briefly while still refreshing live changes", () => { + vi.useFakeTimers(); + vi.setSystemTime(1_000); + hoisted.loadSessionStoreMock.mockReturnValue({ + "agent:main:main": { verboseLevel: "full" }, + }); + const shouldEmitOutput = createShouldEmitToolOutput({ + sessionKey: "agent:main:main", + storePath: "/tmp/store.json", + resolvedVerboseLevel: "off", + }); + + expect(shouldEmitOutput()).toBe(true); + hoisted.loadSessionStoreMock.mockReturnValue({ + "agent:main:main": { verboseLevel: "off" }, + }); + expect(shouldEmitOutput()).toBe(true); + expect(hoisted.loadSessionStoreMock).toHaveBeenCalledOnce(); + + vi.setSystemTime(1_251); + expect(shouldEmitOutput()).toBe(false); + expect(hoisted.loadSessionStoreMock).toHaveBeenCalledTimes(2); + }); + it("falls back when store read fails or session value is invalid", () => { hoisted.loadSessionStoreMock.mockImplementation(() => { throw new Error("boom"); diff --git a/src/auto-reply/reply/agent-runner-helpers.ts b/src/auto-reply/reply/agent-runner-helpers.ts index 18f63c5b741..00cd20ce43c 100644 --- a/src/auto-reply/reply/agent-runner-helpers.ts +++ b/src/auto-reply/reply/agent-runner-helpers.ts @@ -21,7 +21,9 @@ type VerboseGateParams = { resolvedVerboseLevel: VerboseLevel; }; -function resolveCurrentVerboseLevel(params: VerboseGateParams): VerboseLevel | undefined { +const VERBOSE_GATE_SESSION_REFRESH_MS = 250; + +function readCurrentVerboseLevel(params: VerboseGateParams): VerboseLevel | undefined { if (!params.sessionKey || !params.storePath) { return undefined; } @@ -37,14 +39,34 @@ function resolveCurrentVerboseLevel(params: VerboseGateParams): VerboseLevel | u } } +function createCurrentVerboseLevelResolver( + params: VerboseGateParams, +): () => VerboseLevel | undefined { + let cachedLevel: VerboseLevel | undefined; + let cachedAtMs = Number.NEGATIVE_INFINITY; + return () => { + if (!params.sessionKey || !params.storePath) { + return undefined; + } + const now = Date.now(); + if (now - cachedAtMs < VERBOSE_GATE_SESSION_REFRESH_MS) { + return cachedLevel; + } + cachedLevel = readCurrentVerboseLevel(params); + cachedAtMs = now; + return cachedLevel; + }; +} + function createVerboseGate( params: VerboseGateParams, shouldEmit: (level: VerboseLevel) => boolean, ): () => boolean { // Normalize verbose values from session store/config so false/"false" still means off. const fallbackVerbose = params.resolvedVerboseLevel; + const resolveCurrentVerboseLevel = createCurrentVerboseLevelResolver(params); return () => { - return shouldEmit(resolveCurrentVerboseLevel(params) ?? fallbackVerbose); + return shouldEmit(resolveCurrentVerboseLevel() ?? fallbackVerbose); }; } diff --git a/src/cli/ports.ts b/src/cli/ports.ts index cc01e9c57de..3555db02096 100644 --- a/src/cli/ports.ts +++ b/src/cli/ports.ts @@ -276,6 +276,10 @@ export async function forceFreePortAndWait( killed = killPortWithFuser(port, "SIGTERM"); } + if (killed.length === 0) { + return { killed, waitedMs: 0, escalatedToSigkill: false }; + } + const checkBusy = async (): Promise => useFuserFallback ? isPortBusy(port) : listPortListeners(port).length > 0; diff --git a/src/cli/program.force.test.ts b/src/cli/program.force.test.ts index bca24ba6288..8b4f291b4e8 100644 --- a/src/cli/program.force.test.ts +++ b/src/cli/program.force.test.ts @@ -59,6 +59,23 @@ describe("gateway --force helpers", () => { expect(listPortListeners(18789)).toEqual([]); }); + it("does not re-scan lsof when no listeners were killed", async () => { + (execFileSync as unknown as Mock).mockImplementation(() => { + const err = new Error("no matches") as NodeJS.ErrnoException & { status?: number }; + err.status = 1; // lsof uses exit 1 for no matches + throw err; + }); + + const result = await forceFreePortAndWait(18789, { timeoutMs: 500, intervalMs: 100 }); + + expect(result).toEqual({ + killed: [], + waitedMs: 0, + escalatedToSigkill: false, + }); + expect(execFileSync).toHaveBeenCalledOnce(); + }); + it("throws when lsof missing", () => { (execFileSync as unknown as Mock).mockImplementation(() => { const err = new Error("not found") as NodeJS.ErrnoException; diff --git a/test/scripts/bench-gateway-startup.test.ts b/test/scripts/bench-gateway-startup.test.ts new file mode 100644 index 00000000000..f6477726318 --- /dev/null +++ b/test/scripts/bench-gateway-startup.test.ts @@ -0,0 +1,23 @@ +import { spawnSync } from "node:child_process"; +import { describe, expect, it } from "vitest"; + +describe("gateway startup benchmark script", () => { + it("prints help without running benchmark cases", () => { + const result = spawnSync( + process.execPath, + ["--import", "tsx", "scripts/bench-gateway-startup.ts", "--help"], + { + cwd: process.cwd(), + encoding: "utf8", + env: process.env, + }, + ); + + expect(result.status).toBe(0); + expect(result.stdout).toContain("OpenClaw Gateway startup benchmark"); + expect(result.stdout).toContain("--case "); + expect(result.stdout).toContain("default (gateway default)"); + expect(result.stdout).not.toContain("[gateway-startup-bench]"); + expect(result.stderr).toBe(""); + }); +});