From b7db63751b99f063f7ef94a95aa9414e542b4f78 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 29 Apr 2026 15:24:40 +0100 Subject: [PATCH] perf(agents): cache subagent registry reads --- CHANGELOG.md | 1 + .../subagent-registry.persistence.test.ts | 90 +++++++++++++++++++ src/agents/subagent-registry.store.ts | 70 +++++++++++++++ 3 files changed, 161 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e63de094bcb..3c9d6bce1db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai - CLI/tools: keep the Gateway `tools.*` RPC namespace out of plugin command discovery and managed proxy startup, so stray commands like `openclaw tools effective` fail quickly instead of cold-loading plugin metadata. Refs #73477. Thanks @oromeis. - CLI/status: keep default text `openclaw status --usage` on metadata-only channel scans unless `--deep` or `--all` is set, and send stray `openclaw tools --help` through the precomputed root-help fast path so latency-triage commands avoid plugin/runtime cold loads before printing. Refs #73477 and #74220. Thanks @oromeis and @NianJiuZst. - Agents/diagnostics: trace embedded-run startup and preparation stage timings before model I/O, and warn only on severe slow stages, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency without noisy normal logs. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang. +- Agents/subagents: cache persisted subagent run registry reads by file signature while preserving fresh-parse isolation, so busy gateways stop reparsing unchanged `subagents/runs.json` on controller/list/status hot paths. Refs #72338. Thanks @argus-as. - Gateway/clients: wait for the event loop to become responsive before opening Gateway WebSocket RPC/probe/client connections while charging that readiness wait to caller timeouts, so Windows deferred module-evaluation stalls no longer turn healthy loopback gateways into false handshake timeouts across status, TUI, ACP, MCP, node-host, and plugin client paths. Refs #74279 and #48270. Thanks @wongcode and @joost-heijden. - Gateway/Windows: read listener command lines via PowerShell before falling back to `wmic`, so restart health can recognize OpenClaw listeners on modern Windows installs and avoid long anonymous-port waits. Refs #74280. Thanks @zym951223. - Plugins/runtime-deps: memoize packaged bundled runtime dist-mirror preparation after the first successful pass while keeping source-checkout mirrors refreshable, so constrained Docker/VPS installs avoid repeated root scans before chat turns. Refs #73428, #73421, #73532, and #73477. Thanks @Dimaoggg, @oromeis, @oadiazp, @jmfraga, @bstanbury, @antoniusfelix, and @jkobject. diff --git a/src/agents/subagent-registry.persistence.test.ts b/src/agents/subagent-registry.persistence.test.ts index 82c0c226175..6352e88cfa0 100644 --- a/src/agents/subagent-registry.persistence.test.ts +++ b/src/agents/subagent-registry.persistence.test.ts @@ -328,6 +328,96 @@ describe("subagent registry persistence", () => { expect(after.version).toBe(2); }); + it("reuses unchanged persisted registry snapshots without reparsing runs.json", async () => { + const registryPath = await writePersistedRegistry( + { + version: 2, + runs: { + "run-cached": { + runId: "run-cached", + childSessionKey: "agent:main:subagent:cached", + requesterSessionKey: "agent:main:main", + requesterOrigin: { channel: "telegram", accountId: "cached-account" }, + requesterDisplayKey: "main", + task: "cached persisted run", + cleanup: "keep", + createdAt: 1, + startedAt: 1, + outcome: { status: "ok" }, + }, + }, + }, + { seedChildSessions: false }, + ); + const readSpy = vi.spyOn(fsSync, "readFileSync"); + + const first = loadSubagentRegistryFromDisk(); + first.clear(); + const cachedEntry = loadSubagentRegistryFromDisk().get("run-cached"); + if (!cachedEntry) { + throw new Error("expected cached run"); + } + cachedEntry.endedAt = 999; + cachedEntry.cleanupHandled = true; + if (cachedEntry.requesterOrigin) { + cachedEntry.requesterOrigin.accountId = "mutated-account"; + } + if (cachedEntry.outcome) { + cachedEntry.outcome.status = "error"; + } + const second = loadSubagentRegistryFromDisk(); + + expect(second.get("run-cached")).toMatchObject({ + requesterOrigin: { accountId: "cached-account" }, + outcome: { status: "ok" }, + }); + expect(second.get("run-cached")?.endedAt).toBeUndefined(); + expect(second.get("run-cached")?.cleanupHandled).toBeUndefined(); + expect( + readSpy.mock.calls.filter(([pathname]) => String(pathname) === registryPath), + ).toHaveLength(1); + + await fs.writeFile( + registryPath, + `${JSON.stringify({ + version: 2, + runs: { + "run-updated": { + runId: "run-updated", + childSessionKey: "agent:main:subagent:updated", + requesterSessionKey: "agent:main:main", + requesterDisplayKey: "main", + task: "updated persisted run with a longer payload", + cleanup: "keep", + createdAt: 2, + startedAt: 2, + }, + }, + })}\n`, + "utf8", + ); + + expect(loadSubagentRegistryFromDisk().has("run-updated")).toBe(true); + expect( + readSpy.mock.calls.filter(([pathname]) => String(pathname) === registryPath), + ).toHaveLength(2); + }); + + it("reuses unchanged invalid persisted registry snapshots as empty", async () => { + tempStateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-subagent-")); + process.env.OPENCLAW_STATE_DIR = tempStateDir; + const registryPath = path.join(tempStateDir, "subagents", "runs.json"); + await fs.mkdir(path.dirname(registryPath), { recursive: true }); + await fs.writeFile(registryPath, "{invalid", "utf8"); + const readSpy = vi.spyOn(fsSync, "readFileSync"); + + expect(loadSubagentRegistryFromDisk()).toEqual(new Map()); + expect(loadSubagentRegistryFromDisk()).toEqual(new Map()); + expect( + readSpy.mock.calls.filter(([pathname]) => String(pathname) === registryPath), + ).toHaveLength(1); + }); + it("normalizes persisted and newly registered session keys to canonical trimmed values", async () => { const persisted = { version: 2, diff --git a/src/agents/subagent-registry.store.ts b/src/agents/subagent-registry.store.ts index 4b3a21ac2b9..5e9c90ab330 100644 --- a/src/agents/subagent-registry.store.ts +++ b/src/agents/subagent-registry.store.ts @@ -1,3 +1,4 @@ +import fs from "node:fs"; import os from "node:os"; import path from "node:path"; import { resolveStateDir } from "../config/paths.js"; @@ -21,9 +22,15 @@ type PersistedSubagentRegistryV2 = { type PersistedSubagentRegistry = PersistedSubagentRegistryV1 | PersistedSubagentRegistryV2; const REGISTRY_VERSION = 2 as const; +const MAX_SUBAGENT_REGISTRY_READ_CACHE_ENTRIES = 32; type PersistedSubagentRunRecord = SubagentRunRecord; +type RegistryCacheEntry = { + signature: string; + runs: Map; +}; + type LegacySubagentRunRecord = PersistedSubagentRunRecord & { announceCompletedAt?: unknown; announceHandled?: unknown; @@ -31,6 +38,32 @@ type LegacySubagentRunRecord = PersistedSubagentRunRecord & { requesterAccountId?: unknown; }; +const registryReadCache = new Map(); + +function cloneSubagentRunRecord(entry: SubagentRunRecord): SubagentRunRecord { + return structuredClone(entry); +} + +function cloneSubagentRunMap(runs: Map): Map { + return new Map([...runs].map(([runId, entry]) => [runId, cloneSubagentRunRecord(entry)])); +} + +function setCachedRegistryRead( + pathname: string, + signature: string, + runs: Map, +): void { + registryReadCache.delete(pathname); + registryReadCache.set(pathname, { signature, runs: cloneSubagentRunMap(runs) }); + if (registryReadCache.size <= MAX_SUBAGENT_REGISTRY_READ_CACHE_ENTRIES) { + return; + } + const oldestKey = registryReadCache.keys().next().value; + if (typeof oldestKey === "string") { + registryReadCache.delete(oldestKey); + } +} + function resolveSubagentStateDir(env: NodeJS.ProcessEnv = process.env): string { const explicit = env.OPENCLAW_STATE_DIR?.trim(); if (explicit) { @@ -48,16 +81,30 @@ export function resolveSubagentRegistryPath(): string { export function loadSubagentRegistryFromDisk(): Map { const pathname = resolveSubagentRegistryPath(); + const signature = statRegistryFileSignature(pathname); + if (signature === null) { + registryReadCache.delete(pathname); + return new Map(); + } + const cached = registryReadCache.get(pathname); + if (cached?.signature === signature) { + registryReadCache.delete(pathname); + registryReadCache.set(pathname, cached); + return cloneSubagentRunMap(cached.runs); + } const raw = loadJsonFile(pathname); if (!raw || typeof raw !== "object") { + setCachedRegistryRead(pathname, signature, new Map()); return new Map(); } const record = raw as Partial; if (record.version !== 1 && record.version !== 2) { + setCachedRegistryRead(pathname, signature, new Map()); return new Map(); } const runsRaw = record.runs; if (!runsRaw || typeof runsRaw !== "object") { + setCachedRegistryRead(pathname, signature, new Map()); return new Map(); } const out = new Map(); @@ -123,6 +170,8 @@ export function loadSubagentRegistryFromDisk(): Map { } catch { // ignore migration write failures } + } else { + setCachedRegistryRead(pathname, signature, out); } return out; } @@ -138,4 +187,25 @@ export function saveSubagentRegistryToDisk(runs: Map) runs: serialized, }; saveJsonFile(pathname, out); + const signature = statRegistryFileSignature(pathname); + if (signature === null) { + registryReadCache.delete(pathname); + } else { + setCachedRegistryRead(pathname, signature, runs); + } +} + +function statRegistryFileSignature(pathname: string): string | null { + try { + const stat = fs.statSync(pathname, { bigint: true }); + if (!stat.isFile()) { + return null; + } + return `${stat.dev}:${stat.ino}:${stat.size}:${stat.mtimeNs}:${stat.ctimeNs}`; + } catch (error) { + if ((error as NodeJS.ErrnoException).code === "ENOENT") { + return null; + } + throw error; + } }