perf(agents): cache subagent registry reads

This commit is contained in:
Peter Steinberger
2026-04-29 15:24:40 +01:00
parent 34d11d5757
commit b7db63751b
3 changed files with 161 additions and 0 deletions

View File

@@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai
- CLI/tools: keep the Gateway `tools.*` RPC namespace out of plugin command discovery and managed proxy startup, so stray commands like `openclaw tools effective` fail quickly instead of cold-loading plugin metadata. Refs #73477. Thanks @oromeis.
- CLI/status: keep default text `openclaw status --usage` on metadata-only channel scans unless `--deep` or `--all` is set, and send stray `openclaw tools --help` through the precomputed root-help fast path so latency-triage commands avoid plugin/runtime cold loads before printing. Refs #73477 and #74220. Thanks @oromeis and @NianJiuZst.
- Agents/diagnostics: trace embedded-run startup and preparation stage timings before model I/O, and warn only on severe slow stages, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency without noisy normal logs. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang.
- Agents/subagents: cache persisted subagent run registry reads by file signature while preserving fresh-parse isolation, so busy gateways stop reparsing unchanged `subagents/runs.json` on controller/list/status hot paths. Refs #72338. Thanks @argus-as.
- Gateway/clients: wait for the event loop to become responsive before opening Gateway WebSocket RPC/probe/client connections while charging that readiness wait to caller timeouts, so Windows deferred module-evaluation stalls no longer turn healthy loopback gateways into false handshake timeouts across status, TUI, ACP, MCP, node-host, and plugin client paths. Refs #74279 and #48270. Thanks @wongcode and @joost-heijden.
- Gateway/Windows: read listener command lines via PowerShell before falling back to `wmic`, so restart health can recognize OpenClaw listeners on modern Windows installs and avoid long anonymous-port waits. Refs #74280. Thanks @zym951223.
- Plugins/runtime-deps: memoize packaged bundled runtime dist-mirror preparation after the first successful pass while keeping source-checkout mirrors refreshable, so constrained Docker/VPS installs avoid repeated root scans before chat turns. Refs #73428, #73421, #73532, and #73477. Thanks @Dimaoggg, @oromeis, @oadiazp, @jmfraga, @bstanbury, @antoniusfelix, and @jkobject.

View File

@@ -328,6 +328,96 @@ describe("subagent registry persistence", () => {
expect(after.version).toBe(2);
});
it("reuses unchanged persisted registry snapshots without reparsing runs.json", async () => {
const registryPath = await writePersistedRegistry(
{
version: 2,
runs: {
"run-cached": {
runId: "run-cached",
childSessionKey: "agent:main:subagent:cached",
requesterSessionKey: "agent:main:main",
requesterOrigin: { channel: "telegram", accountId: "cached-account" },
requesterDisplayKey: "main",
task: "cached persisted run",
cleanup: "keep",
createdAt: 1,
startedAt: 1,
outcome: { status: "ok" },
},
},
},
{ seedChildSessions: false },
);
const readSpy = vi.spyOn(fsSync, "readFileSync");
const first = loadSubagentRegistryFromDisk();
first.clear();
const cachedEntry = loadSubagentRegistryFromDisk().get("run-cached");
if (!cachedEntry) {
throw new Error("expected cached run");
}
cachedEntry.endedAt = 999;
cachedEntry.cleanupHandled = true;
if (cachedEntry.requesterOrigin) {
cachedEntry.requesterOrigin.accountId = "mutated-account";
}
if (cachedEntry.outcome) {
cachedEntry.outcome.status = "error";
}
const second = loadSubagentRegistryFromDisk();
expect(second.get("run-cached")).toMatchObject({
requesterOrigin: { accountId: "cached-account" },
outcome: { status: "ok" },
});
expect(second.get("run-cached")?.endedAt).toBeUndefined();
expect(second.get("run-cached")?.cleanupHandled).toBeUndefined();
expect(
readSpy.mock.calls.filter(([pathname]) => String(pathname) === registryPath),
).toHaveLength(1);
await fs.writeFile(
registryPath,
`${JSON.stringify({
version: 2,
runs: {
"run-updated": {
runId: "run-updated",
childSessionKey: "agent:main:subagent:updated",
requesterSessionKey: "agent:main:main",
requesterDisplayKey: "main",
task: "updated persisted run with a longer payload",
cleanup: "keep",
createdAt: 2,
startedAt: 2,
},
},
})}\n`,
"utf8",
);
expect(loadSubagentRegistryFromDisk().has("run-updated")).toBe(true);
expect(
readSpy.mock.calls.filter(([pathname]) => String(pathname) === registryPath),
).toHaveLength(2);
});
it("reuses unchanged invalid persisted registry snapshots as empty", async () => {
tempStateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-subagent-"));
process.env.OPENCLAW_STATE_DIR = tempStateDir;
const registryPath = path.join(tempStateDir, "subagents", "runs.json");
await fs.mkdir(path.dirname(registryPath), { recursive: true });
await fs.writeFile(registryPath, "{invalid", "utf8");
const readSpy = vi.spyOn(fsSync, "readFileSync");
expect(loadSubagentRegistryFromDisk()).toEqual(new Map());
expect(loadSubagentRegistryFromDisk()).toEqual(new Map());
expect(
readSpy.mock.calls.filter(([pathname]) => String(pathname) === registryPath),
).toHaveLength(1);
});
it("normalizes persisted and newly registered session keys to canonical trimmed values", async () => {
const persisted = {
version: 2,

View File

@@ -1,3 +1,4 @@
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { resolveStateDir } from "../config/paths.js";
@@ -21,9 +22,15 @@ type PersistedSubagentRegistryV2 = {
type PersistedSubagentRegistry = PersistedSubagentRegistryV1 | PersistedSubagentRegistryV2;
const REGISTRY_VERSION = 2 as const;
const MAX_SUBAGENT_REGISTRY_READ_CACHE_ENTRIES = 32;
type PersistedSubagentRunRecord = SubagentRunRecord;
type RegistryCacheEntry = {
signature: string;
runs: Map<string, SubagentRunRecord>;
};
type LegacySubagentRunRecord = PersistedSubagentRunRecord & {
announceCompletedAt?: unknown;
announceHandled?: unknown;
@@ -31,6 +38,32 @@ type LegacySubagentRunRecord = PersistedSubagentRunRecord & {
requesterAccountId?: unknown;
};
const registryReadCache = new Map<string, RegistryCacheEntry>();
function cloneSubagentRunRecord(entry: SubagentRunRecord): SubagentRunRecord {
return structuredClone(entry);
}
function cloneSubagentRunMap(runs: Map<string, SubagentRunRecord>): Map<string, SubagentRunRecord> {
return new Map([...runs].map(([runId, entry]) => [runId, cloneSubagentRunRecord(entry)]));
}
function setCachedRegistryRead(
pathname: string,
signature: string,
runs: Map<string, SubagentRunRecord>,
): void {
registryReadCache.delete(pathname);
registryReadCache.set(pathname, { signature, runs: cloneSubagentRunMap(runs) });
if (registryReadCache.size <= MAX_SUBAGENT_REGISTRY_READ_CACHE_ENTRIES) {
return;
}
const oldestKey = registryReadCache.keys().next().value;
if (typeof oldestKey === "string") {
registryReadCache.delete(oldestKey);
}
}
function resolveSubagentStateDir(env: NodeJS.ProcessEnv = process.env): string {
const explicit = env.OPENCLAW_STATE_DIR?.trim();
if (explicit) {
@@ -48,16 +81,30 @@ export function resolveSubagentRegistryPath(): string {
export function loadSubagentRegistryFromDisk(): Map<string, SubagentRunRecord> {
const pathname = resolveSubagentRegistryPath();
const signature = statRegistryFileSignature(pathname);
if (signature === null) {
registryReadCache.delete(pathname);
return new Map();
}
const cached = registryReadCache.get(pathname);
if (cached?.signature === signature) {
registryReadCache.delete(pathname);
registryReadCache.set(pathname, cached);
return cloneSubagentRunMap(cached.runs);
}
const raw = loadJsonFile(pathname);
if (!raw || typeof raw !== "object") {
setCachedRegistryRead(pathname, signature, new Map());
return new Map();
}
const record = raw as Partial<PersistedSubagentRegistry>;
if (record.version !== 1 && record.version !== 2) {
setCachedRegistryRead(pathname, signature, new Map());
return new Map();
}
const runsRaw = record.runs;
if (!runsRaw || typeof runsRaw !== "object") {
setCachedRegistryRead(pathname, signature, new Map());
return new Map();
}
const out = new Map<string, SubagentRunRecord>();
@@ -123,6 +170,8 @@ export function loadSubagentRegistryFromDisk(): Map<string, SubagentRunRecord> {
} catch {
// ignore migration write failures
}
} else {
setCachedRegistryRead(pathname, signature, out);
}
return out;
}
@@ -138,4 +187,25 @@ export function saveSubagentRegistryToDisk(runs: Map<string, SubagentRunRecord>)
runs: serialized,
};
saveJsonFile(pathname, out);
const signature = statRegistryFileSignature(pathname);
if (signature === null) {
registryReadCache.delete(pathname);
} else {
setCachedRegistryRead(pathname, signature, runs);
}
}
function statRegistryFileSignature(pathname: string): string | null {
try {
const stat = fs.statSync(pathname, { bigint: true });
if (!stat.isFile()) {
return null;
}
return `${stat.dev}:${stat.ino}:${stat.size}:${stat.mtimeNs}:${stat.ctimeNs}`;
} catch (error) {
if ((error as NodeJS.ErrnoException).code === "ENOENT") {
return null;
}
throw error;
}
}