From e4cee2eb697f7efaa09314ac4723a9afe6d4caaf Mon Sep 17 00:00:00 2001 From: rolandrscheel Date: Thu, 14 May 2026 06:20:40 +0200 Subject: [PATCH] perf(gateway): cache session list resolver lookups Refs #75839.\n\nRebases and lands the sessions.list resolver-cache fix from #77187 after maintainer conflict repair. The change keeps cache state scoped to a single sessions.list call and memoizes deterministic per-row resolver work for repeated provider/model tuples.\n\nVerification:\n- pnpm test src/gateway/session-utils.perf.test.ts src/gateway/session-utils.test.ts\n- pnpm exec oxfmt --check --threads=1 src/gateway/session-utils.ts src/gateway/session-utils.perf.test.ts scripts/github/real-behavior-proof-policy.mjs\n- git diff --check HEAD -- CHANGELOG.md scripts/github/real-behavior-proof-policy.mjs src/gateway/session-utils.perf.test.ts src/gateway/session-utils.ts\n- GitHub PR checks: 87 passing, CodeQL neutral, 21 skipped\n\nCo-authored-by: OpenClaw Agent --- CHANGELOG.md | 1 + scripts/github/real-behavior-proof-policy.mjs | 2 + src/gateway/session-utils.perf.test.ts | 88 +++++++++++++++++++ src/gateway/session-utils.ts | 73 +++++++++++++-- 4 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 src/gateway/session-utils.perf.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 41e2cbc5af2..72db3b3f9bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -590,6 +590,7 @@ Docs: https://docs.openclaw.ai - CLI/migrate: show native Codex plugin names before truncated plan items and prompt for plugin activation explicitly during interactive Codex migration instead of silently keeping every planned plugin. Thanks @kevinslin. - CLI/migrate: leave already configured target Codex plugins unchecked in the interactive plugin selector and show a `plugin exists` conflict hint while keeping new plugin activations selected by default. Thanks @kevinslin. - CLI/migrate: return cleanly without apply confirmation when interactive Codex migration leaves both skill copies and native plugin activations unselected. Thanks @kevinslin. +- Gateway/sessions: extend the per-call sessions-list `rowContext` cache with memoization for `resolveSessionDisplayModelIdentityRef`, thinking metadata, and `resolveModelCostConfig` so deterministic per-row resolvers run once per unique `(provider, model[, agentId])` tuple instead of once per session. Cuts CPU on `sessions.list` for stores with many sessions sharing a small set of model tuples; behavior is unchanged for callers that pass no `rowContext`. Thanks @rolandrscheel. - Cron CLI: add `openclaw cron list --agent `, normalize the requested agent id, and include jobs without a stored agent id under the configured default agent while keeping `cron list` unfiltered when no agent is supplied. Fixes #77118. Thanks @zhanggttry. - Slack/performance: reduce message preparation, stream recipient lookup, and thread-context allocation overhead on Slack reply hot paths. Thanks @vincentkoc. - Control UI/chat: strip untrusted sender metadata from live streams and transcript display, preserve canvas preview anchors, and stop operator UI clients from injecting their internal client id as sender identity. Fixes #78739. Thanks @tmimmanuel, @guguangxin-eng, @hclsys, and @BunsDev. diff --git a/scripts/github/real-behavior-proof-policy.mjs b/scripts/github/real-behavior-proof-policy.mjs index b8e8e6d9af6..81620dc099e 100644 --- a/scripts/github/real-behavior-proof-policy.mjs +++ b/scripts/github/real-behavior-proof-policy.mjs @@ -112,6 +112,8 @@ export function hasProofOverride(labels) { } export function extractRealBehaviorProofSection(body = "") { + // Normalize CRLF → LF so regexes and section slicing see GitHub web-editor PR + // bodies the same way as locally-authored Markdown. const normalizedBody = normalizeLineEndings(body); const headingRegex = /^#{2,6}\s+real behavior proof\b[^\n]*$/gim; const match = headingRegex.exec(normalizedBody); diff --git a/src/gateway/session-utils.perf.test.ts b/src/gateway/session-utils.perf.test.ts new file mode 100644 index 00000000000..69c76865ebc --- /dev/null +++ b/src/gateway/session-utils.perf.test.ts @@ -0,0 +1,88 @@ +import path from "node:path"; +import { describe, test, expect, vi } from "vitest"; +import * as thinking from "../auto-reply/thinking.js"; +import type { OpenClawConfig } from "../config/config.js"; +import { resetConfigRuntimeState, setRuntimeConfigSnapshot } from "../config/config.js"; +import type { SessionEntry } from "../config/sessions.js"; +import { createEmptyPluginRegistry } from "../plugins/registry-empty.js"; +import { resetPluginRuntimeStateForTest, setActivePluginRegistry } from "../plugins/runtime.js"; +import { withStateDirEnv } from "../test-helpers/state-dir-env.js"; +import * as usageFormat from "../utils/usage-format.js"; +import { listSessionsFromStore } from "./session-utils.js"; + +/** + * Regression smoke for the per-list rowContext resolver cache. The bug we are + * guarding against is O(rows) scaling of deterministic resolvers whose results + * only depend on `(provider, model[, agentId])`: with N sessions sharing K + * unique model tuples, the cached path must perform at most O(K) underlying + * resolver calls -- not O(N). + * + * We assert call counts directly instead of a wall-time bound because shared + * CI runners cannot give a stable wall-time signal, and call-count regressions + * are the actual scaling failure mode we care about. + */ +describe("listSessionsFromStore resolver cache", () => { + test("collapses non-lightweight per-row resolver work to O(unique provider/model tuples)", async () => { + await withStateDirEnv("openclaw-perf-", async ({ stateDir }) => { + resetPluginRuntimeStateForTest(); + setActivePluginRegistry(createEmptyPluginRegistry()); + const cfg: OpenClawConfig = { + agents: { + defaults: { model: { primary: "google-vertex/gemini-3-flash-preview" } }, + }, + } as OpenClawConfig; + resetConfigRuntimeState(); + setRuntimeConfigSnapshot(cfg); + + const tuples: Array<{ modelProvider: string; model: string }> = [ + { modelProvider: "google-vertex", model: "gemini-3-flash-preview" }, + { modelProvider: "openai", model: "gpt-5" }, + { modelProvider: "anthropic", model: "claude-opus-4-7" }, + { modelProvider: "openrouter", model: "z-ai/glm-5" }, + { modelProvider: "google", model: "gemini-2.5-pro" }, + ]; + + const store: Record = {}; + const now = Date.now(); + const rowCount = 30; + for (let i = 0; i < rowCount; i++) { + const tuple = tuples[i % tuples.length]; + store[`agent:default:webchat:dm:${i}`] = { + updatedAt: now - i, + modelProvider: tuple.modelProvider, + model: tuple.model, + inputTokens: 100, + outputTokens: 50, + } as SessionEntry; + } + + const thinkingSpy = vi.spyOn(thinking, "listThinkingLevelOptions"); + const costSpy = vi.spyOn(usageFormat, "resolveModelCostConfig"); + try { + const result = listSessionsFromStore({ + cfg, + storePath: path.join(stateDir, "sessions.json"), + store, + // sessions.list bounds responses to 100 rows by default; the perf + // smoke explicitly opts into the full set so the non-lightweight + // row builder exercises the display-identity, thinking-default, and + // model-cost caches at scale. + opts: { limit: rowCount }, + }); + expect(result.sessions.length).toBe(rowCount); + + // The cache keys on rowContext are (provider, model) or + // (agentId, provider, model). With K=5 unique tuples we must see at + // most a small constant number of resolver calls, not O(N=30). A + // pre-cache regression would scale linearly and easily exceed the + // threshold below. + const cacheCallCeiling = tuples.length * 4; + expect(thinkingSpy.mock.calls.length).toBeLessThanOrEqual(cacheCallCeiling); + expect(costSpy.mock.calls.length).toBeLessThanOrEqual(cacheCallCeiling); + } finally { + thinkingSpy.mockRestore(); + costSpy.mockRestore(); + } + }); + }); +}); diff --git a/src/gateway/session-utils.ts b/src/gateway/session-utils.ts index 05bc7f55a40..954c5d3efb5 100644 --- a/src/gateway/session-utils.ts +++ b/src/gateway/session-utils.ts @@ -79,6 +79,7 @@ import { normalizeOptionalLowercaseString, } from "../shared/string-coerce.js"; import { normalizeSessionDeliveryFields } from "../utils/delivery-context.shared.js"; +import type { ModelCostConfig } from "../utils/usage-format.js"; import { estimateUsageCost, resolveModelCostConfig } from "../utils/usage-format.js"; import { resolveSessionStoreAgentId, @@ -294,6 +295,24 @@ function buildCompactionCheckpointPreview( }; } +function resolveModelCostConfigCached( + provider: string | undefined, + model: string | undefined, + cfg: OpenClawConfig, + rowContext?: SessionListRowContext, +): ModelCostConfig | undefined { + if (!rowContext) { + return resolveModelCostConfig({ provider, model, config: cfg }); + } + const key = createSessionRowModelCacheKey(provider, model); + if (rowContext.modelCostConfigByModelRef.has(key)) { + return rowContext.modelCostConfigByModelRef.get(key); + } + const value = resolveModelCostConfig({ provider, model, config: cfg }); + rowContext.modelCostConfigByModelRef.set(key, value); + return value; +} + function resolveEstimatedSessionCostUsd(params: { cfg: OpenClawConfig; provider?: string; @@ -303,6 +322,7 @@ function resolveEstimatedSessionCostUsd(params: { "estimatedCostUsd" | "inputTokens" | "outputTokens" | "cacheRead" | "cacheWrite" >; explicitCostUsd?: number; + rowContext?: SessionListRowContext; }): number | undefined { const explicitCostUsd = resolveNonNegativeNumber( params.explicitCostUsd ?? params.entry?.estimatedCostUsd, @@ -322,11 +342,12 @@ function resolveEstimatedSessionCostUsd(params: { ) { return undefined; } - const cost = resolveModelCostConfig({ - provider: params.provider, - model: params.model, - config: params.cfg, - }); + const cost = resolveModelCostConfigCached( + params.provider, + params.model, + params.cfg, + params.rowContext, + ); if (!cost) { return undefined; } @@ -372,6 +393,10 @@ type SessionListRowContext = { subagentRuns: ReturnType; storeChildSessionsByKey: Map; selectedModelByOverrideRef: Map>; + // Per-list memoization for deterministic resolvers that scale linearly with + // session count but only depend on (provider, model[, agentId]). Sessions + // in a single list typically share a small set of those tuples, so caching + // here collapses the work to O(unique tuples) per call. thinkingMetadataByModelRef: Map< string, { @@ -379,6 +404,8 @@ type SessionListRowContext = { defaultLevel: ReturnType; } >; + displayModelIdentityByKey: Map; + modelCostConfigByModelRef: Map; }; function resolveRuntimeChildSessionKeys( @@ -497,6 +524,8 @@ function buildSessionListRowContext(params: { storeChildSessionsByKey: buildStoreChildSessionIndex(params.store, params.now, subagentRuns), selectedModelByOverrideRef: new Map(), thinkingMetadataByModelRef: new Map(), + displayModelIdentityByKey: new Map(), + modelCostConfigByModelRef: new Map(), }; } @@ -622,6 +651,7 @@ function resolveTranscriptUsageFallback(params: { fallbackProvider?: string; fallbackModel?: string; maxTranscriptBytes?: number; + rowContext?: SessionListRowContext; }): { estimatedCostUsd?: number; totalTokens?: number; @@ -668,6 +698,7 @@ function resolveTranscriptUsageFallback(params: { cacheRead: snapshot.cacheRead, cacheWrite: snapshot.cacheWrite, }, + rowContext: params.rowContext, }); return { modelProvider, @@ -1508,6 +1539,30 @@ export function resolveSessionModelIdentityRef( return { provider: resolved.provider, model: resolved.model }; } +function resolveSessionDisplayModelIdentityRefCached(params: { + cfg: OpenClawConfig; + agentId: string; + provider?: string; + model?: string; + rowContext?: SessionListRowContext; +}): { provider?: string; model?: string } { + const ctx = params.rowContext; + if (!ctx) { + return resolveSessionDisplayModelIdentityRef(params); + } + const key = `${params.agentId}\u0000${createSessionRowModelCacheKey( + params.provider, + params.model, + )}`; + const cached = ctx.displayModelIdentityByKey.get(key); + if (cached) { + return cached; + } + const value = resolveSessionDisplayModelIdentityRef(params); + ctx.displayModelIdentityByKey.set(key, value); + return value; +} + export function resolveSessionDisplayModelIdentityRef(params: { cfg: OpenClawConfig; agentId: string; @@ -1671,6 +1726,7 @@ export function buildGatewaySessionRow(params: { provider: resolvedModel.provider, model: resolvedModel.model ?? DEFAULT_MODEL, entry, + rowContext, }) === undefined; const transcriptUsage = !skipTranscriptUsage && @@ -1683,6 +1739,7 @@ export function buildGatewaySessionRow(params: { fallbackProvider: resolvedModel.provider, fallbackModel: resolvedModel.model ?? DEFAULT_MODEL, maxTranscriptBytes: params.transcriptUsageMaxBytes, + rowContext: params.rowContext, }) : null; const preferLiveSubagentModelIdentity = @@ -1722,11 +1779,12 @@ export function buildGatewaySessionRow(params: { const selectedOrRuntimeModel = selectedModel?.model ?? model; const rowModelIdentity = lightweight ? { provider: selectedOrRuntimeModelProvider, model: selectedOrRuntimeModel } - : resolveSessionDisplayModelIdentityRef({ + : resolveSessionDisplayModelIdentityRefCached({ cfg, agentId: sessionAgentId, provider: selectedOrRuntimeModelProvider, model: selectedOrRuntimeModel, + rowContext: params.rowContext, }); const rowModelProvider = rowModelIdentity.provider; const rowModel = rowModelIdentity.model; @@ -1746,6 +1804,7 @@ export function buildGatewaySessionRow(params: { provider: rowModelProvider, model: rowModel, entry, + rowContext: params.rowContext, }) ?? resolveNonNegativeNumber(transcriptUsage?.estimatedCostUsd)); const contextTokens = lightweight ? resolvePositiveNumber(entry?.contextTokens) @@ -1817,7 +1876,7 @@ export function buildGatewaySessionRow(params: { abortedLastRun: entry?.abortedLastRun, thinkingLevel: entry?.thinkingLevel, thinkingLevels, - thinkingOptions: thinkingLevels?.map((level) => level.label), + thinkingOptions: thinkingLevels.map((level) => level.label), thinkingDefault, fastMode: entry?.fastMode, verboseLevel: entry?.verboseLevel,