From 57748a66fd192d9e891fcf32378eb3e4b643bca8 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Tue, 26 May 2026 01:39:44 +0200 Subject: [PATCH] fix(scripts): bound source scan file reads --- scripts/lib/source-file-scan-cache.mjs | 58 ++++++++++++++++---- test/scripts/source-file-scan-cache.test.ts | 59 +++++++++++++++++++++ 2 files changed, 107 insertions(+), 10 deletions(-) create mode 100644 test/scripts/source-file-scan-cache.test.ts diff --git a/scripts/lib/source-file-scan-cache.mjs b/scripts/lib/source-file-scan-cache.mjs index b7761b87722..28da5c8ccef 100644 --- a/scripts/lib/source-file-scan-cache.mjs +++ b/scripts/lib/source-file-scan-cache.mjs @@ -1,6 +1,7 @@ import { promises as fs } from "node:fs"; import path from "node:path"; +const DEFAULT_SOURCE_FILE_READ_CONCURRENCY = 32; const scanCache = new Map(); function normalizeRepoPath(repoRoot, filePath) { @@ -34,7 +35,35 @@ async function walkFiles(params, rootDir) { return out; } +function normalizeConcurrency(value) { + if (!Number.isInteger(value) || value < 1) { + return DEFAULT_SOURCE_FILE_READ_CONCURRENCY; + } + return value; +} + +export async function mapWithConcurrency(items, concurrency, mapper) { + const out = Array.from({ length: items.length }); + const workerCount = Math.min(normalizeConcurrency(concurrency), items.length); + let nextIndex = 0; + + async function worker() { + for (;;) { + const index = nextIndex; + nextIndex += 1; + if (index >= items.length) { + return; + } + out[index] = await mapper(items[index], index); + } + } + + await Promise.all(Array.from({ length: workerCount }, () => worker())); + return out; +} + export async function collectSourceFileContents(params) { + const useCache = !params.readFile; const cacheKey = JSON.stringify({ repoRoot: params.repoRoot, scanRoots: params.scanRoots, @@ -43,9 +72,11 @@ export async function collectSourceFileContents(params) { left.localeCompare(right), ), }); - const cached = scanCache.get(cacheKey); - if (cached) { - return await cached; + if (useCache) { + const cached = scanCache.get(cacheKey); + if (cached) { + return await cached; + } } const promise = (async () => { @@ -59,22 +90,29 @@ export async function collectSourceFileContents(params) { normalizeRepoPath(params.repoRoot, left).localeCompare( normalizeRepoPath(params.repoRoot, right), ), - ); + ); - return await Promise.all( - files.map(async (filePath) => ({ + const readFile = params.readFile ?? fs.readFile; + return await mapWithConcurrency( + files, + params.maxConcurrentReads, + async (filePath) => ({ filePath, relativeFile: normalizeRepoPath(params.repoRoot, filePath), - content: await fs.readFile(filePath, "utf8"), - })), + content: await readFile(filePath, "utf8"), + }), ); })(); - scanCache.set(cacheKey, promise); + if (useCache) { + scanCache.set(cacheKey, promise); + } try { return await promise; } catch (error) { - scanCache.delete(cacheKey); + if (useCache) { + scanCache.delete(cacheKey); + } throw error; } } diff --git a/test/scripts/source-file-scan-cache.test.ts b/test/scripts/source-file-scan-cache.test.ts new file mode 100644 index 00000000000..25f70a59934 --- /dev/null +++ b/test/scripts/source-file-scan-cache.test.ts @@ -0,0 +1,59 @@ +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { collectSourceFileContents } from "../../scripts/lib/source-file-scan-cache.mjs"; + +const tempDirs: string[] = []; + +async function makeTempRepo() { + const repoRoot = await mkdtemp(path.join(os.tmpdir(), "openclaw-source-scan-")); + tempDirs.push(repoRoot); + return repoRoot; +} + +describe("source file scan cache", () => { + afterEach(async () => { + await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true }))); + }); + + it("bounds concurrent source file reads while preserving sorted output", async () => { + const repoRoot = await makeTempRepo(); + const srcRoot = path.join(repoRoot, "src"); + await mkdir(srcRoot, { recursive: true }); + await Promise.all( + Array.from({ length: 9 }, async (_, index) => { + const file = path.join(srcRoot, `file-${index}.ts`); + await writeFile(file, `export const value${index} = ${index};\n`, "utf8"); + }), + ); + + let activeReads = 0; + let maxActiveReads = 0; + const readFile = async (filePath: string) => { + activeReads += 1; + maxActiveReads = Math.max(maxActiveReads, activeReads); + await new Promise((resolve) => setTimeout(resolve, 10)); + activeReads -= 1; + return `content:${path.basename(filePath)}`; + }; + + const files = await collectSourceFileContents({ + repoRoot, + scanRoots: ["src"], + scanExtensions: new Set([".ts"]), + ignoredDirNames: new Set(), + maxConcurrentReads: 3, + readFile, + }); + + expect(maxActiveReads).toBeGreaterThan(1); + expect(maxActiveReads).toBeLessThanOrEqual(3); + expect(files.map((file) => file.relativeFile)).toEqual( + Array.from({ length: 9 }, (_, index) => `src/file-${index}.ts`), + ); + expect(files.map((file) => file.content)).toEqual( + Array.from({ length: 9 }, (_, index) => `content:file-${index}.ts`), + ); + }); +});