fix(scripts): bound source scan file reads

This commit is contained in:
Vincent Koc
2026-05-26 01:39:44 +02:00
parent 2a6b4ed3e2
commit 57748a66fd
2 changed files with 107 additions and 10 deletions

View File

@@ -1,6 +1,7 @@
import { promises as fs } from "node:fs";
import path from "node:path";
const DEFAULT_SOURCE_FILE_READ_CONCURRENCY = 32;
const scanCache = new Map();
function normalizeRepoPath(repoRoot, filePath) {
@@ -34,7 +35,35 @@ async function walkFiles(params, rootDir) {
return out;
}
function normalizeConcurrency(value) {
if (!Number.isInteger(value) || value < 1) {
return DEFAULT_SOURCE_FILE_READ_CONCURRENCY;
}
return value;
}
export async function mapWithConcurrency(items, concurrency, mapper) {
const out = Array.from({ length: items.length });
const workerCount = Math.min(normalizeConcurrency(concurrency), items.length);
let nextIndex = 0;
async function worker() {
for (;;) {
const index = nextIndex;
nextIndex += 1;
if (index >= items.length) {
return;
}
out[index] = await mapper(items[index], index);
}
}
await Promise.all(Array.from({ length: workerCount }, () => worker()));
return out;
}
export async function collectSourceFileContents(params) {
const useCache = !params.readFile;
const cacheKey = JSON.stringify({
repoRoot: params.repoRoot,
scanRoots: params.scanRoots,
@@ -43,9 +72,11 @@ export async function collectSourceFileContents(params) {
left.localeCompare(right),
),
});
const cached = scanCache.get(cacheKey);
if (cached) {
return await cached;
if (useCache) {
const cached = scanCache.get(cacheKey);
if (cached) {
return await cached;
}
}
const promise = (async () => {
@@ -59,22 +90,29 @@ export async function collectSourceFileContents(params) {
normalizeRepoPath(params.repoRoot, left).localeCompare(
normalizeRepoPath(params.repoRoot, right),
),
);
);
return await Promise.all(
files.map(async (filePath) => ({
const readFile = params.readFile ?? fs.readFile;
return await mapWithConcurrency(
files,
params.maxConcurrentReads,
async (filePath) => ({
filePath,
relativeFile: normalizeRepoPath(params.repoRoot, filePath),
content: await fs.readFile(filePath, "utf8"),
})),
content: await readFile(filePath, "utf8"),
}),
);
})();
scanCache.set(cacheKey, promise);
if (useCache) {
scanCache.set(cacheKey, promise);
}
try {
return await promise;
} catch (error) {
scanCache.delete(cacheKey);
if (useCache) {
scanCache.delete(cacheKey);
}
throw error;
}
}

View File

@@ -0,0 +1,59 @@
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { collectSourceFileContents } from "../../scripts/lib/source-file-scan-cache.mjs";
const tempDirs: string[] = [];
async function makeTempRepo() {
const repoRoot = await mkdtemp(path.join(os.tmpdir(), "openclaw-source-scan-"));
tempDirs.push(repoRoot);
return repoRoot;
}
describe("source file scan cache", () => {
afterEach(async () => {
await Promise.all(tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true })));
});
it("bounds concurrent source file reads while preserving sorted output", async () => {
const repoRoot = await makeTempRepo();
const srcRoot = path.join(repoRoot, "src");
await mkdir(srcRoot, { recursive: true });
await Promise.all(
Array.from({ length: 9 }, async (_, index) => {
const file = path.join(srcRoot, `file-${index}.ts`);
await writeFile(file, `export const value${index} = ${index};\n`, "utf8");
}),
);
let activeReads = 0;
let maxActiveReads = 0;
const readFile = async (filePath: string) => {
activeReads += 1;
maxActiveReads = Math.max(maxActiveReads, activeReads);
await new Promise((resolve) => setTimeout(resolve, 10));
activeReads -= 1;
return `content:${path.basename(filePath)}`;
};
const files = await collectSourceFileContents({
repoRoot,
scanRoots: ["src"],
scanExtensions: new Set([".ts"]),
ignoredDirNames: new Set(),
maxConcurrentReads: 3,
readFile,
});
expect(maxActiveReads).toBeGreaterThan(1);
expect(maxActiveReads).toBeLessThanOrEqual(3);
expect(files.map((file) => file.relativeFile)).toEqual(
Array.from({ length: 9 }, (_, index) => `src/file-${index}.ts`),
);
expect(files.map((file) => file.content)).toEqual(
Array.from({ length: 9 }, (_, index) => `content:file-${index}.ts`),
);
});
});