From 8a8fdc971ca26e80c1cf8d0d0046943862dc1998 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 11 Apr 2026 12:50:45 +0100 Subject: [PATCH] perf: share web boundary source scans --- .../check-web-fetch-provider-boundaries.mjs | 43 ++-------- .../check-web-search-provider-boundaries.mjs | 56 +++---------- scripts/lib/source-file-scan-cache.mjs | 80 +++++++++++++++++++ 3 files changed, 97 insertions(+), 82 deletions(-) create mode 100644 scripts/lib/source-file-scan-cache.mjs diff --git a/scripts/check-web-fetch-provider-boundaries.mjs b/scripts/check-web-fetch-provider-boundaries.mjs index 0b5bd41532b..d53a25dff1e 100644 --- a/scripts/check-web-fetch-provider-boundaries.mjs +++ b/scripts/check-web-fetch-provider-boundaries.mjs @@ -1,8 +1,8 @@ #!/usr/bin/env node -import { promises as fs } from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; +import { collectSourceFileContents } from "./lib/source-file-scan-cache.mjs"; import { runAsScript } from "./lib/ts-guard-utils.mjs"; const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); @@ -32,45 +32,18 @@ const suspiciousPatterns = [ /id:\s*"firecrawl"/, ]; -async function walkFiles(rootDir) { - const out = []; - let entries = []; - try { - entries = await fs.readdir(rootDir, { withFileTypes: true }); - } catch (error) { - if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") { - return out; - } - throw error; - } - for (const entry of entries) { - const entryPath = path.join(rootDir, entry.name); - if (entry.isDirectory()) { - if (!ignoredDirNames.has(entry.name)) { - out.push(...(await walkFiles(entryPath))); - } - continue; - } - if (entry.isFile() && scanExtensions.has(path.extname(entry.name))) { - out.push(entryPath); - } - } - return out; -} - -function normalizeRepoPath(filePath) { - return path.relative(repoRoot, filePath).split(path.sep).join("/"); -} - export async function collectWebFetchProviderBoundaryViolations() { - const files = await walkFiles(path.join(repoRoot, "src")); const violations = []; - for (const filePath of files) { - const relativeFile = normalizeRepoPath(filePath); + const files = await collectSourceFileContents({ + repoRoot, + scanRoots: ["src"], + scanExtensions, + ignoredDirNames, + }); + for (const { relativeFile, content } of files) { if (allowedFiles.has(relativeFile) || relativeFile.includes(".test.")) { continue; } - const content = await fs.readFile(filePath, "utf8"); const lines = content.split(/\r?\n/); for (const [index, line] of lines.entries()) { if (!line.includes("firecrawl") && !line.includes("Firecrawl")) { diff --git a/scripts/check-web-search-provider-boundaries.mjs b/scripts/check-web-search-provider-boundaries.mjs index 7371f5a44ad..671336e3fcf 100644 --- a/scripts/check-web-search-provider-boundaries.mjs +++ b/scripts/check-web-search-provider-boundaries.mjs @@ -3,11 +3,8 @@ import { promises as fs } from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; -import { - diffInventoryEntries, - normalizeRepoPath, - runBaselineInventoryCheck, -} from "./lib/guard-inventory-utils.mjs"; +import { diffInventoryEntries, runBaselineInventoryCheck } from "./lib/guard-inventory-utils.mjs"; +import { collectSourceFileContents } from "./lib/source-file-scan-cache.mjs"; import { runAsScript } from "./lib/ts-guard-utils.mjs"; const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); @@ -68,38 +65,6 @@ const ignoredFiles = new Set([ let webSearchProviderInventoryPromise; -async function walkFiles(rootDir) { - const out = []; - let entries = []; - try { - entries = await fs.readdir(rootDir, { withFileTypes: true }); - } catch (error) { - if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") { - return out; - } - throw error; - } - entries.sort((left, right) => left.name.localeCompare(right.name)); - for (const entry of entries) { - const entryPath = path.join(rootDir, entry.name); - if (entry.isDirectory()) { - if (ignoredDirNames.has(entry.name)) { - continue; - } - out.push(...(await walkFiles(entryPath))); - continue; - } - if (!entry.isFile()) { - continue; - } - if (!scanExtensions.has(path.extname(entry.name))) { - continue; - } - out.push(entryPath); - } - return out; -} - function compareInventoryEntries(left, right) { return ( left.provider.localeCompare(right.provider) || @@ -192,20 +157,17 @@ export async function collectWebSearchProviderBoundaryInventory() { if (!webSearchProviderInventoryPromise) { webSearchProviderInventoryPromise = (async () => { const inventory = []; - const files = ( - await Promise.all(scanRoots.map(async (root) => await walkFiles(path.join(repoRoot, root)))) - ) - .flat() - .toSorted((left, right) => - normalizeRepoPath(repoRoot, left).localeCompare(normalizeRepoPath(repoRoot, right)), - ); + const files = await collectSourceFileContents({ + repoRoot, + scanRoots, + scanExtensions, + ignoredDirNames, + }); - for (const filePath of files) { - const relativeFile = normalizeRepoPath(repoRoot, filePath); + for (const { relativeFile, content } of files) { if (ignoredFiles.has(relativeFile) || relativeFile.includes(".test.")) { continue; } - const content = await fs.readFile(filePath, "utf8"); const lines = content.split(/\r?\n/); if (relativeFile === "src/plugins/web-search-providers.ts") { diff --git a/scripts/lib/source-file-scan-cache.mjs b/scripts/lib/source-file-scan-cache.mjs new file mode 100644 index 00000000000..b7761b87722 --- /dev/null +++ b/scripts/lib/source-file-scan-cache.mjs @@ -0,0 +1,80 @@ +import { promises as fs } from "node:fs"; +import path from "node:path"; + +const scanCache = new Map(); + +function normalizeRepoPath(repoRoot, filePath) { + return path.relative(repoRoot, filePath).split(path.sep).join("/"); +} + +async function walkFiles(params, rootDir) { + const out = []; + let entries = []; + try { + entries = await fs.readdir(rootDir, { withFileTypes: true }); + } catch (error) { + if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") { + return out; + } + throw error; + } + entries.sort((left, right) => left.name.localeCompare(right.name)); + for (const entry of entries) { + const entryPath = path.join(rootDir, entry.name); + if (entry.isDirectory()) { + if (!params.ignoredDirNames.has(entry.name)) { + out.push(...(await walkFiles(params, entryPath))); + } + continue; + } + if (entry.isFile() && params.scanExtensions.has(path.extname(entry.name))) { + out.push(entryPath); + } + } + return out; +} + +export async function collectSourceFileContents(params) { + const cacheKey = JSON.stringify({ + repoRoot: params.repoRoot, + scanRoots: params.scanRoots, + scanExtensions: [...params.scanExtensions].toSorted((left, right) => left.localeCompare(right)), + ignoredDirNames: [...params.ignoredDirNames].toSorted((left, right) => + left.localeCompare(right), + ), + }); + const cached = scanCache.get(cacheKey); + if (cached) { + return await cached; + } + + const promise = (async () => { + const files = ( + await Promise.all( + params.scanRoots.map(async (root) => walkFiles(params, path.join(params.repoRoot, root))), + ) + ) + .flat() + .toSorted((left, right) => + normalizeRepoPath(params.repoRoot, left).localeCompare( + normalizeRepoPath(params.repoRoot, right), + ), + ); + + return await Promise.all( + files.map(async (filePath) => ({ + filePath, + relativeFile: normalizeRepoPath(params.repoRoot, filePath), + content: await fs.readFile(filePath, "utf8"), + })), + ); + })(); + + scanCache.set(cacheKey, promise); + try { + return await promise; + } catch (error) { + scanCache.delete(cacheKey); + throw error; + } +}