perf: share web boundary source scans

This commit is contained in:
Peter Steinberger
2026-04-11 12:50:45 +01:00
parent 893a0f469a
commit 8a8fdc971c
3 changed files with 97 additions and 82 deletions

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env node
import { promises as fs } from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { collectSourceFileContents } from "./lib/source-file-scan-cache.mjs";
import { runAsScript } from "./lib/ts-guard-utils.mjs";
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
@@ -32,45 +32,18 @@ const suspiciousPatterns = [
/id:\s*"firecrawl"/,
];
async function walkFiles(rootDir) {
const out = [];
let entries = [];
try {
entries = await fs.readdir(rootDir, { withFileTypes: true });
} catch (error) {
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
return out;
}
throw error;
}
for (const entry of entries) {
const entryPath = path.join(rootDir, entry.name);
if (entry.isDirectory()) {
if (!ignoredDirNames.has(entry.name)) {
out.push(...(await walkFiles(entryPath)));
}
continue;
}
if (entry.isFile() && scanExtensions.has(path.extname(entry.name))) {
out.push(entryPath);
}
}
return out;
}
function normalizeRepoPath(filePath) {
return path.relative(repoRoot, filePath).split(path.sep).join("/");
}
export async function collectWebFetchProviderBoundaryViolations() {
const files = await walkFiles(path.join(repoRoot, "src"));
const violations = [];
for (const filePath of files) {
const relativeFile = normalizeRepoPath(filePath);
const files = await collectSourceFileContents({
repoRoot,
scanRoots: ["src"],
scanExtensions,
ignoredDirNames,
});
for (const { relativeFile, content } of files) {
if (allowedFiles.has(relativeFile) || relativeFile.includes(".test.")) {
continue;
}
const content = await fs.readFile(filePath, "utf8");
const lines = content.split(/\r?\n/);
for (const [index, line] of lines.entries()) {
if (!line.includes("firecrawl") && !line.includes("Firecrawl")) {

View File

@@ -3,11 +3,8 @@
import { promises as fs } from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
import {
diffInventoryEntries,
normalizeRepoPath,
runBaselineInventoryCheck,
} from "./lib/guard-inventory-utils.mjs";
import { diffInventoryEntries, runBaselineInventoryCheck } from "./lib/guard-inventory-utils.mjs";
import { collectSourceFileContents } from "./lib/source-file-scan-cache.mjs";
import { runAsScript } from "./lib/ts-guard-utils.mjs";
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
@@ -68,38 +65,6 @@ const ignoredFiles = new Set([
let webSearchProviderInventoryPromise;
async function walkFiles(rootDir) {
const out = [];
let entries = [];
try {
entries = await fs.readdir(rootDir, { withFileTypes: true });
} catch (error) {
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
return out;
}
throw error;
}
entries.sort((left, right) => left.name.localeCompare(right.name));
for (const entry of entries) {
const entryPath = path.join(rootDir, entry.name);
if (entry.isDirectory()) {
if (ignoredDirNames.has(entry.name)) {
continue;
}
out.push(...(await walkFiles(entryPath)));
continue;
}
if (!entry.isFile()) {
continue;
}
if (!scanExtensions.has(path.extname(entry.name))) {
continue;
}
out.push(entryPath);
}
return out;
}
function compareInventoryEntries(left, right) {
return (
left.provider.localeCompare(right.provider) ||
@@ -192,20 +157,17 @@ export async function collectWebSearchProviderBoundaryInventory() {
if (!webSearchProviderInventoryPromise) {
webSearchProviderInventoryPromise = (async () => {
const inventory = [];
const files = (
await Promise.all(scanRoots.map(async (root) => await walkFiles(path.join(repoRoot, root))))
)
.flat()
.toSorted((left, right) =>
normalizeRepoPath(repoRoot, left).localeCompare(normalizeRepoPath(repoRoot, right)),
);
const files = await collectSourceFileContents({
repoRoot,
scanRoots,
scanExtensions,
ignoredDirNames,
});
for (const filePath of files) {
const relativeFile = normalizeRepoPath(repoRoot, filePath);
for (const { relativeFile, content } of files) {
if (ignoredFiles.has(relativeFile) || relativeFile.includes(".test.")) {
continue;
}
const content = await fs.readFile(filePath, "utf8");
const lines = content.split(/\r?\n/);
if (relativeFile === "src/plugins/web-search-providers.ts") {

View File

@@ -0,0 +1,80 @@
import { promises as fs } from "node:fs";
import path from "node:path";
const scanCache = new Map();
function normalizeRepoPath(repoRoot, filePath) {
return path.relative(repoRoot, filePath).split(path.sep).join("/");
}
async function walkFiles(params, rootDir) {
const out = [];
let entries = [];
try {
entries = await fs.readdir(rootDir, { withFileTypes: true });
} catch (error) {
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
return out;
}
throw error;
}
entries.sort((left, right) => left.name.localeCompare(right.name));
for (const entry of entries) {
const entryPath = path.join(rootDir, entry.name);
if (entry.isDirectory()) {
if (!params.ignoredDirNames.has(entry.name)) {
out.push(...(await walkFiles(params, entryPath)));
}
continue;
}
if (entry.isFile() && params.scanExtensions.has(path.extname(entry.name))) {
out.push(entryPath);
}
}
return out;
}
export async function collectSourceFileContents(params) {
const cacheKey = JSON.stringify({
repoRoot: params.repoRoot,
scanRoots: params.scanRoots,
scanExtensions: [...params.scanExtensions].toSorted((left, right) => left.localeCompare(right)),
ignoredDirNames: [...params.ignoredDirNames].toSorted((left, right) =>
left.localeCompare(right),
),
});
const cached = scanCache.get(cacheKey);
if (cached) {
return await cached;
}
const promise = (async () => {
const files = (
await Promise.all(
params.scanRoots.map(async (root) => walkFiles(params, path.join(params.repoRoot, root))),
)
)
.flat()
.toSorted((left, right) =>
normalizeRepoPath(params.repoRoot, left).localeCompare(
normalizeRepoPath(params.repoRoot, right),
),
);
return await Promise.all(
files.map(async (filePath) => ({
filePath,
relativeFile: normalizeRepoPath(params.repoRoot, filePath),
content: await fs.readFile(filePath, "utf8"),
})),
);
})();
scanCache.set(cacheKey, promise);
try {
return await promise;
} catch (error) {
scanCache.delete(cacheKey);
throw error;
}
}