mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 01:31:08 +00:00
perf: share web boundary source scans
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { promises as fs } from "node:fs";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { collectSourceFileContents } from "./lib/source-file-scan-cache.mjs";
|
||||
import { runAsScript } from "./lib/ts-guard-utils.mjs";
|
||||
|
||||
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
|
||||
@@ -32,45 +32,18 @@ const suspiciousPatterns = [
|
||||
/id:\s*"firecrawl"/,
|
||||
];
|
||||
|
||||
async function walkFiles(rootDir) {
|
||||
const out = [];
|
||||
let entries = [];
|
||||
try {
|
||||
entries = await fs.readdir(rootDir, { withFileTypes: true });
|
||||
} catch (error) {
|
||||
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
|
||||
return out;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
const entryPath = path.join(rootDir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (!ignoredDirNames.has(entry.name)) {
|
||||
out.push(...(await walkFiles(entryPath)));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (entry.isFile() && scanExtensions.has(path.extname(entry.name))) {
|
||||
out.push(entryPath);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function normalizeRepoPath(filePath) {
|
||||
return path.relative(repoRoot, filePath).split(path.sep).join("/");
|
||||
}
|
||||
|
||||
export async function collectWebFetchProviderBoundaryViolations() {
|
||||
const files = await walkFiles(path.join(repoRoot, "src"));
|
||||
const violations = [];
|
||||
for (const filePath of files) {
|
||||
const relativeFile = normalizeRepoPath(filePath);
|
||||
const files = await collectSourceFileContents({
|
||||
repoRoot,
|
||||
scanRoots: ["src"],
|
||||
scanExtensions,
|
||||
ignoredDirNames,
|
||||
});
|
||||
for (const { relativeFile, content } of files) {
|
||||
if (allowedFiles.has(relativeFile) || relativeFile.includes(".test.")) {
|
||||
continue;
|
||||
}
|
||||
const content = await fs.readFile(filePath, "utf8");
|
||||
const lines = content.split(/\r?\n/);
|
||||
for (const [index, line] of lines.entries()) {
|
||||
if (!line.includes("firecrawl") && !line.includes("Firecrawl")) {
|
||||
|
||||
@@ -3,11 +3,8 @@
|
||||
import { promises as fs } from "node:fs";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import {
|
||||
diffInventoryEntries,
|
||||
normalizeRepoPath,
|
||||
runBaselineInventoryCheck,
|
||||
} from "./lib/guard-inventory-utils.mjs";
|
||||
import { diffInventoryEntries, runBaselineInventoryCheck } from "./lib/guard-inventory-utils.mjs";
|
||||
import { collectSourceFileContents } from "./lib/source-file-scan-cache.mjs";
|
||||
import { runAsScript } from "./lib/ts-guard-utils.mjs";
|
||||
|
||||
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
|
||||
@@ -68,38 +65,6 @@ const ignoredFiles = new Set([
|
||||
|
||||
let webSearchProviderInventoryPromise;
|
||||
|
||||
async function walkFiles(rootDir) {
|
||||
const out = [];
|
||||
let entries = [];
|
||||
try {
|
||||
entries = await fs.readdir(rootDir, { withFileTypes: true });
|
||||
} catch (error) {
|
||||
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
|
||||
return out;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
entries.sort((left, right) => left.name.localeCompare(right.name));
|
||||
for (const entry of entries) {
|
||||
const entryPath = path.join(rootDir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (ignoredDirNames.has(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
out.push(...(await walkFiles(entryPath)));
|
||||
continue;
|
||||
}
|
||||
if (!entry.isFile()) {
|
||||
continue;
|
||||
}
|
||||
if (!scanExtensions.has(path.extname(entry.name))) {
|
||||
continue;
|
||||
}
|
||||
out.push(entryPath);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function compareInventoryEntries(left, right) {
|
||||
return (
|
||||
left.provider.localeCompare(right.provider) ||
|
||||
@@ -192,20 +157,17 @@ export async function collectWebSearchProviderBoundaryInventory() {
|
||||
if (!webSearchProviderInventoryPromise) {
|
||||
webSearchProviderInventoryPromise = (async () => {
|
||||
const inventory = [];
|
||||
const files = (
|
||||
await Promise.all(scanRoots.map(async (root) => await walkFiles(path.join(repoRoot, root))))
|
||||
)
|
||||
.flat()
|
||||
.toSorted((left, right) =>
|
||||
normalizeRepoPath(repoRoot, left).localeCompare(normalizeRepoPath(repoRoot, right)),
|
||||
);
|
||||
const files = await collectSourceFileContents({
|
||||
repoRoot,
|
||||
scanRoots,
|
||||
scanExtensions,
|
||||
ignoredDirNames,
|
||||
});
|
||||
|
||||
for (const filePath of files) {
|
||||
const relativeFile = normalizeRepoPath(repoRoot, filePath);
|
||||
for (const { relativeFile, content } of files) {
|
||||
if (ignoredFiles.has(relativeFile) || relativeFile.includes(".test.")) {
|
||||
continue;
|
||||
}
|
||||
const content = await fs.readFile(filePath, "utf8");
|
||||
const lines = content.split(/\r?\n/);
|
||||
|
||||
if (relativeFile === "src/plugins/web-search-providers.ts") {
|
||||
|
||||
80
scripts/lib/source-file-scan-cache.mjs
Normal file
80
scripts/lib/source-file-scan-cache.mjs
Normal file
@@ -0,0 +1,80 @@
|
||||
import { promises as fs } from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
const scanCache = new Map();
|
||||
|
||||
function normalizeRepoPath(repoRoot, filePath) {
|
||||
return path.relative(repoRoot, filePath).split(path.sep).join("/");
|
||||
}
|
||||
|
||||
async function walkFiles(params, rootDir) {
|
||||
const out = [];
|
||||
let entries = [];
|
||||
try {
|
||||
entries = await fs.readdir(rootDir, { withFileTypes: true });
|
||||
} catch (error) {
|
||||
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
|
||||
return out;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
entries.sort((left, right) => left.name.localeCompare(right.name));
|
||||
for (const entry of entries) {
|
||||
const entryPath = path.join(rootDir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (!params.ignoredDirNames.has(entry.name)) {
|
||||
out.push(...(await walkFiles(params, entryPath)));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (entry.isFile() && params.scanExtensions.has(path.extname(entry.name))) {
|
||||
out.push(entryPath);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export async function collectSourceFileContents(params) {
|
||||
const cacheKey = JSON.stringify({
|
||||
repoRoot: params.repoRoot,
|
||||
scanRoots: params.scanRoots,
|
||||
scanExtensions: [...params.scanExtensions].toSorted((left, right) => left.localeCompare(right)),
|
||||
ignoredDirNames: [...params.ignoredDirNames].toSorted((left, right) =>
|
||||
left.localeCompare(right),
|
||||
),
|
||||
});
|
||||
const cached = scanCache.get(cacheKey);
|
||||
if (cached) {
|
||||
return await cached;
|
||||
}
|
||||
|
||||
const promise = (async () => {
|
||||
const files = (
|
||||
await Promise.all(
|
||||
params.scanRoots.map(async (root) => walkFiles(params, path.join(params.repoRoot, root))),
|
||||
)
|
||||
)
|
||||
.flat()
|
||||
.toSorted((left, right) =>
|
||||
normalizeRepoPath(params.repoRoot, left).localeCompare(
|
||||
normalizeRepoPath(params.repoRoot, right),
|
||||
),
|
||||
);
|
||||
|
||||
return await Promise.all(
|
||||
files.map(async (filePath) => ({
|
||||
filePath,
|
||||
relativeFile: normalizeRepoPath(params.repoRoot, filePath),
|
||||
content: await fs.readFile(filePath, "utf8"),
|
||||
})),
|
||||
);
|
||||
})();
|
||||
|
||||
scanCache.set(cacheKey, promise);
|
||||
try {
|
||||
return await promise;
|
||||
} catch (error) {
|
||||
scanCache.delete(cacheKey);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user