fix: bounded directory scan actionable regression (#74942)

* fix: bounded directory scan actionable regression

* fix: current main remaining regression

* fix(skills): compose workspace scan caps

---------

Co-authored-by: openclaw-clawsweeper[bot] <280122609+openclaw-clawsweeper[bot]@users.noreply.github.com>
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
clawsweeper[bot]
2026-04-30 00:54:02 -07:00
committed by GitHub
parent 77f904d35c
commit 53dff569b8
2 changed files with 130 additions and 51 deletions

View File

@@ -446,6 +446,37 @@ describe("loadWorkspaceSkillEntries", () => {
expect(names).toEqual(expect.arrayContaining(["direct-skill", "grouped-skill"]));
});
it("does not count invalid grouped candidates against the loaded skill cap", async () => {
const workspaceDir = await createTempWorkspaceDir();
for (const nestedName of ["a", "b"]) {
const invalidDir = path.join(workspaceDir, "skills", "00-group", nestedName);
await fs.mkdir(invalidDir, { recursive: true });
await fs.writeFile(
path.join(invalidDir, "SKILL.md"),
`---\nname: ${nestedName}\n---\n\n# Invalid\n`,
"utf-8",
);
}
await writeSkill({
dir: path.join(workspaceDir, "skills", "01-valid"),
name: "valid-skill",
description: "Valid sibling after invalid grouped candidates",
});
const names = loadTestWorkspaceSkillEntries(workspaceDir, {
config: {
skills: {
limits: {
maxCandidatesPerRoot: 10,
maxSkillsLoadedPerSource: 1,
},
},
},
}).map((entry) => entry.skill.name);
expect(names).toEqual(["valid-skill"]);
});
it("does not descend more than two levels (skills/a/b/c/SKILL.md is ignored)", async () => {
const workspaceDir = await createTempWorkspaceDir();
await writeSkill({
@@ -521,11 +552,38 @@ describe("loadWorkspaceSkillEntries", () => {
warn.mock.calls
.map(([line]) => String(line))
.some((line) =>
line.includes(
"Nested skills directory looks suspiciously large, truncating discovery.",
),
line.includes("Nested skills directory has many entries, truncating discovery."),
),
).toBe(true);
});
it("does not spend nested candidate budget on ignored raw entries", async () => {
const workspaceDir = await createTempWorkspaceDir();
const groupDir = path.join(workspaceDir, "skills", "group");
await fs.mkdir(groupDir, { recursive: true });
for (let i = 0; i < 50; i += 1) {
await fs.writeFile(path.join(groupDir, `ignored-${String(i).padStart(2, "0")}.txt`), "");
}
for (const name of ["valid-a", "valid-b", "valid-c"]) {
await writeSkill({
dir: path.join(groupDir, name),
name,
description: `${name} nested under a group`,
});
}
const names = loadTestWorkspaceSkillEntries(workspaceDir, {
config: {
skills: {
limits: {
maxCandidatesPerRoot: 2,
maxSkillsLoadedPerSource: 10,
},
},
},
}).map((entry) => entry.skill.name);
expect(names.filter((name) => name.startsWith("valid-"))).toEqual(["valid-a", "valid-b"]);
});
});
});

View File

@@ -126,6 +126,8 @@ const DEFAULT_MAX_SKILLS_LOADED_PER_SOURCE = 200;
const DEFAULT_MAX_SKILLS_IN_PROMPT = 150;
const DEFAULT_MAX_SKILLS_PROMPT_CHARS = 18_000;
const DEFAULT_MAX_SKILL_FILE_BYTES = 256_000;
const DEFAULT_MIN_RAW_ENTRIES_PER_DIRECTORY_SCAN = 1_000;
const DEFAULT_MAX_RAW_ENTRIES_PER_DIRECTORY_SCAN = 10_000;
type ResolvedSkillsLimits = {
maxCandidatesPerRoot: number;
@@ -171,13 +173,14 @@ function resolveSkillsLimits(config?: OpenClawConfig, agentId?: string): Resolve
function listChildDirectories(
dir: string,
opts?: {
maxEntriesToScan?: number;
maxCandidateDirs?: number;
maxRawEntriesToScan?: number;
},
): ChildDirectoryScan {
const maxEntriesToScan =
opts?.maxEntriesToScan === undefined
? Number.POSITIVE_INFINITY
: Math.max(0, opts.maxEntriesToScan);
const maxRawEntriesToScan =
opts?.maxRawEntriesToScan === undefined
? resolveRawEntryScanLimit(opts?.maxCandidateDirs)
: Math.max(0, opts.maxRawEntriesToScan);
try {
const dirs: string[] = [];
let scannedEntryCount = 0;
@@ -186,7 +189,7 @@ function listChildDirectories(
try {
let entry: Dirent | null;
while ((entry = handle.readSync()) !== null) {
if (scannedEntryCount >= maxEntriesToScan) {
if (scannedEntryCount >= maxRawEntriesToScan) {
truncated = true;
break;
}
@@ -218,6 +221,20 @@ function listChildDirectories(
}
}
function resolveRawEntryScanLimit(maxCandidateDirs: number | undefined): number {
if (maxCandidateDirs === undefined) {
return Number.POSITIVE_INFINITY;
}
const normalized = Math.max(0, maxCandidateDirs);
if (normalized === 0) {
return 0;
}
return Math.min(
DEFAULT_MAX_RAW_ENTRIES_PER_DIRECTORY_SCAN,
Math.max(DEFAULT_MIN_RAW_ENTRIES_PER_DIRECTORY_SCAN, normalized * 10),
);
}
function tryRealpath(filePath: string): string | null {
try {
return fs.realpathSync(filePath);
@@ -340,7 +357,7 @@ function resolveNestedSkillsRoot(
// Heuristic: if `dir/skills/*/SKILL.md` exists for any entry, treat `dir/skills` as the real root.
// Note: don't stop at 25, but keep a cap to avoid pathological scans.
const scanLimit = Math.max(0, opts?.maxEntriesToScan ?? 100);
const nestedDirs = listChildDirectories(nested, { maxEntriesToScan: scanLimit }).dirs;
const nestedDirs = listChildDirectories(nested, { maxCandidateDirs: scanLimit }).dirs;
for (const name of nestedDirs) {
const skillMd = path.join(nested, name, "SKILL.md");
@@ -455,13 +472,13 @@ function loadSkillEntries(
const maxCandidatesPerRoot = Math.max(0, limits.maxCandidatesPerRoot);
const maxSkillsLoadedPerSource = Math.max(0, limits.maxSkillsLoadedPerSource);
const maxCandidates = Math.min(maxCandidatesPerRoot, maxSkillsLoadedPerSource);
const childDirScan = listChildDirectories(baseDir, {
maxEntriesToScan: maxCandidatesPerRoot,
maxCandidateDirs: maxCandidatesPerRoot,
});
const childDirs = childDirScan.dirs;
const suspicious = childDirScan.truncated;
const limitedChildren = childDirs.toSorted().slice(0, maxCandidates);
const limitedChildren =
maxSkillsLoadedPerSource === 0 ? [] : childDirs.toSorted().slice(0, maxCandidatesPerRoot);
if (suspicious) {
skillsLogger.warn("Skills root looks suspiciously large, truncating discovery.", {
@@ -469,25 +486,49 @@ function loadSkillEntries(
baseDir,
childDirCount: childDirs.length,
scannedEntryCount: childDirScan.scannedEntryCount,
maxEntriesToScan: maxCandidatesPerRoot,
maxEntriesToScan: resolveRawEntryScanLimit(maxCandidatesPerRoot),
maxCandidatesPerRoot: limits.maxCandidatesPerRoot,
maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource,
});
} else if (childDirs.length > maxCandidates) {
} else if (childDirs.length > maxCandidatesPerRoot) {
skillsLogger.warn("Skills root has many entries, truncating discovery.", {
dir: params.dir,
baseDir,
childDirCount: childDirs.length,
maxCandidatesPerRoot: limits.maxCandidatesPerRoot,
maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource,
});
}
const loadedSkills: LoadedSkillRecord[] = [];
const loadCandidateSkill = ({ skillDir, name, skillMdRealPath }: CandidateSkillDir) => {
try {
const size = fs.statSync(skillMdRealPath).size;
if (size > limits.maxSkillFileBytes) {
skillsLogger.warn("Skipping skill due to oversized SKILL.md.", {
skill: name,
filePath: path.join(skillDir, "SKILL.md"),
size,
maxSkillFileBytes: limits.maxSkillFileBytes,
});
return;
}
} catch {
return;
}
loadedSkills.push(
...loadContainedSkillRecords({
skillDir,
source: params.source,
maxSkillFileBytes: limits.maxSkillFileBytes,
}),
);
};
// Consider immediate subfolders that look like skills (have SKILL.md) and are under size cap.
// When an immediate subfolder does NOT have a SKILL.md, check one level deeper for grouped
// skill directories (e.g. ~/.openclaw/skills/coze/koze-retrieval/SKILL.md).
const candidateDirs: CandidateSkillDir[] = [];
for (const name of limitedChildren) {
const skillDir = path.join(baseDir, name);
const skillDirRealPath = resolveContainedSkillPath({
@@ -508,13 +549,13 @@ function loadSkillEntries(
candidatePath: skillMd,
});
if (skillMdRealPath) {
candidateDirs.push({ skillDir, name, skillMdRealPath });
loadCandidateSkill({ skillDir, name, skillMdRealPath });
}
} else {
// No SKILL.md here — check one level deeper for grouped skill directories.
// Apply the same per-root cap as the outer scan to avoid scanning huge nested trees.
const nestedChildScan = listChildDirectories(skillDir, {
maxEntriesToScan: maxCandidatesPerRoot,
maxCandidateDirs: maxCandidatesPerRoot,
});
const nestedChildren = nestedChildScan.dirs;
const nestedSuspicious = nestedChildScan.truncated;
@@ -527,13 +568,22 @@ function loadSkillEntries(
nestedDir: skillDir,
nestedChildDirCount: nestedChildren.length,
scannedEntryCount: nestedChildScan.scannedEntryCount,
maxEntriesToScan: maxCandidatesPerRoot,
maxEntriesToScan: resolveRawEntryScanLimit(maxCandidatesPerRoot),
maxCandidatesPerRoot: limits.maxCandidatesPerRoot,
maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource,
},
);
} else if (nestedChildren.length > maxCandidatesPerRoot) {
skillsLogger.warn("Nested skills directory has many entries, truncating discovery.", {
dir: params.dir,
baseDir,
nestedDir: skillDir,
nestedChildDirCount: nestedChildren.length,
maxCandidatesPerRoot: limits.maxCandidatesPerRoot,
maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource,
});
}
const limitedNested = nestedChildren.toSorted();
const limitedNested = nestedChildren.toSorted().slice(0, maxCandidatesPerRoot);
for (const nestedName of limitedNested) {
const nestedDir = path.join(skillDir, nestedName);
const nestedSkillMd = path.join(nestedDir, "SKILL.md");
@@ -551,47 +601,18 @@ function loadSkillEntries(
candidatePath: nestedSkillMd,
});
if (nestedDirRealPath && nestedSkillMdRealPath) {
candidateDirs.push({
loadCandidateSkill({
skillDir: nestedDir,
name: `${name}/${nestedName}`,
skillMdRealPath: nestedSkillMdRealPath,
});
}
}
if (candidateDirs.length >= maxSkillsLoadedPerSource) {
if (loadedSkills.length >= maxSkillsLoadedPerSource) {
break;
}
}
}
if (candidateDirs.length >= maxSkillsLoadedPerSource) {
break;
}
}
for (const { skillDir, name, skillMdRealPath } of candidateDirs) {
try {
const size = fs.statSync(skillMdRealPath).size;
if (size > limits.maxSkillFileBytes) {
skillsLogger.warn("Skipping skill due to oversized SKILL.md.", {
skill: name,
filePath: path.join(skillDir, "SKILL.md"),
size,
maxSkillFileBytes: limits.maxSkillFileBytes,
});
continue;
}
} catch {
continue;
}
loadedSkills.push(
...loadContainedSkillRecords({
skillDir,
source: params.source,
maxSkillFileBytes: limits.maxSkillFileBytes,
}),
);
if (loadedSkills.length >= maxSkillsLoadedPerSource) {
break;
}