diff --git a/src/agents/skills.loadworkspaceskillentries.test.ts b/src/agents/skills.loadworkspaceskillentries.test.ts index be974acc367..3f3c0a49c2b 100644 --- a/src/agents/skills.loadworkspaceskillentries.test.ts +++ b/src/agents/skills.loadworkspaceskillentries.test.ts @@ -446,6 +446,37 @@ describe("loadWorkspaceSkillEntries", () => { expect(names).toEqual(expect.arrayContaining(["direct-skill", "grouped-skill"])); }); + it("does not count invalid grouped candidates against the loaded skill cap", async () => { + const workspaceDir = await createTempWorkspaceDir(); + for (const nestedName of ["a", "b"]) { + const invalidDir = path.join(workspaceDir, "skills", "00-group", nestedName); + await fs.mkdir(invalidDir, { recursive: true }); + await fs.writeFile( + path.join(invalidDir, "SKILL.md"), + `---\nname: ${nestedName}\n---\n\n# Invalid\n`, + "utf-8", + ); + } + await writeSkill({ + dir: path.join(workspaceDir, "skills", "01-valid"), + name: "valid-skill", + description: "Valid sibling after invalid grouped candidates", + }); + + const names = loadTestWorkspaceSkillEntries(workspaceDir, { + config: { + skills: { + limits: { + maxCandidatesPerRoot: 10, + maxSkillsLoadedPerSource: 1, + }, + }, + }, + }).map((entry) => entry.skill.name); + + expect(names).toEqual(["valid-skill"]); + }); + it("does not descend more than two levels (skills/a/b/c/SKILL.md is ignored)", async () => { const workspaceDir = await createTempWorkspaceDir(); await writeSkill({ @@ -521,11 +552,38 @@ describe("loadWorkspaceSkillEntries", () => { warn.mock.calls .map(([line]) => String(line)) .some((line) => - line.includes( - "Nested skills directory looks suspiciously large, truncating discovery.", - ), + line.includes("Nested skills directory has many entries, truncating discovery."), ), ).toBe(true); }); + + it("does not spend nested candidate budget on ignored raw entries", async () => { + const workspaceDir = await createTempWorkspaceDir(); + const groupDir = path.join(workspaceDir, "skills", "group"); + await fs.mkdir(groupDir, { recursive: true }); + for (let i = 0; i < 50; i += 1) { + await fs.writeFile(path.join(groupDir, `ignored-${String(i).padStart(2, "0")}.txt`), ""); + } + for (const name of ["valid-a", "valid-b", "valid-c"]) { + await writeSkill({ + dir: path.join(groupDir, name), + name, + description: `${name} nested under a group`, + }); + } + + const names = loadTestWorkspaceSkillEntries(workspaceDir, { + config: { + skills: { + limits: { + maxCandidatesPerRoot: 2, + maxSkillsLoadedPerSource: 10, + }, + }, + }, + }).map((entry) => entry.skill.name); + + expect(names.filter((name) => name.startsWith("valid-"))).toEqual(["valid-a", "valid-b"]); + }); }); }); diff --git a/src/agents/skills/workspace.ts b/src/agents/skills/workspace.ts index 869199012e5..bced0bb130f 100644 --- a/src/agents/skills/workspace.ts +++ b/src/agents/skills/workspace.ts @@ -126,6 +126,8 @@ const DEFAULT_MAX_SKILLS_LOADED_PER_SOURCE = 200; const DEFAULT_MAX_SKILLS_IN_PROMPT = 150; const DEFAULT_MAX_SKILLS_PROMPT_CHARS = 18_000; const DEFAULT_MAX_SKILL_FILE_BYTES = 256_000; +const DEFAULT_MIN_RAW_ENTRIES_PER_DIRECTORY_SCAN = 1_000; +const DEFAULT_MAX_RAW_ENTRIES_PER_DIRECTORY_SCAN = 10_000; type ResolvedSkillsLimits = { maxCandidatesPerRoot: number; @@ -171,13 +173,14 @@ function resolveSkillsLimits(config?: OpenClawConfig, agentId?: string): Resolve function listChildDirectories( dir: string, opts?: { - maxEntriesToScan?: number; + maxCandidateDirs?: number; + maxRawEntriesToScan?: number; }, ): ChildDirectoryScan { - const maxEntriesToScan = - opts?.maxEntriesToScan === undefined - ? Number.POSITIVE_INFINITY - : Math.max(0, opts.maxEntriesToScan); + const maxRawEntriesToScan = + opts?.maxRawEntriesToScan === undefined + ? resolveRawEntryScanLimit(opts?.maxCandidateDirs) + : Math.max(0, opts.maxRawEntriesToScan); try { const dirs: string[] = []; let scannedEntryCount = 0; @@ -186,7 +189,7 @@ function listChildDirectories( try { let entry: Dirent | null; while ((entry = handle.readSync()) !== null) { - if (scannedEntryCount >= maxEntriesToScan) { + if (scannedEntryCount >= maxRawEntriesToScan) { truncated = true; break; } @@ -218,6 +221,20 @@ function listChildDirectories( } } +function resolveRawEntryScanLimit(maxCandidateDirs: number | undefined): number { + if (maxCandidateDirs === undefined) { + return Number.POSITIVE_INFINITY; + } + const normalized = Math.max(0, maxCandidateDirs); + if (normalized === 0) { + return 0; + } + return Math.min( + DEFAULT_MAX_RAW_ENTRIES_PER_DIRECTORY_SCAN, + Math.max(DEFAULT_MIN_RAW_ENTRIES_PER_DIRECTORY_SCAN, normalized * 10), + ); +} + function tryRealpath(filePath: string): string | null { try { return fs.realpathSync(filePath); @@ -340,7 +357,7 @@ function resolveNestedSkillsRoot( // Heuristic: if `dir/skills/*/SKILL.md` exists for any entry, treat `dir/skills` as the real root. // Note: don't stop at 25, but keep a cap to avoid pathological scans. const scanLimit = Math.max(0, opts?.maxEntriesToScan ?? 100); - const nestedDirs = listChildDirectories(nested, { maxEntriesToScan: scanLimit }).dirs; + const nestedDirs = listChildDirectories(nested, { maxCandidateDirs: scanLimit }).dirs; for (const name of nestedDirs) { const skillMd = path.join(nested, name, "SKILL.md"); @@ -455,13 +472,13 @@ function loadSkillEntries( const maxCandidatesPerRoot = Math.max(0, limits.maxCandidatesPerRoot); const maxSkillsLoadedPerSource = Math.max(0, limits.maxSkillsLoadedPerSource); - const maxCandidates = Math.min(maxCandidatesPerRoot, maxSkillsLoadedPerSource); const childDirScan = listChildDirectories(baseDir, { - maxEntriesToScan: maxCandidatesPerRoot, + maxCandidateDirs: maxCandidatesPerRoot, }); const childDirs = childDirScan.dirs; const suspicious = childDirScan.truncated; - const limitedChildren = childDirs.toSorted().slice(0, maxCandidates); + const limitedChildren = + maxSkillsLoadedPerSource === 0 ? [] : childDirs.toSorted().slice(0, maxCandidatesPerRoot); if (suspicious) { skillsLogger.warn("Skills root looks suspiciously large, truncating discovery.", { @@ -469,25 +486,49 @@ function loadSkillEntries( baseDir, childDirCount: childDirs.length, scannedEntryCount: childDirScan.scannedEntryCount, - maxEntriesToScan: maxCandidatesPerRoot, + maxEntriesToScan: resolveRawEntryScanLimit(maxCandidatesPerRoot), maxCandidatesPerRoot: limits.maxCandidatesPerRoot, maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource, }); - } else if (childDirs.length > maxCandidates) { + } else if (childDirs.length > maxCandidatesPerRoot) { skillsLogger.warn("Skills root has many entries, truncating discovery.", { dir: params.dir, baseDir, childDirCount: childDirs.length, + maxCandidatesPerRoot: limits.maxCandidatesPerRoot, maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource, }); } const loadedSkills: LoadedSkillRecord[] = []; + const loadCandidateSkill = ({ skillDir, name, skillMdRealPath }: CandidateSkillDir) => { + try { + const size = fs.statSync(skillMdRealPath).size; + if (size > limits.maxSkillFileBytes) { + skillsLogger.warn("Skipping skill due to oversized SKILL.md.", { + skill: name, + filePath: path.join(skillDir, "SKILL.md"), + size, + maxSkillFileBytes: limits.maxSkillFileBytes, + }); + return; + } + } catch { + return; + } + + loadedSkills.push( + ...loadContainedSkillRecords({ + skillDir, + source: params.source, + maxSkillFileBytes: limits.maxSkillFileBytes, + }), + ); + }; // Consider immediate subfolders that look like skills (have SKILL.md) and are under size cap. // When an immediate subfolder does NOT have a SKILL.md, check one level deeper for grouped // skill directories (e.g. ~/.openclaw/skills/coze/koze-retrieval/SKILL.md). - const candidateDirs: CandidateSkillDir[] = []; for (const name of limitedChildren) { const skillDir = path.join(baseDir, name); const skillDirRealPath = resolveContainedSkillPath({ @@ -508,13 +549,13 @@ function loadSkillEntries( candidatePath: skillMd, }); if (skillMdRealPath) { - candidateDirs.push({ skillDir, name, skillMdRealPath }); + loadCandidateSkill({ skillDir, name, skillMdRealPath }); } } else { // No SKILL.md here — check one level deeper for grouped skill directories. // Apply the same per-root cap as the outer scan to avoid scanning huge nested trees. const nestedChildScan = listChildDirectories(skillDir, { - maxEntriesToScan: maxCandidatesPerRoot, + maxCandidateDirs: maxCandidatesPerRoot, }); const nestedChildren = nestedChildScan.dirs; const nestedSuspicious = nestedChildScan.truncated; @@ -527,13 +568,22 @@ function loadSkillEntries( nestedDir: skillDir, nestedChildDirCount: nestedChildren.length, scannedEntryCount: nestedChildScan.scannedEntryCount, - maxEntriesToScan: maxCandidatesPerRoot, + maxEntriesToScan: resolveRawEntryScanLimit(maxCandidatesPerRoot), maxCandidatesPerRoot: limits.maxCandidatesPerRoot, maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource, }, ); + } else if (nestedChildren.length > maxCandidatesPerRoot) { + skillsLogger.warn("Nested skills directory has many entries, truncating discovery.", { + dir: params.dir, + baseDir, + nestedDir: skillDir, + nestedChildDirCount: nestedChildren.length, + maxCandidatesPerRoot: limits.maxCandidatesPerRoot, + maxSkillsLoadedPerSource: limits.maxSkillsLoadedPerSource, + }); } - const limitedNested = nestedChildren.toSorted(); + const limitedNested = nestedChildren.toSorted().slice(0, maxCandidatesPerRoot); for (const nestedName of limitedNested) { const nestedDir = path.join(skillDir, nestedName); const nestedSkillMd = path.join(nestedDir, "SKILL.md"); @@ -551,47 +601,18 @@ function loadSkillEntries( candidatePath: nestedSkillMd, }); if (nestedDirRealPath && nestedSkillMdRealPath) { - candidateDirs.push({ + loadCandidateSkill({ skillDir: nestedDir, name: `${name}/${nestedName}`, skillMdRealPath: nestedSkillMdRealPath, }); } } - if (candidateDirs.length >= maxSkillsLoadedPerSource) { + if (loadedSkills.length >= maxSkillsLoadedPerSource) { break; } } } - if (candidateDirs.length >= maxSkillsLoadedPerSource) { - break; - } - } - - for (const { skillDir, name, skillMdRealPath } of candidateDirs) { - try { - const size = fs.statSync(skillMdRealPath).size; - if (size > limits.maxSkillFileBytes) { - skillsLogger.warn("Skipping skill due to oversized SKILL.md.", { - skill: name, - filePath: path.join(skillDir, "SKILL.md"), - size, - maxSkillFileBytes: limits.maxSkillFileBytes, - }); - continue; - } - } catch { - continue; - } - - loadedSkills.push( - ...loadContainedSkillRecords({ - skillDir, - source: params.source, - maxSkillFileBytes: limits.maxSkillFileBytes, - }), - ); - if (loadedSkills.length >= maxSkillsLoadedPerSource) { break; }