mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
CI: add code-size check for files crossing LOC threshold (#12810)
* CI: add code-size check for files crossing LOC threshold * feat(ci): add duplicate function detection to CI code-size check The --compare-to mode now also detects new duplicate function names introduced by a PR. Uses git diff to scope checks to changed files only, keeping CI fast. * fix(ci): address review feedback for code-size check - Validate git ref upfront; exit 2 if ref doesn't exist - Distinguish 'file missing at ref' from genuine git errors - Explicitly fetch base branch ref in CI workflow - Raise threshold from 700 to 1000 lines * fix(ci): exclude Swabble, skills, .pi from code analysis * update gitignore for pycache * ci: make code-size check informational (no failure on violations)
This commit is contained in:
25
.github/workflows/ci.yml
vendored
25
.github/workflows/ci.yml
vendored
@@ -386,6 +386,31 @@ jobs:
|
|||||||
- name: Run ${{ matrix.task }}
|
- name: Run ${{ matrix.task }}
|
||||||
run: ${{ matrix.command }}
|
run: ${{ matrix.command }}
|
||||||
|
|
||||||
|
# Check for files that grew past LOC threshold in this PR (delta-only).
|
||||||
|
code-size:
|
||||||
|
if: github.event_name == 'pull_request'
|
||||||
|
runs-on: blacksmith-4vcpu-ubuntu-2404
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
submodules: false
|
||||||
|
|
||||||
|
- name: Setup Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
|
||||||
|
- name: Fetch base branch
|
||||||
|
run: git fetch origin ${{ github.base_ref }}:refs/remotes/origin/${{ github.base_ref }}
|
||||||
|
|
||||||
|
- name: Check code file sizes
|
||||||
|
run: |
|
||||||
|
python scripts/analyze_code_files.py \
|
||||||
|
--compare-to origin/${{ github.base_ref }} \
|
||||||
|
--threshold 1000
|
||||||
|
|
||||||
secrets:
|
secrets:
|
||||||
runs-on: blacksmith-4vcpu-ubuntu-2404
|
runs-on: blacksmith-4vcpu-ubuntu-2404
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -7,6 +7,8 @@ pnpm-lock.yaml
|
|||||||
bun.lock
|
bun.lock
|
||||||
bun.lockb
|
bun.lockb
|
||||||
coverage
|
coverage
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
.tsbuildinfo
|
.tsbuildinfo
|
||||||
.pnpm-store
|
.pnpm-store
|
||||||
.worktrees/
|
.worktrees/
|
||||||
|
|||||||
@@ -2,13 +2,18 @@
|
|||||||
"""
|
"""
|
||||||
Lists the longest and shortest code files in the project, and counts duplicated function names across files. Useful for identifying potential refactoring targets and enforcing code size guidelines.
|
Lists the longest and shortest code files in the project, and counts duplicated function names across files. Useful for identifying potential refactoring targets and enforcing code size guidelines.
|
||||||
Threshold can be set to warn about files longer or shorter than a certain number of lines.
|
Threshold can be set to warn about files longer or shorter than a certain number of lines.
|
||||||
|
|
||||||
|
CI mode (--compare-to): Only warns about files that grew past threshold compared to a base ref.
|
||||||
|
Use --strict to exit non-zero on violations for CI gating.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
import argparse
|
import argparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Tuple, Dict, Set
|
from typing import List, Tuple, Dict, Set, Optional
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
# File extensions to consider as code files
|
# File extensions to consider as code files
|
||||||
@@ -23,7 +28,10 @@ CODE_EXTENSIONS = {
|
|||||||
SKIP_DIRS = {
|
SKIP_DIRS = {
|
||||||
'node_modules', '.git', 'dist', 'build', 'coverage',
|
'node_modules', '.git', 'dist', 'build', 'coverage',
|
||||||
'__pycache__', '.turbo', 'out', '.worktrees', 'vendor',
|
'__pycache__', '.turbo', 'out', '.worktrees', 'vendor',
|
||||||
'Pods', 'DerivedData', '.gradle', '.idea'
|
'Pods', 'DerivedData', '.gradle', '.idea',
|
||||||
|
'Swabble', # Separate Swift package
|
||||||
|
'skills', # Standalone skill scripts
|
||||||
|
'.pi', # Pi editor extensions
|
||||||
}
|
}
|
||||||
|
|
||||||
# Filename patterns to skip in short-file warnings (barrel exports, stubs)
|
# Filename patterns to skip in short-file warnings (barrel exports, stubs)
|
||||||
@@ -125,12 +133,7 @@ def extract_functions(file_path: Path) -> Set[str]:
|
|||||||
except Exception:
|
except Exception:
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
functions = set()
|
return extract_functions_from_content(content)
|
||||||
for pattern in TS_FUNCTION_PATTERNS:
|
|
||||||
for match in pattern.finditer(content):
|
|
||||||
functions.add(match.group(1))
|
|
||||||
|
|
||||||
return functions
|
|
||||||
|
|
||||||
|
|
||||||
def find_duplicate_functions(files: List[Tuple[Path, int]], root_dir: Path) -> Dict[str, List[Path]]:
|
def find_duplicate_functions(files: List[Tuple[Path, int]], root_dir: Path) -> Dict[str, List[Path]]:
|
||||||
@@ -155,6 +158,170 @@ def find_duplicate_functions(files: List[Tuple[Path, int]], root_dir: Path) -> D
|
|||||||
return {name: paths for name, paths in function_locations.items() if len(paths) > 1}
|
return {name: paths for name, paths in function_locations.items() if len(paths) > 1}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_git_ref(root_dir: Path, ref: str) -> bool:
|
||||||
|
"""Validate that a git ref exists. Exits with error if not."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['git', 'rev-parse', '--verify', ref],
|
||||||
|
capture_output=True,
|
||||||
|
cwd=root_dir,
|
||||||
|
encoding='utf-8',
|
||||||
|
)
|
||||||
|
return result.returncode == 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_content_at_ref(file_path: Path, root_dir: Path, ref: str) -> Optional[str]:
|
||||||
|
"""Get content of a file at a specific git ref. Returns None if file doesn't exist at ref."""
|
||||||
|
try:
|
||||||
|
relative_path = file_path.relative_to(root_dir)
|
||||||
|
# Use forward slashes for git paths
|
||||||
|
git_path = str(relative_path).replace('\\', '/')
|
||||||
|
result = subprocess.run(
|
||||||
|
['git', 'show', f'{ref}:{git_path}'],
|
||||||
|
capture_output=True,
|
||||||
|
cwd=root_dir,
|
||||||
|
encoding='utf-8',
|
||||||
|
errors='ignore',
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
stderr = result.stderr.strip()
|
||||||
|
# "does not exist" or "exists on disk, but not in" = file missing at ref (OK)
|
||||||
|
if 'does not exist' in stderr or 'exists on disk' in stderr:
|
||||||
|
return None
|
||||||
|
# Other errors (bad ref, git broken) = genuine failure
|
||||||
|
if stderr:
|
||||||
|
print(f"⚠️ git show error for {git_path}: {stderr}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
return result.stdout
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ failed to read {file_path} at {ref}: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_line_count_at_ref(file_path: Path, root_dir: Path, ref: str) -> Optional[int]:
|
||||||
|
"""Get line count of a file at a specific git ref. Returns None if file doesn't exist at ref."""
|
||||||
|
content = get_file_content_at_ref(file_path, root_dir, ref)
|
||||||
|
if content is None:
|
||||||
|
return None
|
||||||
|
return len(content.splitlines())
|
||||||
|
|
||||||
|
|
||||||
|
def extract_functions_from_content(content: str) -> Set[str]:
|
||||||
|
"""Extract function names from TypeScript content string."""
|
||||||
|
functions = set()
|
||||||
|
for pattern in TS_FUNCTION_PATTERNS:
|
||||||
|
for match in pattern.finditer(content):
|
||||||
|
functions.add(match.group(1))
|
||||||
|
return functions
|
||||||
|
|
||||||
|
|
||||||
|
def get_changed_files(root_dir: Path, compare_ref: str) -> Set[str]:
|
||||||
|
"""Get set of files changed between compare_ref and HEAD (relative paths with forward slashes)."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['git', 'diff', '--name-only', compare_ref, 'HEAD'],
|
||||||
|
capture_output=True,
|
||||||
|
cwd=root_dir,
|
||||||
|
encoding='utf-8',
|
||||||
|
errors='ignore',
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
return set()
|
||||||
|
return {line.strip() for line in result.stdout.splitlines() if line.strip()}
|
||||||
|
except Exception:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
|
||||||
|
def find_duplicate_regressions(
|
||||||
|
files: List[Tuple[Path, int]],
|
||||||
|
root_dir: Path,
|
||||||
|
compare_ref: str,
|
||||||
|
) -> Dict[str, List[Path]]:
|
||||||
|
"""
|
||||||
|
Find new duplicate function names that didn't exist at the base ref.
|
||||||
|
Only checks functions in files that changed to keep CI fast.
|
||||||
|
Returns dict of function_name -> list of current file paths, only for
|
||||||
|
duplicates that are new (weren't duplicated at compare_ref).
|
||||||
|
"""
|
||||||
|
# Build current duplicate map
|
||||||
|
current_dupes = find_duplicate_functions(files, root_dir)
|
||||||
|
if not current_dupes:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Get changed files to scope the comparison
|
||||||
|
changed_files = get_changed_files(root_dir, compare_ref)
|
||||||
|
if not changed_files:
|
||||||
|
return {} # Nothing changed, no new duplicates possible
|
||||||
|
|
||||||
|
# Only check duplicate functions that involve at least one changed file
|
||||||
|
relevant_dupes: Dict[str, List[Path]] = {}
|
||||||
|
for func_name, paths in current_dupes.items():
|
||||||
|
involves_changed = any(
|
||||||
|
str(p.relative_to(root_dir)).replace('\\', '/') in changed_files
|
||||||
|
for p in paths
|
||||||
|
)
|
||||||
|
if involves_changed:
|
||||||
|
relevant_dupes[func_name] = paths
|
||||||
|
|
||||||
|
if not relevant_dupes:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# For relevant duplicates, check if they were already duplicated at base ref
|
||||||
|
# Only need to read base versions of files involved in these duplicates
|
||||||
|
files_to_check: Set[Path] = set()
|
||||||
|
for paths in relevant_dupes.values():
|
||||||
|
files_to_check.update(paths)
|
||||||
|
|
||||||
|
base_function_locations: Dict[str, List[Path]] = defaultdict(list)
|
||||||
|
for file_path in files_to_check:
|
||||||
|
if file_path.suffix.lower() not in {'.ts', '.tsx'}:
|
||||||
|
continue
|
||||||
|
content = get_file_content_at_ref(file_path, root_dir, compare_ref)
|
||||||
|
if content is None:
|
||||||
|
continue
|
||||||
|
functions = extract_functions_from_content(content)
|
||||||
|
for func in functions:
|
||||||
|
if func in SKIP_DUPLICATE_FUNCTIONS:
|
||||||
|
continue
|
||||||
|
if any(func.startswith(prefix) for prefix in SKIP_DUPLICATE_PREFIXES):
|
||||||
|
continue
|
||||||
|
base_function_locations[func].append(file_path)
|
||||||
|
|
||||||
|
base_dupes = {name for name, paths in base_function_locations.items() if len(paths) > 1}
|
||||||
|
|
||||||
|
# Return only new duplicates
|
||||||
|
return {name: paths for name, paths in relevant_dupes.items() if name not in base_dupes}
|
||||||
|
|
||||||
|
|
||||||
|
def find_threshold_regressions(
|
||||||
|
files: List[Tuple[Path, int]],
|
||||||
|
root_dir: Path,
|
||||||
|
compare_ref: str,
|
||||||
|
threshold: int,
|
||||||
|
) -> List[Tuple[Path, int, Optional[int]]]:
|
||||||
|
"""
|
||||||
|
Find files that crossed the threshold compared to a base ref.
|
||||||
|
Returns list of (path, current_lines, base_lines) for files that:
|
||||||
|
- Were under threshold (or didn't exist) at compare_ref
|
||||||
|
- Are now at or over threshold
|
||||||
|
"""
|
||||||
|
regressions = []
|
||||||
|
|
||||||
|
for file_path, current_lines in files:
|
||||||
|
if current_lines < threshold:
|
||||||
|
continue # Not over threshold now, skip
|
||||||
|
|
||||||
|
base_lines = get_line_count_at_ref(file_path, root_dir, compare_ref)
|
||||||
|
|
||||||
|
# Regression if: file is new OR was under threshold before
|
||||||
|
if base_lines is None or base_lines < threshold:
|
||||||
|
regressions.append((file_path, current_lines, base_lines))
|
||||||
|
|
||||||
|
return regressions
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Analyze code files: list longest/shortest files, find duplicate function names'
|
description='Analyze code files: list longest/shortest files, find duplicate function names'
|
||||||
@@ -189,10 +356,72 @@ def main():
|
|||||||
default='.',
|
default='.',
|
||||||
help='Directory to scan (default: current directory)'
|
help='Directory to scan (default: current directory)'
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--compare-to',
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help='Git ref to compare against (e.g., origin/main). Only warn about files that grew past threshold.'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--strict',
|
||||||
|
action='store_true',
|
||||||
|
help='Exit with non-zero status if any violations found (for CI)'
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
root_dir = Path(args.directory).resolve()
|
root_dir = Path(args.directory).resolve()
|
||||||
|
|
||||||
|
# CI delta mode: only show regressions
|
||||||
|
if args.compare_to:
|
||||||
|
print(f"\n📂 Scanning: {root_dir}")
|
||||||
|
print(f"🔍 Comparing to: {args.compare_to}\n")
|
||||||
|
|
||||||
|
if not validate_git_ref(root_dir, args.compare_to):
|
||||||
|
print(f"❌ Invalid git ref: {args.compare_to}", file=sys.stderr)
|
||||||
|
print(" Make sure the ref exists (e.g. run 'git fetch origin <branch>')", file=sys.stderr)
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
files = find_code_files(root_dir)
|
||||||
|
violations = False
|
||||||
|
|
||||||
|
# Check file length regressions
|
||||||
|
regressions = find_threshold_regressions(files, root_dir, args.compare_to, args.threshold)
|
||||||
|
|
||||||
|
if regressions:
|
||||||
|
print(f"⚠️ {len(regressions)} file(s) crossed {args.threshold} line threshold:\n")
|
||||||
|
for file_path, current, base in regressions:
|
||||||
|
relative_path = file_path.relative_to(root_dir)
|
||||||
|
if base is None:
|
||||||
|
print(f" {relative_path}: {current:,} lines (new file)")
|
||||||
|
else:
|
||||||
|
print(f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})")
|
||||||
|
print()
|
||||||
|
violations = True
|
||||||
|
else:
|
||||||
|
print(f"✅ No files crossed {args.threshold} line threshold")
|
||||||
|
|
||||||
|
# Check new duplicate function names
|
||||||
|
new_dupes = find_duplicate_regressions(files, root_dir, args.compare_to)
|
||||||
|
|
||||||
|
if new_dupes:
|
||||||
|
print(f"⚠️ {len(new_dupes)} new duplicate function name(s):\n")
|
||||||
|
for func_name in sorted(new_dupes.keys()):
|
||||||
|
paths = new_dupes[func_name]
|
||||||
|
print(f" {func_name}:")
|
||||||
|
for path in paths:
|
||||||
|
print(f" {path.relative_to(root_dir)}")
|
||||||
|
print()
|
||||||
|
violations = True
|
||||||
|
else:
|
||||||
|
print(f"✅ No new duplicate function names")
|
||||||
|
|
||||||
|
print()
|
||||||
|
if args.strict and violations:
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
print(f"\n📂 Scanning: {root_dir}\n")
|
print(f"\n📂 Scanning: {root_dir}\n")
|
||||||
|
|
||||||
# Find and sort files by line count
|
# Find and sort files by line count
|
||||||
@@ -306,6 +535,10 @@ def main():
|
|||||||
print(f"\n✅ No duplicate function names")
|
print(f"\n✅ No duplicate function names")
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
# Exit with error if --strict and there are violations
|
||||||
|
if args.strict and long_warnings:
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
Reference in New Issue
Block a user