diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74057aee869..0792d09cb69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -145,10 +145,10 @@ jobs: path: dist/ retention-days: 1 - # Validate npm pack contents after build. + # Validate npm pack contents after build (only on push to main, not PRs). release-check: needs: [docs-scope, build-artifacts] - if: needs.docs-scope.outputs.docs_only != 'true' + if: github.event_name == 'push' && needs.docs-scope.outputs.docs_only != 'true' runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - name: Checkout diff --git a/scripts/analyze_code_files.py b/scripts/analyze_code_files.py index 984d3f44837..03558cc06ad 100644 --- a/scripts/analyze_code_files.py +++ b/scripts/analyze_code_files.py @@ -21,27 +21,47 @@ from collections import defaultdict # File extensions to consider as code files CODE_EXTENSIONS = { - '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', # TypeScript/JavaScript - '.swift', # macOS/iOS - '.kt', '.java', # Android - '.py', '.sh', # Scripts + ".ts", + ".tsx", + ".js", + ".jsx", + ".mjs", + ".cjs", # TypeScript/JavaScript + ".swift", # macOS/iOS + ".kt", + ".java", # Android + ".py", + ".sh", # Scripts } # Directories to skip SKIP_DIRS = { - 'node_modules', '.git', 'dist', 'build', 'coverage', - '__pycache__', '.turbo', 'out', '.worktrees', 'vendor', - 'Pods', 'DerivedData', '.gradle', '.idea', - 'Swabble', # Separate Swift package - 'skills', # Standalone skill scripts - '.pi', # Pi editor extensions + "node_modules", + ".git", + "dist", + "build", + "coverage", + "__pycache__", + ".turbo", + "out", + ".worktrees", + "vendor", + "Pods", + "DerivedData", + ".gradle", + ".idea", + "Swabble", # Separate Swift package + "skills", # Standalone skill scripts + ".pi", # Pi editor extensions } # Filename patterns to skip in short-file warnings (barrel exports, stubs) SKIP_SHORT_PATTERNS = { - 'index.js', 'index.ts', 'postinstall.js', + "index.js", + "index.ts", + "postinstall.js", } -SKIP_SHORT_SUFFIXES = ('-cli.ts',) +SKIP_SHORT_SUFFIXES = ("-cli.ts",) # Function names to skip in duplicate detection. # Only list names so generic they're expected to appear independently in many modules. @@ -49,20 +69,56 @@ SKIP_SHORT_SUFFIXES = ('-cli.ts',) # stripPrefix, parseConfig are specific enough to flag). SKIP_DUPLICATE_FUNCTIONS = { # Lifecycle / framework plumbing - 'main', 'init', 'setup', 'teardown', 'cleanup', 'dispose', 'destroy', - 'open', 'close', 'connect', 'disconnect', 'execute', 'run', 'start', 'stop', - 'render', 'update', 'refresh', 'reset', 'clear', 'flush', + "main", + "init", + "setup", + "teardown", + "cleanup", + "dispose", + "destroy", + "open", + "close", + "connect", + "disconnect", + "execute", + "run", + "start", + "stop", + "render", + "update", + "refresh", + "reset", + "clear", + "flush", # Too-short / too-generic identifiers - 'text', 'json', 'pad', 'mask', 'digest', 'confirm', 'intro', 'outro', - 'exists', 'send', 'receive', 'listen', 'log', 'warn', 'error', 'info', - 'help', 'version', 'config', 'configure', 'describe', 'test', 'action', + "text", + "json", + "pad", + "mask", + "digest", + "confirm", + "intro", + "outro", + "exists", + "send", + "receive", + "listen", + "log", + "warn", + "error", + "info", + "help", + "version", + "config", + "configure", + "describe", + "test", + "action", } -SKIP_DUPLICATE_FILE_PATTERNS = ('.test.ts', '.test.tsx', '.spec.ts') +SKIP_DUPLICATE_FILE_PATTERNS = (".test.ts", ".test.tsx", ".spec.ts") # Known packages in the monorepo -PACKAGES = { - 'src', 'apps', 'extensions', 'packages', 'scripts', 'ui', 'test', 'docs' -} +PACKAGES = {"src", "apps", "extensions", "packages", "scripts", "ui", "test", "docs"} def get_package(file_path: Path, root_dir: Path) -> str: @@ -72,15 +128,15 @@ def get_package(file_path: Path, root_dir: Path) -> str: parts = relative.parts if len(parts) > 0 and parts[0] in PACKAGES: return parts[0] - return 'root' + return "root" except ValueError: - return 'root' + return "root" def count_lines(file_path: Path) -> int: """Count the number of lines in a file.""" try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: return sum(1 for _ in f) except Exception: return 0 @@ -89,81 +145,100 @@ def count_lines(file_path: Path) -> int: def find_code_files(root_dir: Path) -> List[Tuple[Path, int]]: """Find all code files and their line counts.""" files_with_counts = [] - + for dirpath, dirnames, filenames in os.walk(root_dir): # Remove skip directories from dirnames to prevent walking into them dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS] - + for filename in filenames: file_path = Path(dirpath) / filename if file_path.suffix.lower() in CODE_EXTENSIONS: line_count = count_lines(file_path) files_with_counts.append((file_path, line_count)) - + return files_with_counts # Regex patterns for TypeScript functions (exported and internal) TS_FUNCTION_PATTERNS = [ # export function name(...) or function name(...) - re.compile(r'^(?:export\s+)?(?:async\s+)?function\s+(\w+)', re.MULTILINE), + re.compile(r"^(?:export\s+)?(?:async\s+)?function\s+(\w+)", re.MULTILINE), # export const name = or const name = - re.compile(r'^(?:export\s+)?const\s+(\w+)\s*=\s*(?:\([^)]*\)|\w+)\s*=>', re.MULTILINE), + re.compile( + r"^(?:export\s+)?const\s+(\w+)\s*=\s*(?:\([^)]*\)|\w+)\s*=>", re.MULTILINE + ), ] def extract_functions(file_path: Path) -> Set[str]: """Extract function names from a TypeScript file.""" - if file_path.suffix.lower() not in {'.ts', '.tsx'}: + if file_path.suffix.lower() not in {".ts", ".tsx"}: return set() - + try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: content = f.read() except Exception: return set() - + return extract_functions_from_content(content) -def find_duplicate_functions(files: List[Tuple[Path, int]], root_dir: Path) -> Dict[str, List[Path]]: +def find_duplicate_functions( + files: List[Tuple[Path, int]], root_dir: Path +) -> Dict[str, List[Path]]: """Find function names that appear in multiple files.""" function_locations: Dict[str, List[Path]] = defaultdict(list) - + for file_path, _ in files: # Skip test files for duplicate detection if any(file_path.name.endswith(pat) for pat in SKIP_DUPLICATE_FILE_PATTERNS): continue - + functions = extract_functions(file_path) for func in functions: # Skip known common function names if func in SKIP_DUPLICATE_FUNCTIONS: continue function_locations[func].append(file_path) - - # Filter to only duplicates, ignoring cross-extension duplicates. - # Extensions are independent packages — the same function name in - # extensions/telegram and extensions/discord is expected, not duplication. + + # Filter to only duplicates, ignoring cross-package duplicates. + # Independent packages (extensions/*, apps/*, ui/) are treated like separate codebases — + # the same function name in extensions/telegram and extensions/discord, + # or in apps/ios and apps/macos, is expected, not duplication. result: Dict[str, List[Path]] = {} for name, paths in function_locations.items(): if len(paths) < 2: continue - # If ALL instances are in different extensions, skip - ext_dirs = set() - non_ext = False - for p in paths: + + # Identify which independent package each path belongs to (if any) + # Returns a unique package key or None if it's core code + def get_independent_package(p: Path) -> Optional[str]: try: rel = p.relative_to(root_dir) parts = rel.parts - if len(parts) >= 2 and parts[0] == 'extensions': - ext_dirs.add(parts[1]) - else: - non_ext = True + if len(parts) >= 2: + # extensions/, apps/ are each independent + if parts[0] in ("extensions", "apps"): + return f"{parts[0]}/{parts[1]}" + # ui/ is a single independent package (browser frontend) + if len(parts) >= 1 and parts[0] == "ui": + return "ui" + return None except ValueError: - non_ext = True - # Skip if every instance lives in a different extension (no core overlap) - if not non_ext and len(ext_dirs) == len(paths): + return None + + package_keys = set() + has_core = False + for p in paths: + pkg = get_independent_package(p) + if pkg: + package_keys.add(pkg) + else: + has_core = True + + # Skip if ALL instances are in different independent packages (no core overlap) + if not has_core and len(package_keys) == len(paths): continue result[name] = paths return result @@ -173,10 +248,10 @@ def validate_git_ref(root_dir: Path, ref: str) -> bool: """Validate that a git ref exists. Exits with error if not.""" try: result = subprocess.run( - ['git', 'rev-parse', '--verify', ref], + ["git", "rev-parse", "--verify", ref], capture_output=True, cwd=root_dir, - encoding='utf-8', + encoding="utf-8", ) return result.returncode == 0 except Exception: @@ -188,18 +263,18 @@ def get_file_content_at_ref(file_path: Path, root_dir: Path, ref: str) -> Option try: relative_path = file_path.relative_to(root_dir) # Use forward slashes for git paths - git_path = str(relative_path).replace('\\', '/') + git_path = str(relative_path).replace("\\", "/") result = subprocess.run( - ['git', 'show', f'{ref}:{git_path}'], + ["git", "show", f"{ref}:{git_path}"], capture_output=True, cwd=root_dir, - encoding='utf-8', - errors='ignore', + encoding="utf-8", + errors="ignore", ) if result.returncode != 0: stderr = result.stderr.strip() # "does not exist" or "exists on disk, but not in" = file missing at ref (OK) - if 'does not exist' in stderr or 'exists on disk' in stderr: + if "does not exist" in stderr or "exists on disk" in stderr: return None # Other errors (bad ref, git broken) = genuine failure if stderr: @@ -232,11 +307,11 @@ def get_changed_files(root_dir: Path, compare_ref: str) -> Set[str]: """Get set of files changed between compare_ref and HEAD (relative paths with forward slashes).""" try: result = subprocess.run( - ['git', 'diff', '--name-only', compare_ref, 'HEAD'], + ["git", "diff", "--name-only", compare_ref, "HEAD"], capture_output=True, cwd=root_dir, - encoding='utf-8', - errors='ignore', + encoding="utf-8", + errors="ignore", ) if result.returncode != 0: return set() @@ -270,7 +345,7 @@ def find_duplicate_regressions( relevant_dupes: Dict[str, List[Path]] = {} for func_name, paths in current_dupes.items(): involves_changed = any( - str(p.relative_to(root_dir)).replace('\\', '/') in changed_files + str(p.relative_to(root_dir)).replace("\\", "/") in changed_files for p in paths ) if involves_changed: @@ -287,7 +362,7 @@ def find_duplicate_regressions( base_function_locations: Dict[str, List[Path]] = defaultdict(list) for file_path in files_to_check: - if file_path.suffix.lower() not in {'.ts', '.tsx'}: + if file_path.suffix.lower() not in {".ts", ".tsx"}: continue content = get_file_content_at_ref(file_path, root_dir, compare_ref) if content is None: @@ -298,10 +373,14 @@ def find_duplicate_regressions( continue base_function_locations[func].append(file_path) - base_dupes = {name for name, paths in base_function_locations.items() if len(paths) > 1} + base_dupes = { + name for name, paths in base_function_locations.items() if len(paths) > 1 + } # Return only new duplicates - return {name: paths for name, paths in relevant_dupes.items() if name not in base_dupes} + return { + name: paths for name, paths in relevant_dupes.items() if name not in base_dupes + } def find_threshold_regressions( @@ -318,20 +397,20 @@ def find_threshold_regressions( """ crossed = [] grew = [] - + for file_path, current_lines in files: if current_lines < threshold: continue # Not over threshold now, skip - + base_lines = get_line_count_at_ref(file_path, root_dir, compare_ref) - + if base_lines is None or base_lines < threshold: # New file or crossed the threshold crossed.append((file_path, current_lines, base_lines)) elif current_lines > base_lines: # Already over threshold and grew larger grew.append((file_path, current_lines, base_lines)) - + return crossed, grew @@ -350,13 +429,17 @@ def _write_github_summary( lines.append("> ⚠️ **DO NOT trash the code base!** The goal is maintainability.\n") if crossed: - lines.append(f"### {len(crossed)} file(s) crossed the {threshold}-line threshold\n") + lines.append( + f"### {len(crossed)} file(s) crossed the {threshold}-line threshold\n" + ) lines.append("| File | Before | After | Delta |") lines.append("|------|-------:|------:|------:|") for file_path, current, base in crossed: - rel = str(file_path.relative_to(root_dir)).replace('\\', '/') + rel = str(file_path.relative_to(root_dir)).replace("\\", "/") before = f"{base:,}" if base is not None else "new" - lines.append(f"| `{rel}` | {before} | {current:,} | +{current - (base or 0):,} |") + lines.append( + f"| `{rel}` | {before} | {current:,} | +{current - (base or 0):,} |" + ) lines.append("") if grew: @@ -364,7 +447,7 @@ def _write_github_summary( lines.append("| File | Before | After | Delta |") lines.append("|------|-------:|------:|------:|") for file_path, current, base in grew: - rel = str(file_path.relative_to(root_dir)).replace('\\', '/') + rel = str(file_path.relative_to(root_dir)).replace("\\", "/") lines.append(f"| `{rel}` | {base:,} | {current:,} | +{current - base:,} |") lines.append("") @@ -374,7 +457,9 @@ def _write_github_summary( lines.append("|----------|-------|") for func_name in sorted(new_dupes.keys()): paths = new_dupes[func_name] - file_list = ", ".join(f"`{str(p.relative_to(root_dir)).replace(chr(92), '/')}`" for p in paths) + file_list = ", ".join( + f"`{str(p.relative_to(root_dir)).replace(chr(92), '/')}`" for p in paths + ) lines.append(f"| `{func_name}` | {file_list} |") lines.append("") @@ -383,67 +468,73 @@ def _write_github_summary( lines.append("- Extract helpers, types, or constants into separate files") lines.append("- See `AGENTS.md` for guidelines (~500–700 LOC target)") lines.append(f"- This check compares your PR against `{compare_ref}`") - lines.append(f"- Only code files are checked: {', '.join(f'`{e}`' for e in sorted(CODE_EXTENSIONS))}") + lines.append( + f"- Only code files are checked: {', '.join(f'`{e}`' for e in sorted(CODE_EXTENSIONS))}" + ) lines.append("- Docs, test names, and config files are **not** affected") lines.append("\n") try: - with open(summary_path, 'a', encoding='utf-8') as f: - f.write('\n'.join(lines) + '\n') + with open(summary_path, "a", encoding="utf-8") as f: + f.write("\n".join(lines) + "\n") except Exception as e: print(f"⚠️ Failed to write job summary: {e}", file=sys.stderr) def main(): parser = argparse.ArgumentParser( - description='Analyze code files: list longest/shortest files, find duplicate function names' + description="Analyze code files: list longest/shortest files, find duplicate function names" ) parser.add_argument( - '-t', '--threshold', + "-t", + "--threshold", type=int, default=1000, - help='Warn about files longer than this many lines (default: 1000)' + help="Warn about files longer than this many lines (default: 1000)", ) parser.add_argument( - '--min-threshold', + "--min-threshold", type=int, default=10, - help='Warn about files shorter than this many lines (default: 10)' + help="Warn about files shorter than this many lines (default: 10)", ) parser.add_argument( - '-n', '--top', + "-n", + "--top", type=int, default=20, - help='Show top N longest files (default: 20)' + help="Show top N longest files (default: 20)", ) parser.add_argument( - '-b', '--bottom', + "-b", + "--bottom", type=int, default=10, - help='Show bottom N shortest files (default: 10)' + help="Show bottom N shortest files (default: 10)", ) parser.add_argument( - '-d', '--directory', + "-d", + "--directory", type=str, - default='.', - help='Directory to scan (default: current directory)' + default=".", + help="Directory to scan (default: current directory)", ) parser.add_argument( - '--compare-to', + "--compare-to", type=str, default=None, - help='Git ref to compare against (e.g., origin/main). Only warn about files that grew past threshold.' + help="Git ref to compare against (e.g., origin/main). Only warn about files that grew past threshold.", ) parser.add_argument( - '--strict', - action='store_true', - help='Exit with non-zero status if any violations found (for CI)' + "--strict", + action="store_true", + help="Exit with non-zero status if any violations found (for CI)", ) - + args = parser.parse_args() - + root_dir = Path(args.directory).resolve() - + # CI delta mode: only show regressions if args.compare_to: print(f"\n📂 Scanning: {root_dir}") @@ -451,23 +542,32 @@ def main(): if not validate_git_ref(root_dir, args.compare_to): print(f"❌ Invalid git ref: {args.compare_to}", file=sys.stderr) - print(" Make sure the ref exists (e.g. run 'git fetch origin ')", file=sys.stderr) + print( + " Make sure the ref exists (e.g. run 'git fetch origin ')", + file=sys.stderr, + ) sys.exit(2) - + files = find_code_files(root_dir) violations = False # Check file length regressions - crossed, grew = find_threshold_regressions(files, root_dir, args.compare_to, args.threshold) - + crossed, grew = find_threshold_regressions( + files, root_dir, args.compare_to, args.threshold + ) + if crossed: - print(f"⚠️ {len(crossed)} file(s) crossed {args.threshold} line threshold:\n") + print( + f"⚠️ {len(crossed)} file(s) crossed {args.threshold} line threshold:\n" + ) for file_path, current, base in crossed: relative_path = file_path.relative_to(root_dir) if base is None: print(f" {relative_path}: {current:,} lines (new file)") else: - print(f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})") + print( + f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})" + ) print() violations = True else: @@ -477,7 +577,9 @@ def main(): print(f"⚠️ {len(grew)} already-large file(s) grew larger:\n") for file_path, current, base in grew: relative_path = file_path.relative_to(root_dir) - print(f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})") + print( + f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})" + ) print() violations = True else: @@ -501,26 +603,42 @@ def main(): print() if args.strict and violations: # Emit GitHub Actions file annotations so violations appear inline in the PR diff - in_gha = os.environ.get('GITHUB_ACTIONS') == 'true' + in_gha = os.environ.get("GITHUB_ACTIONS") == "true" if in_gha: for file_path, current, base in crossed: - rel = str(file_path.relative_to(root_dir)).replace('\\', '/') + rel = str(file_path.relative_to(root_dir)).replace("\\", "/") if base is None: - print(f"::error file={rel},title=File over {args.threshold} lines::{rel} is {current:,} lines (new file). Split into smaller modules.") + print( + f"::error file={rel},title=File over {args.threshold} lines::{rel} is {current:,} lines (new file). Split into smaller modules." + ) else: - print(f"::error file={rel},title=File crossed {args.threshold} lines::{rel} grew from {base:,} to {current:,} lines (+{current - base:,}). Split into smaller modules.") + print( + f"::error file={rel},title=File crossed {args.threshold} lines::{rel} grew from {base:,} to {current:,} lines (+{current - base:,}). Split into smaller modules." + ) for file_path, current, base in grew: - rel = str(file_path.relative_to(root_dir)).replace('\\', '/') - print(f"::error file={rel},title=Large file grew larger::{rel} is already {base:,} lines and grew to {current:,} (+{current - base:,}). Consider refactoring.") + rel = str(file_path.relative_to(root_dir)).replace("\\", "/") + print( + f"::error file={rel},title=Large file grew larger::{rel} is already {base:,} lines and grew to {current:,} (+{current - base:,}). Consider refactoring." + ) for func_name in sorted(new_dupes.keys()): for p in new_dupes[func_name]: - rel = str(p.relative_to(root_dir)).replace('\\', '/') - print(f"::error file={rel},title=Duplicate function '{func_name}'::Function '{func_name}' appears in multiple files. Centralize or rename.") + rel = str(p.relative_to(root_dir)).replace("\\", "/") + print( + f"::error file={rel},title=Duplicate function '{func_name}'::Function '{func_name}' appears in multiple files. Centralize or rename." + ) # Write GitHub Actions job summary (visible in the Actions check details) - summary_path = os.environ.get('GITHUB_STEP_SUMMARY') + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") if summary_path: - _write_github_summary(summary_path, crossed, grew, new_dupes, root_dir, args.threshold, args.compare_to) + _write_github_summary( + summary_path, + crossed, + grew, + new_dupes, + root_dir, + args.threshold, + args.compare_to, + ) # Print actionable summary so contributors know what to do print("─" * 60) @@ -528,9 +646,13 @@ def main(): print(" ⚠️ DO NOT just trash the code base!") print(" The goal is maintainability.\n") if crossed: - print(f" {len(crossed)} file(s) grew past the {args.threshold}-line limit.") + print( + f" {len(crossed)} file(s) grew past the {args.threshold}-line limit." + ) if grew: - print(f" {len(grew)} file(s) already over {args.threshold} lines got larger.") + print( + f" {len(grew)} file(s) already over {args.threshold} lines got larger." + ) print() print(" How to fix:") print(" • Split large files into smaller, focused modules") @@ -538,7 +660,9 @@ def main(): print(" • See AGENTS.md for guidelines (~500-700 LOC target)") print() print(f" This check compares your PR against {args.compare_to}.") - print(f" Only code files are checked ({', '.join(sorted(e for e in CODE_EXTENSIONS))}).") + print( + f" Only code files are checked ({', '.join(sorted(e for e in CODE_EXTENSIONS))})." + ) print(" Docs, tests names, and config files are not affected.") print("─" * 60) sys.exit(1) @@ -546,113 +670,122 @@ def main(): print("─" * 60) print("✅ Code size check passed — no files exceed thresholds.") print("─" * 60) - + return - + print(f"\n📂 Scanning: {root_dir}\n") - + # Find and sort files by line count files = find_code_files(root_dir) files_desc = sorted(files, key=lambda x: x[1], reverse=True) files_asc = sorted(files, key=lambda x: x[1]) - + # Show top N longest files - top_files = files_desc[:args.top] - + top_files = files_desc[: args.top] + print(f"📊 Top {min(args.top, len(top_files))} longest code files:\n") print(f"{'Lines':>8} {'File'}") print("-" * 60) - + long_warnings = [] - + for file_path, line_count in top_files: relative_path = file_path.relative_to(root_dir) - + # Check if over threshold if line_count >= args.threshold: marker = " ⚠️" long_warnings.append((relative_path, line_count)) else: marker = "" - + print(f"{line_count:>8} {relative_path}{marker}") - + # Show bottom N shortest files - bottom_files = files_asc[:args.bottom] - + bottom_files = files_asc[: args.bottom] + print(f"\n📉 Bottom {min(args.bottom, len(bottom_files))} shortest code files:\n") print(f"{'Lines':>8} {'File'}") print("-" * 60) - + short_warnings = [] - + for file_path, line_count in bottom_files: relative_path = file_path.relative_to(root_dir) filename = file_path.name - + # Skip known barrel exports and stubs - is_expected_short = ( - filename in SKIP_SHORT_PATTERNS or - any(filename.endswith(suffix) for suffix in SKIP_SHORT_SUFFIXES) + is_expected_short = filename in SKIP_SHORT_PATTERNS or any( + filename.endswith(suffix) for suffix in SKIP_SHORT_SUFFIXES ) - + # Check if under threshold if line_count <= args.min_threshold and not is_expected_short: marker = " ⚠️" short_warnings.append((relative_path, line_count)) else: marker = "" - + print(f"{line_count:>8} {relative_path}{marker}") - + # Summary total_files = len(files) total_lines = sum(count for _, count in files) - + print("-" * 60) print(f"\n📈 Summary:") print(f" Total code files: {total_files:,}") print(f" Total lines: {total_lines:,}") - print(f" Average lines/file: {total_lines // total_files if total_files else 0:,}") - + print( + f" Average lines/file: {total_lines // total_files if total_files else 0:,}" + ) + # Per-package breakdown package_stats: dict[str, dict] = {} for file_path, line_count in files: pkg = get_package(file_path, root_dir) if pkg not in package_stats: - package_stats[pkg] = {'files': 0, 'lines': 0} - package_stats[pkg]['files'] += 1 - package_stats[pkg]['lines'] += line_count - + package_stats[pkg] = {"files": 0, "lines": 0} + package_stats[pkg]["files"] += 1 + package_stats[pkg]["lines"] += line_count + print(f"\n📦 Per-package breakdown:\n") print(f"{'Package':<15} {'Files':>8} {'Lines':>10} {'Avg':>8}") print("-" * 45) - - for pkg in sorted(package_stats.keys(), key=lambda p: package_stats[p]['lines'], reverse=True): + + for pkg in sorted( + package_stats.keys(), key=lambda p: package_stats[p]["lines"], reverse=True + ): stats = package_stats[pkg] - avg = stats['lines'] // stats['files'] if stats['files'] else 0 + avg = stats["lines"] // stats["files"] if stats["files"] else 0 print(f"{pkg:<15} {stats['files']:>8,} {stats['lines']:>10,} {avg:>8,}") - + # Long file warnings if long_warnings: - print(f"\n⚠️ Warning: {len(long_warnings)} file(s) exceed {args.threshold} lines (consider refactoring):") + print( + f"\n⚠️ Warning: {len(long_warnings)} file(s) exceed {args.threshold} lines (consider refactoring):" + ) for path, count in long_warnings: print(f" - {path} ({count:,} lines)") else: print(f"\n✅ No files exceed {args.threshold} lines") - + # Short file warnings if short_warnings: - print(f"\n⚠️ Warning: {len(short_warnings)} file(s) are {args.min_threshold} lines or less (check if needed):") + print( + f"\n⚠️ Warning: {len(short_warnings)} file(s) are {args.min_threshold} lines or less (check if needed):" + ) for path, count in short_warnings: print(f" - {path} ({count} lines)") else: print(f"\n✅ No files are {args.min_threshold} lines or less") - + # Duplicate function names duplicates = find_duplicate_functions(files, root_dir) if duplicates: - print(f"\n⚠️ Warning: {len(duplicates)} function name(s) appear in multiple files (consider renaming):") + print( + f"\n⚠️ Warning: {len(duplicates)} function name(s) appear in multiple files (consider renaming):" + ) for func_name in sorted(duplicates.keys()): paths = duplicates[func_name] print(f" - {func_name}:") @@ -660,13 +793,13 @@ def main(): print(f" {path.relative_to(root_dir)}") else: print(f"\n✅ No duplicate function names") - + print() - + # Exit with error if --strict and there are violations if args.strict and long_warnings: sys.exit(1) -if __name__ == '__main__': +if __name__ == "__main__": main()