#!/usr/bin/env python3 """ Measure rustdoc coverage for pdftract-core. This script scans all .rs files and counts: - Public items (pub fn/struct/enum/trait/type/mod/const) - Items with documentation (/// or /*!) - Items with worked examples (```rust blocks in doc comments) """ import os import re from pathlib import Path from dataclasses import dataclass from typing import Dict, List @dataclass class FileStats: """Statistics for a single source file.""" path: str pub_items: int with_doc: int with_example: int items: List[Dict] def extract_public_items(content: str, filepath: str) -> List[Dict]: """Extract public items from Rust source code. Returns a list of dicts with keys: kind, name, has_doc, has_example, line """ items = [] lines = content.split('\n') # Patterns for public items patterns = [ (r'pub\s+(?:async\s+)?fn\s+(\w+)', 'fn'), (r'pub\s+struct\s+(\w+)', 'struct'), (r'pub\s+enum\s+(\w+)', 'enum'), (r'pub\s+trait\s+(\w+)', 'trait'), (r'pub\s+type\s+(\w+)', 'type'), (r'pub\s+mod\s+(\w+)', 'mod'), (r'pub\s+(?:const|static)\s+(\w+)', 'const'), (r'pub\s+use\s+(?:(\w+)|.*\s+as\s+(\w+))', 'use'), # pub use X as Y (r'impl\s+(\w+)\s*\{', 'impl'), # impl blocks (inherent impls) ] i = 0 while i < len(lines): line = lines[i] stripped = line.strip() # Skip lines that are just comments or empty if stripped.startswith('//') or not stripped: i += 1 continue # Check if this line declares a public item matched = False for pattern, kind in patterns: match = re.search(pattern, line) if match: # Get the name (handle both groups for pub use case) name = match.group(1) or match.group(2) if match.lastindex >= 2 else match.group(1) if name: # Look back for documentation comments has_doc = False has_example = False doc_lines = [] j = i - 1 while j >= 0: prev_line = lines[j].strip() if prev_line.startswith('///') or prev_line.startswith('//!'): has_doc = True doc_lines.insert(0, prev_line[3:]) # Check for example blocks if '```' in prev_line: has_example = True elif prev_line.startswith('/**') or prev_line.startswith('/*!'): has_doc = True # Multi-line comment - scan forward k = j while k < len(lines): curr = lines[k].strip() if '```' in curr: has_example = True if curr.endswith('*/') or curr.endswith('*/)'): break k += 1 break elif prev_line and not prev_line.startswith('//'): # Non-comment, non-empty line - stop looking back break j -= 1 items.append({ 'kind': kind, 'name': name, 'line': i + 1, 'has_doc': has_doc, 'has_example': has_example, 'doc_lines': doc_lines }) matched = True break # Special handling for re-exports that span multiple lines if not matched and 'pub use' in line: # This might be a multi-line pub use - skip for now pass i += 1 return items def scan_directory(src_dir: Path) -> Dict[str, FileStats]: """Scan all .rs files in the source directory.""" stats = {} for rs_file in src_dir.rglob('*.rs'): # Skip tests and benchmarks directories if 'tests' in rs_file.parts or 'benches' in rs_file.parts: continue try: with open(rs_file, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() except Exception as e: print(f"Warning: Could not read {rs_file}: {e}") continue relative_path = rs_file.relative_to(src_dir.parent) items = extract_public_items(content, str(rs_file)) if items: with_doc = sum(1 for it in items if it['has_doc']) with_example = sum(1 for it in items if it['has_example']) stats[str(relative_path)] = FileStats( path=str(relative_path), pub_items=len(items), with_doc=with_doc, with_example=with_example, items=items ) return stats def print_summary(stats: Dict[str, FileStats]): """Print summary statistics.""" total_items = sum(s.pub_items for s in stats.values()) total_with_doc = sum(s.with_doc for s in stats.values()) total_with_example = sum(s.with_example for s in stats.values()) doc_coverage = (total_with_doc / total_items * 100) if total_items > 0 else 0 example_coverage = (total_with_example / total_items * 100) if total_items > 0 else 0 print("=" * 70) print("RUSTDOC COVERAGE SUMMARY") print("=" * 70) print(f"\nTotal public items: {total_items}") print(f"With documentation: {total_with_doc} ({doc_coverage:.1f}%)") print(f"With examples: {total_with_example} ({example_coverage:.1f}%)") print() # Files with low example coverage print("Files with lowest example coverage (top 10):") print("-" * 70) sorted_files = sorted( stats.items(), key=lambda x: (x[1].pub_items - x[1].with_example) if x[1].pub_items > 0 else 0, reverse=True ) for i, (path, stat) in enumerate(sorted_files[:10]): if stat.pub_items > 0: cov = (stat.with_example / stat.pub_items * 100) if stat.pub_items > 0 else 0 print(f"{i+1:2d}. {path:50s} {stat.with_example:3d}/{stat.pub_items:3d} ({cov:5.1f}%)") print() # Files lacking documentation entirely no_doc_files = [(p, s) for p, s in stats.items() if s.with_doc == 0 and s.pub_items > 0] if no_doc_files: print("Files with NO documentation:") print("-" * 70) for path, stat in no_doc_files[:10]: print(f" {path}: {stat.pub_items} undocumented items") print() # Specific items without documentation undocumented = [] for path, stat in stats.items(): for item in stat.items: if not item['has_doc']: undocumented.append((path, item)) if undocumented: print(f"Undocumented items (showing first 20 of {len(undocumented)}):") print("-" * 70) for i, (path, item) in enumerate(undocumented[:20]): print(f"{i+1:2d}. {path:45s} {item['kind']:8s} {item['name']}") print() # Items without examples no_example = [] for path, stat in stats.items(): for item in stat.items: if not item['has_example'] and item['kind'] in ('fn', 'struct', 'enum', 'trait'): no_example.append((path, item)) if no_example: print(f"Items without examples (showing first 30 of {len(no_example)}):") print("-" * 70) for i, (path, item) in enumerate(no_example[:30]): print(f"{i+1:2d}. {path:45s} {item['kind']:8s} {item['name']}") print() def main(): src_dir = Path(__file__).parent / 'src' if not src_dir.exists(): print(f"Error: Source directory not found: {src_dir}") return 1 print(f"Scanning {src_dir}...") stats = scan_directory(src_dir) print_summary(stats) # Return non-zero if example coverage < 80% total_items = sum(s.pub_items for s in stats.values()) total_with_example = sum(s.with_example for s in stats.values()) coverage = (total_with_example / total_items * 100) if total_items > 0 else 0 print("=" * 70) if coverage >= 80: print(f"✓ PASS: Example coverage {coverage:.1f}% >= 80%") return 0 else: print(f"✗ FAIL: Example coverage {coverage:.1f}% < 80%") return 1 if __name__ == '__main__': exit(main())