#!/usr/bin/env python3 """Analyze rustdoc coverage for pdftract-core. This script counts: - Total public items (fn, struct, enum, trait, type, const, mod) - Items with rustdoc examples (```rust blocks) - Coverage percentage """ import re import subprocess from pathlib import Path from collections import defaultdict from dataclasses import dataclass @dataclass class DocStats: """Statistics for documentation coverage.""" total_items: int = 0 items_with_docs: int = 0 items_with_examples: int = 0 items_by_type: dict = None def __post_init__(self): if self.items_by_type is None: self.items_by_type = defaultdict(lambda: dict(total=0, with_docs=0, with_examples=0)) def coverage_pct(self): """Return percentage of items with documentation.""" if self.total_items == 0: return 0.0 return (self.items_with_docs / self.total_items) * 100 def example_pct(self): """Return percentage of items with examples.""" if self.total_items == 0: return 0.0 return (self.items_with_examples / self.total_items) * 100 def extract_rustdoc_items(content: str, file_path: str) -> list: """Extract public items and their associated documentation from Rust source. Returns list of (item_type, name, has_doc, has_example, doc_content) tuples. """ items = [] lines = content.split('\n') i = 0 # Patterns for public items patterns = { 'fn': re.compile(r'pub\s+(?:async\s+)?fn\s+(\w+)'), 'struct': re.compile(r'pub\s+struct\s+(\w+)'), 'enum': re.compile(r'pub\s+enum\s+(\w+)'), 'trait': re.compile(r'pub\s+trait\s+(\w+)'), 'type': re.compile(r'pub\s+type\s+(\w+)'), 'const': re.compile(r'pub\s+(?:const\s+|async\s+)?(\w+)\s*:'), 'mod': re.compile(r'pub\s+mod\s+(\w+)'), 'impl': re.compile(r'pub\s+impl'), # impl blocks (trait impls) } # Track pending documentation pending_doc = [] in_doc = False while i < len(lines): line = lines[i] # Check for doc comments if line.strip().startswith('///') or line.strip().startswith('//!'): pending_doc.append(line) in_doc = True elif in_doc and line.strip() and not line.strip().startswith('//'): # End of doc block, check for public item in_doc = False doc_content = '\n'.join(pending_doc) pending_doc = [] # Check each pattern found_item = False for item_type, pattern in patterns.items(): match = pattern.search(line) if match: name = match.group(1) if item_type != 'impl' else f'' has_example = '```rust' in doc_content has_doc = len(doc_content) > 0 # Skip trait impls - they inherit doc from trait if item_type != 'impl': items.append((item_type, name, has_doc, has_example, doc_content)) found_item = True break if not found_item and line.strip(): # Check next few lines for the actual item for j in range(i+1, min(i+5, len(lines))): for item_type, pattern in patterns.items(): match = pattern.search(lines[j]) if match: name = match.group(1) if item_type != 'impl' else f'' has_example = '```rust' in doc_content has_doc = len(doc_content) > 0 if item_type != 'impl': items.append((item_type, name, has_doc, has_example, doc_content)) break elif not in_doc and not line.strip().startswith('//'): # Check for public item without preceding doc for item_type, pattern in patterns.items(): match = pattern.search(line) if match: name = match.group(1) if item_type != 'impl' else f'' if item_type != 'impl': items.append((item_type, name, False, False, '')) break i += 1 return items def analyze_source_file(file_path: Path) -> tuple: """Analyze a single Rust source file for documentation coverage. Returns (file_path, items_list) """ try: content = file_path.read_text() items = extract_rustdoc_items(content, str(file_path)) return (file_path, items) except Exception as e: print(f"Error reading {file_path}: {e}") return (file_path, []) def main(): """Main entry point.""" src_dir = Path('/home/coding/pdftract/crates/pdftract-core/src') if not src_dir.exists(): print(f"Source directory not found: {src_dir}") return # Find all Rust files rust_files = list(src_dir.rglob('*.rs')) print(f"Found {len(rust_files)} Rust files") # Analyze each file all_items = [] for file_path in rust_files: _, items = analyze_source_file(file_path) all_items.extend([(file_path, *item) for item in items]) # Calculate statistics stats = DocStats() for file_path, item_type, name, has_doc, has_example, _ in all_items: stats.total_items += 1 if has_doc: stats.items_with_docs += 1 if has_example: stats.items_with_examples += 1 stats.items_by_type[item_type]['total'] += 1 if has_doc: stats.items_by_type[item_type]['with_docs'] += 1 if has_example: stats.items_by_type[item_type]['with_examples'] += 1 # Print report print("\n" + "="*70) print("PDFTRACT-CORE RUSTDOC COVERAGE REPORT") print("="*70) print(f"\nTotal public items: {stats.total_items}") print(f"Items with documentation: {stats.items_with_docs} ({stats.coverage_pct():.1f}%)") print(f"Items with examples: {stats.items_with_examples} ({stats.example_pct():.1f}%)") print(f"\nTarget: 80%+ example coverage") print(f"Status: {'✓ PASS' if stats.example_pct() >= 80 else '✗ FAIL'}") print("\n" + "-"*70) print("BY TYPE") print("-"*70) print(f"{'Type':<12} {'Total':>8} {'With Doc':>10} {'With Ex':>10} {'Ex %':>8}") print("-"*70) for item_type in ['fn', 'struct', 'enum', 'trait', 'type', 'const', 'mod']: if item_type in stats.items_by_type: data = stats.items_by_type[item_type] total = data['total'] with_docs = data['with_docs'] with_ex = data['with_examples'] ex_pct = (with_ex / total * 100) if total > 0 else 0 print(f"{item_type:<12} {total:>8} {with_docs:>10} {with_ex:>10} {ex_pct:>7.1f}%") print("\n" + "-"*70) print("FILES NEEDING ATTENTION (public items without examples)") print("-"*70) # Group items by file files_needing_examples = defaultdict(list) for file_path, item_type, name, has_doc, has_example, _ in all_items: if not has_example: files_needing_examples[file_path].append((item_type, name)) # Show files with most missing examples sorted_files = sorted(files_needing_examples.items(), key=lambda x: len(x[1]), reverse=True) for file_path, items in sorted_files[:15]: rel_path = file_path.relative_to(src_dir) print(f"\n{rel_path} ({len(items)} items without examples):") for item_type, name in items[:10]: print(f" - {item_type} {name}") if len(items) > 10: print(f" ... and {len(items) - 10} more") print("\n" + "="*70) if __name__ == '__main__': main()