#!/usr/bin/env python3 """ Audit documentation coverage for pdftract-core public API. Counts public items and checks for rustdoc examples. """ import ast import os import re import subprocess from pathlib import Path from collections import defaultdict # Patterns for doc comments containing examples EXAMPLE_PATTERNS = [ r'```rust', r'```ignore', r'```no_run', ] def extract_rust_items(file_path: Path): """Extract public items from a Rust file.""" try: content = file_path.read_text() except: return [] items = [] lines = content.split('\n') # Simple regex-based extraction for public items for i, line in enumerate(lines): # Look for public fn, struct, enum, trait, type, const, mod for pattern in [ r'pub\s+(?:unsafe\s+)?(?:async\s+)?fn\s+(\w+)', r'pub\s+struct\s+(\w+)', r'pub\s+enum\s+(\w+)', r'pub\s+trait\s+(\w+)', r'pub\s+type\s+(\w+)', r'pub\s+const\s+(\w+)', r'pub\s+mod\s+(\w+)', ]: match = re.search(pattern, line) if match and not line.strip().startswith('//'): item_name = match.group(1) # Look backward for doc comments has_doc = False has_example = False j = i - 1 while j >= 0: prev_line = lines[j].strip() if prev_line.startswith('///') or prev_line.startswith('//!'): has_doc = True # Check for example patterns for ex_pat in EXAMPLE_PATTERNS: if re.search(ex_pat, lines[j]): has_example = True j -= 1 elif prev_line and not prev_line.startswith('//') and not prev_line.startswith('#'): break else: j -= 1 items.append({ 'name': item_name, 'line': i + 1, 'has_doc': has_doc, 'has_example': has_example, 'file': file_path, }) return items def scan_directory(crate_src: Path): """Scan all Rust files in the crate source directory.""" all_items = [] for rs_file in crate_src.rglob('*.rs'): if 'target' in str(rs_file): continue items = extract_rust_items(rs_file) all_items.extend(items) return all_items def main(): pdftract_root = Path('/home/coding/pdftract') core_src = pdftract_root / 'crates' / 'pdftract-core' / 'src' if not core_src.exists(): print(f"Source directory not found: {core_src}") return 1 items = scan_directory(core_src) # Count coverage total = len(items) with_doc = sum(1 for i in items if i['has_doc']) with_example = sum(1 for i in items if i['has_example']) without_doc = total - with_doc print(f"Documentation Coverage for pdftract-core") print(f"=" * 50) print(f"Total public items: {total}") print(f"With documentation: {with_doc} ({100*with_doc/total:.1f}%)") print(f"With examples: {with_example} ({100*with_example/total:.1f}%)") print(f"Without documentation: {without_doc}") print() # Show items without documentation if without_doc > 0: print("Items missing documentation:") for item in items: if not item['has_doc']: rel_path = item['file'].relative_to(pdftract_root) print(f" - {item['name']} ({rel_path}:{item['line']})") print() # Show items without examples (but have docs) no_example_items = [i for i in items if i['has_doc'] and not i['has_example']] if no_example_items: print(f"Items with docs but no examples ({len(no_example_items)}):") for item in no_example_items[:20]: # Show first 20 rel_path = item['file'].relative_to(pdftract_root) print(f" - {item['name']} ({rel_path}:{item['line']})") if len(no_example_items) > 20: print(f" ... and {len(no_example_items) - 20} more") return 0 if __name__ == '__main__': exit(main())