#!/usr/bin/env python3 """Assess rustdoc coverage for pdftract-core public API.""" import re from pathlib import Path from collections import defaultdict from dataclasses import dataclass @dataclass class DocStats: total_items: int = 0 with_docs: int = 0 with_examples: int = 0 items: list = None def __post_init__(self): if self.items is None: self.items = [] def extract_public_items(file_path: Path) -> DocStats: """Extract public items and their documentation status.""" content = file_path.read_text() lines = content.split('\n') stats = DocStats() # Pattern to match public items patterns = { 'pub fn': r'pub\s+fn\s+(\w+)', 'pub struct': r'pub\s+struct\s+(\w+)', 'pub enum': r'pub\s+enum\s+(\w+)', 'pub trait': r'pub\s+trait\s+(\w+)', 'pub const': r'pub\s+const\s+(\w+)', 'pub type': r'pub\s+type\s+(\w+)', 'pub mod': r'pub\s+mod\s+(\w+)', } for i, line in enumerate(lines): for item_type, pattern in patterns.items(): match = re.search(pattern, line) if match: name = match.group(1) stats.total_items += 1 # Check for doc comment above has_doc = False has_example = False # Look back for doc comments (/// or //!) j = i - 1 doc_lines = [] while j >= 0 and (lines[j].strip().startswith('///') or lines[j].strip().startswith('//!') or lines[j].strip() == ''): if lines[j].strip().startswith('///') or lines[j].strip().startswith('//!'): doc_lines.append(lines[j]) j -= 1 has_doc = len(doc_lines) > 0 has_example = any('```rust' in dl or '```no_run' in dl or '```ignore' in dl for dl in doc_lines) if has_doc: stats.with_docs += 1 if has_example: stats.with_examples += 1 stats.items.append({ 'name': name, 'type': item_type, 'file': str(file_path), 'line': i + 1, 'has_doc': has_doc, 'has_example': has_example, }) return stats def main(): src_dir = Path('/home/coding/pdftract/crates/pdftract-core/src') all_stats = DocStats() module_docs = {} for rs_file in src_dir.rglob('*.rs'): # Skip files in tests/ and examples/ if 'tests' in rs_file.parts or 'examples' in rs_file.parts: continue stats = extract_public_items(rs_file) if stats.total_items > 0: module_name = rs_file.relative_to(src_dir) module_docs[module_name] = stats all_stats.total_items += stats.total_items all_stats.with_docs += stats.with_docs all_stats.with_examples += stats.with_examples print(f"Total public items: {all_stats.total_items}") print(f"With documentation: {all_stats.with_docs} ({all_stats.with_docs/all_stats.total_items*100:.1f}%)") print(f"With examples: {all_stats.with_examples} ({all_stats.with_examples/all_stats.total_items*100:.1f}%)") print() # Show modules with worst coverage print("Modules needing documentation (sorted by items without examples):") for module, stats in sorted(module_docs.items(), key=lambda x: x[1].total_items - x[1].with_examples, reverse=True): if stats.total_items > 0: coverage = stats.with_examples / stats.total_items * 100 if stats.total_items > 0 else 0 print(f" {module}: {stats.with_examples}/{stats.total_items} ({coverage:.0f}%)") # List items without docs print("\nItems WITHOUT any documentation:") for module, stats in module_docs.items(): for item in stats.items: if not item['has_doc']: print(f" {module}:{item['line']} - {item['type']} {item['name']}") if __name__ == '__main__': main()