pdftract/crates/pdftract-core/scripts/measure-doc-coverage.py
2026-05-29 08:25:23 -04:00

201 lines
6.8 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Measure rustdoc coverage for pdftract-core.
Counts:
- Total public items (pub fn/struct/enum/trait/type/const/mod)
- Items with doc comments (/// or //!)
- Items with worked examples (```rust code blocks)
Usage: python3 scripts/measure-doc-coverage.py
"""
import os
import re
from pathlib import Path
from typing import Dict, List, Tuple
# Simple Rust parser for extracting public items
def extract_public_items(file_path: Path) -> List[Tuple[str, str, str, List[str]]]:
"""
Extract public items from a Rust source file.
Returns: List of (item_type, name, doc_comment, location)
"""
items = []
content = file_path.read_text()
lines = content.split('\n')
# Track preceding doc comments
doc_comment = []
for i, line in enumerate(lines, 1):
stripped = line.strip()
# Collect doc comments
if stripped.startswith('///') or stripped.startswith('//!'):
doc_comment.append(stripped)
continue
elif doc_comment and (stripped.startswith('//') or stripped == ''):
# Allow blank lines and regular comments within doc blocks
continue
elif not stripped or stripped.startswith('//') or stripped.startswith('#'):
# Reset if we hit a blank line without a pub item
if not stripped.startswith('#'):
doc_comment = []
continue
# Check for public items
if stripped.startswith('pub '):
# Parse the item
item_type = None
name = None
if 'pub fn ' in stripped:
item_type = 'fn'
match = re.search(r'pub\s+fn\s+(\w+)', stripped)
if match:
name = match.group(1)
elif 'pub struct ' in stripped:
item_type = 'struct'
match = re.search(r'pub\s+struct\s+(\w+)', stripped)
if match:
name = match.group(1)
elif 'pub enum ' in stripped:
item_type = 'enum'
match = re.search(r'pub\s+enum\s+(\w+)', stripped)
if match:
name = match.group(1)
elif 'pub trait ' in stripped:
item_type = 'trait'
match = re.search(r'pub\s+trait\s+(\w+)', stripped)
if match:
name = match.group(1)
elif 'pub type ' in stripped:
item_type = 'type'
match = re.search(r'pub\s+type\s+(\w+)', stripped)
if match:
name = match.group(1)
elif 'pub const ' in stripped:
item_type = 'const'
match = re.search(r'pub\s+const\s+(\w+)', stripped)
if match:
name = match.group(1)
elif 'pub mod ' in stripped:
item_type = 'mod'
match = re.search(r'pub\s+mod\s+(\w+)', stripped)
if match:
name = match.group(1)
elif 'pub use ' in stripped:
# Skip re-exports for now (they inherit docs from the original)
doc_comment = []
continue
if name:
items.append((
item_type,
name,
'\n'.join(doc_comment),
f"{file_path.relative_to('/home/coding/pdftract/crates/pdftract-core/src')}:{i}"
))
doc_comment = []
return items
def has_worked_example(doc: str) -> bool:
"""Check if doc comment contains a worked example (```rust block)."""
if not doc:
return False
return '```rust' in doc or '```rust,no_run' in doc or '```rust,ignore' in doc
def measure_coverage(src_dir: Path) -> Dict:
"""Measure documentation coverage across all source files."""
results = {
'total_items': 0,
'with_docs': 0,
'with_examples': 0,
'by_type': {},
'items_missing_examples': [],
}
for rs_file in src_dir.rglob('*.rs'):
# Skip tests directory
if 'tests' in str(rs_file):
continue
items = extract_public_items(rs_file)
for item_type, name, doc, location in items:
results['total_items'] += 1
if item_type not in results['by_type']:
results['by_type'][item_type] = {
'total': 0,
'with_docs': 0,
'with_examples': 0,
}
results['by_type'][item_type]['total'] += 1
if doc:
results['with_docs'] += 1
results['by_type'][item_type]['with_docs'] += 1
if has_worked_example(doc):
results['with_examples'] += 1
results['by_type'][item_type]['with_examples'] += 1
else:
results['items_missing_examples'].append((item_type, name, location))
return results
def main():
src_dir = Path('/home/coding/pdftract/crates/pdftract-core/src')
results = measure_coverage(src_dir)
total = results['total_items']
with_docs = results['with_docs']
with_examples = results['with_examples']
doc_coverage = (with_docs / total * 100) if total > 0 else 0
example_coverage = (with_examples / total * 100) if total > 0 else 0
print(f"=== Rustdoc Coverage Report for pdftract-core ===\n")
print(f"Total public items: {total}")
print(f"With documentation: {with_docs} ({doc_coverage:.1f}%)")
print(f"With worked examples: {with_examples} ({example_coverage:.1f}%)")
print()
print("By item type:")
for item_type, stats in sorted(results['by_type'].items()):
t_total = stats['total']
t_docs = stats['with_docs']
t_examples = stats['with_examples']
t_doc_cov = (t_docs / t_total * 100) if t_total > 0 else 0
t_ex_cov = (t_examples / t_total * 100) if t_total > 0 else 0
print(f" {item_type:8s}: {t_examples:3d}/{t_total:3d} with examples ({t_ex_cov:.0f}%)")
print()
if example_coverage < 80.0:
print(f"⚠️ Target: 80% coverage. Current: {example_coverage:.1f}%")
print(f" Need {int(total * 0.8 - with_examples)} more examples.\n")
# Show first 20 items missing examples
missing = results['items_missing_examples'][:20]
print(f"First 20 items missing examples (showing {len(missing)} of {len(results['items_missing_examples'])}):")
for item_type, name, location in missing:
print(f" - {item_type:8s} {name:30s} ({location})")
if len(results['items_missing_examples']) > 20:
print(f" ... and {len(results['items_missing_examples']) - 20} more")
else:
print(f"✅ Target met: {example_coverage:.1f}% >= 80%")
if __name__ == '__main__':
main()