201 lines
6.8 KiB
Python
Executable file
201 lines
6.8 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Measure rustdoc coverage for pdftract-core.
|
|
|
|
Counts:
|
|
- Total public items (pub fn/struct/enum/trait/type/const/mod)
|
|
- Items with doc comments (/// or //!)
|
|
- Items with worked examples (```rust code blocks)
|
|
|
|
Usage: python3 scripts/measure-doc-coverage.py
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Tuple
|
|
|
|
# Simple Rust parser for extracting public items
|
|
def extract_public_items(file_path: Path) -> List[Tuple[str, str, str, List[str]]]:
|
|
"""
|
|
Extract public items from a Rust source file.
|
|
|
|
Returns: List of (item_type, name, doc_comment, location)
|
|
"""
|
|
items = []
|
|
content = file_path.read_text()
|
|
lines = content.split('\n')
|
|
|
|
# Track preceding doc comments
|
|
doc_comment = []
|
|
|
|
for i, line in enumerate(lines, 1):
|
|
stripped = line.strip()
|
|
|
|
# Collect doc comments
|
|
if stripped.startswith('///') or stripped.startswith('//!'):
|
|
doc_comment.append(stripped)
|
|
continue
|
|
elif doc_comment and (stripped.startswith('//') or stripped == ''):
|
|
# Allow blank lines and regular comments within doc blocks
|
|
continue
|
|
elif not stripped or stripped.startswith('//') or stripped.startswith('#'):
|
|
# Reset if we hit a blank line without a pub item
|
|
if not stripped.startswith('#'):
|
|
doc_comment = []
|
|
continue
|
|
|
|
# Check for public items
|
|
if stripped.startswith('pub '):
|
|
# Parse the item
|
|
item_type = None
|
|
name = None
|
|
|
|
if 'pub fn ' in stripped:
|
|
item_type = 'fn'
|
|
match = re.search(r'pub\s+fn\s+(\w+)', stripped)
|
|
if match:
|
|
name = match.group(1)
|
|
elif 'pub struct ' in stripped:
|
|
item_type = 'struct'
|
|
match = re.search(r'pub\s+struct\s+(\w+)', stripped)
|
|
if match:
|
|
name = match.group(1)
|
|
elif 'pub enum ' in stripped:
|
|
item_type = 'enum'
|
|
match = re.search(r'pub\s+enum\s+(\w+)', stripped)
|
|
if match:
|
|
name = match.group(1)
|
|
elif 'pub trait ' in stripped:
|
|
item_type = 'trait'
|
|
match = re.search(r'pub\s+trait\s+(\w+)', stripped)
|
|
if match:
|
|
name = match.group(1)
|
|
elif 'pub type ' in stripped:
|
|
item_type = 'type'
|
|
match = re.search(r'pub\s+type\s+(\w+)', stripped)
|
|
if match:
|
|
name = match.group(1)
|
|
elif 'pub const ' in stripped:
|
|
item_type = 'const'
|
|
match = re.search(r'pub\s+const\s+(\w+)', stripped)
|
|
if match:
|
|
name = match.group(1)
|
|
elif 'pub mod ' in stripped:
|
|
item_type = 'mod'
|
|
match = re.search(r'pub\s+mod\s+(\w+)', stripped)
|
|
if match:
|
|
name = match.group(1)
|
|
elif 'pub use ' in stripped:
|
|
# Skip re-exports for now (they inherit docs from the original)
|
|
doc_comment = []
|
|
continue
|
|
|
|
if name:
|
|
items.append((
|
|
item_type,
|
|
name,
|
|
'\n'.join(doc_comment),
|
|
f"{file_path.relative_to('/home/coding/pdftract/crates/pdftract-core/src')}:{i}"
|
|
))
|
|
|
|
doc_comment = []
|
|
|
|
return items
|
|
|
|
|
|
def has_worked_example(doc: str) -> bool:
|
|
"""Check if doc comment contains a worked example (```rust block)."""
|
|
if not doc:
|
|
return False
|
|
return '```rust' in doc or '```rust,no_run' in doc or '```rust,ignore' in doc
|
|
|
|
|
|
def measure_coverage(src_dir: Path) -> Dict:
|
|
"""Measure documentation coverage across all source files."""
|
|
results = {
|
|
'total_items': 0,
|
|
'with_docs': 0,
|
|
'with_examples': 0,
|
|
'by_type': {},
|
|
'items_missing_examples': [],
|
|
}
|
|
|
|
for rs_file in src_dir.rglob('*.rs'):
|
|
# Skip tests directory
|
|
if 'tests' in str(rs_file):
|
|
continue
|
|
|
|
items = extract_public_items(rs_file)
|
|
|
|
for item_type, name, doc, location in items:
|
|
results['total_items'] += 1
|
|
|
|
if item_type not in results['by_type']:
|
|
results['by_type'][item_type] = {
|
|
'total': 0,
|
|
'with_docs': 0,
|
|
'with_examples': 0,
|
|
}
|
|
|
|
results['by_type'][item_type]['total'] += 1
|
|
|
|
if doc:
|
|
results['with_docs'] += 1
|
|
results['by_type'][item_type]['with_docs'] += 1
|
|
|
|
if has_worked_example(doc):
|
|
results['with_examples'] += 1
|
|
results['by_type'][item_type]['with_examples'] += 1
|
|
else:
|
|
results['items_missing_examples'].append((item_type, name, location))
|
|
|
|
return results
|
|
|
|
|
|
def main():
|
|
src_dir = Path('/home/coding/pdftract/crates/pdftract-core/src')
|
|
results = measure_coverage(src_dir)
|
|
|
|
total = results['total_items']
|
|
with_docs = results['with_docs']
|
|
with_examples = results['with_examples']
|
|
|
|
doc_coverage = (with_docs / total * 100) if total > 0 else 0
|
|
example_coverage = (with_examples / total * 100) if total > 0 else 0
|
|
|
|
print(f"=== Rustdoc Coverage Report for pdftract-core ===\n")
|
|
print(f"Total public items: {total}")
|
|
print(f"With documentation: {with_docs} ({doc_coverage:.1f}%)")
|
|
print(f"With worked examples: {with_examples} ({example_coverage:.1f}%)")
|
|
print()
|
|
|
|
print("By item type:")
|
|
for item_type, stats in sorted(results['by_type'].items()):
|
|
t_total = stats['total']
|
|
t_docs = stats['with_docs']
|
|
t_examples = stats['with_examples']
|
|
t_doc_cov = (t_docs / t_total * 100) if t_total > 0 else 0
|
|
t_ex_cov = (t_examples / t_total * 100) if t_total > 0 else 0
|
|
print(f" {item_type:8s}: {t_examples:3d}/{t_total:3d} with examples ({t_ex_cov:.0f}%)")
|
|
|
|
print()
|
|
|
|
if example_coverage < 80.0:
|
|
print(f"⚠️ Target: 80% coverage. Current: {example_coverage:.1f}%")
|
|
print(f" Need {int(total * 0.8 - with_examples)} more examples.\n")
|
|
|
|
# Show first 20 items missing examples
|
|
missing = results['items_missing_examples'][:20]
|
|
print(f"First 20 items missing examples (showing {len(missing)} of {len(results['items_missing_examples'])}):")
|
|
for item_type, name, location in missing:
|
|
print(f" - {item_type:8s} {name:30s} ({location})")
|
|
|
|
if len(results['items_missing_examples']) > 20:
|
|
print(f" ... and {len(results['items_missing_examples']) - 20} more")
|
|
else:
|
|
print(f"✅ Target met: {example_coverage:.1f}% >= 80%")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|