217 lines
7.7 KiB
Python
Executable file
217 lines
7.7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Analyze rustdoc coverage for pdftract-core.
|
|
|
|
This script counts:
|
|
- Total public items (fn, struct, enum, trait, type, const, mod)
|
|
- Items with rustdoc examples (```rust blocks)
|
|
- Coverage percentage
|
|
"""
|
|
|
|
import re
|
|
import subprocess
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from dataclasses import dataclass
|
|
|
|
@dataclass
|
|
class DocStats:
|
|
"""Statistics for documentation coverage."""
|
|
total_items: int = 0
|
|
items_with_docs: int = 0
|
|
items_with_examples: int = 0
|
|
items_by_type: dict = None
|
|
|
|
def __post_init__(self):
|
|
if self.items_by_type is None:
|
|
self.items_by_type = defaultdict(lambda: dict(total=0, with_docs=0, with_examples=0))
|
|
|
|
def coverage_pct(self):
|
|
"""Return percentage of items with documentation."""
|
|
if self.total_items == 0:
|
|
return 0.0
|
|
return (self.items_with_docs / self.total_items) * 100
|
|
|
|
def example_pct(self):
|
|
"""Return percentage of items with examples."""
|
|
if self.total_items == 0:
|
|
return 0.0
|
|
return (self.items_with_examples / self.total_items) * 100
|
|
|
|
|
|
def extract_rustdoc_items(content: str, file_path: str) -> list:
|
|
"""Extract public items and their associated documentation from Rust source.
|
|
|
|
Returns list of (item_type, name, has_doc, has_example, doc_content) tuples.
|
|
"""
|
|
items = []
|
|
lines = content.split('\n')
|
|
i = 0
|
|
|
|
# Patterns for public items
|
|
patterns = {
|
|
'fn': re.compile(r'pub\s+(?:async\s+)?fn\s+(\w+)'),
|
|
'struct': re.compile(r'pub\s+struct\s+(\w+)'),
|
|
'enum': re.compile(r'pub\s+enum\s+(\w+)'),
|
|
'trait': re.compile(r'pub\s+trait\s+(\w+)'),
|
|
'type': re.compile(r'pub\s+type\s+(\w+)'),
|
|
'const': re.compile(r'pub\s+(?:const\s+|async\s+)?(\w+)\s*:'),
|
|
'mod': re.compile(r'pub\s+mod\s+(\w+)'),
|
|
'impl': re.compile(r'pub\s+impl'), # impl blocks (trait impls)
|
|
}
|
|
|
|
# Track pending documentation
|
|
pending_doc = []
|
|
in_doc = False
|
|
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
|
|
# Check for doc comments
|
|
if line.strip().startswith('///') or line.strip().startswith('//!'):
|
|
pending_doc.append(line)
|
|
in_doc = True
|
|
elif in_doc and line.strip() and not line.strip().startswith('//'):
|
|
# End of doc block, check for public item
|
|
in_doc = False
|
|
doc_content = '\n'.join(pending_doc)
|
|
pending_doc = []
|
|
|
|
# Check each pattern
|
|
found_item = False
|
|
for item_type, pattern in patterns.items():
|
|
match = pattern.search(line)
|
|
if match:
|
|
name = match.group(1) if item_type != 'impl' else f'<anonymous_{i}>'
|
|
has_example = '```rust' in doc_content
|
|
has_doc = len(doc_content) > 0
|
|
|
|
# Skip trait impls - they inherit doc from trait
|
|
if item_type != 'impl':
|
|
items.append((item_type, name, has_doc, has_example, doc_content))
|
|
found_item = True
|
|
break
|
|
|
|
if not found_item and line.strip():
|
|
# Check next few lines for the actual item
|
|
for j in range(i+1, min(i+5, len(lines))):
|
|
for item_type, pattern in patterns.items():
|
|
match = pattern.search(lines[j])
|
|
if match:
|
|
name = match.group(1) if item_type != 'impl' else f'<anonymous_{j}>'
|
|
has_example = '```rust' in doc_content
|
|
has_doc = len(doc_content) > 0
|
|
if item_type != 'impl':
|
|
items.append((item_type, name, has_doc, has_example, doc_content))
|
|
break
|
|
elif not in_doc and not line.strip().startswith('//'):
|
|
# Check for public item without preceding doc
|
|
for item_type, pattern in patterns.items():
|
|
match = pattern.search(line)
|
|
if match:
|
|
name = match.group(1) if item_type != 'impl' else f'<anonymous_{i}>'
|
|
if item_type != 'impl':
|
|
items.append((item_type, name, False, False, ''))
|
|
break
|
|
|
|
i += 1
|
|
|
|
return items
|
|
|
|
|
|
def analyze_source_file(file_path: Path) -> tuple:
|
|
"""Analyze a single Rust source file for documentation coverage.
|
|
|
|
Returns (file_path, items_list)
|
|
"""
|
|
try:
|
|
content = file_path.read_text()
|
|
items = extract_rustdoc_items(content, str(file_path))
|
|
return (file_path, items)
|
|
except Exception as e:
|
|
print(f"Error reading {file_path}: {e}")
|
|
return (file_path, [])
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
src_dir = Path('/home/coding/pdftract/crates/pdftract-core/src')
|
|
|
|
if not src_dir.exists():
|
|
print(f"Source directory not found: {src_dir}")
|
|
return
|
|
|
|
# Find all Rust files
|
|
rust_files = list(src_dir.rglob('*.rs'))
|
|
print(f"Found {len(rust_files)} Rust files")
|
|
|
|
# Analyze each file
|
|
all_items = []
|
|
for file_path in rust_files:
|
|
_, items = analyze_source_file(file_path)
|
|
all_items.extend([(file_path, *item) for item in items])
|
|
|
|
# Calculate statistics
|
|
stats = DocStats()
|
|
for file_path, item_type, name, has_doc, has_example, _ in all_items:
|
|
stats.total_items += 1
|
|
if has_doc:
|
|
stats.items_with_docs += 1
|
|
if has_example:
|
|
stats.items_with_examples += 1
|
|
|
|
stats.items_by_type[item_type]['total'] += 1
|
|
if has_doc:
|
|
stats.items_by_type[item_type]['with_docs'] += 1
|
|
if has_example:
|
|
stats.items_by_type[item_type]['with_examples'] += 1
|
|
|
|
# Print report
|
|
print("\n" + "="*70)
|
|
print("PDFTRACT-CORE RUSTDOC COVERAGE REPORT")
|
|
print("="*70)
|
|
print(f"\nTotal public items: {stats.total_items}")
|
|
print(f"Items with documentation: {stats.items_with_docs} ({stats.coverage_pct():.1f}%)")
|
|
print(f"Items with examples: {stats.items_with_examples} ({stats.example_pct():.1f}%)")
|
|
print(f"\nTarget: 80%+ example coverage")
|
|
print(f"Status: {'✓ PASS' if stats.example_pct() >= 80 else '✗ FAIL'}")
|
|
|
|
print("\n" + "-"*70)
|
|
print("BY TYPE")
|
|
print("-"*70)
|
|
print(f"{'Type':<12} {'Total':>8} {'With Doc':>10} {'With Ex':>10} {'Ex %':>8}")
|
|
print("-"*70)
|
|
|
|
for item_type in ['fn', 'struct', 'enum', 'trait', 'type', 'const', 'mod']:
|
|
if item_type in stats.items_by_type:
|
|
data = stats.items_by_type[item_type]
|
|
total = data['total']
|
|
with_docs = data['with_docs']
|
|
with_ex = data['with_examples']
|
|
ex_pct = (with_ex / total * 100) if total > 0 else 0
|
|
print(f"{item_type:<12} {total:>8} {with_docs:>10} {with_ex:>10} {ex_pct:>7.1f}%")
|
|
|
|
print("\n" + "-"*70)
|
|
print("FILES NEEDING ATTENTION (public items without examples)")
|
|
print("-"*70)
|
|
|
|
# Group items by file
|
|
files_needing_examples = defaultdict(list)
|
|
for file_path, item_type, name, has_doc, has_example, _ in all_items:
|
|
if not has_example:
|
|
files_needing_examples[file_path].append((item_type, name))
|
|
|
|
# Show files with most missing examples
|
|
sorted_files = sorted(files_needing_examples.items(), key=lambda x: len(x[1]), reverse=True)
|
|
for file_path, items in sorted_files[:15]:
|
|
rel_path = file_path.relative_to(src_dir)
|
|
print(f"\n{rel_path} ({len(items)} items without examples):")
|
|
for item_type, name in items[:10]:
|
|
print(f" - {item_type} {name}")
|
|
if len(items) > 10:
|
|
print(f" ... and {len(items) - 10} more")
|
|
|
|
print("\n" + "="*70)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|