158 lines
6 KiB
Python
Executable file
158 lines
6 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Measure rustdoc coverage for pdftract-core public API.
|
|
Counts public items and tracks which have doc comments with examples.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
from dataclasses import dataclass
|
|
from typing import List, Set, Dict
|
|
|
|
@dataclass
|
|
class DocStats:
|
|
"""Statistics for documentation coverage."""
|
|
total_items: int = 0
|
|
documented_items: int = 0
|
|
with_examples: int = 0
|
|
items_with_examples: List[str] = None
|
|
|
|
def __post_init__(self):
|
|
if self.items_with_examples is None:
|
|
self.items_with_examples = []
|
|
|
|
def extract_rust_items(content: str, filename: str) -> List[tuple]:
|
|
"""
|
|
Extract public items from Rust source code.
|
|
Returns list of (item_type, name, line_number, has_doc, has_example) tuples.
|
|
"""
|
|
items = []
|
|
lines = content.split('\n')
|
|
i = 0
|
|
in_doc_block = False
|
|
doc_lines = []
|
|
|
|
# Patterns for public items
|
|
patterns = {
|
|
'pub fn': re.compile(r'pub\s+(?:async\s+)?fn\s+(\w+)'),
|
|
'pub struct': re.compile(r'pub\s+struct\s+(\w+)'),
|
|
'pub enum': re.compile(r'pub\s+enum\s+(\w+)'),
|
|
'pub trait': re.compile(r'pub\s+trait\s+(\w+)'),
|
|
'pub const': re.compile(r'pub\s+const\s+(\w+)'),
|
|
'pub type': re.compile(r'pub\s+type\s+(\w+)'),
|
|
'pub mod': re.compile(r'pub\s+mod\s+(\w+)'),
|
|
'impl': re.compile(r'impl\s+(\w+)'), # For trait impls
|
|
}
|
|
|
|
while i < len(lines):
|
|
line = lines[i].strip()
|
|
|
|
# Track doc comments
|
|
if line.startswith('///') or line.startswith('//!'):
|
|
in_doc_block = True
|
|
doc_lines.append(line)
|
|
elif line.startswith('/*!') or line.startswith('/**!'):
|
|
# Block doc start
|
|
in_doc_block = True
|
|
doc_lines.append(line)
|
|
elif in_doc_block and (line.startswith('*/') or line.startswith('/*!') or line.startswith('/**!')):
|
|
# End of block doc
|
|
doc_lines.append(line)
|
|
elif in_doc_block and not (line.startswith('/*') or line.startswith('*') or not line):
|
|
# Still in doc block or continuation
|
|
if line.startswith('*') or line.startswith('/*') or line.startswith('*/'):
|
|
doc_lines.append(line)
|
|
else:
|
|
in_doc_block = False
|
|
else:
|
|
# Check for public items
|
|
for item_type, pattern in patterns.items():
|
|
match = pattern.search(line)
|
|
if match:
|
|
name = match.group(1)
|
|
has_doc = len(doc_lines) > 0
|
|
has_example = any('```' in dl for dl in doc_lines)
|
|
|
|
# Only count if it's actually public (not `pub(crate)` etc)
|
|
if 'pub(' not in lines[i][max(0, lines[i].find('pub')-10):lines[i].find('pub')+20]:
|
|
items.append((item_type, name, i + 1, has_doc, has_example, filename))
|
|
|
|
doc_lines = []
|
|
break
|
|
else:
|
|
# No match found, reset doc tracking
|
|
if not line.startswith('*') and not line.startswith('/*') and line and not line.startswith('//'):
|
|
doc_lines = []
|
|
in_doc_block = False
|
|
|
|
i += 1
|
|
|
|
return items
|
|
|
|
def scan_directory(src_dir: Path) -> Dict[str, DocStats]:
|
|
"""Scan all Rust files in src directory."""
|
|
all_items = []
|
|
|
|
for rs_file in src_dir.rglob('*.rs'):
|
|
if 'tests' in str(rs_file) or 'examples' in str(rs_file):
|
|
continue
|
|
|
|
content = rs_file.read_text(encoding='utf-8', errors='ignore')
|
|
items = extract_rust_items(content, str(rs_file))
|
|
all_items.extend(items)
|
|
|
|
stats = DocStats()
|
|
stats.total_items = len(all_items)
|
|
stats.documented_items = sum(1 for item in all_items if item[3])
|
|
stats.with_examples = sum(1 for item in all_items if item[4])
|
|
stats.items_with_examples = [f"{item[0]} {item[1]} ({item[5]}:{item[2]})" for item in all_items if item[4]]
|
|
|
|
return stats, all_items
|
|
|
|
def main():
|
|
src_dir = Path('crates/pdftract-core/src')
|
|
|
|
print("Scanning pdftract-core for public API items...")
|
|
stats, all_items = scan_directory(src_dir)
|
|
|
|
print(f"\n=== Documentation Coverage Report ===")
|
|
print(f"Total public items: {stats.total_items}")
|
|
print(f"Documented items: {stats.documented_items} ({stats.documented_items/max(1,stats.total_items)*100:.1f}%)")
|
|
print(f"With examples: {stats.with_examples} ({stats.with_examples/max(1,stats.total_items)*100:.1f}%)")
|
|
print(f"\nTarget: 80% coverage")
|
|
print(f"Current: {stats.with_examples/max(1,stats.total_items)*100:.1f}%")
|
|
print(f"Gap: {max(0, 0.8 * stats.total_items - stats.with_examples):.0f} items need examples")
|
|
|
|
# Show items by type
|
|
from collections import defaultdict
|
|
by_type = defaultdict(list)
|
|
for item in all_items:
|
|
by_type[item[0]].append(item)
|
|
|
|
print(f"\n=== Breakdown by type ===")
|
|
for item_type, items in sorted(by_type.items()):
|
|
total = len(items)
|
|
with_ex = sum(1 for i in items if i[4])
|
|
print(f"{item_type}: {with_ex}/{total} ({with_ex/max(1,total)*100:.0f}%)")
|
|
|
|
# Show undocumented items
|
|
undocumented = [item for item in all_items if not item[3]]
|
|
if undocumented:
|
|
print(f"\n=== Undocumented items ({len(undocumented)}) ===")
|
|
for item in sorted(undocumented, key=lambda x: (x[5], x[2]))[:50]:
|
|
print(f" {item[0]} {item[1]} at {item[5]}:{item[2]}")
|
|
if len(undocumented) > 50:
|
|
print(f" ... and {len(undocumented) - 50} more")
|
|
|
|
# Show documented without examples
|
|
doc_no_ex = [item for item in all_items if item[3] and not item[4]]
|
|
if doc_no_ex:
|
|
print(f"\n=== Documented but without examples ({len(doc_no_ex)}) ===")
|
|
for item in sorted(doc_no_ex, key=lambda x: (x[5], x[2]))[:50]:
|
|
print(f" {item[0]} {item[1]} at {item[5]}:{item[2]}")
|
|
if len(doc_no_ex) > 50:
|
|
print(f" ... and {len(doc_no_ex) - 50} more")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|