The extract_markdown stub was calling extract_text instead of extract_text_fn, causing a compilation error. This fixes the function name to match the exported function from extract_text.rs. This completes the extract_text PyO3 entry point implementation, which was already present in extract_text.rs and lib.rs. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
175 lines
5.8 KiB
Python
Executable file
175 lines
5.8 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Measure rustdoc coverage for pdftract-core public API."""
|
|
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from typing import Dict, List, Tuple
|
|
|
|
RUST_KEYWORDS = {
|
|
'where', 'let', 'mut', 'if', 'else', 'for', 'while', 'loop', 'match',
|
|
'return', 'break', 'continue', 'impl', 'struct', 'enum', 'trait',
|
|
'type', 'fn', 'const', 'static', 'mod', 'use', 'crate', 'super',
|
|
'self', 'Self', 'extern', 'unsafe', 'async', 'await', 'move',
|
|
'ref', 'True', 'False', 'Some', 'None', 'Ok', 'Err', 'Vec',
|
|
'String', 'Box', 'Result', 'Option', 'u8', 'u16', 'u32', 'u64',
|
|
'i8', 'i16', 'i32', 'i64', 'f32', 'f64', 'bool', 'usize', 'isize'
|
|
}
|
|
|
|
|
|
def extract_items_from_file(filepath: Path) -> List[Tuple[str, str, int, bool]]:
|
|
"""Extract public items from a Rust source file.
|
|
|
|
Returns: List of (name, kind, line_number, has_example) tuples.
|
|
"""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
items = []
|
|
lines = content.split('\n')
|
|
|
|
# Track current doc comment for next item
|
|
pending_doc = None
|
|
|
|
for i, line in enumerate(lines, 1):
|
|
stripped = line.strip()
|
|
|
|
# Skip empty lines and non-doc comments
|
|
if not stripped or stripped.startswith('//') and not stripped.startswith('///'):
|
|
if stripped.startswith('//') and not stripped.startswith('///'):
|
|
pending_doc = None
|
|
continue
|
|
|
|
# Track doc comments
|
|
if stripped.startswith('///'):
|
|
if pending_doc is None:
|
|
pending_doc = []
|
|
pending_doc.append(stripped)
|
|
continue
|
|
|
|
# Check for attribute lines (cfg, derive, etc.) - don't reset doc
|
|
if stripped.startswith('#['):
|
|
continue
|
|
|
|
# Check for pub items
|
|
if stripped.startswith('pub '):
|
|
# Extract item kind and name
|
|
kind_match = re.search(r'pub (fn|struct|enum|trait|type|const|mod|use)\s+(\w+)', stripped)
|
|
if not kind_match:
|
|
# Handle complex cases like `pub use foo::Bar;`
|
|
use_match = re.search(r'pub use\s+(.+?);', stripped)
|
|
if use_match:
|
|
item_name = use_match.group(1).split('::')[-1].rstrip(';')
|
|
kind = 'use'
|
|
else:
|
|
continue
|
|
else:
|
|
kind = kind_match.group(1)
|
|
item_name = kind_match.group(2)
|
|
|
|
# Skip known items that are re-exports
|
|
if item_name in RUST_KEYWORDS:
|
|
pending_doc = None
|
|
continue
|
|
|
|
# Check if doc has examples
|
|
has_example = False
|
|
if pending_doc:
|
|
doc_text = '\n'.join(pending_doc)
|
|
has_example = '```rust' in doc_text or '```no_run' in doc_text
|
|
|
|
items.append((item_name, kind, i, has_example))
|
|
pending_doc = None
|
|
|
|
# Reset doc if we encounter something else
|
|
elif stripped and not stripped.startswith('#') and not stripped.startswith('use'):
|
|
pending_doc = None
|
|
|
|
return items
|
|
|
|
|
|
def scan_directory(src_dir: Path) -> Dict[str, List[Tuple[str, str, int, bool]]]:
|
|
"""Scan all Rust files in a directory."""
|
|
all_items = {}
|
|
|
|
for rust_file in src_dir.rglob('*.rs'):
|
|
# Skip test files and tests modules
|
|
if 'tests.rs' in rust_file.name or 'test_' in rust_file.name:
|
|
continue
|
|
if any(p.startswith('test') or p == 'benches' for p in rust_file.parts):
|
|
continue
|
|
|
|
relative = rust_file.relative_to(src_dir)
|
|
module_path = str(relative.with_suffix(''))
|
|
|
|
items = extract_items_from_file(rust_file)
|
|
if items:
|
|
all_items[module_path] = items
|
|
|
|
return all_items
|
|
|
|
|
|
def print_report(all_items: Dict[str, List[Tuple[str, str, int, bool]]]):
|
|
"""Print coverage report."""
|
|
total = 0
|
|
with_examples = 0
|
|
by_kind = defaultdict(lambda: [0, 0]) # kind -> [total, with_examples]
|
|
|
|
print("=" * 80)
|
|
print("RUSTDOC COVERAGE REPORT")
|
|
print("=" * 80)
|
|
|
|
for module_path in sorted(all_items.keys()):
|
|
items = all_items[module_path]
|
|
if not items:
|
|
continue
|
|
|
|
module_total = len(items)
|
|
module_with = sum(1 for _, _, _, has_ex in items if has_ex)
|
|
module_pct = (module_with / module_total * 100) if module_total else 0
|
|
|
|
print(f"\n{module_path}:")
|
|
print(f" {module_with}/{module_total} items with examples ({module_pct:.1f}%)")
|
|
|
|
# List missing examples
|
|
missing = [name for name, kind, _, has_ex in items if not has_ex and kind in ('fn', 'struct', 'enum', 'trait', 'type')]
|
|
if missing:
|
|
print(f" Missing examples: {', '.join(missing[:10])}", end='')
|
|
if len(missing) > 10:
|
|
print(f" ... and {len(missing) - 10} more")
|
|
else:
|
|
print()
|
|
|
|
total += module_total
|
|
with_examples += module_with
|
|
|
|
for _, kind, _, has_ex in items:
|
|
by_kind[kind][0] += 1
|
|
if has_ex:
|
|
by_kind[kind][1] += 1
|
|
|
|
overall_pct = (with_examples / total * 100) if total else 0
|
|
print("\n" + "=" * 80)
|
|
print(f"OVERALL: {with_examples}/{total} items with examples ({overall_pct:.1f}%)")
|
|
print("=" * 80)
|
|
|
|
print("\nBy kind:")
|
|
for kind in sorted(by_kind.keys()):
|
|
t, w = by_kind[kind]
|
|
pct = (w / t * 100) if t else 0
|
|
print(f" {kind:10s}: {w:4d}/{t:4d} ({pct:5.1f}%)")
|
|
|
|
# Threshold check
|
|
print("\n" + "=" * 80)
|
|
if overall_pct >= 80:
|
|
print("PASS: Meets 80% threshold")
|
|
else:
|
|
print(f"FAIL: Below 80% threshold (need {int((0.8 * total) - with_examples)} more examples)")
|
|
print("=" * 80)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
src_dir = Path('/home/coding/pdftract/crates/pdftract-core/src')
|
|
all_items = scan_directory(src_dir)
|
|
print_report(all_items)
|