The indent trigger was using .abs() which fired on both increased indent (non-indented → indented) AND decreased indent (indented → non-indented). This caused drop-cap style paragraphs (indented first line, flush-left continuation) to incorrectly split into two blocks. Per plan Phase 4.4 heuristic #2, indent change should only trigger when the current line is MORE indented (to the right, larger x0) than the block average - i.e., a new paragraph starting after non-indented text. It should NOT trigger for decreased indent (first line indented, rest flush-left). Fix: Remove .abs() and only check if line_x0 - block_avg_x0 > threshold. Tests: - test_indented_first_line_new_block: PASS (non-indented → indented splits) - test_indented_first_line_of_paragraph_not_split: PASS (drop cap stays together) - All 179 line module tests: PASS
183 lines
6.4 KiB
Python
183 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Measure rustdoc example coverage for pdftract-core.
|
|
|
|
Counts public items and determines how many have at least one worked example.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import json
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
# Patterns to detect public items
|
|
PUBLIC_PATTERNS = {
|
|
'fn': re.compile(r'pub\s+(?:async\s+)?fn\s+(\w+)'),
|
|
'struct': re.compile(r'pub\s+struct\s+(\w+)'),
|
|
'enum': re.compile(r'pub\s+enum\s+(\w+)'),
|
|
'trait': re.compile(r'pub\s+trait\s+(\w+)'),
|
|
'mod': re.compile(r'pub\s+mod\s+(\w+)'),
|
|
'type': re.compile(r'pub\s+type\s+(\w+)'),
|
|
'const': re.compile(r'pub\s+(?:const|static)\s+(\w+)'),
|
|
'impl': re.compile(r'impl\s+(?:<[^>]*>)?\s*(\w+)\s*(?:<[^>]*>)?\s*\{'), # For trait impls that add methods
|
|
}
|
|
|
|
# Pattern to detect doc code blocks
|
|
EXAMPLE_PATTERN = re.compile(r'```rust[^`]*```', re.MULTILINE)
|
|
DOC_COMMENT_PATTERN = re.compile(r'///[^\n]*|//![^\n]*')
|
|
|
|
def has_item_with_examples(content, item_name, item_type):
|
|
"""Check if a public item has at least one worked example."""
|
|
# Look for the item and its associated doc comments
|
|
# This is a simplified check - we look for doc comments with code blocks
|
|
# near the item declaration
|
|
|
|
# Split by item and look for doc comments immediately before
|
|
lines = content.split('\n')
|
|
item_line = None
|
|
for i, line in enumerate(lines):
|
|
if item_name in line and any(f'pub {t}' in line for t in ['fn', 'struct', 'enum', 'trait', 'mod', 'type', 'const', 'static']):
|
|
item_line = i
|
|
break
|
|
|
|
if item_line is None:
|
|
return False
|
|
|
|
# Look backwards for doc comments
|
|
doc_lines = []
|
|
for i in range(item_line - 1, max(0, item_line - 50), -1):
|
|
line = lines[i].strip()
|
|
if line.startswith('///') or line.startswith('//!'):
|
|
doc_lines.insert(0, line)
|
|
elif line and not line.startswith('//') and not line.startswith('#['):
|
|
# Stop at non-comment, non-attribute line
|
|
break
|
|
|
|
doc_content = '\n'.join(doc_lines)
|
|
|
|
# Check for code blocks
|
|
return bool(EXAMPLE_PATTERN.search(doc_content))
|
|
|
|
def find_public_items_in_file(filepath):
|
|
"""Find all public items in a Rust source file."""
|
|
content = filepath.read_text()
|
|
|
|
items = []
|
|
for item_type, pattern in PUBLIC_PATTERNS.items():
|
|
for match in pattern.finditer(content):
|
|
item_name = match.group(1)
|
|
# Skip common non-public items
|
|
if item_name.startswith('_'):
|
|
continue
|
|
items.append((item_type, item_name, match.start()))
|
|
|
|
return items, content
|
|
|
|
def scan_crate(src_path):
|
|
"""Scan the crate for public items and example coverage."""
|
|
src_path = Path(src_path)
|
|
results = {
|
|
'total_items': 0,
|
|
'items_with_examples': 0,
|
|
'by_type': defaultdict(lambda: {'total': 0, 'with_examples': 0}),
|
|
'files': {}
|
|
}
|
|
|
|
# Get all .rs files
|
|
rs_files = list(src_path.rglob('*.rs'))
|
|
|
|
for rs_file in rs_files:
|
|
# Skip build.rs and tests
|
|
if 'build.rs' in str(rs_file) or 'tests/' in str(rs_file):
|
|
continue
|
|
|
|
try:
|
|
items, content = find_public_items_in_file(rs_file)
|
|
|
|
if items:
|
|
file_results = {
|
|
'total': len(items),
|
|
'with_examples': 0,
|
|
'items': []
|
|
}
|
|
|
|
for item_type, item_name, _ in items:
|
|
results['total_items'] += 1
|
|
results['by_type'][item_type]['total'] += 1
|
|
file_results['total'] += 1
|
|
|
|
has_examples = has_item_with_examples(content, item_name, item_type)
|
|
|
|
file_results['items'].append({
|
|
'name': item_name,
|
|
'type': item_type,
|
|
'has_examples': has_examples
|
|
})
|
|
|
|
if has_examples:
|
|
results['items_with_examples'] += 1
|
|
results['by_type'][item_type]['with_examples'] += 1
|
|
file_results['with_examples'] += 1
|
|
|
|
results['files'][str(rs_file.relative_to(src_path.parent.parent))] = file_results
|
|
except Exception as e:
|
|
print(f"Error processing {rs_file}: {e}", flush=True)
|
|
|
|
return results
|
|
|
|
def main():
|
|
pdftract_core = Path('/home/coding/pdftract/crates/pdftract-core/src')
|
|
results = scan_crate(pdftract_core)
|
|
|
|
coverage = (results['items_with_examples'] / results['total_items'] * 100) if results['total_items'] > 0 else 0
|
|
|
|
print("=" * 60)
|
|
print(f"Rustdoc Example Coverage Report for pdftract-core")
|
|
print("=" * 60)
|
|
print(f"\nTotal public items: {results['total_items']}")
|
|
print(f"Items with examples: {results['items_with_examples']}")
|
|
print(f"Coverage: {coverage:.1f}%")
|
|
print(f"\nTarget: 80%")
|
|
print(f"Status: {'✓ PASS' if coverage >= 80 else '✗ FAIL'}")
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Coverage by Type")
|
|
print("=" * 60)
|
|
for item_type, counts in sorted(results['by_type'].items()):
|
|
total = counts['total']
|
|
with_ex = counts['with_examples']
|
|
cov = (with_ex / total * 100) if total > 0 else 0
|
|
print(f"{item_type:12} {with_ex:4}/{total:4} ({cov:5.1f}%) {'✓' if cov >= 80 else '✗'}")
|
|
|
|
# Show files that need work
|
|
print("\n" + "=" * 60)
|
|
print("Files Needing Examples (showing items without examples)")
|
|
print("=" * 60)
|
|
|
|
for file_path, file_results in sorted(results['files'].items()):
|
|
file_cov = (file_results['with_examples'] / file_results['total'] * 100) if file_results['total'] > 0 else 0
|
|
missing = [item for item in file_results['items'] if not item['has_examples']]
|
|
if missing and file_cov < 80:
|
|
print(f"\n{file_path} ({file_cov:.0f}% coverage)")
|
|
for item in sorted(missing, key=lambda x: (x['type'], x['name'])):
|
|
print(f" - {item['type']:8} {item['name']}")
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
# Output JSON for scripts
|
|
output_json = {
|
|
'coverage': coverage,
|
|
'total_items': results['total_items'],
|
|
'items_with_examples': results['items_with_examples'],
|
|
'pass': coverage >= 80
|
|
}
|
|
|
|
json_path = Path('/tmp/doc_example_coverage.json')
|
|
json_path.write_text(json.dumps(output_json, indent=2))
|
|
|
|
return 0 if coverage >= 80 else 1
|
|
|
|
if __name__ == '__main__':
|
|
exit(main())
|