The indent trigger was using .abs() which fired on both increased indent (non-indented → indented) AND decreased indent (indented → non-indented). This caused drop-cap style paragraphs (indented first line, flush-left continuation) to incorrectly split into two blocks. Per plan Phase 4.4 heuristic #2, indent change should only trigger when the current line is MORE indented (to the right, larger x0) than the block average - i.e., a new paragraph starting after non-indented text. It should NOT trigger for decreased indent (first line indented, rest flush-left). Fix: Remove .abs() and only check if line_x0 - block_avg_x0 > threshold. Tests: - test_indented_first_line_new_block: PASS (non-indented → indented splits) - test_indented_first_line_of_paragraph_not_split: PASS (drop cap stays together) - All 179 line module tests: PASS
100 lines
3 KiB
Python
100 lines
3 KiB
Python
#!/usr/bin/env python3
|
|
"""Count rustdoc coverage for pdftract-core."""
|
|
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
CORE_DIR = Path("crates/pdftract-core/src")
|
|
|
|
# Patterns for public items
|
|
PUB_PATTERNS = {
|
|
"fn": re.compile(r'^pub (?:async\s+)?fn\s+(\w+)'),
|
|
"struct": re.compile(r'^pub struct\s+(\w+)'),
|
|
"enum": re.compile(r'^pub enum\s+(\w+)'),
|
|
"trait": re.compile(r'^pub trait\s+(\w+)'),
|
|
"type": re.compile(r'^pub type\s+(\w+)'),
|
|
"mod": re.compile(r'^pub mod\s+(\w+)'),
|
|
"const": re.compile(r'^pub const\s+(\w+)'),
|
|
"static": re.compile(r'^pub static\s+(\w+)'),
|
|
}
|
|
|
|
# Pattern for code blocks in doc comments
|
|
EXAMPLE_PATTERN = re.compile(r'```rust[^`]*```')
|
|
DOC_COMMENT_PATTERN = re.compile(r'///.*|//!.*')
|
|
|
|
def count_public_items_and_examples(file_path: Path):
|
|
"""Count public items and examples in a single file."""
|
|
with open(file_path) as f:
|
|
lines = f.readlines()
|
|
|
|
pub_items = []
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
|
|
# Look for public items
|
|
for item_type, pattern in PUB_PATTERNS.items():
|
|
match = pattern.match(line.strip())
|
|
if match:
|
|
item_name = match.group(1)
|
|
pub_items.append({
|
|
"type": item_type,
|
|
"name": item_name,
|
|
"line": i + 1,
|
|
"has_example": False
|
|
})
|
|
break
|
|
i += 1
|
|
|
|
# Now check each pub item for examples
|
|
# This is simplified - we need to scan doc comments before each item
|
|
for item in pub_items:
|
|
line_idx = item["line"] - 1
|
|
# Scan backwards for doc comments
|
|
doc_lines = []
|
|
j = line_idx - 1
|
|
while j >= 0 and (lines[j].strip().startswith("///") or lines[j].strip().startswith("//!")):
|
|
doc_lines.insert(0, lines[j])
|
|
j -= 1
|
|
|
|
# Check if any doc comment contains a code block
|
|
doc_text = "".join(doc_lines)
|
|
if EXAMPLE_PATTERN.search(doc_text):
|
|
item["has_example"] = True
|
|
|
|
return pub_items
|
|
|
|
|
|
def main():
|
|
all_items = []
|
|
for rs_file in CORE_DIR.rglob("*.rs"):
|
|
# Skip lib.rs top-level module exports
|
|
if rs_file.name == "lib.rs":
|
|
continue
|
|
|
|
items = count_public_items_and_examples(rs_file)
|
|
all_items.extend(items)
|
|
|
|
total = len(all_items)
|
|
with_examples = sum(1 for item in all_items if item["has_example"])
|
|
coverage = (with_examples / total * 100) if total > 0 else 0
|
|
|
|
print(f"Total public items: {total}")
|
|
print(f"With worked examples: {with_examples}")
|
|
print(f"Coverage: {coverage:.1f}%")
|
|
|
|
# Breakdown by type
|
|
by_type = defaultdict(list)
|
|
for item in all_items:
|
|
by_type[item["type"]].append(item)
|
|
|
|
print("\nBy type:")
|
|
for item_type, items in sorted(by_type.items()):
|
|
with_ex = sum(1 for i in items if i["has_example"])
|
|
print(f" {item_type}: {with_ex}/{len(items)} ({with_ex/len(items)*100:.1f}%)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|