pdftract/scripts/measure_doc_coverage.py
jedarden 246befd8d1 feat(pdftract-2m3gl): implement PHP SDK with Packagist publishing
- Add jedarden/pdftract Composer package (sdk/php/)
- Implement Client.php with proc_open subprocess execution
- Add PSR-3 LoggerInterface integration (defaults to NullLogger)
- Add 9 contract methods: extract, extractText, extractMarkdown, extractStream, search, getMetadata, hash, classify, verifyReceipt
- Add readonly model classes: Document, Page, Metadata, Fingerprint, Classification, Match, Receipt
- Add exception classes: PdftractException base + 8 subclasses
- Add PHPUnit conformance test suite
- Add phpunit.xml configuration
- Add composer.json with jedarden/pdftract package name
- Add .ci/argo-workflows/pdftract-php-publish.yaml (Packagist auto-discovery from git tags)

Also includes Ruby SDK scaffold from parallel workflow.

Closes pdftract-2m3gl
2026-06-01 10:27:03 -04:00

75 lines
2.2 KiB
Python

#!/usr/bin/env python3
"""Measure rustdoc coverage for pdftract-core."""
import os
import re
from pathlib import Path
from collections import defaultdict
def count_items_in_file(file_path):
"""Count public items, doc items, and example items in a single file."""
with open(file_path, 'r') as f:
content = f.read()
# Count public items
pub_pattern = r'^pub\s+(fn|struct|enum|trait|type|const|static|mod|use)\s+'
public_items = len(re.findall(pub_pattern, content, re.MULTILINE))
# Count doc comments (/// or //! at line start)
doc_pattern = r'^///|//!'
doc_items = len(re.findall(doc_pattern, content, re.MULTILINE))
# Count examples (```rust blocks)
example_pattern = r'```rust'
example_items = len(re.findall(example_pattern, content))
return public_items, doc_items, example_items
def main():
src_dir = Path('crates/pdftract-core/src')
if not src_dir.exists():
print(f"Error: {src_dir} does not exist")
return
total_public = 0
total_doc = 0
total_examples = 0
file_gaps = []
for rs_file in src_dir.rglob('*.rs'):
pub, doc, ex = count_items_in_file(rs_file)
total_public += pub
total_doc += doc
total_examples += ex
if pub > 0:
gap = pub - doc
if gap > 0:
file_gaps.append((str(rs_file.relative_to(src_dir.parent)), gap))
print("Measuring rustdoc coverage for pdftract-core...")
print()
print(f"Public items found: {total_public}")
print(f"Items with docs: {total_doc}")
print(f"Items with examples: {total_examples}")
print()
if total_public > 0:
doc_coverage = (total_doc * 100) // total_public
example_coverage = (total_examples * 100) // total_public
print(f"Documentation coverage: {doc_coverage}%")
print(f"Example coverage: {example_coverage}%")
print()
print(f"Target: 80% example coverage")
print()
print("Files with most undocumented public items:")
print()
file_gaps.sort(key=lambda x: x[1], reverse=True)
for file_path, gap in file_gaps[:20]:
print(f" {file_path}: {gap} undocumented items")
if __name__ == '__main__':
main()