- Add jedarden/pdftract Composer package (sdk/php/) - Implement Client.php with proc_open subprocess execution - Add PSR-3 LoggerInterface integration (defaults to NullLogger) - Add 9 contract methods: extract, extractText, extractMarkdown, extractStream, search, getMetadata, hash, classify, verifyReceipt - Add readonly model classes: Document, Page, Metadata, Fingerprint, Classification, Match, Receipt - Add exception classes: PdftractException base + 8 subclasses - Add PHPUnit conformance test suite - Add phpunit.xml configuration - Add composer.json with jedarden/pdftract package name - Add .ci/argo-workflows/pdftract-php-publish.yaml (Packagist auto-discovery from git tags) Also includes Ruby SDK scaffold from parallel workflow. Closes pdftract-2m3gl
52 lines
1.6 KiB
Rust
52 lines
1.6 KiB
Rust
//! Debug test to print normalized content streams for fixture PDFs.
|
|
//!
|
|
//! This helps diagnose why content_edit_one_glyph and content_edit_one_paragraph
|
|
//! fixtures produce identical fingerprints despite having different content.
|
|
|
|
use pdftract_core::document::PdfExtractor;
|
|
use std::path::Path;
|
|
|
|
fn print_normalized_content(path: &Path) {
|
|
println!("\n=== {} ===", path.display());
|
|
|
|
match PdfExtractor::open(path) {
|
|
Ok(mut extractor) => {
|
|
// Get the document and fingerprint
|
|
let fingerprint = extractor.fingerprint();
|
|
println!("Fingerprint: {}", fingerprint);
|
|
|
|
// Try to get the first page
|
|
if let Ok(pages) = extractor.materialize_pages() {
|
|
if let Some(page) = pages.first() {
|
|
println!("Page 0 resources: {:?}", page.resources);
|
|
|
|
// Get content streams
|
|
for (i, stream_ref) in page.contents.iter().enumerate() {
|
|
println!("Content stream {}: ref={:?}", i, stream_ref);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Err(e) => {
|
|
println!("Failed to open: {:?}", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn main() {
|
|
let fixtures = [
|
|
"tests/fingerprint/fixtures/content_edit_one_glyph/v1.pdf",
|
|
"tests/fingerprint/fixtures/content_edit_one_glyph/v2.pdf",
|
|
"tests/fingerprint/fixtures/content_edit_one_paragraph/v1.pdf",
|
|
"tests/fingerprint/fixtures/content_edit_one_paragraph/v2.pdf",
|
|
];
|
|
|
|
for fixture in fixtures {
|
|
print_normalized_content(Path::new(fixture));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_debug_content_streams() {
|
|
main();
|
|
}
|