pdftract/crates/pdftract-core/tests/debug_content_streams.rs
jedarden 246befd8d1 feat(pdftract-2m3gl): implement PHP SDK with Packagist publishing
- Add jedarden/pdftract Composer package (sdk/php/)
- Implement Client.php with proc_open subprocess execution
- Add PSR-3 LoggerInterface integration (defaults to NullLogger)
- Add 9 contract methods: extract, extractText, extractMarkdown, extractStream, search, getMetadata, hash, classify, verifyReceipt
- Add readonly model classes: Document, Page, Metadata, Fingerprint, Classification, Match, Receipt
- Add exception classes: PdftractException base + 8 subclasses
- Add PHPUnit conformance test suite
- Add phpunit.xml configuration
- Add composer.json with jedarden/pdftract package name
- Add .ci/argo-workflows/pdftract-php-publish.yaml (Packagist auto-discovery from git tags)

Also includes Ruby SDK scaffold from parallel workflow.

Closes pdftract-2m3gl
2026-06-01 10:27:03 -04:00

52 lines
1.6 KiB
Rust

//! Debug test to print normalized content streams for fixture PDFs.
//!
//! This helps diagnose why content_edit_one_glyph and content_edit_one_paragraph
//! fixtures produce identical fingerprints despite having different content.
use pdftract_core::document::PdfExtractor;
use std::path::Path;
fn print_normalized_content(path: &Path) {
println!("\n=== {} ===", path.display());
match PdfExtractor::open(path) {
Ok(mut extractor) => {
// Get the document and fingerprint
let fingerprint = extractor.fingerprint();
println!("Fingerprint: {}", fingerprint);
// Try to get the first page
if let Ok(pages) = extractor.materialize_pages() {
if let Some(page) = pages.first() {
println!("Page 0 resources: {:?}", page.resources);
// Get content streams
for (i, stream_ref) in page.contents.iter().enumerate() {
println!("Content stream {}: ref={:?}", i, stream_ref);
}
}
}
}
Err(e) => {
println!("Failed to open: {:?}", e);
}
}
}
fn main() {
let fixtures = [
"tests/fingerprint/fixtures/content_edit_one_glyph/v1.pdf",
"tests/fingerprint/fixtures/content_edit_one_glyph/v2.pdf",
"tests/fingerprint/fixtures/content_edit_one_paragraph/v1.pdf",
"tests/fingerprint/fixtures/content_edit_one_paragraph/v2.pdf",
];
for fixture in fixtures {
print_normalized_content(Path::new(fixture));
}
}
#[test]
fn test_debug_content_streams() {
main();
}