- Remove unused jpx::JpxDecoder import from stream.rs (code uses fully qualified paths)
- Add notes/pdftract-36glh.md with acceptance criteria verification
The JPXDecode passthrough implementation was already complete in commit 4ba4687.
This change is minor cleanup only.
References: pdftract-36glh
374 lines
9.8 KiB
Rust
374 lines
9.8 KiB
Rust
//! Integration tests for the --header CLI flag.
|
|
//!
|
|
//! These tests verify that the --header flag:
|
|
//! 1. Accepts valid headers in HEADER:VALUE format
|
|
//! 2. Rejects invalid headers (no colon, CRLF injection, managed headers)
|
|
//! 3. Silently ignores headers for local file extraction
|
|
//! 4. Would pass headers to HttpRangeSource for URLs (when Phase 1.8 is implemented)
|
|
|
|
use std::process::Command;
|
|
use std::path::PathBuf;
|
|
|
|
/// Path to the pdftract CLI binary.
|
|
fn pdftract_bin() -> PathBuf {
|
|
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
path.push("../../target/debug/pdftract");
|
|
path
|
|
}
|
|
|
|
/// Find a test fixture PDF file.
|
|
fn fixture_pdf() -> PathBuf {
|
|
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
path.push("../../tests/fixtures/test-minimal.pdf");
|
|
if !path.exists() {
|
|
// Try alternate path
|
|
path = PathBuf::from("../../tests/fixtures/test-minimal.pdf");
|
|
}
|
|
path
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_valid_single() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"X-API-Key:abc123",
|
|
pdf.to_str().unwrap(),
|
|
"--format",
|
|
"json",
|
|
"-o",
|
|
"-",
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should succeed (headers are validated and parsed)
|
|
assert!(
|
|
output.status.success(),
|
|
"pdftract failed: {}",
|
|
String::from_utf8_lossy(&output.stderr)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_valid_multiple() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"X-API-Key:abc123",
|
|
"--header",
|
|
"Authorization:Bearer token",
|
|
"--header",
|
|
"X-Tenant:xyz",
|
|
pdf.to_str().unwrap(),
|
|
"--format",
|
|
"json",
|
|
"-o",
|
|
"-",
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should succeed with multiple headers
|
|
assert!(
|
|
output.status.success(),
|
|
"pdftract failed: {}",
|
|
String::from_utf8_lossy(&output.stderr)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_no_colon() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"NoColonHere",
|
|
pdf.to_str().unwrap(),
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should fail with parse error
|
|
assert!(!output.status.success());
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
assert!(
|
|
stderr.contains("must contain a ':' delimiter"),
|
|
"Expected missing colon error, got: {}",
|
|
stderr
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_crlf_injection() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"X-Bad:Value\r\nInjected: true",
|
|
pdf.to_str().unwrap(),
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should fail with CRLF injection error
|
|
assert!(!output.status.success());
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
assert!(
|
|
stderr.contains("CRLF"),
|
|
"Expected CRLF injection error, got: {}",
|
|
stderr
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_managed_header_host() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"Host:example.com",
|
|
pdf.to_str().unwrap(),
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should fail with managed header error
|
|
assert!(!output.status.success());
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
assert!(
|
|
stderr.contains("managed automatically") || stderr.contains("Host"),
|
|
"Expected managed header error, got: {}",
|
|
stderr
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_managed_header_content_length() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"Content-Length:1234",
|
|
pdf.to_str().unwrap(),
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should fail with managed header error
|
|
assert!(!output.status.success());
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
assert!(
|
|
stderr.contains("managed automatically") || stderr.contains("Content-Length"),
|
|
"Expected managed header error, got: {}",
|
|
stderr
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_authorization_allowed() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"Authorization:Bearer abc123",
|
|
pdf.to_str().unwrap(),
|
|
"--format",
|
|
"json",
|
|
"-o",
|
|
"-",
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should succeed - Authorization is explicitly allowed
|
|
assert!(
|
|
output.status.success(),
|
|
"pdftract failed: {}",
|
|
String::from_utf8_lossy(&output.stderr)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_empty_name() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
":value",
|
|
pdf.to_str().unwrap(),
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should fail with empty name error
|
|
assert!(!output.status.success());
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
assert!(
|
|
stderr.contains("empty name") || stderr.contains("Empty"),
|
|
"Expected empty name error, got: {}",
|
|
stderr
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_empty_value() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"Name:",
|
|
pdf.to_str().unwrap(),
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should fail with empty value error
|
|
assert!(!output.status.success());
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
assert!(
|
|
stderr.contains("empty value") || stderr.contains("Empty"),
|
|
"Expected empty value error, got: {}",
|
|
stderr
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_invalid_name_chars() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"X Bad Name:value",
|
|
pdf.to_str().unwrap(),
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should fail with invalid name error
|
|
assert!(!output.status.success());
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
assert!(
|
|
stderr.contains("invalid") || stderr.contains("Invalid"),
|
|
"Expected invalid name error, got: {}",
|
|
stderr
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_with_spaces_around_colon() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"X-API-Key : abc123",
|
|
pdf.to_str().unwrap(),
|
|
"--format",
|
|
"json",
|
|
"-o",
|
|
"-",
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should succeed - spaces around colon are trimmed
|
|
assert!(
|
|
output.status.success(),
|
|
"pdftract failed: {}",
|
|
String::from_utf8_lossy(&output.stderr)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_value_with_colon() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"X-Url:https://example.com:8080/path",
|
|
pdf.to_str().unwrap(),
|
|
"--format",
|
|
"json",
|
|
"-o",
|
|
"-",
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should succeed - values can contain colons
|
|
assert!(
|
|
output.status.success(),
|
|
"pdftract failed: {}",
|
|
String::from_utf8_lossy(&output.stderr)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_header_flag_local_file_silent_ignore() {
|
|
let pdf = fixture_pdf();
|
|
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
|
|
|
|
let output = Command::new(pdftract_bin())
|
|
.args([
|
|
"extract",
|
|
"--header",
|
|
"X-API-Key:abc123",
|
|
pdf.to_str().unwrap(),
|
|
"--format",
|
|
"json",
|
|
"-o",
|
|
"-",
|
|
])
|
|
.output()
|
|
.expect("Failed to run pdftract");
|
|
|
|
// Should succeed without error - headers are silently ignored for local files
|
|
assert!(
|
|
output.status.success(),
|
|
"pdftract failed: {}",
|
|
String::from_utf8_lossy(&output.stderr)
|
|
);
|
|
|
|
// Should NOT print a warning about headers being unused
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
// The current implementation doesn't print anything for local files
|
|
// (headers are silently ignored as specified)
|
|
}
|