pdftract/crates/pdftract-cli/tests/test_header_flag.rs
jedarden db92403bd5
Some checks are pending
Schema Generation Validation / Validate JSON Schema (push) Waiting to run
Schema Generation Validation / Validate JSON Syntax (push) Waiting to run
chore(pdftract-36glh): remove unused JpxDecoder import and add verification note
- Remove unused jpx::JpxDecoder import from stream.rs (code uses fully qualified paths)
- Add notes/pdftract-36glh.md with acceptance criteria verification

The JPXDecode passthrough implementation was already complete in commit 4ba4687.
This change is minor cleanup only.

References: pdftract-36glh
2026-05-28 05:23:13 -04:00

374 lines
9.8 KiB
Rust

//! Integration tests for the --header CLI flag.
//!
//! These tests verify that the --header flag:
//! 1. Accepts valid headers in HEADER:VALUE format
//! 2. Rejects invalid headers (no colon, CRLF injection, managed headers)
//! 3. Silently ignores headers for local file extraction
//! 4. Would pass headers to HttpRangeSource for URLs (when Phase 1.8 is implemented)
use std::process::Command;
use std::path::PathBuf;
/// Path to the pdftract CLI binary.
fn pdftract_bin() -> PathBuf {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("../../target/debug/pdftract");
path
}
/// Find a test fixture PDF file.
fn fixture_pdf() -> PathBuf {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("../../tests/fixtures/test-minimal.pdf");
if !path.exists() {
// Try alternate path
path = PathBuf::from("../../tests/fixtures/test-minimal.pdf");
}
path
}
#[test]
fn test_header_flag_valid_single() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"X-API-Key:abc123",
pdf.to_str().unwrap(),
"--format",
"json",
"-o",
"-",
])
.output()
.expect("Failed to run pdftract");
// Should succeed (headers are validated and parsed)
assert!(
output.status.success(),
"pdftract failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
#[test]
fn test_header_flag_valid_multiple() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"X-API-Key:abc123",
"--header",
"Authorization:Bearer token",
"--header",
"X-Tenant:xyz",
pdf.to_str().unwrap(),
"--format",
"json",
"-o",
"-",
])
.output()
.expect("Failed to run pdftract");
// Should succeed with multiple headers
assert!(
output.status.success(),
"pdftract failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
#[test]
fn test_header_flag_no_colon() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"NoColonHere",
pdf.to_str().unwrap(),
])
.output()
.expect("Failed to run pdftract");
// Should fail with parse error
assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("must contain a ':' delimiter"),
"Expected missing colon error, got: {}",
stderr
);
}
#[test]
fn test_header_flag_crlf_injection() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"X-Bad:Value\r\nInjected: true",
pdf.to_str().unwrap(),
])
.output()
.expect("Failed to run pdftract");
// Should fail with CRLF injection error
assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("CRLF"),
"Expected CRLF injection error, got: {}",
stderr
);
}
#[test]
fn test_header_flag_managed_header_host() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"Host:example.com",
pdf.to_str().unwrap(),
])
.output()
.expect("Failed to run pdftract");
// Should fail with managed header error
assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("managed automatically") || stderr.contains("Host"),
"Expected managed header error, got: {}",
stderr
);
}
#[test]
fn test_header_flag_managed_header_content_length() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"Content-Length:1234",
pdf.to_str().unwrap(),
])
.output()
.expect("Failed to run pdftract");
// Should fail with managed header error
assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("managed automatically") || stderr.contains("Content-Length"),
"Expected managed header error, got: {}",
stderr
);
}
#[test]
fn test_header_flag_authorization_allowed() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"Authorization:Bearer abc123",
pdf.to_str().unwrap(),
"--format",
"json",
"-o",
"-",
])
.output()
.expect("Failed to run pdftract");
// Should succeed - Authorization is explicitly allowed
assert!(
output.status.success(),
"pdftract failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
#[test]
fn test_header_flag_empty_name() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
":value",
pdf.to_str().unwrap(),
])
.output()
.expect("Failed to run pdftract");
// Should fail with empty name error
assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("empty name") || stderr.contains("Empty"),
"Expected empty name error, got: {}",
stderr
);
}
#[test]
fn test_header_flag_empty_value() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"Name:",
pdf.to_str().unwrap(),
])
.output()
.expect("Failed to run pdftract");
// Should fail with empty value error
assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("empty value") || stderr.contains("Empty"),
"Expected empty value error, got: {}",
stderr
);
}
#[test]
fn test_header_flag_invalid_name_chars() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"X Bad Name:value",
pdf.to_str().unwrap(),
])
.output()
.expect("Failed to run pdftract");
// Should fail with invalid name error
assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("invalid") || stderr.contains("Invalid"),
"Expected invalid name error, got: {}",
stderr
);
}
#[test]
fn test_header_flag_with_spaces_around_colon() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"X-API-Key : abc123",
pdf.to_str().unwrap(),
"--format",
"json",
"-o",
"-",
])
.output()
.expect("Failed to run pdftract");
// Should succeed - spaces around colon are trimmed
assert!(
output.status.success(),
"pdftract failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
#[test]
fn test_header_flag_value_with_colon() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"X-Url:https://example.com:8080/path",
pdf.to_str().unwrap(),
"--format",
"json",
"-o",
"-",
])
.output()
.expect("Failed to run pdftract");
// Should succeed - values can contain colons
assert!(
output.status.success(),
"pdftract failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
#[test]
fn test_header_flag_local_file_silent_ignore() {
let pdf = fixture_pdf();
assert!(pdf.exists(), "Fixture PDF not found: {:?}", pdf);
let output = Command::new(pdftract_bin())
.args([
"extract",
"--header",
"X-API-Key:abc123",
pdf.to_str().unwrap(),
"--format",
"json",
"-o",
"-",
])
.output()
.expect("Failed to run pdftract");
// Should succeed without error - headers are silently ignored for local files
assert!(
output.status.success(),
"pdftract failed: {}",
String::from_utf8_lossy(&output.stderr)
);
// Should NOT print a warning about headers being unused
let stderr = String::from_utf8_lossy(&output.stderr);
// The current implementation doesn't print anything for local files
// (headers are silently ignored as specified)
}