pdftract/crates/pdftract-cli/tests/comparison_mode_test.rs
jedarden 7ffb1a729f fix(pdftract-63ka2): AES-128 test buffer allocation for PKCS#7 padding
The encrypt_padded_mut API requires the buffer to be large enough to
hold the padded ciphertext. The tests were using plaintext.to_vec() which
only allocated plaintext.len() bytes, insufficient for padding.

Changed pattern:
- Before: plaintext.to_vec() (insufficient space)
- After: vec![0u8; plaintext.len() + 16] with copy_from_slice

Also fixed incorrect usage: encrypt_padded_mut returns Result<(), Error>,
not a length. Use data_copy.len() directly for ciphertext length.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 01:30:33 -04:00

306 lines
9 KiB
Rust

//! Integration test for comparison mode (Phase 7.9.8).
//!
//! This test verifies that the `--compare` flag works correctly for
//! side-by-side PDF diff viewing in the inspector.
use std::path::PathBuf;
#[test]
fn test_inspect_args_has_compare_field() {
use pdftract_cli::inspect::args::InspectArgs;
// Test that InspectArgs can be constructed with compare field
let args = InspectArgs {
file: PathBuf::from("test.pdf"),
port: 7676,
bind: "127.0.0.1".to_string(),
auth_token: None,
no_open: true,
compare: Some(PathBuf::from("compare.pdf")),
audit_log: None,
};
assert_eq!(args.file, PathBuf::from("test.pdf"));
assert_eq!(args.compare, Some(PathBuf::from("compare.pdf")));
assert!(args.no_open);
}
#[test]
fn test_inspect_args_validate_without_compare() {
use pdftract_cli::inspect::args::InspectArgs;
let args = InspectArgs {
file: PathBuf::from("tests/fixtures/minimal.pdf"),
port: 7676,
bind: "127.0.0.1".to_string(),
auth_token: None,
no_open: true,
compare: None,
audit_log: None,
};
// Should succeed if file exists
if args.file.exists() {
assert!(args.validate().is_ok());
}
}
#[test]
fn test_diff_summary_serialization() {
use pdftract_cli::inspect::api::DiffSummary;
let summary = DiffSummary {
pages_added: 1,
pages_removed: 0,
blocks_added: 5,
blocks_removed: 2,
blocks_changed: 3,
spans_added: 10,
spans_removed: 4,
spans_changed: 6,
reading_order_changed: false,
};
let json = serde_json::to_string(&summary).unwrap();
assert!(json.contains("\"pages_added\":1"));
assert!(json.contains("\"blocks_added\":5"));
assert!(json.contains("\"reading_order_changed\":false"));
// Verify deserialization works
let deserialized: DiffSummary = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.pages_added, 1);
assert_eq!(deserialized.blocks_added, 5);
}
#[test]
fn test_page_diff_serialization() {
use pdftract_cli::inspect::api::PageDiff;
let diff = PageDiff {
changed_blocks: vec![0, 2],
removed_blocks: vec![1],
added_blocks: vec![3, 4],
changed_spans: vec![5, 6, 7],
removed_spans: vec![8],
added_spans: vec![9, 10],
reading_order_changed: true,
};
let json = serde_json::to_string(&diff).unwrap();
assert!(json.contains("\"changed_blocks\":[0,2]"));
assert!(json.contains("\"added_blocks\":[3,4]"));
// Verify deserialization works
let deserialized: PageDiff = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.changed_blocks, vec![0, 2]);
assert_eq!(deserialized.reading_order_changed, true);
}
#[test]
fn test_compare_document_meta_serialization() {
use pdftract_cli::inspect::api::CompareDocumentMeta;
use serde_json::json;
let meta = CompareDocumentMeta {
a: json!({"pages": []}),
b: Some(json!({"pages": []})),
diff_summary: None,
};
let json_str = serde_json::to_string(&meta).unwrap();
assert!(json_str.contains("\"a\":"));
assert!(json_str.contains("\"b\":"));
// Verify deserialization works
let deserialized: CompareDocumentMeta = serde_json::from_str(&json_str).unwrap();
assert!(deserialized.a.is_object());
assert!(deserialized.b.is_some());
}
#[test]
fn test_compare_page_data_serialization() {
use pdftract_cli::inspect::api::{ComparePageData, PageDiff};
use serde_json::json;
let page_data = ComparePageData {
a: Some(json!({"index": 0})),
b: Some(json!({"index": 0})),
diff: Some(PageDiff {
changed_blocks: vec![],
removed_blocks: vec![],
added_blocks: vec![],
changed_spans: vec![],
removed_spans: vec![],
added_spans: vec![],
reading_order_changed: false,
}),
};
let json_str = serde_json::to_string(&page_data).unwrap();
assert!(json_str.contains("\"a\":"));
assert!(json_str.contains("\"b\":"));
assert!(json_str.contains("\"diff\":"));
// Verify deserialization works
let deserialized: ComparePageData = serde_json::from_str(&json_str).unwrap();
assert!(deserialized.a.is_some());
assert!(deserialized.b.is_some());
assert!(deserialized.diff.is_some());
}
#[test]
fn test_bbox_overlap_score() {
use pdftract_cli::inspect::api::bbox_overlap_score;
// Test identical bounding boxes (score should be 1.0)
let bbox1 = [100.0, 100.0, 200.0, 200.0];
let bbox2 = [100.0, 100.0, 200.0, 200.0];
let score = bbox_overlap_score(&bbox1, &bbox2);
assert!((score - 1.0).abs() < 0.001);
// Test non-overlapping boxes (score should be 0.0)
let bbox3 = [0.0, 0.0, 50.0, 50.0];
let bbox4 = [100.0, 100.0, 150.0, 150.0];
let score = bbox_overlap_score(&bbox3, &bbox4);
assert!((score - 0.0).abs() < 0.001);
// Test partially overlapping boxes (score should be between 0 and 1)
let bbox5 = [0.0, 0.0, 100.0, 100.0];
let bbox6 = [50.0, 50.0, 150.0, 150.0];
let score = bbox_overlap_score(&bbox5, &bbox6);
assert!(score > 0.0 && score < 1.0);
}
#[test]
fn test_text_similarity_score() {
use pdftract_cli::inspect::api::text_similarity_score;
// Test identical text (score should be 1.0)
let score = text_similarity_score("hello world", "hello world");
assert!((score - 1.0).abs() < 0.001);
// Test completely different text (score should be < 0.5)
let score = text_similarity_score("abcdef", "xyz123");
assert!(score < 0.5);
// Test similar text (score should be > 0.5)
let score = text_similarity_score("hello world", "hello word");
assert!(score > 0.5);
}
#[test]
fn test_levenshtein_distance() {
use pdftract_cli::inspect::api::levenshtein_distance;
// Test identical strings
assert_eq!(levenshtein_distance("hello", "hello"), 0);
// Test completely different strings
assert_eq!(levenshtein_distance("abc", "xyz"), 3);
// Test one substitution
assert_eq!(levenshtein_distance("hello", "hallo"), 1);
// Test one insertion
assert_eq!(levenshtein_distance("hello", "hello!"), 1);
// Test one deletion
assert_eq!(levenshtein_distance("hello", "hell"), 1);
}
#[test]
fn test_block_match_score() {
use pdftract_cli::inspect::api::block_match_score;
let block_a = pdftract_core::schema::BlockJson {
kind: "paragraph".to_string(),
text: "Hello world".to_string(),
bbox: [100.0, 100.0, 200.0, 200.0],
level: None,
table_index: None,
spans: vec![],
receipt: None,
};
// Test identical block (high score)
let block_b = pdftract_core::schema::BlockJson {
kind: "paragraph".to_string(),
text: "Hello world".to_string(),
bbox: [100.0, 100.0, 200.0, 200.0],
level: None,
table_index: None,
spans: vec![],
receipt: None,
};
let score = block_match_score(&block_a, &block_b);
assert!(score > 0.9);
// Test different text (lower score)
let block_c = pdftract_core::schema::BlockJson {
kind: "paragraph".to_string(),
text: "Goodbye world".to_string(),
bbox: [100.0, 100.0, 200.0, 200.0],
level: None,
table_index: None,
spans: vec![],
receipt: None,
};
let score = block_match_score(&block_a, &block_c);
assert!(score < 0.9 && score > 0.5); // bbox matches but text doesn't
}
#[test]
fn test_span_match_score() {
use pdftract_cli::inspect::api::span_match_score;
let span_a = pdftract_core::schema::SpanJson {
text: "Hello".to_string(),
bbox: [100.0, 100.0, 150.0, 120.0],
font: "Helvetica".to_string(),
size: 12.0,
color: None,
rendering_mode: None,
confidence: None,
confidence_source: None,
lang: None,
flags: vec![],
receipt: None,
column: None,
};
// Test identical span (high score)
let span_b = pdftract_core::schema::SpanJson {
text: "Hello".to_string(),
bbox: [100.0, 100.0, 150.0, 120.0],
font: "Helvetica".to_string(),
size: 12.0,
color: None,
rendering_mode: None,
confidence: None,
confidence_source: None,
lang: None,
flags: vec![],
receipt: None,
column: None,
};
let score = span_match_score(&span_a, &span_b);
assert!(score > 0.9);
// Test different text (lower score)
let span_c = pdftract_core::schema::SpanJson {
text: "World".to_string(),
bbox: [100.0, 100.0, 150.0, 120.0],
font: "Helvetica".to_string(),
size: 12.0,
color: None,
rendering_mode: None,
confidence: None,
confidence_source: None,
lang: None,
flags: vec![],
receipt: None,
column: None,
};
let score = span_match_score(&span_a, &span_c);
assert!(score < 0.9 && score > 0.4); // bbox matches but text doesn't
}