The encrypt_padded_mut API requires the buffer to be large enough to hold the padded ciphertext. The tests were using plaintext.to_vec() which only allocated plaintext.len() bytes, insufficient for padding. Changed pattern: - Before: plaintext.to_vec() (insufficient space) - After: vec![0u8; plaintext.len() + 16] with copy_from_slice Also fixed incorrect usage: encrypt_padded_mut returns Result<(), Error>, not a length. Use data_copy.len() directly for ciphertext length. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
306 lines
9 KiB
Rust
306 lines
9 KiB
Rust
//! Integration test for comparison mode (Phase 7.9.8).
|
|
//!
|
|
//! This test verifies that the `--compare` flag works correctly for
|
|
//! side-by-side PDF diff viewing in the inspector.
|
|
|
|
use std::path::PathBuf;
|
|
|
|
#[test]
|
|
fn test_inspect_args_has_compare_field() {
|
|
use pdftract_cli::inspect::args::InspectArgs;
|
|
|
|
// Test that InspectArgs can be constructed with compare field
|
|
let args = InspectArgs {
|
|
file: PathBuf::from("test.pdf"),
|
|
port: 7676,
|
|
bind: "127.0.0.1".to_string(),
|
|
auth_token: None,
|
|
no_open: true,
|
|
compare: Some(PathBuf::from("compare.pdf")),
|
|
audit_log: None,
|
|
};
|
|
|
|
assert_eq!(args.file, PathBuf::from("test.pdf"));
|
|
assert_eq!(args.compare, Some(PathBuf::from("compare.pdf")));
|
|
assert!(args.no_open);
|
|
}
|
|
|
|
#[test]
|
|
fn test_inspect_args_validate_without_compare() {
|
|
use pdftract_cli::inspect::args::InspectArgs;
|
|
|
|
let args = InspectArgs {
|
|
file: PathBuf::from("tests/fixtures/minimal.pdf"),
|
|
port: 7676,
|
|
bind: "127.0.0.1".to_string(),
|
|
auth_token: None,
|
|
no_open: true,
|
|
compare: None,
|
|
audit_log: None,
|
|
};
|
|
|
|
// Should succeed if file exists
|
|
if args.file.exists() {
|
|
assert!(args.validate().is_ok());
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_diff_summary_serialization() {
|
|
use pdftract_cli::inspect::api::DiffSummary;
|
|
|
|
let summary = DiffSummary {
|
|
pages_added: 1,
|
|
pages_removed: 0,
|
|
blocks_added: 5,
|
|
blocks_removed: 2,
|
|
blocks_changed: 3,
|
|
spans_added: 10,
|
|
spans_removed: 4,
|
|
spans_changed: 6,
|
|
reading_order_changed: false,
|
|
};
|
|
|
|
let json = serde_json::to_string(&summary).unwrap();
|
|
assert!(json.contains("\"pages_added\":1"));
|
|
assert!(json.contains("\"blocks_added\":5"));
|
|
assert!(json.contains("\"reading_order_changed\":false"));
|
|
|
|
// Verify deserialization works
|
|
let deserialized: DiffSummary = serde_json::from_str(&json).unwrap();
|
|
assert_eq!(deserialized.pages_added, 1);
|
|
assert_eq!(deserialized.blocks_added, 5);
|
|
}
|
|
|
|
#[test]
|
|
fn test_page_diff_serialization() {
|
|
use pdftract_cli::inspect::api::PageDiff;
|
|
|
|
let diff = PageDiff {
|
|
changed_blocks: vec![0, 2],
|
|
removed_blocks: vec![1],
|
|
added_blocks: vec![3, 4],
|
|
changed_spans: vec![5, 6, 7],
|
|
removed_spans: vec![8],
|
|
added_spans: vec![9, 10],
|
|
reading_order_changed: true,
|
|
};
|
|
|
|
let json = serde_json::to_string(&diff).unwrap();
|
|
assert!(json.contains("\"changed_blocks\":[0,2]"));
|
|
assert!(json.contains("\"added_blocks\":[3,4]"));
|
|
|
|
// Verify deserialization works
|
|
let deserialized: PageDiff = serde_json::from_str(&json).unwrap();
|
|
assert_eq!(deserialized.changed_blocks, vec![0, 2]);
|
|
assert_eq!(deserialized.reading_order_changed, true);
|
|
}
|
|
|
|
#[test]
|
|
fn test_compare_document_meta_serialization() {
|
|
use pdftract_cli::inspect::api::CompareDocumentMeta;
|
|
use serde_json::json;
|
|
|
|
let meta = CompareDocumentMeta {
|
|
a: json!({"pages": []}),
|
|
b: Some(json!({"pages": []})),
|
|
diff_summary: None,
|
|
};
|
|
|
|
let json_str = serde_json::to_string(&meta).unwrap();
|
|
assert!(json_str.contains("\"a\":"));
|
|
assert!(json_str.contains("\"b\":"));
|
|
|
|
// Verify deserialization works
|
|
let deserialized: CompareDocumentMeta = serde_json::from_str(&json_str).unwrap();
|
|
assert!(deserialized.a.is_object());
|
|
assert!(deserialized.b.is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn test_compare_page_data_serialization() {
|
|
use pdftract_cli::inspect::api::{ComparePageData, PageDiff};
|
|
use serde_json::json;
|
|
|
|
let page_data = ComparePageData {
|
|
a: Some(json!({"index": 0})),
|
|
b: Some(json!({"index": 0})),
|
|
diff: Some(PageDiff {
|
|
changed_blocks: vec![],
|
|
removed_blocks: vec![],
|
|
added_blocks: vec![],
|
|
changed_spans: vec![],
|
|
removed_spans: vec![],
|
|
added_spans: vec![],
|
|
reading_order_changed: false,
|
|
}),
|
|
};
|
|
|
|
let json_str = serde_json::to_string(&page_data).unwrap();
|
|
assert!(json_str.contains("\"a\":"));
|
|
assert!(json_str.contains("\"b\":"));
|
|
assert!(json_str.contains("\"diff\":"));
|
|
|
|
// Verify deserialization works
|
|
let deserialized: ComparePageData = serde_json::from_str(&json_str).unwrap();
|
|
assert!(deserialized.a.is_some());
|
|
assert!(deserialized.b.is_some());
|
|
assert!(deserialized.diff.is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn test_bbox_overlap_score() {
|
|
use pdftract_cli::inspect::api::bbox_overlap_score;
|
|
|
|
// Test identical bounding boxes (score should be 1.0)
|
|
let bbox1 = [100.0, 100.0, 200.0, 200.0];
|
|
let bbox2 = [100.0, 100.0, 200.0, 200.0];
|
|
let score = bbox_overlap_score(&bbox1, &bbox2);
|
|
assert!((score - 1.0).abs() < 0.001);
|
|
|
|
// Test non-overlapping boxes (score should be 0.0)
|
|
let bbox3 = [0.0, 0.0, 50.0, 50.0];
|
|
let bbox4 = [100.0, 100.0, 150.0, 150.0];
|
|
let score = bbox_overlap_score(&bbox3, &bbox4);
|
|
assert!((score - 0.0).abs() < 0.001);
|
|
|
|
// Test partially overlapping boxes (score should be between 0 and 1)
|
|
let bbox5 = [0.0, 0.0, 100.0, 100.0];
|
|
let bbox6 = [50.0, 50.0, 150.0, 150.0];
|
|
let score = bbox_overlap_score(&bbox5, &bbox6);
|
|
assert!(score > 0.0 && score < 1.0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_text_similarity_score() {
|
|
use pdftract_cli::inspect::api::text_similarity_score;
|
|
|
|
// Test identical text (score should be 1.0)
|
|
let score = text_similarity_score("hello world", "hello world");
|
|
assert!((score - 1.0).abs() < 0.001);
|
|
|
|
// Test completely different text (score should be < 0.5)
|
|
let score = text_similarity_score("abcdef", "xyz123");
|
|
assert!(score < 0.5);
|
|
|
|
// Test similar text (score should be > 0.5)
|
|
let score = text_similarity_score("hello world", "hello word");
|
|
assert!(score > 0.5);
|
|
}
|
|
|
|
#[test]
|
|
fn test_levenshtein_distance() {
|
|
use pdftract_cli::inspect::api::levenshtein_distance;
|
|
|
|
// Test identical strings
|
|
assert_eq!(levenshtein_distance("hello", "hello"), 0);
|
|
|
|
// Test completely different strings
|
|
assert_eq!(levenshtein_distance("abc", "xyz"), 3);
|
|
|
|
// Test one substitution
|
|
assert_eq!(levenshtein_distance("hello", "hallo"), 1);
|
|
|
|
// Test one insertion
|
|
assert_eq!(levenshtein_distance("hello", "hello!"), 1);
|
|
|
|
// Test one deletion
|
|
assert_eq!(levenshtein_distance("hello", "hell"), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_block_match_score() {
|
|
use pdftract_cli::inspect::api::block_match_score;
|
|
|
|
let block_a = pdftract_core::schema::BlockJson {
|
|
kind: "paragraph".to_string(),
|
|
text: "Hello world".to_string(),
|
|
bbox: [100.0, 100.0, 200.0, 200.0],
|
|
level: None,
|
|
table_index: None,
|
|
spans: vec![],
|
|
receipt: None,
|
|
};
|
|
|
|
// Test identical block (high score)
|
|
let block_b = pdftract_core::schema::BlockJson {
|
|
kind: "paragraph".to_string(),
|
|
text: "Hello world".to_string(),
|
|
bbox: [100.0, 100.0, 200.0, 200.0],
|
|
level: None,
|
|
table_index: None,
|
|
spans: vec![],
|
|
receipt: None,
|
|
};
|
|
let score = block_match_score(&block_a, &block_b);
|
|
assert!(score > 0.9);
|
|
|
|
// Test different text (lower score)
|
|
let block_c = pdftract_core::schema::BlockJson {
|
|
kind: "paragraph".to_string(),
|
|
text: "Goodbye world".to_string(),
|
|
bbox: [100.0, 100.0, 200.0, 200.0],
|
|
level: None,
|
|
table_index: None,
|
|
spans: vec![],
|
|
receipt: None,
|
|
};
|
|
let score = block_match_score(&block_a, &block_c);
|
|
assert!(score < 0.9 && score > 0.5); // bbox matches but text doesn't
|
|
}
|
|
|
|
#[test]
|
|
fn test_span_match_score() {
|
|
use pdftract_cli::inspect::api::span_match_score;
|
|
|
|
let span_a = pdftract_core::schema::SpanJson {
|
|
text: "Hello".to_string(),
|
|
bbox: [100.0, 100.0, 150.0, 120.0],
|
|
font: "Helvetica".to_string(),
|
|
size: 12.0,
|
|
color: None,
|
|
rendering_mode: None,
|
|
confidence: None,
|
|
confidence_source: None,
|
|
lang: None,
|
|
flags: vec![],
|
|
receipt: None,
|
|
column: None,
|
|
};
|
|
|
|
// Test identical span (high score)
|
|
let span_b = pdftract_core::schema::SpanJson {
|
|
text: "Hello".to_string(),
|
|
bbox: [100.0, 100.0, 150.0, 120.0],
|
|
font: "Helvetica".to_string(),
|
|
size: 12.0,
|
|
color: None,
|
|
rendering_mode: None,
|
|
confidence: None,
|
|
confidence_source: None,
|
|
lang: None,
|
|
flags: vec![],
|
|
receipt: None,
|
|
column: None,
|
|
};
|
|
let score = span_match_score(&span_a, &span_b);
|
|
assert!(score > 0.9);
|
|
|
|
// Test different text (lower score)
|
|
let span_c = pdftract_core::schema::SpanJson {
|
|
text: "World".to_string(),
|
|
bbox: [100.0, 100.0, 150.0, 120.0],
|
|
font: "Helvetica".to_string(),
|
|
size: 12.0,
|
|
color: None,
|
|
rendering_mode: None,
|
|
confidence: None,
|
|
confidence_source: None,
|
|
lang: None,
|
|
flags: vec![],
|
|
receipt: None,
|
|
column: None,
|
|
};
|
|
let score = span_match_score(&span_a, &span_c);
|
|
assert!(score < 0.9 && score > 0.4); // bbox matches but text doesn't
|
|
}
|