//! Integration test for audit logging. //! //! This test verifies that: //! 1. The --audit-log flag is accepted by serve, mcp, and inspect subcommands //! 2. The audit log writer creates valid NDJSON output //! 3. Log-policy enforcement redacts sensitive values //! 4. Stdio MCP mode omits client_ip field use pdftract_core::audit::{AuditLogWriter, AuditRecord}; use std::io::BufRead; use std::path::PathBuf; use tempfile::TempDir; #[test] fn test_audit_log_creates_valid_ndjson() { let temp_dir = TempDir::new().unwrap(); let audit_path = temp_dir.path().join("audit.ndjson"); let writer = AuditLogWriter::open(&audit_path).unwrap(); // Write a sample audit record let record = AuditRecord::new("extract", Some("pdftract-v1:abcd1234".to_string()), 1234, 200) .with_client_ip("10.0.0.1") .with_diagnostics(vec!["XREF_REPAIRED".to_string()]); writer.write_record(&record).unwrap(); // Read back and verify let file = std::fs::File::open(&audit_path).unwrap(); let reader = std::io::BufReader::new(file); let lines: Vec = reader.lines().map(|l| l.unwrap()).collect(); assert_eq!(lines.len(), 1, "Should have exactly one line"); let line = &lines[0]; let parsed: serde_json::Value = serde_json::from_str(line).unwrap(); assert_eq!(parsed["tool"], "extract"); assert_eq!(parsed["fingerprint"], "pdftract-v1:abcd1234"); assert_eq!(parsed["duration_ms"], 1234); assert_eq!(parsed["status"], 200); assert_eq!(parsed["client_ip"], "10.0.0.1"); assert_eq!(parsed["diagnostics"].as_array().unwrap().len(), 1); assert_eq!(parsed["diagnostics"][0], "XREF_REPAIRED"); // Verify it has a timestamp field assert!(parsed["ts"].is_string()); assert!(parsed["ts"].as_str().unwrap().len() > 0); } #[test] fn test_audit_log_omit_client_ip_for_stdio() { let temp_dir = TempDir::new().unwrap(); let audit_path = temp_dir.path().join("audit.ndjson"); let writer = AuditLogWriter::open(&audit_path).unwrap(); // Write a record without client_ip (stdio mode) let record = AuditRecord::new("mcp.extract", None, 500, 500); writer.write_record(&record).unwrap(); // Read back and verify let file = std::fs::File::open(&audit_path).unwrap(); let reader = std::io::BufReader::new(file); let lines: Vec = reader.lines().map(|l| l.unwrap()).collect(); let parsed: serde_json::Value = serde_json::from_str(&lines[0]).unwrap(); // client_ip field should be absent for stdio mode assert!(parsed.get("client_ip").is_none(), "client_ip should be absent for stdio mode"); } #[test] fn test_audit_log_appends_multiple_records() { let temp_dir = TempDir::new().unwrap(); let audit_path = temp_dir.path().join("audit.ndjson"); let writer = AuditLogWriter::open(&audit_path).unwrap(); // Write multiple records for i in 0..5 { let record = AuditRecord::new("extract", Some(format!("pdftract-v1:{:x}", i)), i * 100, 200); writer.write_record(&record).unwrap(); } // Read back and verify let file = std::fs::File::open(&audit_path).unwrap(); let reader = std::io::BufReader::new(file); let lines: Vec = reader.lines().map(|l| l.unwrap()).collect(); assert_eq!(lines.len(), 5, "Should have 5 lines"); } #[test] fn test_audit_log_policy_enforcement_redacts_secrets() { use pdftract_core::log_policy; // Test that password patterns are redacted let line_with_password = "user:john password:secret123 action:extract"; let redacted = log_policy::redact_audit_log_line(line_with_password); assert!(redacted.contains("[REDACTED]")); assert!(!redacted.contains("secret123")); // Test that bearer tokens are redacted let line_with_token = "Authorization: Bearer abc123xyz456"; let redacted = log_policy::redact_audit_log_line(line_with_token); assert!(redacted.contains("[REDACTED]")); assert!(!redacted.contains("abc123xyz456")); // Test that cookies are redacted let line_with_cookie = "Cookie: session_id=secret_value"; let redacted = log_policy::redact_audit_log_line(line_with_cookie); assert!(redacted.contains("[REDACTED]")); assert!(!redacted.contains("secret_value")); // Test that normal content is preserved let normal_line = r#"{"tool":"extract","fingerprint":"pdftract-v1:abcd"}"#; let redacted = log_policy::redact_audit_log_line(normal_line); assert!(redacted.contains("extract")); assert!(redacted.contains("pdftract-v1:abcd")); assert!(!redacted.contains("[REDACTED]")); } #[test] fn test_audit_record_matches_plan_spec() { // Verify the AuditRecord matches the spec from plan lines 974-978 let record = AuditRecord::new("extract", Some("pdftract-v1:abcd1234".to_string()), 1234, 200) .with_client_ip("10.0.0.1") .with_diagnostics(vec!["XREF_REPAIRED".to_string()]); let json = serde_json::to_string(&record).unwrap(); let parsed: serde_json::Value = serde_json::from_str(&json).unwrap(); // Verify all required fields are present assert!(parsed["ts"].is_string(), "ts field must be present (ISO-8601 timestamp)"); assert!(parsed["client_ip"].is_string(), "client_ip field must be present"); assert!(parsed["tool"].is_string(), "tool field must be present"); assert!(parsed["fingerprint"].is_string(), "fingerprint field must be present"); assert!(parsed["duration_ms"].is_number(), "duration_ms field must be present"); assert!(parsed["status"].is_number(), "status field must be present (u16 HTTP-style)"); assert!(parsed["diagnostics"].is_array(), "diagnostics field must be present (Vec)"); } #[test] fn test_audit_log_writer_crash_safety() { let temp_dir = TempDir::new().unwrap(); let audit_path = temp_dir.path().join("audit.ndjson"); let writer = AuditLogWriter::open(&audit_path).unwrap(); // Write a record and verify it's flushed immediately let record = AuditRecord::new("extract", Some("pdftract-v1:abcd".to_string()), 100, 200); writer.write_record(&record).unwrap(); // Read back immediately - the record should be there (flushed) let contents = std::fs::read_to_string(&audit_path).unwrap(); assert!(contents.contains("extract"), "Record should be flushed immediately"); assert!(contents.ends_with('\n'), "Record should end with newline"); } #[test] fn test_audit_record_serialization_is_single_line() { let record = AuditRecord::new("extract", Some("pdftract-v1:abcd".to_string()), 1234, 200) .with_diagnostics(vec!["XREF_REPAIRED".to_string(), "STREAM_BOMB".to_string()]); let json = serde_json::to_string(&record).unwrap(); // Verify it's a single line (no newlines) assert!(!json.contains('\n'), "Audit record should be single-line JSON"); assert!(!json.contains('\r'), "Audit record should not contain carriage returns"); // Verify it's valid JSON let _parsed: serde_json::Value = serde_json::from_str(&json).unwrap(); }