miroir/crates/miroir-proxy/tests/p7_5_structured_logging.rs
jedarden 4777bb6834 fix(cli): add --version and --help flags to miroir-proxy
Adds clap-based CLI argument parsing so `miroir-proxy --version`
and `miroir-proxy --help` print version/usage and exit instead
of starting the server and hanging.

Also fixes numerous pre-existing clippy warnings in test files:
- digit grouping inconsistencies
- unused functions/variables
- useless_vec (vec! -> array)
- assert!(true) placeholders
- too_many_arguments

Resolves: bf-31ff
2026-05-26 03:02:56 -04:00

689 lines
23 KiB
Rust

//! P7.5 Structured JSON logging + request IDs + trace correlation tests.
//!
//! Tests for plan §10 structured logging format:
//! - JSON-per-line parseable by jq
//! - request_id on every log line (same as X-Request-Id header)
//! - No PII (API keys, document content, query strings) in logs
//! - Log volume: 2 INFO entries per search request (middleware + search handler)
use axum::{body::Body, extract::Request, http::StatusCode, Router};
use std::sync::{Arc, Mutex};
use tower::ServiceExt;
/// Helper: parse a JSON log line and extract fields
fn parse_log_line(line: &str) -> Option<serde_json::Value> {
serde_json::from_str(line).ok()
}
/// Helper: check if a string is a valid 8-char hex request ID (from RequestId::new)
fn is_valid_request_id(s: &str) -> bool {
s.len() == 8 && s.chars().all(|c| c.is_ascii_hexdigit())
}
/// Helper: check if a line contains PII patterns
fn contains_pii(line: &str) -> bool {
let lower = line.to_lowercase();
// Check for API key patterns
if lower.contains("api_key") || lower.contains("apikey") || lower.contains("api-key") {
// Allow if it's just "api_key_hash" or similar
if !lower.contains("hash") && !lower.contains("uid") {
return true;
}
}
// Check for master keys
if lower.contains("master") && (lower.contains("key") || lower.contains("secret")) {
return true;
}
// Check for JWT tokens
if lower.contains("eyj") && lower.len() > 100 {
// JWT base64 pattern
return true;
}
// Check for document content patterns (long text blocks in logs)
if line.len() > 5000 {
// Very long log line might contain document content
return true;
}
// Check for raw query strings (both URL-style q= and JSON-style "q":)
if (lower.contains("q=") || lower.contains("\"q\":")) && line.len() > 200 {
// But not if the value is explicitly redacted
if !lower.contains("\"[redacted]\"") {
return true;
}
}
false
}
// ---------------------------------------------------------------------------
// Acceptance Criterion 1: jq parses every log line
// ---------------------------------------------------------------------------
#[test]
fn test_json_logs_parseable_by_jq() {
// Sample log entry in the expected format
let sample_log = r#"{
"timestamp": "2026-05-01T12:00:00.000Z",
"level": "info",
"target": "miroir.request",
"message": "GET /indexes/products/search 200",
"request_id": "abc123",
"pod_id": "miroir-proxy-7f8d9c6d5-abc12",
"duration_ms": 42,
"status": 200,
"method": "GET",
"path_template": "/indexes/{uid}/search"
}"#;
let parsed = parse_log_line(sample_log);
assert!(parsed.is_some(), "Sample log should parse as JSON");
let log_obj = parsed.unwrap();
assert_eq!(log_obj["level"], "info");
assert_eq!(log_obj["target"], "miroir.request");
assert_eq!(log_obj["request_id"], "abc123");
assert_eq!(log_obj["duration_ms"], 42);
}
#[test]
fn test_request_id_format_in_logs() {
// Verify request_id appears in JSON logs
let sample_log = r#"{
"timestamp": "2026-05-01T12:00:00.000Z",
"level": "info",
"target": "miroir.request",
"message": "search completed",
"request_id": "a1b2c3d4",
"pod_id": "test-pod",
"duration_ms": 42
}"#;
let parsed = parse_log_line(sample_log).unwrap();
let request_id = parsed["request_id"].as_str().unwrap();
// Request IDs should be 8 hex chars (from RequestId::new())
assert_eq!(request_id.len(), 8);
assert!(request_id.chars().all(|c| c.is_ascii_hexdigit()));
}
// ---------------------------------------------------------------------------
// Acceptance Criterion 2: request_id traceable across pods
// ---------------------------------------------------------------------------
#[test]
fn test_request_id_extraction_from_logs() {
let logs = [
r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","request_id":"abc12345","pod_id":"pod-1","message":"GET /search 200"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","request_id":"abc12345","pod_id":"pod-1","node_id":"node-1","message":"node call started"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.010Z","level":"info","target":"miroir.search","request_id":"abc12345","pod_id":"pod-1","index":"products","message":"search completed"}"#,
];
// Extract all logs with request_id = "abc12345"
let target_id = "abc12345";
let matching_logs: Vec<_> = logs
.iter()
.filter(|line| line.contains(&format!("\"request_id\":\"{target_id}\"")))
.collect();
assert_eq!(
matching_logs.len(),
3,
"Should find all 3 log lines for the request"
);
}
// ---------------------------------------------------------------------------
// Acceptance Criterion 3: No PII in logs
// ---------------------------------------------------------------------------
#[test]
fn test_no_api_keys_in_logs() {
// These patterns should NOT appear in logs
let pii_patterns = vec![
r#""api_key": "master_key_12345""#,
r#""bearer": "Bearer master_key_12345""#,
r#""authorization": "Bearer master_key_12345""#,
];
for pattern in pii_patterns {
assert!(
contains_pii(pattern),
"Pattern should be flagged as PII: {pattern}"
);
}
// Hashed versions are OK
let safe_patterns = vec![
r#""api_key_hash": "a1b2c3d4""#,
r#""key_uid": "12345""#,
r#""tenant_id": "tenant-123""#,
];
for pattern in safe_patterns {
assert!(
!contains_pii(pattern),
"Hashed pattern should NOT be flagged as PII: {pattern}"
);
}
}
#[test]
fn test_no_query_strings_in_logs() {
// Raw Meilisearch query content should not be logged
// The contains_pii function checks for long q= patterns (user queries)
// Line must be >200 chars total and contain "q=" to trigger PII detection
let padding = "x".repeat(200);
let bad_log = format!(
r#"{{"message": "processing search", "q": "SELECT * FROM users WHERE email = 'test@example.com'", "padding": "{padding}"}}"#
);
assert!(
contains_pii(&bad_log),
"Long query string should be flagged as PII"
);
// Redacted queries are OK
let good_log = r#"{"message": "processing search", "query": "[redacted]", "q": "[redacted]"}"#;
assert!(
!contains_pii(good_log),
"Redacted query should NOT be flagged as PII"
);
// Short queries are OK (e.g., just a word or two)
let short_query = r#"{"message": "processing search", "q": "test"}"#;
assert!(
!contains_pii(short_query),
"Short query should NOT be flagged as PII"
);
// Long line without q= is OK
let long_safe = format!(
r#"{{"message": "processing", "data": "{}"}}"#,
"x".repeat(200)
);
assert!(
!contains_pii(&long_safe),
"Long line without q= should NOT be flagged as PII"
);
}
#[test]
fn test_no_document_content_in_logs() {
// Large JSON blocks likely contain document content
let large_log = r#"{"message": "indexing document", "doc": {"#;
let large_log_padded = format!("{}{}", large_log, "x".repeat(5000));
assert!(
contains_pii(&large_log_padded),
"Very long log line should be flagged as potential PII"
);
// Normal log lines are OK
let normal_log = r#"{"message": "indexed 100 documents", "count": 100}"#;
assert!(
!contains_pii(normal_log),
"Normal log line should NOT be flagged"
);
}
// ---------------------------------------------------------------------------
// Acceptance Criterion 4: Log volume
// ---------------------------------------------------------------------------
#[test]
fn test_log_volume_info_level() {
// At INFO level, search requests produce 2 INFO log entries:
// 1 from telemetry middleware (miroir.request) + 1 from search handler (miroir.search)
let request_logs = [
r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","message":"GET /indexes/products/search 200"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","message":"node call"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.002Z","level":"info","target":"miroir.search","message":"search completed"}"#,
];
let info_count = request_logs
.iter()
.filter(|line| line.contains(r#""level":"info""#))
.count();
assert_eq!(
info_count, 2,
"INFO level should have 2 log lines per search request (middleware + search handler)"
);
}
#[test]
fn test_debug_level_has_more_logs() {
// At DEBUG level, we get per-node logs in addition to the INFO logs
let debug_logs = [
r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","message":"GET / 200"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","message":"node call started"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.002Z","level":"debug","target":"miroir.node","message":"node call completed"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.003Z","level":"info","target":"miroir.search","message":"search completed"}"#,
];
let debug_count = debug_logs
.iter()
.filter(|line| line.contains(r#""level":"debug""#))
.count();
assert!(
debug_count > 1,
"DEBUG level should have multiple log lines"
);
}
// ---------------------------------------------------------------------------
// Integration test: Verify actual log format from middleware
// ---------------------------------------------------------------------------
#[test]
fn test_middleware_log_format() {
use axum::http::HeaderMap;
use miroir_proxy::middleware::{generate_request_id, RequestIdExt};
// Test request_id generation format
let id = generate_request_id();
assert_eq!(id.len(), 16, "Request ID should be 16 hex chars");
assert!(
id.chars().all(|c| c.is_ascii_hexdigit()),
"Request ID should be hexadecimal"
);
// Test header extraction
let mut headers = HeaderMap::new();
headers.set_request_id("test123456789abcd");
assert_eq!(
headers.get_request_id(),
Some("test123456789abcd".to_string())
);
}
// ---------------------------------------------------------------------------
// Log level verification
// ---------------------------------------------------------------------------
#[test]
fn test_log_levels_correct() {
let logs_by_level = vec![
(
r#"{"level":"error","target":"miroir.request","message":"internal failure"}"#,
"error",
),
(
r#"{"level":"warn","target":"miroir.request","message":"degraded response"}"#,
"warn",
),
(
r#"{"level":"info","target":"miroir.request","message":"GET / 200"}"#,
"info",
),
(
r#"{"level":"debug","target":"miroir.node","message":"node call"}"#,
"debug",
),
(
r#"{"level":"trace","target":"miroir.scatter","message":"fan-out buffer"}"#,
"trace",
),
];
for (log, expected_level) in logs_by_level {
let parsed = parse_log_line(log).unwrap();
assert_eq!(parsed["level"], expected_level);
}
}
// ---------------------------------------------------------------------------
// Verify SearchRequestBody Debug impl redacts sensitive fields
// ---------------------------------------------------------------------------
#[test]
fn test_search_request_debug_redaction() {
// Verified by unit test in routes/search.rs (SearchRequestBody is private).
// That test confirms the Debug impl redacts `q` and `filter` fields.
// This placeholder keeps the integration test file's acceptance criteria visible.
}
// ---------------------------------------------------------------------------
// Request ID middleware integration tests (P7.5.a)
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_request_id_response_header() {
// Acceptance criterion: Every response includes X-Request-Id: <8-char hex>
use axum::middleware;
use axum::routing::get;
use miroir_proxy::middleware::request_id_middleware;
async fn handler() -> &'static str {
"ok"
}
let app = Router::new()
.route("/test", get(handler))
.layer(middleware::from_fn(request_id_middleware));
let response = app
.oneshot(Request::builder().uri("/test").body(Body::empty()).unwrap())
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let headers = response.headers();
let request_id = headers.get("x-request-id");
assert!(
request_id.is_some(),
"X-Request-Id header should be present"
);
let id_str = request_id.unwrap().to_str().unwrap();
assert!(
is_valid_request_id(id_str),
"X-Request-Id should be 8 hex chars, got: {id_str}"
);
}
#[tokio::test]
async fn test_request_id_extension_accessible() {
// Acceptance criterion: Request.extensions().get::<RequestId>() works from any handler
use axum::middleware;
use axum::{routing::get, Extension};
use miroir_proxy::middleware::{request_id_middleware, RequestId};
let app = Router::new()
.route(
"/test",
get(|Extension(id): Extension<RequestId>| async move {
assert!(
is_valid_request_id(id.as_str()),
"RequestId in extensions should be 8 hex chars"
);
"ok"
}),
)
.layer(middleware::from_fn(request_id_middleware));
let response = app
.oneshot(Request::builder().uri("/test").body(Body::empty()).unwrap())
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
}
#[tokio::test]
async fn test_consecutive_requests_different_ids() {
// Acceptance criterion: Two consecutive requests produce different IDs
use axum::extract::State as AxumState;
use axum::middleware;
use axum::{routing::get, Extension};
use miroir_proxy::middleware::{request_id_middleware, RequestId};
use std::sync::Arc;
use std::sync::Mutex;
let captured_ids = Arc::new(Mutex::new(Vec::<String>::new()));
let app = Router::new()
.route(
"/test",
get(
|Extension(id): Extension<RequestId>,
AxumState(ids): AxumState<Arc<Mutex<Vec<String>>>>| async move {
ids.lock().unwrap().push(id.as_str().to_string());
"ok"
},
),
)
.layer(middleware::from_fn(request_id_middleware))
.with_state(captured_ids.clone());
// Make two requests consecutively
for _ in 0..2 {
let _ = app
.clone()
.oneshot(Request::builder().uri("/test").body(Body::empty()).unwrap())
.await
.unwrap();
}
let ids = captured_ids.lock().unwrap();
assert_eq!(ids.len(), 2, "Should have captured 2 request IDs");
assert_ne!(
ids[0], ids[1],
"Two consecutive requests should have different IDs"
);
assert!(
is_valid_request_id(&ids[0]),
"First ID should be 8 hex chars"
);
assert!(
is_valid_request_id(&ids[1]),
"Second ID should be 8 hex chars"
);
}
#[tokio::test]
async fn test_request_id_preserves_existing_header() {
// If a request already has X-Request-Id, it should be preserved
use axum::middleware;
use axum::{routing::get, Extension};
use miroir_proxy::middleware::{request_id_middleware, RequestId};
let app = Router::new()
.route(
"/test",
get(|Extension(id): Extension<RequestId>| async move { id.as_str().to_string() }),
)
.layer(middleware::from_fn(request_id_middleware));
// Send a request with an existing X-Request-Id header
let existing_id = "deadbeef";
let response = app
.oneshot(
Request::builder()
.uri("/test")
.header("x-request-id", existing_id)
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
// The response should have the same X-Request-Id
let response_id = response
.headers()
.get("x-request-id")
.unwrap()
.to_str()
.unwrap();
assert_eq!(
response_id, existing_id,
"Should preserve existing X-Request-Id"
);
}
#[tokio::test]
async fn test_request_id_invalid_header_is_replaced() {
// If a request has an invalid X-Request-Id (not 8 hex chars),
// the middleware should generate a new one
use axum::middleware;
use axum::{routing::get, Extension};
use miroir_proxy::middleware::{request_id_middleware, RequestId};
let app = Router::new()
.route(
"/test",
get(|Extension(id): Extension<RequestId>| async move { id.as_str().to_string() }),
)
.layer(middleware::from_fn(request_id_middleware));
// Send a request with an INVALID X-Request-Id header (wrong length)
let invalid_id = "not-8-chars";
let response = app
.oneshot(
Request::builder()
.uri("/test")
.header("x-request-id", invalid_id)
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
// The response should have a VALID 8-char hex ID (replaced)
let response_id = response
.headers()
.get("x-request-id")
.unwrap()
.to_str()
.unwrap();
assert_ne!(
response_id, invalid_id,
"Should replace invalid X-Request-Id"
);
assert!(
is_valid_request_id(response_id),
"Replaced ID should be 8 hex chars"
);
}
// ---------------------------------------------------------------------------
// P7.5.c Trace correlation: request_id appears in all log lines
// ---------------------------------------------------------------------------
// Custom writer to capture log output for testing
struct CaptureWriter {
logs: Arc<Mutex<Vec<String>>>,
}
impl std::io::Write for CaptureWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if let Ok(s) = std::str::from_utf8(buf) {
self.logs.lock().unwrap().push(s.to_string());
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[tokio::test]
async fn test_request_id_appears_in_all_log_lines_within_request() {
// Acceptance criterion: Every log line inside a request must carry request_id=<id>
// This is achieved via tracing::Span with request_id recorded on span enter
// and tracing_subscriber::fmt().with_current_span(true)
use axum::{routing::get, Extension};
use miroir_core::config::MiroirConfig;
use miroir_proxy::middleware::{
request_id_middleware, telemetry_middleware, Metrics, RequestId, TelemetryState,
};
use std::sync::{Arc, Mutex};
use tracing::info;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
// Capture logs into a vector
let captured_logs = Arc::new(Mutex::new(Vec::<String>::new()));
// Create a custom layer that captures log events
let capture_layer = tracing_subscriber::fmt::layer()
.json()
.with_target(true)
.with_current_span(true) // This is the key: includes span fields on all events
.with_span_list(false)
.flatten_event(true)
.with_writer(std::sync::Mutex::new(CaptureWriter {
logs: captured_logs.clone(),
}));
// Initialize the subscriber with our capture layer
tracing_subscriber::registry().with(capture_layer).init();
// Build telemetry state
let config = MiroirConfig::default();
let metrics = Metrics::new(&config);
let telemetry = TelemetryState {
metrics: metrics.clone(),
pod_id: "test-pod".to_string(),
};
// Build app with full middleware stack
// IMPORTANT: Layer order matters! Last layer() call = outermost = runs first.
// request_id_middleware must run BEFORE telemetry_middleware.
// To achieve this, request_id_middleware layer() call comes LAST (so it's outermost).
let app = axum::Router::new()
.route(
"/test",
get(|Extension(_id): Extension<RequestId>| async move {
// These log events should all inherit the request_id from the parent span
info!(message = "handler started", step = "1");
info!(message = "processing request", step = "2");
info!(message = "handler completed", step = "3");
"ok"
}),
)
.layer(axum::middleware::from_fn_with_state(
telemetry.clone(),
telemetry_middleware,
))
.layer(axum::middleware::from_fn(request_id_middleware));
// Make a request
let response = app
.oneshot(Request::builder().uri("/test").body(Body::empty()).unwrap())
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
// Extract the request_id from the response header
let request_id = response
.headers()
.get("x-request-id")
.unwrap()
.to_str()
.unwrap();
// Verify all captured log lines contain the request_id
let logs = captured_logs.lock().unwrap();
let log_lines: Vec<&str> = logs
.iter()
.flat_map(|s| s.lines())
.filter(|line| line.contains("message")) // Only look at actual log lines
.collect();
// We should have at least 3 log lines from the handler
assert!(
log_lines.len() >= 3,
"Expected at least 3 log lines from handler, got: {}",
log_lines.len()
);
// Every log line should contain the request_id (either at top level or in span)
for line in &log_lines {
let json =
parse_log_line(line).unwrap_or_else(|| panic!("Log line should be valid JSON: {line}"));
// Verify request_id field exists and matches the response header
// It can be at the top level OR nested in the span object
let log_request_id = json
.get("request_id")
.or_else(|| json.get("span").and_then(|s| s.get("request_id")))
.and_then(|v| v.as_str());
assert!(
log_request_id.is_some(),
"Log line missing request_id field: {line}"
);
assert_eq!(
log_request_id.unwrap(),
request_id,
"Log line request_id should match response header: {line}"
);
}
}