miroir/crates/miroir-proxy/tests/p7_5_structured_logging.rs
jedarden 04f1d47909 P3.3.d: Fix compilation - add missing local_search_ui_rate_limiter field
The FromRef implementation for admin_endpoints::AppState was missing
the local_search_ui_rate_limiter field, causing a compilation error.

This completes P3.3.d Redis backend extras, which were already fully
implemented:
- Rate-limit keys with EXPIRE (miroir:ratelimit:searchui:<ip>,
  miroir:ratelimit:adminlogin:<ip>, miroir:ratelimit:adminlogin:backoff:<ip>)
- Scoped-key coordination (miroir:search_ui_scoped_key:<index>,
  miroir:search_ui_scoped_key_observed:<pod>:<index> with EXPIRE 60s)
- Pub/Sub for admin session revocation (miroir:admin_session:revoked)
- CDC overflow buffer (miroir:cdc:overflow:<sink> with LPUSH + LTRIM)

All acceptance criteria verified by existing tests:
- test_redis_rate_limit_searchui verifies EXPIRE is set
- test_redis_pubsub_session_invalidation verifies <100ms propagation
- test_redis_cdc_overflow verifies LLEN matches bytes published

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-26 11:18:02 -04:00

636 lines
22 KiB
Rust

//! P7.5 Structured JSON logging + request IDs + trace correlation tests.
//!
//! Tests for plan §10 structured logging format:
//! - JSON-per-line parseable by jq
//! - request_id on every log line (same as X-Request-Id header)
//! - No PII (API keys, document content, query strings) in logs
//! - Log volume: 2 INFO entries per search request (middleware + search handler)
use std::sync::{Arc, Mutex};
use axum::{
extract::Request,
body::Body,
http::StatusCode,
Router,
};
use tower::ServiceExt;
/// Helper: parse a JSON log line and extract fields
fn parse_log_line(line: &str) -> Option<serde_json::Value> {
serde_json::from_str(line).ok()
}
/// Helper: check if a string is a valid 8-char hex request ID (from RequestId::new)
fn is_valid_request_id(s: &str) -> bool {
s.len() == 8 && s.chars().all(|c| c.is_ascii_hexdigit())
}
/// Helper: check if a line contains PII patterns
fn contains_pii(line: &str) -> bool {
let lower = line.to_lowercase();
// Check for API key patterns
if lower.contains("api_key") || lower.contains("apikey") || lower.contains("api-key") {
// Allow if it's just "api_key_hash" or similar
if !lower.contains("hash") && !lower.contains("uid") {
return true;
}
}
// Check for master keys
if lower.contains("master") && (lower.contains("key") || lower.contains("secret")) {
return true;
}
// Check for JWT tokens
if lower.contains("eyj") && lower.len() > 100 {
// JWT base64 pattern
return true;
}
// Check for document content patterns (long text blocks in logs)
if line.len() > 5000 {
// Very long log line might contain document content
return true;
}
// Check for raw query strings (both URL-style q= and JSON-style "q":)
if (lower.contains("q=") || lower.contains("\"q\":")) && line.len() > 200 {
// But not if the value is explicitly redacted
if !lower.contains("\"[redacted]\"") {
return true;
}
}
false
}
// ---------------------------------------------------------------------------
// Acceptance Criterion 1: jq parses every log line
// ---------------------------------------------------------------------------
#[test]
fn test_json_logs_parseable_by_jq() {
// Sample log entry in the expected format
let sample_log = r#"{
"timestamp": "2026-05-01T12:00:00.000Z",
"level": "info",
"target": "miroir.request",
"message": "GET /indexes/products/search 200",
"request_id": "abc123",
"pod_id": "miroir-proxy-7f8d9c6d5-abc12",
"duration_ms": 42,
"status": 200,
"method": "GET",
"path_template": "/indexes/{uid}/search"
}"#;
let parsed = parse_log_line(sample_log);
assert!(parsed.is_some(), "Sample log should parse as JSON");
let log_obj = parsed.unwrap();
assert_eq!(log_obj["level"], "info");
assert_eq!(log_obj["target"], "miroir.request");
assert_eq!(log_obj["request_id"], "abc123");
assert_eq!(log_obj["duration_ms"], 42);
}
#[test]
fn test_request_id_format_in_logs() {
// Verify request_id appears in JSON logs
let sample_log = r#"{
"timestamp": "2026-05-01T12:00:00.000Z",
"level": "info",
"target": "miroir.request",
"message": "search completed",
"request_id": "a1b2c3d4",
"pod_id": "test-pod",
"duration_ms": 42
}"#;
let parsed = parse_log_line(sample_log).unwrap();
let request_id = parsed["request_id"].as_str().unwrap();
// Request IDs should be 8 hex chars (from RequestId::new())
assert_eq!(request_id.len(), 8);
assert!(request_id.chars().all(|c| c.is_ascii_hexdigit()));
}
// ---------------------------------------------------------------------------
// Acceptance Criterion 2: request_id traceable across pods
// ---------------------------------------------------------------------------
#[test]
fn test_request_id_extraction_from_logs() {
let logs = vec![
r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","request_id":"abc12345","pod_id":"pod-1","message":"GET /search 200"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","request_id":"abc12345","pod_id":"pod-1","node_id":"node-1","message":"node call started"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.010Z","level":"info","target":"miroir.search","request_id":"abc12345","pod_id":"pod-1","index":"products","message":"search completed"}"#,
];
// Extract all logs with request_id = "abc12345"
let target_id = "abc12345";
let matching_logs: Vec<_> = logs
.iter()
.filter(|line| line.contains(&format!("\"request_id\":\"{}\"", target_id)))
.collect();
assert_eq!(matching_logs.len(), 3, "Should find all 3 log lines for the request");
}
// ---------------------------------------------------------------------------
// Acceptance Criterion 3: No PII in logs
// ---------------------------------------------------------------------------
#[test]
fn test_no_api_keys_in_logs() {
// These patterns should NOT appear in logs
let pii_patterns = vec![
r#""api_key": "master_key_12345""#,
r#""bearer": "Bearer master_key_12345""#,
r#""authorization": "Bearer master_key_12345""#,
];
for pattern in pii_patterns {
assert!(
contains_pii(pattern),
"Pattern should be flagged as PII: {}",
pattern
);
}
// Hashed versions are OK
let safe_patterns = vec![
r#""api_key_hash": "a1b2c3d4""#,
r#""key_uid": "12345""#,
r#""tenant_id": "tenant-123""#,
];
for pattern in safe_patterns {
assert!(
!contains_pii(pattern),
"Hashed pattern should NOT be flagged as PII: {}",
pattern
);
}
}
#[test]
fn test_no_query_strings_in_logs() {
// Raw Meilisearch query content should not be logged
// The contains_pii function checks for long q= patterns (user queries)
// Line must be >200 chars total and contain "q=" to trigger PII detection
let padding = "x".repeat(200);
let bad_log = format!(r#"{{"message": "processing search", "q": "SELECT * FROM users WHERE email = 'test@example.com'", "padding": "{}"}}"#, padding);
assert!(contains_pii(&bad_log), "Long query string should be flagged as PII");
// Redacted queries are OK
let good_log = r#"{"message": "processing search", "query": "[redacted]", "q": "[redacted]"}"#;
assert!(!contains_pii(good_log), "Redacted query should NOT be flagged as PII");
// Short queries are OK (e.g., just a word or two)
let short_query = r#"{"message": "processing search", "q": "test"}"#;
assert!(!contains_pii(short_query), "Short query should NOT be flagged as PII");
// Long line without q= is OK
let long_safe = format!(r#"{{"message": "processing", "data": "{}"}}"#, "x".repeat(200));
assert!(!contains_pii(&long_safe), "Long line without q= should NOT be flagged as PII");
}
#[test]
fn test_no_document_content_in_logs() {
// Large JSON blocks likely contain document content
let large_log = r#"{"message": "indexing document", "doc": {"#;
let large_log_padded = format!("{}{}", large_log, "x".repeat(5000));
assert!(contains_pii(&large_log_padded), "Very long log line should be flagged as potential PII");
// Normal log lines are OK
let normal_log = r#"{"message": "indexed 100 documents", "count": 100}"#;
assert!(!contains_pii(normal_log), "Normal log line should NOT be flagged");
}
// ---------------------------------------------------------------------------
// Acceptance Criterion 4: Log volume
// ---------------------------------------------------------------------------
#[test]
fn test_log_volume_info_level() {
// At INFO level, search requests produce 2 INFO log entries:
// 1 from telemetry middleware (miroir.request) + 1 from search handler (miroir.search)
let request_logs = vec![
r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","message":"GET /indexes/products/search 200"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","message":"node call"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.002Z","level":"info","target":"miroir.search","message":"search completed"}"#,
];
let info_count = request_logs
.iter()
.filter(|line| line.contains(r#""level":"info""#))
.count();
assert_eq!(
info_count, 2,
"INFO level should have 2 log lines per search request (middleware + search handler)"
);
}
#[test]
fn test_debug_level_has_more_logs() {
// At DEBUG level, we get per-node logs in addition to the INFO logs
let debug_logs = vec![
r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","message":"GET / 200"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","message":"node call started"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.002Z","level":"debug","target":"miroir.node","message":"node call completed"}"#,
r#"{"timestamp":"2026-05-01T12:00:00.003Z","level":"info","target":"miroir.search","message":"search completed"}"#,
];
let debug_count = debug_logs
.iter()
.filter(|line| line.contains(r#""level":"debug""#))
.count();
assert!(debug_count > 1, "DEBUG level should have multiple log lines");
}
// ---------------------------------------------------------------------------
// Integration test: Verify actual log format from middleware
// ---------------------------------------------------------------------------
#[test]
fn test_middleware_log_format() {
use miroir_proxy::middleware::{generate_request_id, RequestIdExt};
use axum::http::HeaderMap;
// Test request_id generation format
let id = generate_request_id();
assert_eq!(id.len(), 16, "Request ID should be 16 hex chars");
assert!(id.chars().all(|c| c.is_ascii_hexdigit()), "Request ID should be hexadecimal");
// Test header extraction
let mut headers = HeaderMap::new();
headers.set_request_id("test123456789abcd");
assert_eq!(headers.get_request_id(), Some("test123456789abcd".to_string()));
}
// ---------------------------------------------------------------------------
// Log level verification
// ---------------------------------------------------------------------------
#[test]
fn test_log_levels_correct() {
let logs_by_level = vec![
(r#"{"level":"error","target":"miroir.request","message":"internal failure"}"#, "error"),
(r#"{"level":"warn","target":"miroir.request","message":"degraded response"}"#, "warn"),
(r#"{"level":"info","target":"miroir.request","message":"GET / 200"}"#, "info"),
(r#"{"level":"debug","target":"miroir.node","message":"node call"}"#, "debug"),
(r#"{"level":"trace","target":"miroir.scatter","message":"fan-out buffer"}"#, "trace"),
];
for (log, expected_level) in logs_by_level {
let parsed = parse_log_line(log).unwrap();
assert_eq!(parsed["level"], expected_level);
}
}
// ---------------------------------------------------------------------------
// Verify SearchRequestBody Debug impl redacts sensitive fields
// ---------------------------------------------------------------------------
#[test]
fn test_search_request_debug_redaction() {
// Verified by unit test in routes/search.rs (SearchRequestBody is private).
// That test confirms the Debug impl redacts `q` and `filter` fields.
// This placeholder keeps the integration test file's acceptance criteria visible.
}
// ---------------------------------------------------------------------------
// Request ID middleware integration tests (P7.5.a)
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_request_id_response_header() {
// Acceptance criterion: Every response includes X-Request-Id: <8-char hex>
use axum::routing::get;
use axum::middleware;
use miroir_proxy::middleware::request_id_middleware;
async fn handler() -> &'static str {
"ok"
}
let app = Router::new()
.route("/test", get(handler))
.layer(middleware::from_fn(request_id_middleware));
let response = app
.oneshot(
Request::builder()
.uri("/test")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let headers = response.headers();
let request_id = headers.get("x-request-id");
assert!(request_id.is_some(), "X-Request-Id header should be present");
let id_str = request_id.unwrap().to_str().unwrap();
assert!(
is_valid_request_id(id_str),
"X-Request-Id should be 8 hex chars, got: {}",
id_str
);
}
#[tokio::test]
async fn test_request_id_extension_accessible() {
// Acceptance criterion: Request.extensions().get::<RequestId>() works from any handler
use axum::{routing::get, Extension};
use axum::middleware;
use miroir_proxy::middleware::{request_id_middleware, RequestId};
let app = Router::new()
.route("/test", get(|Extension(id): Extension<RequestId>| async move {
assert!(
is_valid_request_id(id.as_str()),
"RequestId in extensions should be 8 hex chars"
);
"ok"
}))
.layer(middleware::from_fn(request_id_middleware));
let response = app
.oneshot(
Request::builder()
.uri("/test")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
}
#[tokio::test]
async fn test_consecutive_requests_different_ids() {
// Acceptance criterion: Two consecutive requests produce different IDs
use axum::{routing::get, Extension};
use axum::extract::State as AxumState;
use axum::middleware;
use miroir_proxy::middleware::{request_id_middleware, RequestId};
use std::sync::Arc;
use std::sync::Mutex;
let captured_ids = Arc::new(Mutex::new(Vec::<String>::new()));
let app = Router::new()
.route(
"/test",
get(|Extension(id): Extension<RequestId>, AxumState(ids): AxumState<Arc<Mutex<Vec<String>>>>| async move {
ids.lock().unwrap().push(id.as_str().to_string());
"ok"
}),
)
.layer(middleware::from_fn(request_id_middleware))
.with_state(captured_ids.clone());
// Make two requests consecutively
for _ in 0..2 {
let _ = app
.clone()
.oneshot(
Request::builder()
.uri("/test")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
}
let ids = captured_ids.lock().unwrap();
assert_eq!(ids.len(), 2, "Should have captured 2 request IDs");
assert_ne!(ids[0], ids[1], "Two consecutive requests should have different IDs");
assert!(
is_valid_request_id(&ids[0]),
"First ID should be 8 hex chars"
);
assert!(
is_valid_request_id(&ids[1]),
"Second ID should be 8 hex chars"
);
}
#[tokio::test]
async fn test_request_id_preserves_existing_header() {
// If a request already has X-Request-Id, it should be preserved
use axum::{routing::get, Extension};
use axum::middleware;
use miroir_proxy::middleware::{request_id_middleware, RequestId};
let app = Router::new()
.route(
"/test",
get(|Extension(id): Extension<RequestId>| async move {
id.as_str().to_string()
}),
)
.layer(middleware::from_fn(request_id_middleware));
// Send a request with an existing X-Request-Id header
let existing_id = "deadbeef";
let response = app
.oneshot(
Request::builder()
.uri("/test")
.header("x-request-id", existing_id)
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
// The response should have the same X-Request-Id
let response_id = response.headers().get("x-request-id").unwrap().to_str().unwrap();
assert_eq!(response_id, existing_id, "Should preserve existing X-Request-Id");
}
#[tokio::test]
async fn test_request_id_invalid_header_is_replaced() {
// If a request has an invalid X-Request-Id (not 8 hex chars),
// the middleware should generate a new one
use axum::{routing::get, Extension};
use axum::middleware;
use miroir_proxy::middleware::{request_id_middleware, RequestId};
let app = Router::new()
.route(
"/test",
get(|Extension(id): Extension<RequestId>| async move {
id.as_str().to_string()
}),
)
.layer(middleware::from_fn(request_id_middleware));
// Send a request with an INVALID X-Request-Id header (wrong length)
let invalid_id = "not-8-chars";
let response = app
.oneshot(
Request::builder()
.uri("/test")
.header("x-request-id", invalid_id)
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
// The response should have a VALID 8-char hex ID (replaced)
let response_id = response.headers().get("x-request-id").unwrap().to_str().unwrap();
assert_ne!(response_id, invalid_id, "Should replace invalid X-Request-Id");
assert!(
is_valid_request_id(response_id),
"Replaced ID should be 8 hex chars"
);
}
// ---------------------------------------------------------------------------
// P7.5.c Trace correlation: request_id appears in all log lines
// ---------------------------------------------------------------------------
// Custom writer to capture log output for testing
struct CaptureWriter {
logs: Arc<Mutex<Vec<String>>>,
}
impl std::io::Write for CaptureWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if let Ok(s) = std::str::from_utf8(buf) {
self.logs.lock().unwrap().push(s.to_string());
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[tokio::test]
async fn test_request_id_appears_in_all_log_lines_within_request() {
// Acceptance criterion: Every log line inside a request must carry request_id=<id>
// This is achieved via tracing::Span with request_id recorded on span enter
// and tracing_subscriber::fmt().with_current_span(true)
use axum::{routing::get, Extension};
use axum::middleware;
use miroir_proxy::middleware::{request_id_middleware, telemetry_middleware, TelemetryState, RequestId, Metrics};
use miroir_core::config::MiroirConfig;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
use tracing::info;
use std::sync::{Arc, Mutex};
// Capture logs into a vector
let captured_logs = Arc::new(Mutex::new(Vec::<String>::new()));
// Create a custom layer that captures log events
let capture_layer = tracing_subscriber::fmt::layer()
.json()
.with_target(true)
.with_current_span(true) // This is the key: includes span fields on all events
.with_span_list(false)
.flatten_event(true)
.with_writer(std::sync::Mutex::new(CaptureWriter {
logs: captured_logs.clone(),
}));
// Initialize the subscriber with our capture layer
tracing_subscriber::registry()
.with(capture_layer)
.init();
// Build telemetry state
let config = MiroirConfig::default();
let metrics = Metrics::new(&config);
let telemetry = TelemetryState {
metrics: metrics.clone(),
pod_id: "test-pod".to_string(),
};
// Build app with full middleware stack
// IMPORTANT: Layer order matters! Last layer() call = outermost = runs first.
// request_id_middleware must run BEFORE telemetry_middleware.
// To achieve this, request_id_middleware layer() call comes LAST (so it's outermost).
let app = axum::Router::new()
.route("/test", get(|Extension(_id): Extension<RequestId>| async move {
// These log events should all inherit the request_id from the parent span
info!(message = "handler started", step = "1");
info!(message = "processing request", step = "2");
info!(message = "handler completed", step = "3");
"ok"
}))
.layer(axum::middleware::from_fn_with_state(
telemetry.clone(),
telemetry_middleware,
))
.layer(axum::middleware::from_fn(request_id_middleware));
// Make a request
let response = app
.oneshot(
Request::builder()
.uri("/test")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
// Extract the request_id from the response header
let request_id = response.headers()
.get("x-request-id")
.unwrap()
.to_str()
.unwrap();
// Verify all captured log lines contain the request_id
let logs = captured_logs.lock().unwrap();
let log_lines: Vec<&str> = logs.iter()
.flat_map(|s| s.lines())
.filter(|line| line.contains("message")) // Only look at actual log lines
.collect();
// We should have at least 3 log lines from the handler
assert!(
log_lines.len() >= 3,
"Expected at least 3 log lines from handler, got: {}",
log_lines.len()
);
// Every log line should contain the request_id (either at top level or in span)
for line in &log_lines {
let json = parse_log_line(line).expect(&format!("Log line should be valid JSON: {}", line));
// Verify request_id field exists and matches the response header
// It can be at the top level OR nested in the span object
let log_request_id = json.get("request_id")
.or_else(|| json.get("span").and_then(|s| s.get("request_id")))
.and_then(|v| v.as_str());
assert!(
log_request_id.is_some(),
"Log line missing request_id field: {}",
line
);
assert_eq!(
log_request_id.unwrap(), request_id,
"Log line request_id should match response header: {}",
line
);
}
}