miroir/crates/miroir-proxy/tests/p7_5_structured_logging.rs

//! P7.5 Structured JSON logging + request IDs + trace correlation tests.
//!
//! Tests for plan §10 structured logging format:
//! - JSON-per-line parseable by jq
//! - request_id on every log line (same as X-Request-Id header)
//! - No PII (API keys, document content, query strings) in logs
//! - Log volume: 2 INFO entries per search request (middleware + search handler)

use std::sync::{Arc, Mutex};
use axum::{
    extract::Request,
    body::Body,
    http::StatusCode,
    Router,
};
use tower::ServiceExt;

/// Helper: parse a JSON log line and extract fields
fn parse_log_line(line: &str) -> Option<serde_json::Value> {
    serde_json::from_str(line).ok()
}

/// Helper: check if a string is a valid 8-char hex request ID (from RequestId::new)
fn is_valid_request_id(s: &str) -> bool {
    s.len() == 8 && s.chars().all(|c| c.is_ascii_hexdigit())
}

/// Helper: check if a line contains PII patterns
fn contains_pii(line: &str) -> bool {
    let lower = line.to_lowercase();
    // Check for API key patterns
    if lower.contains("api_key") || lower.contains("apikey") || lower.contains("api-key") {
        // Allow if it's just "api_key_hash" or similar
        if !lower.contains("hash") && !lower.contains("uid") {
            return true;
        }
    }
    // Check for master keys
    if lower.contains("master") && (lower.contains("key") || lower.contains("secret")) {
        return true;
    }
    // Check for JWT tokens
    if lower.contains("eyj") && lower.len() > 100 {
        // JWT base64 pattern
        return true;
    }
    // Check for document content patterns (long text blocks in logs)
    if line.len() > 5000 {
        // Very long log line might contain document content
        return true;
    }
    // Check for raw query strings (both URL-style q= and JSON-style "q":)
    if (lower.contains("q=") || lower.contains("\"q\":")) && line.len() > 200 {
        // But not if the value is explicitly redacted
        if !lower.contains("\"[redacted]\"") {
            return true;
        }
    }
    false
}

// ---------------------------------------------------------------------------
// Acceptance Criterion 1: jq parses every log line
// ---------------------------------------------------------------------------

#[test]
fn test_json_logs_parseable_by_jq() {
    // Sample log entry in the expected format
    let sample_log = r#"{
        "timestamp": "2026-05-01T12:00:00.000Z",
        "level": "info",
        "target": "miroir.request",
        "message": "GET /indexes/products/search 200",
        "request_id": "abc123",
        "pod_id": "miroir-proxy-7f8d9c6d5-abc12",
        "duration_ms": 42,
        "status": 200,
        "method": "GET",
        "path_template": "/indexes/{uid}/search"
    }"#;

    let parsed = parse_log_line(sample_log);
    assert!(parsed.is_some(), "Sample log should parse as JSON");

    let log_obj = parsed.unwrap();
    assert_eq!(log_obj["level"], "info");
    assert_eq!(log_obj["target"], "miroir.request");
    assert_eq!(log_obj["request_id"], "abc123");
    assert_eq!(log_obj["duration_ms"], 42);
}

#[test]
fn test_request_id_format_in_logs() {
    // Verify request_id appears in JSON logs
    let sample_log = r#"{
        "timestamp": "2026-05-01T12:00:00.000Z",
        "level": "info",
        "target": "miroir.request",
        "message": "search completed",
        "request_id": "a1b2c3d4",
        "pod_id": "test-pod",
        "duration_ms": 42
    }"#;

    let parsed = parse_log_line(sample_log).unwrap();
    let request_id = parsed["request_id"].as_str().unwrap();

    // Request IDs should be 8 hex chars (from RequestId::new())
    assert_eq!(request_id.len(), 8);
    assert!(request_id.chars().all(|c| c.is_ascii_hexdigit()));
}

// ---------------------------------------------------------------------------
// Acceptance Criterion 2: request_id traceable across pods
// ---------------------------------------------------------------------------

#[test]
fn test_request_id_extraction_from_logs() {
    let logs = vec![
        r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","request_id":"abc12345","pod_id":"pod-1","message":"GET /search 200"}"#,
        r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","request_id":"abc12345","pod_id":"pod-1","node_id":"node-1","message":"node call started"}"#,
        r#"{"timestamp":"2026-05-01T12:00:00.010Z","level":"info","target":"miroir.search","request_id":"abc12345","pod_id":"pod-1","index":"products","message":"search completed"}"#,
    ];

    // Extract all logs with request_id = "abc12345"
    let target_id = "abc12345";
    let matching_logs: Vec<_> = logs
        .iter()
        .filter(|line| line.contains(&format!("\"request_id\":\"{}\"", target_id)))
        .collect();

    assert_eq!(matching_logs.len(), 3, "Should find all 3 log lines for the request");
}

// ---------------------------------------------------------------------------
// Acceptance Criterion 3: No PII in logs
// ---------------------------------------------------------------------------

#[test]
fn test_no_api_keys_in_logs() {
    // These patterns should NOT appear in logs
    let pii_patterns = vec![
        r#""api_key": "master_key_12345""#,
        r#""bearer": "Bearer master_key_12345""#,
        r#""authorization": "Bearer master_key_12345""#,
    ];

    for pattern in pii_patterns {
        assert!(
            contains_pii(pattern),
            "Pattern should be flagged as PII: {}",
            pattern
        );
    }

    // Hashed versions are OK
    let safe_patterns = vec![
        r#""api_key_hash": "a1b2c3d4""#,
        r#""key_uid": "12345""#,
        r#""tenant_id": "tenant-123""#,
    ];

    for pattern in safe_patterns {
        assert!(
            !contains_pii(pattern),
            "Hashed pattern should NOT be flagged as PII: {}",
            pattern
        );
    }
}

#[test]
fn test_no_query_strings_in_logs() {
    // Raw Meilisearch query content should not be logged
    // The contains_pii function checks for long q= patterns (user queries)
    // Line must be >200 chars total and contain "q=" to trigger PII detection
    let padding = "x".repeat(200);
    let bad_log = format!(r#"{{"message": "processing search", "q": "SELECT * FROM users WHERE email = 'test@example.com'", "padding": "{}"}}"#, padding);
    assert!(contains_pii(&bad_log), "Long query string should be flagged as PII");

    // Redacted queries are OK
    let good_log = r#"{"message": "processing search", "query": "[redacted]", "q": "[redacted]"}"#;
    assert!(!contains_pii(good_log), "Redacted query should NOT be flagged as PII");

    // Short queries are OK (e.g., just a word or two)
    let short_query = r#"{"message": "processing search", "q": "test"}"#;
    assert!(!contains_pii(short_query), "Short query should NOT be flagged as PII");

    // Long line without q= is OK
    let long_safe = format!(r#"{{"message": "processing", "data": "{}"}}"#, "x".repeat(200));
    assert!(!contains_pii(&long_safe), "Long line without q= should NOT be flagged as PII");
}

#[test]
fn test_no_document_content_in_logs() {
    // Large JSON blocks likely contain document content
    let large_log = r#"{"message": "indexing document", "doc": {"#;
    let large_log_padded = format!("{}{}", large_log, "x".repeat(5000));
    assert!(contains_pii(&large_log_padded), "Very long log line should be flagged as potential PII");

    // Normal log lines are OK
    let normal_log = r#"{"message": "indexed 100 documents", "count": 100}"#;
    assert!(!contains_pii(normal_log), "Normal log line should NOT be flagged");
}

// ---------------------------------------------------------------------------
// Acceptance Criterion 4: Log volume
// ---------------------------------------------------------------------------

#[test]
fn test_log_volume_info_level() {
    // At INFO level, search requests produce 2 INFO log entries:
    // 1 from telemetry middleware (miroir.request) + 1 from search handler (miroir.search)
    let request_logs = vec![
        r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","message":"GET /indexes/products/search 200"}"#,
        r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","message":"node call"}"#,
        r#"{"timestamp":"2026-05-01T12:00:00.002Z","level":"info","target":"miroir.search","message":"search completed"}"#,
    ];

    let info_count = request_logs
        .iter()
        .filter(|line| line.contains(r#""level":"info""#))
        .count();

    assert_eq!(
        info_count, 2,
        "INFO level should have 2 log lines per search request (middleware + search handler)"
    );
}

#[test]
fn test_debug_level_has_more_logs() {
    // At DEBUG level, we get per-node logs in addition to the INFO logs
    let debug_logs = vec![
        r#"{"timestamp":"2026-05-01T12:00:00.000Z","level":"info","target":"miroir.request","message":"GET / 200"}"#,
        r#"{"timestamp":"2026-05-01T12:00:00.001Z","level":"debug","target":"miroir.node","message":"node call started"}"#,
        r#"{"timestamp":"2026-05-01T12:00:00.002Z","level":"debug","target":"miroir.node","message":"node call completed"}"#,
        r#"{"timestamp":"2026-05-01T12:00:00.003Z","level":"info","target":"miroir.search","message":"search completed"}"#,
    ];

    let debug_count = debug_logs
        .iter()
        .filter(|line| line.contains(r#""level":"debug""#))
        .count();

    assert!(debug_count > 1, "DEBUG level should have multiple log lines");
}

// ---------------------------------------------------------------------------
// Integration test: Verify actual log format from middleware
// ---------------------------------------------------------------------------

#[test]
fn test_middleware_log_format() {
    use miroir_proxy::middleware::{generate_request_id, RequestIdExt};
    use axum::http::HeaderMap;

    // Test request_id generation format
    let id = generate_request_id();
    assert_eq!(id.len(), 16, "Request ID should be 16 hex chars");
    assert!(id.chars().all(|c| c.is_ascii_hexdigit()), "Request ID should be hexadecimal");

    // Test header extraction
    let mut headers = HeaderMap::new();
    headers.set_request_id("test123456789abcd");
    assert_eq!(headers.get_request_id(), Some("test123456789abcd".to_string()));
}

// ---------------------------------------------------------------------------
// Log level verification
// ---------------------------------------------------------------------------

#[test]
fn test_log_levels_correct() {
    let logs_by_level = vec![
        (r#"{"level":"error","target":"miroir.request","message":"internal failure"}"#, "error"),
        (r#"{"level":"warn","target":"miroir.request","message":"degraded response"}"#, "warn"),
        (r#"{"level":"info","target":"miroir.request","message":"GET / 200"}"#, "info"),
        (r#"{"level":"debug","target":"miroir.node","message":"node call"}"#, "debug"),
        (r#"{"level":"trace","target":"miroir.scatter","message":"fan-out buffer"}"#, "trace"),
    ];

    for (log, expected_level) in logs_by_level {
        let parsed = parse_log_line(log).unwrap();
        assert_eq!(parsed["level"], expected_level);
    }
}

// ---------------------------------------------------------------------------
// Verify SearchRequestBody Debug impl redacts sensitive fields
// ---------------------------------------------------------------------------

#[test]
fn test_search_request_debug_redaction() {
    // Verified by unit test in routes/search.rs (SearchRequestBody is private).
    // That test confirms the Debug impl redacts `q` and `filter` fields.
    // This placeholder keeps the integration test file's acceptance criteria visible.
}

// ---------------------------------------------------------------------------
// Request ID middleware integration tests (P7.5.a)
// ---------------------------------------------------------------------------

#[tokio::test]
async fn test_request_id_response_header() {
    // Acceptance criterion: Every response includes X-Request-Id: <8-char hex>
    use axum::routing::get;
    use axum::middleware;
    use miroir_proxy::middleware::request_id_middleware;

    async fn handler() -> &'static str {
        "ok"
    }

    let app = Router::new()
        .route("/test", get(handler))
        .layer(middleware::from_fn(request_id_middleware));

    let response = app
        .oneshot(
            Request::builder()
                .uri("/test")
                .body(Body::empty())
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);

    let headers = response.headers();
    let request_id = headers.get("x-request-id");
    assert!(request_id.is_some(), "X-Request-Id header should be present");

    let id_str = request_id.unwrap().to_str().unwrap();
    assert!(
        is_valid_request_id(id_str),
        "X-Request-Id should be 8 hex chars, got: {}",
        id_str
    );
}

#[tokio::test]
async fn test_request_id_extension_accessible() {
    // Acceptance criterion: Request.extensions().get::<RequestId>() works from any handler
    use axum::{routing::get, Extension};
    use axum::middleware;
    use miroir_proxy::middleware::{request_id_middleware, RequestId};

    let app = Router::new()
        .route("/test", get(|Extension(id): Extension<RequestId>| async move {
            assert!(
                is_valid_request_id(id.as_str()),
                "RequestId in extensions should be 8 hex chars"
            );
            "ok"
        }))
        .layer(middleware::from_fn(request_id_middleware));

    let response = app
        .oneshot(
            Request::builder()
                .uri("/test")
                .body(Body::empty())
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);
}

#[tokio::test]
async fn test_consecutive_requests_different_ids() {
    // Acceptance criterion: Two consecutive requests produce different IDs
    use axum::{routing::get, Extension};
    use axum::extract::State as AxumState;
    use axum::middleware;
    use miroir_proxy::middleware::{request_id_middleware, RequestId};
    use std::sync::Arc;
    use std::sync::Mutex;

    let captured_ids = Arc::new(Mutex::new(Vec::<String>::new()));

    let app = Router::new()
        .route(
            "/test",
            get(|Extension(id): Extension<RequestId>, AxumState(ids): AxumState<Arc<Mutex<Vec<String>>>>| async move {
                ids.lock().unwrap().push(id.as_str().to_string());
                "ok"
            }),
        )
        .layer(middleware::from_fn(request_id_middleware))
        .with_state(captured_ids.clone());

    // Make two requests consecutively
    for _ in 0..2 {
        let _ = app
            .clone()
            .oneshot(
                Request::builder()
                    .uri("/test")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
    }

    let ids = captured_ids.lock().unwrap();
    assert_eq!(ids.len(), 2, "Should have captured 2 request IDs");
    assert_ne!(ids[0], ids[1], "Two consecutive requests should have different IDs");
    assert!(
        is_valid_request_id(&ids[0]),
        "First ID should be 8 hex chars"
    );
    assert!(
        is_valid_request_id(&ids[1]),
        "Second ID should be 8 hex chars"
    );
}

#[tokio::test]
async fn test_request_id_preserves_existing_header() {
    // If a request already has X-Request-Id, it should be preserved
    use axum::{routing::get, Extension};
    use axum::middleware;
    use miroir_proxy::middleware::{request_id_middleware, RequestId};

    let app = Router::new()
        .route(
            "/test",
            get(|Extension(id): Extension<RequestId>| async move {
                id.as_str().to_string()
            }),
        )
        .layer(middleware::from_fn(request_id_middleware));

    // Send a request with an existing X-Request-Id header
    let existing_id = "deadbeef";
    let response = app
        .oneshot(
            Request::builder()
                .uri("/test")
                .header("x-request-id", existing_id)
                .body(Body::empty())
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);

    // The response should have the same X-Request-Id
    let response_id = response.headers().get("x-request-id").unwrap().to_str().unwrap();
    assert_eq!(response_id, existing_id, "Should preserve existing X-Request-Id");
}

#[tokio::test]
async fn test_request_id_invalid_header_is_replaced() {
    // If a request has an invalid X-Request-Id (not 8 hex chars),
    // the middleware should generate a new one
    use axum::{routing::get, Extension};
    use axum::middleware;
    use miroir_proxy::middleware::{request_id_middleware, RequestId};

    let app = Router::new()
        .route(
            "/test",
            get(|Extension(id): Extension<RequestId>| async move {
                id.as_str().to_string()
            }),
        )
        .layer(middleware::from_fn(request_id_middleware));

    // Send a request with an INVALID X-Request-Id header (wrong length)
    let invalid_id = "not-8-chars";
    let response = app
        .oneshot(
            Request::builder()
                .uri("/test")
                .header("x-request-id", invalid_id)
                .body(Body::empty())
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);

    // The response should have a VALID 8-char hex ID (replaced)
    let response_id = response.headers().get("x-request-id").unwrap().to_str().unwrap();
    assert_ne!(response_id, invalid_id, "Should replace invalid X-Request-Id");
    assert!(
        is_valid_request_id(response_id),
        "Replaced ID should be 8 hex chars"
    );
}

// ---------------------------------------------------------------------------
// P7.5.c Trace correlation: request_id appears in all log lines
// ---------------------------------------------------------------------------

// Custom writer to capture log output for testing
struct CaptureWriter {
    logs: Arc<Mutex<Vec<String>>>,
}

impl std::io::Write for CaptureWriter {
    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
        if let Ok(s) = std::str::from_utf8(buf) {
            self.logs.lock().unwrap().push(s.to_string());
        }
        Ok(buf.len())
    }

    fn flush(&mut self) -> std::io::Result<()> {
        Ok(())
    }
}

#[tokio::test]
async fn test_request_id_appears_in_all_log_lines_within_request() {
    // Acceptance criterion: Every log line inside a request must carry request_id=<id>
    // This is achieved via tracing::Span with request_id recorded on span enter
    // and tracing_subscriber::fmt().with_current_span(true)
    use axum::{routing::get, Extension};
    use axum::middleware;
    use miroir_proxy::middleware::{request_id_middleware, telemetry_middleware, TelemetryState, RequestId, Metrics};
    use miroir_core::config::MiroirConfig;
    use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
    use tracing::info;
    use std::sync::{Arc, Mutex};

    // Capture logs into a vector
    let captured_logs = Arc::new(Mutex::new(Vec::<String>::new()));

    // Create a custom layer that captures log events
    let capture_layer = tracing_subscriber::fmt::layer()
        .json()
        .with_target(true)
        .with_current_span(true)  // This is the key: includes span fields on all events
        .with_span_list(false)
        .flatten_event(true)
        .with_writer(std::sync::Mutex::new(CaptureWriter {
            logs: captured_logs.clone(),
        }));

    // Initialize the subscriber with our capture layer
    tracing_subscriber::registry()
        .with(capture_layer)
        .init();

    // Build telemetry state
    let config = MiroirConfig::default();
    let metrics = Metrics::new(&config);
    let telemetry = TelemetryState {
        metrics: metrics.clone(),
        pod_id: "test-pod".to_string(),
    };

    // Build app with full middleware stack
    // IMPORTANT: Layer order matters! Last layer() call = outermost = runs first.
    // request_id_middleware must run BEFORE telemetry_middleware.
    // To achieve this, request_id_middleware layer() call comes LAST (so it's outermost).
    let app = axum::Router::new()
        .route("/test", get(|Extension(_id): Extension<RequestId>| async move {
            // These log events should all inherit the request_id from the parent span
            info!(message = "handler started", step = "1");
            info!(message = "processing request", step = "2");
            info!(message = "handler completed", step = "3");
            "ok"
        }))
        .layer(axum::middleware::from_fn_with_state(
            telemetry.clone(),
            telemetry_middleware,
        ))
        .layer(axum::middleware::from_fn(request_id_middleware));

    // Make a request
    let response = app
        .oneshot(
            Request::builder()
                .uri("/test")
                .body(Body::empty())
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);

    // Extract the request_id from the response header
    let request_id = response.headers()
        .get("x-request-id")
        .unwrap()
        .to_str()
        .unwrap();

    // Verify all captured log lines contain the request_id
    let logs = captured_logs.lock().unwrap();
    let log_lines: Vec<&str> = logs.iter()
        .flat_map(|s| s.lines())
        .filter(|line| line.contains("message"))  // Only look at actual log lines
        .collect();

    // We should have at least 3 log lines from the handler
    assert!(
        log_lines.len() >= 3,
        "Expected at least 3 log lines from handler, got: {}",
        log_lines.len()
    );

    // Every log line should contain the request_id (either at top level or in span)
    for line in &log_lines {
        let json = parse_log_line(line).expect(&format!("Log line should be valid JSON: {}", line));

        // Verify request_id field exists and matches the response header
        // It can be at the top level OR nested in the span object
        let log_request_id = json.get("request_id")
            .or_else(|| json.get("span").and_then(|s| s.get("request_id")))
            .and_then(|v| v.as_str());

        assert!(
            log_request_id.is_some(),
            "Log line missing request_id field: {}",
            line
        );

        assert_eq!(
            log_request_id.unwrap(), request_id,
            "Log line request_id should match response header: {}",
            line
        );
    }
}