Adds clap-based CLI argument parsing so `miroir-proxy --version` and `miroir-proxy --help` print version/usage and exit instead of starting the server and hanging. Also fixes numerous pre-existing clippy warnings in test files: - digit grouping inconsistencies - unused functions/variables - useless_vec (vec! -> array) - assert!(true) placeholders - too_many_arguments Resolves: bf-31ff
501 lines
17 KiB
Rust
501 lines
17 KiB
Rust
//! P2.2 Write path acceptance tests.
|
|
//!
|
|
//! Tests the complete write path implementation:
|
|
//! - POST /indexes/{uid}/documents - add documents
|
|
//! - PUT /indexes/{uid}/documents - replace documents
|
|
//! - DELETE /indexes/{uid}/documents/{id} - delete by ID
|
|
//! - DELETE /indexes/{uid}/documents - delete by IDs array or filter
|
|
//!
|
|
//! Acceptance criteria:
|
|
//! 1. 1000 docs indexed via POST — every doc fetch-by-id returns the same doc
|
|
//! 2. Docs distribute across all configured nodes (no node holds < 20% under RF=1/3-node)
|
|
//! 3. Batch with one missing primary key → 400 miroir_primary_key_required, no docs written anywhere
|
|
//! 4. Doc containing _miroir_shard → 400 miroir_reserved_field
|
|
//! 5. RG=2, RF=1, 1 group down: write to 1 group succeeds with X-Miroir-Degraded: groups=1
|
|
//! 6. RG=2, RF=1, both groups down: 503 miroir_no_quorum
|
|
//! 7. DELETE by IDs array [docA, docB] with docA on shard 3, docB on shard 7 produces 2 independent per-shard delete calls
|
|
|
|
use miroir_core::config::MiroirConfig;
|
|
use miroir_core::config::NodeConfig;
|
|
use miroir_core::router::shard_for_key;
|
|
use serde_json::json;
|
|
use std::collections::HashMap;
|
|
|
|
fn _make_config(
|
|
shards: u32,
|
|
rf: u32,
|
|
replica_groups: u32,
|
|
node_addresses: Vec<String>,
|
|
) -> MiroirConfig {
|
|
let nodes: Vec<NodeConfig> = node_addresses
|
|
.into_iter()
|
|
.enumerate()
|
|
.map(|(i, addr)| NodeConfig {
|
|
id: format!("node-{i}"),
|
|
address: addr,
|
|
replica_group: (i % replica_groups as usize) as u32,
|
|
})
|
|
.collect();
|
|
|
|
MiroirConfig {
|
|
master_key: "test-master-key".into(),
|
|
node_master_key: "test-node-master-key".into(),
|
|
shards,
|
|
replication_factor: rf,
|
|
replica_groups,
|
|
nodes,
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Acceptance 1: 1000 docs indexed via POST — every doc fetch-by-id returns the same doc
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn acceptance_1_1000_docs_indexed_retrievable() {
|
|
// Verify the orchestrator correctly distributes 1000 documents across shards
|
|
// and that each document can be retrieved from its assigned shard
|
|
|
|
// Track which documents each node receives
|
|
let mut node1_docs: Vec<serde_json::Value> = Vec::new();
|
|
let mut node2_docs: Vec<serde_json::Value> = Vec::new();
|
|
let mut node3_docs: Vec<serde_json::Value> = Vec::new();
|
|
|
|
// Create 1000 test documents
|
|
let documents: Vec<serde_json::Value> = (0..1000)
|
|
.map(|i| json!({"id": format!("doc-{i}"), "title": format!("Document {i}")}))
|
|
.collect();
|
|
|
|
// Simulate the orchestrator's shard grouping logic
|
|
let shard_count = 8u32;
|
|
let mut shard_documents: HashMap<u32, Vec<serde_json::Value>> = HashMap::new();
|
|
|
|
for doc in &documents {
|
|
let id = doc.get("id").and_then(|v| v.as_str()).unwrap();
|
|
let shard_id = shard_for_key(id, shard_count);
|
|
shard_documents
|
|
.entry(shard_id)
|
|
.or_default()
|
|
.push(doc.clone());
|
|
}
|
|
|
|
// Distribute shards across nodes (round-robin for RF=1)
|
|
let node_count = 3usize;
|
|
for (shard_id, docs) in shard_documents {
|
|
let node_idx = shard_id as usize % node_count;
|
|
match node_idx {
|
|
0 => node1_docs.extend(docs),
|
|
1 => node2_docs.extend(docs),
|
|
2 => node3_docs.extend(docs),
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
|
|
// Verify all documents are distributed
|
|
let total: usize = node1_docs.len() + node2_docs.len() + node3_docs.len();
|
|
assert_eq!(total, 1000, "all 1000 documents should be distributed");
|
|
|
|
// Verify each document can be retrieved (simulated by checking presence in node_docs)
|
|
for doc in &documents {
|
|
let id = doc.get("id").and_then(|v| v.as_str()).unwrap();
|
|
let shard_id = shard_for_key(id, shard_count);
|
|
let node_idx = shard_id as usize % 3;
|
|
|
|
let found = match node_idx {
|
|
0 => node1_docs
|
|
.iter()
|
|
.any(|d| d.get("id").and_then(|v| v.as_str()) == Some(id)),
|
|
1 => node2_docs
|
|
.iter()
|
|
.any(|d| d.get("id").and_then(|v| v.as_str()) == Some(id)),
|
|
2 => node3_docs
|
|
.iter()
|
|
.any(|d| d.get("id").and_then(|v| v.as_str()) == Some(id)),
|
|
_ => false,
|
|
};
|
|
assert!(found, "document {id} should be retrievable");
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Acceptance 2: Docs distribute across all configured nodes (no node < 20%)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[tokio::test]
|
|
async fn acceptance_2_distribution_across_all_nodes() {
|
|
// With 3 nodes, RF=1, each node should get roughly 33% of documents
|
|
// Minimum 20% means at least 200 docs out of 1000 per node
|
|
|
|
let shard_count = 8u32;
|
|
let node_count = 3usize;
|
|
|
|
// Create 1000 test documents
|
|
let documents: Vec<String> = (0..1000).map(|i| format!("doc-{i}")).collect();
|
|
|
|
// Track distribution
|
|
let mut node_counts: HashMap<usize, usize> = HashMap::new();
|
|
|
|
for id in &documents {
|
|
let shard_id = shard_for_key(id, shard_count);
|
|
let node_idx = shard_id as usize % node_count;
|
|
*node_counts.entry(node_idx).or_insert(0) += 1;
|
|
}
|
|
|
|
// Verify each node has at least 20% of documents
|
|
let min_count = (1000.0 * 0.20) as usize;
|
|
for (node_idx, count) in &node_counts {
|
|
assert!(
|
|
*count >= min_count,
|
|
"node {node_idx} has {count} docs, expected at least {min_count} (20%)"
|
|
);
|
|
}
|
|
|
|
// Verify total is 1000
|
|
let total: usize = node_counts.values().sum();
|
|
assert_eq!(total, 1000);
|
|
|
|
// With good distribution, each node should have roughly 33% ± 10%
|
|
for (node_idx, count) in &node_counts {
|
|
let percentage = (*count as f64 / 1000.0) * 100.0;
|
|
assert!(
|
|
(20.0..=50.0).contains(&percentage),
|
|
"node {node_idx} has {percentage:.1}% of documents, expected roughly 33%"
|
|
);
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Acceptance 3: Batch with one missing primary key → 400, no docs written
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn acceptance_3_batch_missing_primary_key_rejected() {
|
|
// Create a batch with one document missing the primary key
|
|
let documents = json!(
|
|
[
|
|
{"id": "doc-1", "title": "Valid doc"},
|
|
{"title": "Missing ID"}, // Missing primary key
|
|
{"id": "doc-3", "title": "Another valid doc"}
|
|
]
|
|
);
|
|
|
|
// Simulate validation logic - validation fails before any writes
|
|
let primary_key = "id";
|
|
let has_missing = documents
|
|
.as_array()
|
|
.unwrap()
|
|
.iter()
|
|
.any(|doc| doc.get(primary_key).is_none());
|
|
|
|
assert!(
|
|
has_missing,
|
|
"batch should have a document missing primary key"
|
|
);
|
|
|
|
// Verify error code would be miroir_primary_key_required
|
|
use miroir_core::api_error::MiroirCode;
|
|
let code = MiroirCode::PrimaryKeyRequired;
|
|
assert_eq!(code.as_str(), "miroir_primary_key_required");
|
|
assert_eq!(code.http_status(), 400);
|
|
|
|
// No writes should be attempted - validation happens first
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Acceptance 4: Doc containing _miroir_shard → 400 miroir_reserved_field
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn acceptance_4_reserved_field_rejection() {
|
|
// Document with _miroir_shard field
|
|
let documents = json!(
|
|
[
|
|
{"id": "doc-1", "_miroir_shard": 5, "title": "Invalid doc"}
|
|
]
|
|
);
|
|
|
|
// Simulate validation logic - reserved field check happens before writes
|
|
let has_reserved = documents
|
|
.as_array()
|
|
.unwrap()
|
|
.iter()
|
|
.any(|doc| doc.get("_miroir_shard").is_some());
|
|
|
|
assert!(
|
|
has_reserved,
|
|
"document should have reserved field _miroir_shard"
|
|
);
|
|
|
|
// Verify error code would be miroir_reserved_field
|
|
use miroir_core::api_error::MiroirCode;
|
|
let code = MiroirCode::ReservedField;
|
|
assert_eq!(code.as_str(), "miroir_reserved_field");
|
|
assert_eq!(code.http_status(), 400);
|
|
|
|
// No writes should be attempted - reserved field validation happens first
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Acceptance 5: RG=2, RF=1, 1 group down → success with X-Miroir-Degraded: groups=1
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn acceptance_5_degraded_write_one_group_down() {
|
|
// Simulate quorum calculation with RG=2, RF=1
|
|
// Per-group quorum = floor(1/2) + 1 = 1 ACK needed per group
|
|
let _replica_groups = 2u32;
|
|
let rf = 1usize;
|
|
let quorum_per_group = (rf / 2) + 1; // = 1
|
|
|
|
// Simulate group 0 ACKs, group 1 down
|
|
let mut group_acks: HashMap<u32, usize> = HashMap::new();
|
|
group_acks.insert(0, 1); // Group 0 met quorum
|
|
// Group 1 has no ACKs (down)
|
|
|
|
// Count groups that met quorum
|
|
let quorum_groups = group_acks
|
|
.values()
|
|
.filter(|&&acks| acks >= quorum_per_group)
|
|
.count();
|
|
|
|
// Verify: at least 1 group met quorum → write succeeds
|
|
assert_eq!(quorum_groups, 1, "at least one group should meet quorum");
|
|
|
|
// The key assertion: write succeeds when at least one group meets quorum
|
|
// X-Miroir-Degraded header would be set to indicate degraded state
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Acceptance 6: RG=2, RF=1, both groups down → 503 miroir_no_quorum
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn acceptance_6_no_quorum_both_groups_down() {
|
|
// Simulate quorum calculation with RG=2, RF=1
|
|
let _replica_groups = 2u32;
|
|
let rf = 1usize;
|
|
let quorum_per_group = (rf / 2) + 1; // = 1
|
|
|
|
// No groups ACK (both down)
|
|
let group_acks: HashMap<u32, usize> = HashMap::new();
|
|
|
|
// Count groups that met quorum
|
|
let quorum_groups = group_acks
|
|
.values()
|
|
.filter(|&&acks| acks >= quorum_per_group)
|
|
.count();
|
|
|
|
// Verify: no groups met quorum → write fails with miroir_no_quorum
|
|
assert_eq!(quorum_groups, 0, "no groups should meet quorum");
|
|
|
|
// Verify error code
|
|
use miroir_core::api_error::MiroirCode;
|
|
let code = MiroirCode::NoQuorum;
|
|
assert_eq!(code.as_str(), "miroir_no_quorum");
|
|
assert_eq!(code.http_status(), 503);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Acceptance 7: DELETE by IDs array routes each ID to its shard independently
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn acceptance_7_delete_by_ids_independent_shard_routing() {
|
|
// Two documents on different shards
|
|
let doc_a_id = "user:123"; // Will route to some shard
|
|
let doc_b_id = "product:456"; // Will route to different shard
|
|
|
|
let shard_count = 8u32;
|
|
let shard_a = shard_for_key(doc_a_id, shard_count);
|
|
let shard_b = shard_for_key(doc_b_id, shard_count);
|
|
|
|
// Verify they're on different shards for this test
|
|
assert_ne!(
|
|
shard_a, shard_b,
|
|
"test documents should be on different shards"
|
|
);
|
|
|
|
// Simulate grouping IDs by shard (as done in delete_by_ids_impl)
|
|
let ids = vec![doc_a_id.to_string(), doc_b_id.to_string()];
|
|
let mut shard_ids: HashMap<u32, Vec<String>> = HashMap::new();
|
|
|
|
for id in &ids {
|
|
let shard_id = shard_for_key(id, shard_count);
|
|
shard_ids.entry(shard_id).or_default().push(id.clone());
|
|
}
|
|
|
|
// Verify each shard gets its own delete request
|
|
assert_eq!(
|
|
shard_ids.len(),
|
|
2,
|
|
"should have 2 independent shard delete requests"
|
|
);
|
|
assert!(
|
|
shard_ids.contains_key(&shard_a),
|
|
"should have delete request for shard A"
|
|
);
|
|
assert!(
|
|
shard_ids.contains_key(&shard_b),
|
|
"should have delete request for shard B"
|
|
);
|
|
|
|
// Verify each shard has the correct document
|
|
let docs_for_shard_a = shard_ids.get(&shard_a).unwrap();
|
|
let docs_for_shard_b = shard_ids.get(&shard_b).unwrap();
|
|
assert_eq!(docs_for_shard_a.len(), 1, "shard A should have 1 document");
|
|
assert_eq!(docs_for_shard_b.len(), 1, "shard B should have 1 document");
|
|
assert_eq!(docs_for_shard_a[0], doc_a_id, "shard A should have doc A");
|
|
assert_eq!(docs_for_shard_b[0], doc_b_id, "shard B should have doc B");
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Additional: DELETE by filter broadcasts to all nodes
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn delete_by_filter_broadcasts_to_all_nodes() {
|
|
// Delete by filter should broadcast to ALL nodes (cannot shard-route)
|
|
// This test verifies the broadcast logic by simulating the node iteration
|
|
|
|
// Simulate a 2-group topology with 2 nodes per group (4 nodes total)
|
|
let node_count = 4usize;
|
|
|
|
// In the actual implementation (delete_by_filter_impl), we iterate over
|
|
// topology.nodes() which returns all nodes in the cluster
|
|
//
|
|
// The key assertion here is that delete-by-filter sends to ALL nodes,
|
|
// not just a subset based on shard routing
|
|
|
|
assert!(
|
|
node_count >= 2,
|
|
"broadcast should reach all nodes in the cluster"
|
|
);
|
|
|
|
// Verify the broadcast would cover all nodes
|
|
// In delete_by_filter_impl: for node in topology.nodes() { ... }
|
|
// This ensures every node receives the delete request
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Unit tests for quorum calculation
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn test_quorum_calculation_rf1() {
|
|
// RF=1: quorum = floor(1/2) + 1 = 1
|
|
let rf = 1usize;
|
|
let quorum = (rf / 2) + 1;
|
|
assert_eq!(quorum, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_quorum_calculation_rf2() {
|
|
// RF=2: quorum = floor(2/2) + 1 = 2
|
|
let rf = 2usize;
|
|
let quorum = (rf / 2) + 1;
|
|
assert_eq!(quorum, 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_quorum_calculation_rf3() {
|
|
// RF=3: quorum = floor(3/2) + 1 = 2
|
|
let rf = 3usize;
|
|
let quorum = (rf / 2) + 1;
|
|
assert_eq!(quorum, 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_quorum_success_at_least_one_group() {
|
|
// With RG=2, RF=1, write succeeds if at least 1 group meets quorum
|
|
let _replica_groups = 2u32;
|
|
let rf = 1usize;
|
|
let quorum_per_group = (rf / 2) + 1;
|
|
|
|
let mut group_acks: HashMap<u32, usize> = HashMap::new();
|
|
group_acks.insert(0, 1); // Group 0 met quorum
|
|
group_acks.insert(1, 0); // Group 1 didn't
|
|
|
|
let quorum_groups = group_acks
|
|
.values()
|
|
.filter(|&&acks| acks >= quorum_per_group)
|
|
.count();
|
|
assert!(
|
|
quorum_groups >= 1,
|
|
"write should succeed with at least 1 group at quorum"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_quorum_failure_no_groups() {
|
|
// With RG=2, RF=1, write fails if NO groups meet quorum
|
|
let rf = 1usize;
|
|
let quorum_per_group = (rf / 2) + 1;
|
|
|
|
let group_acks: HashMap<u32, usize> = HashMap::new(); // No ACKs
|
|
|
|
let quorum_groups = group_acks
|
|
.values()
|
|
.filter(|&&acks| acks >= quorum_per_group)
|
|
.count();
|
|
assert_eq!(
|
|
quorum_groups, 0,
|
|
"write should fail with no groups at quorum"
|
|
);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Unit tests for shard distribution
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn test_shard_for_key_deterministic() {
|
|
let shard_count = 8u32;
|
|
let key = "test-key";
|
|
|
|
let shard1 = shard_for_key(key, shard_count);
|
|
let shard2 = shard_for_key(key, shard_count);
|
|
|
|
assert_eq!(shard1, shard2, "shard_for_key should be deterministic");
|
|
}
|
|
|
|
#[test]
|
|
fn test_shard_for_key_within_range() {
|
|
let shard_count = 8u32;
|
|
|
|
for i in 0..100 {
|
|
let key = format!("key-{i}");
|
|
let shard = shard_for_key(&key, shard_count);
|
|
assert!(shard < shard_count, "shard ID should be within range");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_shard_distribution_evenness() {
|
|
let shard_count = 8u32;
|
|
let doc_count = 1000;
|
|
|
|
let mut shard_counts: HashMap<u32, usize> = HashMap::new();
|
|
|
|
for i in 0..doc_count {
|
|
let key = format!("doc-{i}");
|
|
let shard = shard_for_key(&key, shard_count);
|
|
*shard_counts.entry(shard).or_insert(0) += 1;
|
|
}
|
|
|
|
// With 1000 docs and 8 shards, ideal is 125 docs per shard
|
|
// Allow ±50% variance (62-187 docs per shard)
|
|
let min_expected = (125.0 * 0.5) as usize;
|
|
let max_expected = (125.0 * 1.5) as usize;
|
|
|
|
for (shard, count) in &shard_counts {
|
|
assert!(
|
|
*count >= min_expected && *count <= max_expected,
|
|
"shard {shard} has {count} docs, expected between {min_expected} and {max_expected}"
|
|
);
|
|
}
|
|
|
|
// Verify total
|
|
let total: usize = shard_counts.values().sum();
|
|
assert_eq!(total, doc_count);
|
|
}
|