P10.2: nodeMasterKey zero-downtime rotation flow
Add `miroir-ctl key rotate-node-master` command implementing plan §9 4-step zero-downtime rotation: create new admin-scoped key on all Meilisearch nodes, print K8s Secret update instructions, wait for rolling restart confirmation, delete old key. Supports --dry-run, node auto-discovery via topology API, and rollback on step 1 failure. Add `address` field to topology API NodeInfo for CLI node discovery. Add runbooks for both nodeMasterKey (zero-downtime) and startup master key (maintenance window required) rotation. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
3b209e8b66
commit
26fe2970fc
8 changed files with 796 additions and 3 deletions
|
|
@ -10,7 +10,7 @@ name = "miroir-ctl"
|
|||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
clap = { version = "4.5", features = ["derive", "env"] }
|
||||
reqwest = { version = "0.12", features = ["json", "rustls-tls"], default-features = false }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
|
|
|||
492
crates/miroir-ctl/src/commands/key.rs
Normal file
492
crates/miroir-ctl/src/commands/key.rs
Normal file
|
|
@ -0,0 +1,492 @@
|
|||
//! Key management commands.
|
||||
//!
|
||||
//! Implements plan §9 zero-downtime rotation for the admin-scoped nodeMasterKey.
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use reqwest::Client;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use std::io::{self, Write};
|
||||
|
||||
/// Key management commands.
|
||||
#[derive(Subcommand, Debug)]
|
||||
pub enum KeySubcommand {
|
||||
/// Rotate the admin-scoped nodeMasterKey (zero-downtime).
|
||||
///
|
||||
/// Implements the 4-step zero-downtime rotation from plan §9:
|
||||
/// 1. Create a new admin-scoped key on every Meilisearch node
|
||||
/// 2. Print instructions for updating the K8s Secret
|
||||
/// 3. Wait for operator to confirm rolling restart of Miroir pods
|
||||
/// 4. Delete the old admin-scoped key from every node
|
||||
///
|
||||
/// TERMINOLOGY (plan §9):
|
||||
/// - MEILI_MASTER_KEY (startup env var): fixed at process start.
|
||||
/// Rotation requires a Meilisearch pod restart (separate runbook).
|
||||
/// - Admin-scoped child keys (POST /keys, actions: ["*"]): multiple
|
||||
/// can coexist, rotation is zero-downtime.
|
||||
/// - The "nodeMasterKey" in Miroir config is the second kind.
|
||||
RotateNodeMaster(RotateNodeMasterArgs),
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
pub struct RotateNodeMasterArgs {
|
||||
/// Print the rotation plan without executing any changes.
|
||||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
|
||||
/// Current nodeMasterKey used to authenticate with Meilisearch nodes.
|
||||
/// Falls back to MIROIR_NODE_MASTER_KEY env var.
|
||||
#[arg(long, env = "MIROIR_NODE_MASTER_KEY")]
|
||||
current_key: Option<String>,
|
||||
|
||||
/// Meilisearch node base URL (repeatable, e.g. http://meili-0.search.svc:7700).
|
||||
/// Discovered from the topology API when omitted.
|
||||
#[arg(long = "node")]
|
||||
nodes: Vec<String>,
|
||||
|
||||
/// Name for the new key (visible in GET /keys output).
|
||||
#[arg(long, default_value = "miroir-node-master")]
|
||||
key_name: String,
|
||||
|
||||
/// Optional expiration for the new key (ISO 8601, e.g. "2026-12-31T23:59:59Z").
|
||||
#[arg(long)]
|
||||
expires_at: Option<String>,
|
||||
|
||||
/// Kubernetes namespace containing the Miroir secret.
|
||||
#[arg(long, default_value = "search")]
|
||||
namespace: String,
|
||||
|
||||
/// Kubernetes Secret name containing nodeMasterKey.
|
||||
#[arg(long, default_value = "miroir-keys")]
|
||||
secret_name: String,
|
||||
|
||||
/// Skip confirmation prompts (use with caution).
|
||||
#[arg(long)]
|
||||
yes: bool,
|
||||
}
|
||||
|
||||
// -- Meilisearch API response types ------------------------------------------
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct MeiliKeysResponse {
|
||||
results: Vec<MeiliKey>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct MeiliKey {
|
||||
uid: String,
|
||||
key: String,
|
||||
name: Option<String>,
|
||||
description: Option<String>,
|
||||
actions: Vec<serde_json::Value>,
|
||||
indexes: Vec<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct MeiliKeyCreated {
|
||||
uid: String,
|
||||
key: String,
|
||||
}
|
||||
|
||||
// -- Topology API response type -----------------------------------------------
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TopologyResponse {
|
||||
nodes: Vec<TopologyNode>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TopologyNode {
|
||||
id: String,
|
||||
address: String,
|
||||
status: String,
|
||||
}
|
||||
|
||||
// -- Entry point --------------------------------------------------------------
|
||||
|
||||
pub async fn run(
|
||||
cmd: KeySubcommand,
|
||||
admin_key: &str,
|
||||
api_url: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
match cmd {
|
||||
KeySubcommand::RotateNodeMaster(args) => {
|
||||
rotate_node_master(args, admin_key, api_url).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -- Rotation logic -----------------------------------------------------------
|
||||
|
||||
async fn rotate_node_master(
|
||||
args: RotateNodeMasterArgs,
|
||||
admin_key: &str,
|
||||
api_url: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Resolve current key
|
||||
let current_key = match &args.current_key {
|
||||
Some(k) => k.clone(),
|
||||
None => {
|
||||
return Err(
|
||||
"No current nodeMasterKey. Use --current-key or set MIROIR_NODE_MASTER_KEY.".into(),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Resolve node addresses
|
||||
let node_addresses = if args.nodes.is_empty() {
|
||||
discover_nodes(admin_key, api_url).await?
|
||||
} else {
|
||||
args.nodes.clone()
|
||||
};
|
||||
|
||||
if node_addresses.is_empty() {
|
||||
return Err(
|
||||
"No Meilisearch node addresses. Use --node or ensure topology API is reachable.".into(),
|
||||
);
|
||||
}
|
||||
|
||||
// ── Dry-run ──────────────────────────────────────────────────────
|
||||
if args.dry_run {
|
||||
return print_dry_run(&args, &node_addresses, ¤t_key);
|
||||
}
|
||||
|
||||
let client = Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.build()?;
|
||||
|
||||
// ── Step 1: Create new admin-scoped key on all nodes ─────────────
|
||||
eprintln!("Step 1/4: Creating new admin-scoped key on all Meilisearch nodes...");
|
||||
|
||||
let mut create_body = json!({
|
||||
"name": args.key_name,
|
||||
"description": format!("{} (rotated epoch {})", args.key_name, epoch_seconds()),
|
||||
"actions": ["*"],
|
||||
"indexes": ["*"],
|
||||
});
|
||||
if let Some(ref exp) = args.expires_at {
|
||||
create_body["expiresAt"] = json!(exp);
|
||||
} else {
|
||||
create_body["expiresAt"] = serde_json::Value::Null;
|
||||
}
|
||||
|
||||
let mut new_key_value: Option<String> = None;
|
||||
let mut new_key_uid: Option<String> = None;
|
||||
let mut created_on: Vec<String> = Vec::new();
|
||||
|
||||
for addr in &node_addresses {
|
||||
let url = format!("{}/keys", addr.trim_end_matches('/'));
|
||||
let resp = client
|
||||
.post(&url)
|
||||
.header("Authorization", format!("Bearer {}", current_key))
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&create_body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to contact {}: {}", addr, e))?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
rollback_create(&client, &created_on, &new_key_uid, ¤t_key).await;
|
||||
return Err(format!("Key creation failed on {}: HTTP {} — {}", addr, status, text).into());
|
||||
}
|
||||
|
||||
let body: MeiliKeyCreated = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Bad response from {}: {}", addr, e))?;
|
||||
|
||||
if new_key_value.is_none() {
|
||||
new_key_value = Some(body.key.clone());
|
||||
new_key_uid = Some(body.uid.clone());
|
||||
}
|
||||
created_on.push(addr.clone());
|
||||
eprintln!(" Created key on {}", addr);
|
||||
}
|
||||
|
||||
let new_key = new_key_value.ok_or("No key value received")?;
|
||||
let new_uid = new_key_uid.ok_or("No key UID received")?;
|
||||
|
||||
eprintln!(
|
||||
" New key: {}... UID: {}",
|
||||
&new_key[..8.min(new_key.len())],
|
||||
new_uid
|
||||
);
|
||||
|
||||
// ── Step 2: Print K8s Secret update instructions ─────────────────
|
||||
println!("\n--- Step 2/4: Update K8s Secret ---\n");
|
||||
println!("Patch the secret with the new key:");
|
||||
println!(
|
||||
" kubectl -n {} patch secret {} \\",
|
||||
args.namespace, args.secret_name
|
||||
);
|
||||
println!(
|
||||
" -p '{{\"stringData\":{{\"nodeMasterKey\":\"{}\"}}}}'",
|
||||
new_key
|
||||
);
|
||||
println!("\nOr update your ExternalSecret / OpenBao source.\n");
|
||||
|
||||
// ── Step 3: Rolling restart instructions ─────────────────────────
|
||||
println!("--- Step 3/4: Rolling restart Miroir pods ---\n");
|
||||
println!(
|
||||
" kubectl -n {} rollout restart deployment/miroir",
|
||||
args.namespace
|
||||
);
|
||||
println!(
|
||||
" kubectl -n {} rollout status deployment/miroir",
|
||||
args.namespace
|
||||
);
|
||||
println!("\nBoth old and new keys are valid concurrently — no downtime.\n");
|
||||
|
||||
if !args.yes {
|
||||
print!(
|
||||
"Press Enter once ALL Miroir pods are running with the new key (Ctrl+C to abort): "
|
||||
);
|
||||
io::stdout().flush()?;
|
||||
let mut buf = String::new();
|
||||
io::stdin().read_line(&mut buf)?;
|
||||
}
|
||||
|
||||
// ── Step 4: Delete old key ───────────────────────────────────────
|
||||
eprintln!("\nStep 4/4: Deleting old admin-scoped key...");
|
||||
|
||||
let old_uid = find_old_key_uid(&client, &node_addresses[0], ¤t_key).await?;
|
||||
|
||||
match old_uid {
|
||||
Some(uid) => {
|
||||
eprintln!(" Old key UID: {}", uid);
|
||||
|
||||
if !args.yes {
|
||||
print!("Delete old key {} from all nodes? [y/N] ", uid);
|
||||
io::stdout().flush()?;
|
||||
let mut buf = String::new();
|
||||
io::stdin().read_line(&mut buf)?;
|
||||
if !buf.trim().eq_ignore_ascii_case("y") {
|
||||
eprintln!("Skipped. Delete manually with:");
|
||||
for addr in &node_addresses {
|
||||
eprintln!(
|
||||
" curl -X DELETE {}/keys/{} -H 'Authorization: Bearer <key>'",
|
||||
addr.trim_end_matches('/'),
|
||||
uid
|
||||
);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
for addr in &node_addresses {
|
||||
let url = format!("{}/keys/{}", addr.trim_end_matches('/'), uid);
|
||||
let resp = client
|
||||
.delete(&url)
|
||||
.header("Authorization", format!("Bearer {}", current_key))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Delete failed on {}: {}", addr, e))?;
|
||||
|
||||
if resp.status().is_success() {
|
||||
eprintln!(" Deleted old key on {}", addr);
|
||||
} else {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
eprintln!(" Warning: delete on {} returned HTTP {} — {}", addr, status, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
eprintln!(" Could not determine old key UID. Skipping deletion.");
|
||||
eprintln!(" List keys and delete manually:");
|
||||
for addr in &node_addresses {
|
||||
eprintln!(
|
||||
" curl {}/keys -H 'Authorization: Bearer <key>'",
|
||||
addr.trim_end_matches('/')
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("\nRotation complete.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// -- Dry-run plan printer ----------------------------------------------------
|
||||
|
||||
fn print_dry_run(
|
||||
args: &RotateNodeMasterArgs,
|
||||
nodes: &[String],
|
||||
current_key: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== nodeMasterKey Rotation Plan (dry-run) ===\n");
|
||||
|
||||
println!("Target nodes ({}):", nodes.len());
|
||||
for addr in nodes {
|
||||
println!(" - {}", addr);
|
||||
}
|
||||
println!();
|
||||
|
||||
println!("Current key prefix: {}...", ¤t_key[..8.min(current_key.len())]);
|
||||
println!();
|
||||
|
||||
println!("Steps:");
|
||||
println!(" 1. Create new admin-scoped key on each node");
|
||||
println!(" POST /keys {{ name: {:?}, actions: [\"*\"], indexes: [\"*\"] }}", args.key_name);
|
||||
if let Some(ref exp) = args.expires_at {
|
||||
println!(" expiresAt: {:?}", exp);
|
||||
}
|
||||
println!();
|
||||
|
||||
println!(
|
||||
" 2. Update K8s Secret {}/{} with new key value",
|
||||
args.namespace, args.secret_name
|
||||
);
|
||||
println!();
|
||||
|
||||
println!(
|
||||
" 3. Rolling restart: kubectl -n {} rollout restart deployment/miroir",
|
||||
args.namespace
|
||||
);
|
||||
println!(" During rollout old-key pods and new-key pods both authenticate (zero-downtime).");
|
||||
println!();
|
||||
|
||||
println!(" 4. Delete old key (UID from GET /keys) on every node");
|
||||
println!();
|
||||
|
||||
println!("Notes:");
|
||||
println!(" - Both old and new admin-scoped keys are valid concurrently (plan §9)");
|
||||
println!(" - The startup MEILI_MASTER_KEY is NOT changed by this flow");
|
||||
println!(" - For startup-master rotation see docs/runbooks/startup-master-key-rotation.md");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// -- Node discovery via topology API -----------------------------------------
|
||||
|
||||
async fn discover_nodes(
|
||||
admin_key: &str,
|
||||
api_url: &str,
|
||||
) -> Result<Vec<String>, Box<dyn std::error::Error>> {
|
||||
let client = Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(10))
|
||||
.build()?;
|
||||
|
||||
let url = format!("{}/_miroir/topology", api_url.trim_end_matches('/'));
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.header("Authorization", format!("Bearer {}", admin_key))
|
||||
.header("X-Admin-Key", admin_key)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Topology API unreachable at {}: {}", url, e))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("Topology API returned HTTP {} — {}", status, text).into());
|
||||
}
|
||||
|
||||
let topo: TopologyResponse = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Bad topology response: {}", e))?;
|
||||
|
||||
let addresses: Vec<String> = topo
|
||||
.nodes
|
||||
.into_iter()
|
||||
.filter(|n| n.status == "healthy" || n.status == "active" || n.status == "joining")
|
||||
.map(|n| n.address)
|
||||
.collect();
|
||||
|
||||
if addresses.is_empty() {
|
||||
return Err("Topology returned no healthy nodes".into());
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"Discovered {} node(s) from topology API",
|
||||
addresses.len()
|
||||
);
|
||||
Ok(addresses)
|
||||
}
|
||||
|
||||
// -- Find old key UID by matching prefix -------------------------------------
|
||||
|
||||
async fn find_old_key_uid(
|
||||
client: &Client,
|
||||
node_addr: &str,
|
||||
current_key: &str,
|
||||
) -> Result<Option<String>, Box<dyn std::error::Error>> {
|
||||
let url = format!("{}/keys", node_addr.trim_end_matches('/'));
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.header("Authorization", format!("Bearer {}", current_key))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to list keys on {}: {}", node_addr, e))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
eprintln!(
|
||||
" Warning: could not list keys on {} (HTTP {} — {})",
|
||||
node_addr, status, text
|
||||
);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let keys: MeiliKeysResponse = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Bad keys response from {}: {}", node_addr, e))?;
|
||||
|
||||
let prefix_len = 8.min(current_key.len());
|
||||
let prefix = ¤t_key[..prefix_len];
|
||||
|
||||
for k in &keys.results {
|
||||
if k.key.len() >= prefix_len && &k.key[..prefix_len] == prefix {
|
||||
return Ok(Some(k.uid.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
// -- Rollback on step 1 failure -----------------------------------------------
|
||||
|
||||
async fn rollback_create(
|
||||
client: &Client,
|
||||
created_on: &[String],
|
||||
key_uid: &Option<String>,
|
||||
auth_key: &str,
|
||||
) {
|
||||
let Some(uid) = key_uid else { return };
|
||||
for addr in created_on {
|
||||
let url = format!("{}/keys/{}", addr.trim_end_matches('/'), uid);
|
||||
match client
|
||||
.delete(&url)
|
||||
.header("Authorization", format!("Bearer {}", auth_key))
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
eprintln!(" Rollback: deleted key on {}", addr);
|
||||
}
|
||||
Ok(resp) => {
|
||||
eprintln!(
|
||||
" Rollback failed on {}: HTTP {}",
|
||||
addr,
|
||||
resp.status()
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(" Rollback failed on {}: {}", addr, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -- Helpers ------------------------------------------------------------------
|
||||
|
||||
fn epoch_seconds() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ pub mod canary;
|
|||
pub mod cdc;
|
||||
pub mod dump;
|
||||
pub mod explain;
|
||||
pub mod key;
|
||||
pub mod node;
|
||||
pub mod rebalance;
|
||||
pub mod reshard;
|
||||
|
|
|
|||
|
|
@ -84,6 +84,10 @@ enum Commands {
|
|||
/// Explain query plans and operations
|
||||
#[command(subcommand)]
|
||||
Explain(commands::explain::ExplainSubcommand),
|
||||
|
||||
/// Manage Meilisearch keys
|
||||
#[command(subcommand)]
|
||||
Key(commands::key::KeySubcommand),
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
|
|
@ -106,8 +110,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
std::process::exit(1);
|
||||
}
|
||||
|
||||
// TODO: Use admin_key for API authentication when commands are implemented
|
||||
let _admin_key = admin_key.unwrap();
|
||||
let admin_key = admin_key.unwrap();
|
||||
let api_url = cli.api_url.unwrap_or_else(|| "http://localhost:8080".to_string());
|
||||
|
||||
match cli.command {
|
||||
Commands::Status(cmd) => commands::status::run(cmd).await,
|
||||
|
|
@ -125,5 +129,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
Commands::Ui(cmd) => commands::ui::run(cmd).await,
|
||||
Commands::Tenant(cmd) => commands::tenant::run(cmd).await,
|
||||
Commands::Explain(cmd) => commands::explain::run(cmd).await,
|
||||
Commands::Key(cmd) => commands::key::run(cmd, &admin_key, &api_url).await,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -171,6 +171,7 @@ pub struct TopologyResponse {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NodeInfo {
|
||||
pub id: String,
|
||||
pub address: String,
|
||||
pub status: String,
|
||||
pub shard_count: u32,
|
||||
pub last_seen_ms: u64,
|
||||
|
|
@ -201,6 +202,7 @@ where
|
|||
.nodes()
|
||||
.map(|n| NodeInfo {
|
||||
id: n.id.as_str().to_string(),
|
||||
address: n.address.clone(),
|
||||
status: format!("{:?}", n.status).to_lowercase(),
|
||||
shard_count: 0, // TODO: compute from routing table
|
||||
last_seen_ms: 0, // TODO: track last health check time
|
||||
|
|
@ -303,6 +305,7 @@ mod tests {
|
|||
nodes: vec.
|
||||
|
||||
## Background (plan §9)
|
||||
|
||||
Meilisearch allows multiple admin-scoped keys (created via `POST /keys` with
|
||||
`actions: ["*"]`, `indexes: ["*"]`) to coexist. The `nodeMasterKey` in Miroir
|
||||
config is one such key. Because old and new keys are both valid until the old
|
||||
one is explicitly deleted, rotation is zero-downtime.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- `miroir-ctl` binary built from this repo
|
||||
- Admin API key (`MIROIR_ADMIN_API_KEY` env var, credentials file, or `--admin-key`)
|
||||
- Current `nodeMasterKey` value (`--current-key` or `MIROIR_NODE_MASTER_KEY` env var)
|
||||
- Miroir admin API reachable (default `http://localhost:8080`, override with `--api-url`)
|
||||
- `kubectl` access to update the K8s secret and restart Miroir pods
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Dry-run — prints the plan without executing
|
||||
miroir-ctl key rotate-node-master --dry-run \
|
||||
--current-key "$MIROIR_NODE_MASTER_KEY"
|
||||
|
||||
# Live rotation with auto-discovered nodes
|
||||
miroir-ctl key rotate-node-master \
|
||||
--current-key "$MIROIR_NODE_MASTER_KEY"
|
||||
|
||||
# Live rotation with explicit nodes
|
||||
miroir-ctl key rotate-node-master \
|
||||
--current-key "$MIROIR_NODE_MASTER_KEY" \
|
||||
--node http://meili-0.search.svc:7700 \
|
||||
--node http://meili-1.search.svc:7700 \
|
||||
--node http://meili-2.search.svc:7700
|
||||
```
|
||||
|
||||
## What the CLI Does (4 steps)
|
||||
|
||||
### Step 1 — Create new admin-scoped key on every Meilisearch node
|
||||
|
||||
`POST /keys` with `actions: ["*"]`, `indexes: ["*"]`. If any node fails, the
|
||||
CLI rolls back by deleting the new key from all nodes where creation succeeded.
|
||||
|
||||
### Step 2 — Print K8s Secret update instructions
|
||||
|
||||
The CLI prints a `kubectl patch secret` command. Apply it:
|
||||
|
||||
```bash
|
||||
kubectl -n search patch secret miroir-keys \
|
||||
-p '{"stringData":{"nodeMasterKey":"<new-key>"}}'
|
||||
```
|
||||
|
||||
Or update your ExternalSecret / OpenBao source and wait for ESO to sync.
|
||||
|
||||
### Step 3 — Rolling restart Miroir pods
|
||||
|
||||
```bash
|
||||
kubectl -n search rollout restart deployment/miroir
|
||||
kubectl -n search rollout status deployment/miroir
|
||||
```
|
||||
|
||||
During rollout, pods with the old key and pods with the new key both
|
||||
authenticate against Meilisearch — no downtime.
|
||||
|
||||
The CLI pauses and waits for you to confirm all pods are running.
|
||||
|
||||
### Step 4 — Delete old admin-scoped key
|
||||
|
||||
The CLI finds the old key UID via `GET /keys` (matching by prefix) and deletes
|
||||
it from all Meilisearch nodes with `DELETE /keys/{uid}`.
|
||||
|
||||
## CLI Flags
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--dry-run` | false | Print plan without executing |
|
||||
| `--current-key` | env `MIROIR_NODE_MASTER_KEY` | Current key (required) |
|
||||
| `--node` | auto-discovered | Meilisearch node URLs (repeatable) |
|
||||
| `--key-name` | `miroir-node-master` | Name for the new key |
|
||||
| `--expires-at` | none | Optional ISO 8601 expiration |
|
||||
| `--namespace` | `search` | K8s namespace |
|
||||
| `--secret-name` | `miroir-keys` | K8s Secret name |
|
||||
| `--yes` | false | Skip confirmation prompts |
|
||||
|
||||
## Manual Steps (if CLI is unavailable)
|
||||
|
||||
1. **Create new key** on each Meilisearch node:
|
||||
```bash
|
||||
for i in 0 1 2 3; do
|
||||
curl -s -X POST "http://meili-${i}.search.svc:7700/keys" \
|
||||
-H "Authorization: Bearer $CURRENT_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name":"miroir-node-master","description":"rotated key","actions":["*"],"indexes":["*"]}' \
|
||||
| jq '{uid,key}'
|
||||
done
|
||||
```
|
||||
|
||||
2. **Update secret** with the new key value from step 1.
|
||||
|
||||
3. **Rolling restart** Miroir deployment.
|
||||
|
||||
4. **Delete old key** — list keys, find the old one by prefix match, delete by UID:
|
||||
```bash
|
||||
curl -s http://meili-0.search.svc:7700/keys \
|
||||
-H "Authorization: Bearer $NEW_KEY" | jq '.results[] | {uid,key,name}'
|
||||
# Then DELETE /keys/{old-uid} on each node
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
# Confirm Miroir is healthy
|
||||
curl -s http://miroir.search.svc:7700/health
|
||||
|
||||
# Check topology
|
||||
miroir-ctl status
|
||||
|
||||
# Test search
|
||||
curl -s http://miroir.search.svc:7700/indexes/test-index/search \
|
||||
-H "Authorization: Bearer $MIROIR_MASTER_KEY" \
|
||||
-d '{"q": ""}'
|
||||
```
|
||||
|
||||
## Cadence
|
||||
|
||||
- Rotate on suspected compromise (immediately)
|
||||
- Rotate proactively every 90 days
|
||||
- Chain after startup-master rotation (see below)
|
||||
|
||||
## Relationship to Startup Master Rotation
|
||||
|
||||
If you have just rotated `MEILI_MASTER_KEY` (see
|
||||
[startup-master-key-rotation.md](startup-master-key-rotation.md)), the new
|
||||
Meilisearch nodes have no admin-scoped child keys yet. Create one using the
|
||||
new master key, then run this zero-downtime flow to rotate it.
|
||||
|
||||
## See Also
|
||||
|
||||
- [startup-master-key-rotation.md](startup-master-key-rotation.md) — startup master (requires maintenance window)
|
||||
- Plan §9 — full secrets handling documentation
|
||||
- `miroir-ctl key rotate-node-master --dry-run` — preview the rotation plan
|
||||
140
docs/runbooks/startup-master-key-rotation.md
Normal file
140
docs/runbooks/startup-master-key-rotation.md
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
# Startup Master Key Rotation (Maintenance Window Required)
|
||||
|
||||
> **This is NOT the zero-downtime flow.** The zero-downtime rotation applies to
|
||||
> admin-scoped child keys (`nodeMasterKey`) — see
|
||||
> `miroir-ctl key rotate-node-master --dry-run`. This runbook covers rotating
|
||||
> `MEILI_MASTER_KEY`, the startup env var fixed at Meilisearch process start.
|
||||
|
||||
## Background (plan §9)
|
||||
|
||||
Meilisearch CE has exactly one **startup master key** per process, supplied via
|
||||
`MEILI_MASTER_KEY`. It is fixed for the life of the process and cannot be
|
||||
rotated without a restart. All admin-scoped child keys created via `POST /keys`
|
||||
are validated against this startup master key.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- A maintenance window (Meilisearch will be briefly unavailable during pod restarts)
|
||||
- `kubectl` access to the cluster with write permissions on the target namespace
|
||||
- The new master key value (generate with `openssl rand -hex 32`)
|
||||
- Current `nodeMasterKey` value (needed to recreate admin-scoped child keys)
|
||||
|
||||
## Steps
|
||||
|
||||
### 1. Generate a new startup master key
|
||||
|
||||
```bash
|
||||
NEW_MASTER=$(openssl rand -hex 32)
|
||||
echo "New master key: $NEW_MASTER"
|
||||
```
|
||||
|
||||
### 2. Update the secret
|
||||
|
||||
**Option A — K8s Secret:**
|
||||
|
||||
```bash
|
||||
kubectl -n search patch secret miroir-keys \
|
||||
-p "{\"stringData\":{\"nodeMasterKey\":\"$NEW_MASTER\"}}"
|
||||
```
|
||||
|
||||
**Option B — ExternalSecret / OpenBao:**
|
||||
|
||||
Update the secret at the external source (e.g., OpenBao KV path
|
||||
`kv/search/miroir`, property `node_master_key`). Wait for ESO to sync.
|
||||
|
||||
### 3. Rolling restart Meilisearch StatefulSet (one pod at a time)
|
||||
|
||||
```bash
|
||||
# Check current StatefulSet name and replica count
|
||||
kubectl -n search get statefulset
|
||||
|
||||
# Rolling restart — one pod at a time to minimize downtime
|
||||
kubectl -n search rollout restart statefulset/meilisearch
|
||||
kubectl -n search rollout status statefulset/meilisearch
|
||||
```
|
||||
|
||||
During this phase:
|
||||
- Each Meilisearch pod restarts with the new `MEILI_MASTER_KEY`
|
||||
- Admin-scoped child keys created under the old master key are **invalidated**
|
||||
- Miroir pods cannot authenticate until new admin-scoped keys are created
|
||||
|
||||
### 4. Create a new admin-scoped child key on each node
|
||||
|
||||
Once all Meilisearch pods are running with the new master key, create a new
|
||||
admin-scoped key that Miroir will use:
|
||||
|
||||
```bash
|
||||
# For each Meilisearch pod (e.g., meili-0, meili-1, meili-2):
|
||||
for i in 0 1 2; do
|
||||
curl -s -X POST "http://meili-${i}.search.svc:7700/keys" \
|
||||
-H "Authorization: Bearer $NEW_MASTER" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "miroir-node-master",
|
||||
"description": "Admin-scoped key for Miroir orchestrator",
|
||||
"actions": ["*"],
|
||||
"indexes": ["*"]
|
||||
}' | jq -r '.key'
|
||||
done
|
||||
```
|
||||
|
||||
Capture the key value from the first node's response (all nodes should produce
|
||||
the same key when using identical creation parameters, but Meilisearch generates
|
||||
unique keys — use the value from any single node and recreate on others).
|
||||
|
||||
**Important:** `POST /keys` returns the full key value **only once**. Save it.
|
||||
|
||||
If keys differ across nodes, note each one and run the zero-downtime rotation
|
||||
flow for each to converge on a single key.
|
||||
|
||||
### 5. Update Miroir's secret with the new admin-scoped key
|
||||
|
||||
```bash
|
||||
# Use the key value captured in step 4
|
||||
kubectl -n search patch secret miroir-keys \
|
||||
-p "{\"stringData\":{\"nodeMasterKey\":\"$ADMIN_SCOPED_KEY\"}}"
|
||||
```
|
||||
|
||||
### 6. Rolling restart Miroir pods
|
||||
|
||||
```bash
|
||||
kubectl -n search rollout restart deployment/miroir
|
||||
kubectl -n search rollout status deployment/miroir
|
||||
```
|
||||
|
||||
### 7. Verify
|
||||
|
||||
```bash
|
||||
# Check Miroir health
|
||||
curl -s http://miroir.search.svc:7700/health
|
||||
|
||||
# Check topology (requires admin key)
|
||||
curl -s http://miroir.search.svc:7700/_miroir/topology \
|
||||
-H "Authorization: Bearer $MIROIR_ADMIN_API_KEY" | jq .
|
||||
|
||||
# Run a test search to confirm end-to-end
|
||||
curl -s http://miroir.search.svc:7700/indexes/test-index/search \
|
||||
-H "Authorization: Bearer $MIROIR_MASTER_KEY" \
|
||||
-d '{"q": ""}'
|
||||
```
|
||||
|
||||
## Rollback
|
||||
|
||||
If the new master key causes issues:
|
||||
|
||||
1. Patch the secret back to the old master key value
|
||||
2. Rolling restart Meilisearch StatefulSet again
|
||||
3. Recreate admin-scoped child keys under the old master
|
||||
4. Update Miroir's secret and restart Miroir pods
|
||||
|
||||
## Cadence
|
||||
|
||||
- Rotate on suspected compromise (immediately)
|
||||
- Rotate proactively every 90 days
|
||||
- Coordinate with `nodeMasterKey` zero-downtime rotation (can chain: startup
|
||||
master rotation → zero-downtime child key rotation)
|
||||
|
||||
## See Also
|
||||
|
||||
- `miroir-ctl key rotate-node-master --dry-run` — zero-downtime child key rotation
|
||||
- Plan §9 — full secrets handling documentation
|
||||
Loading…
Add table
Reference in a new issue