feat(bf-4w2rt): scaffold pdftract-schema-migrate crate
- Add crates/pdftract-schema-migrate/ workspace member - Implement migration framework for v1.x schema versions - MigrationRegistry with version-pair migration functions - Identity migration for v1.0 -> v1.0 - Validation: rejects major version changes and downgrades - Convenience API: migrate(), run_migration(), read_json(), write_json() - Add migrate-schema CLI binary - --from/--to version arguments - stdin/stdout or file I/O support - Auto-detect pretty-print for terminal output - Full test coverage for migration registry and validation Closes bf-4w2rt. Verification: notes/bf-4w2rt.md
This commit is contained in:
parent
05c93c00e8
commit
c51c725d5c
3 changed files with 504 additions and 0 deletions
24
crates/pdftract-schema-migrate/Cargo.toml
Normal file
24
crates/pdftract-schema-migrate/Cargo.toml
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
[package]
|
||||
name = "pdftract-schema-migrate"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
publish = true
|
||||
|
||||
[lib]
|
||||
name = "pdftract_schema_migrate"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "migrate-schema"
|
||||
path = "src/bin/migrate-schema.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
serde_json = "1"
|
||||
142
crates/pdftract-schema-migrate/src/bin/migrate-schema.rs
Normal file
142
crates/pdftract-schema-migrate/src/bin/migrate-schema.rs
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
//! CLI tool for migrating pdftract JSON output between schema versions.
|
||||
//!
|
||||
//! Usage:
|
||||
//! migrate-schema --from 1.0 --to 1.0 input.json > output.json
|
||||
//! cat input.json | migrate-schema --from 1.0 --to 1.0 > output.json
|
||||
//! migrate-schema --from 1.0 --to 1.0 input.json -o output.json
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use pdftract_schema_migrate::{read_json, write_json, run_migration};
|
||||
use std::io::{self, IsTerminal};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args = parse_args()?;
|
||||
|
||||
// Validate migration direction first (fail fast)
|
||||
pdftract_schema_migrate::validate_migration(&args.from, &args.to)
|
||||
.context("Migration validation failed")?;
|
||||
|
||||
// Run the migration
|
||||
run_migration(&args.from, &args.to, &args.input, &args.output, args.pretty)
|
||||
.context("Migration execution failed")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// CLI arguments
|
||||
struct Args {
|
||||
from: String,
|
||||
to: String,
|
||||
input: String,
|
||||
output: String,
|
||||
pretty: bool,
|
||||
}
|
||||
|
||||
/// Parse command-line arguments.
|
||||
///
|
||||
/// We use a simple parser to avoid additional dependencies for this small tool.
|
||||
fn parse_args() -> Result<Args> {
|
||||
let mut args = std::env::args();
|
||||
let program_name = args.next().unwrap_or_else(|| "migrate-schema".to_string());
|
||||
|
||||
let mut from = None;
|
||||
let mut to = None;
|
||||
let mut input = "-".to_string(); // Default to stdin
|
||||
let mut output = "-".to_string(); // Default to stdout
|
||||
let mut pretty = false;
|
||||
|
||||
while let Some(arg) = args.next() {
|
||||
match arg.as_str() {
|
||||
"--from" => {
|
||||
from = Some(args.next().context("--from requires a value")?);
|
||||
}
|
||||
"--to" => {
|
||||
to = Some(args.next().context("--to requires a value")?);
|
||||
}
|
||||
"-i" | "--input" => {
|
||||
input = args.next().context("--input requires a value")?;
|
||||
}
|
||||
"-o" | "--output" => {
|
||||
output = args.next().context("--output requires a value")?;
|
||||
}
|
||||
"-p" | "--pretty" => {
|
||||
pretty = true;
|
||||
}
|
||||
"-h" | "--help" => {
|
||||
print_usage(&program_name);
|
||||
std::process::exit(0);
|
||||
}
|
||||
"-V" | "--version" => {
|
||||
println!("migrate-schema {}", env!("CARGO_PKG_VERSION"));
|
||||
std::process::exit(0);
|
||||
}
|
||||
arg if arg.starts_with('-') => {
|
||||
anyhow::bail!("Unknown option: {}", arg);
|
||||
}
|
||||
_ => {
|
||||
// Positional argument: input file
|
||||
if input == "-" {
|
||||
input = arg;
|
||||
} else {
|
||||
anyhow::bail!("Unexpected argument: {}", arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let from = from.context("--from is required (use --help for usage)")?;
|
||||
let to = to.context("--to is required (use --help for usage)")?;
|
||||
|
||||
// Auto-detect pretty-print: default to true when writing to terminal
|
||||
if !pretty && output == "-" {
|
||||
pretty = io::stdout().is_terminal();
|
||||
}
|
||||
|
||||
Ok(Args {
|
||||
from,
|
||||
to,
|
||||
input,
|
||||
output,
|
||||
pretty,
|
||||
})
|
||||
}
|
||||
|
||||
/// Print usage information.
|
||||
fn print_usage(program_name: &str) {
|
||||
let program = program_name.rsplit('/').next().unwrap_or(program_name);
|
||||
println!(
|
||||
"Schema version migration tool for pdftract JSON output
|
||||
|
||||
Usage:
|
||||
{program} --from <version> --to <version> [options] [input]
|
||||
|
||||
Arguments:
|
||||
--from <version> Source schema version (e.g., 1.0)
|
||||
--to <version> Target schema version (e.g., 1.0, 1.1)
|
||||
[input] Input JSON file (default: stdin)
|
||||
|
||||
Options:
|
||||
-o, --output <file> Output JSON file (default: stdout)
|
||||
-p, --pretty Pretty-print output JSON
|
||||
-h, --help Show this help message
|
||||
-V, --version Show version information
|
||||
|
||||
Examples:
|
||||
# Migrate with stdin/stdout
|
||||
cat input.json | {program} --from 1.0 --to 1.0
|
||||
|
||||
# Migrate with file I/O
|
||||
{program} --from 1.0 --to 1.0 input.json -o output.json
|
||||
|
||||
# Pretty-print output
|
||||
{program} --from 1.0 --to 1.0 input.json --pretty
|
||||
|
||||
Notes:
|
||||
- Only v1.x to v1.y migrations are supported (same major version)
|
||||
- Downgrades are not allowed (e.g., v1.1 to v1.0)
|
||||
- Use '-' for stdin/stdout (default for both input and output)
|
||||
|
||||
Available migrations:
|
||||
- v1.0 -> v1.0 (identity migration)"
|
||||
);
|
||||
}
|
||||
338
crates/pdftract-schema-migrate/src/lib.rs
Normal file
338
crates/pdftract-schema-migrate/src/lib.rs
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
//! Schema version migration for pdftract JSON output.
|
||||
//!
|
||||
//! This crate implements migration between minor versions of the pdftract schema.
|
||||
//! Following the plan's additive-evolution rules, minor version changes are additive only,
|
||||
//! so migrations are primarily for field renames and default additions.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! ```rust
|
||||
//! use pdftract_schema_migrate::{migrate, MigrationRegistry};
|
||||
//! use serde_json::json;
|
||||
//!
|
||||
//! let registry = MigrationRegistry::new();
|
||||
//! let input = json!({"schema_version": "1.0", "data": "test"});
|
||||
//! let output = registry.migrate("1.0", "1.0", input).unwrap();
|
||||
//! assert_eq!(input, output); // identity migration
|
||||
//! ```
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::io::{self, Read, Write};
|
||||
|
||||
/// Migration function type: transforms a JSON value from one schema version to another.
|
||||
pub type MigrationFn = Box<dyn Fn(Value) -> Result<Value> + Send + Sync>;
|
||||
|
||||
/// Registry of available migrations.
|
||||
///
|
||||
/// Maps (from_version, to_version) to the migration function.
|
||||
pub struct MigrationRegistry {
|
||||
migrations: HashMap<(&'static str, &'static str), MigrationFn>,
|
||||
}
|
||||
|
||||
impl MigrationRegistry {
|
||||
/// Create a new registry with all known migrations registered.
|
||||
pub fn new() -> Self {
|
||||
let mut migrations: HashMap<(&'static str, &'static str), MigrationFn> = HashMap::new();
|
||||
|
||||
// Register identity migration for v1.0 -> v1.0
|
||||
migrations.insert(("1.0", "1.0"), Box::new(|v| Ok(v)));
|
||||
|
||||
// Future migrations would be registered here:
|
||||
// migrations.insert(("1.0", "1.1"), Box::new(migrate_1_0_to_1_1));
|
||||
|
||||
Self { migrations }
|
||||
}
|
||||
|
||||
/// Check if a migration is registered for the given version pair.
|
||||
pub fn has_migration(&self, from: &str, to: &str) -> bool {
|
||||
self.migrations.contains_key(&(from.as_ref(), to.as_ref()))
|
||||
}
|
||||
|
||||
/// Execute the migration for the given version pair.
|
||||
pub fn migrate(&self, from: &str, to: &str, json: Value) -> Result<Value> {
|
||||
let key = (from.as_ref(), to.as_ref());
|
||||
|
||||
match self.migrations.get(&key) {
|
||||
Some(migration_fn) => migration_fn(json),
|
||||
None => bail!(
|
||||
"No migration registered from version '{}' to '{}'. Available migrations: v1.0 -> v1.0 (identity)",
|
||||
from, to
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MigrationRegistry {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse and normalize a version string.
|
||||
///
|
||||
/// Ensures version strings follow the "major.minor" format.
|
||||
/// For now, we only support major version 1 (v1.x series).
|
||||
pub fn parse_version(version: &str) -> Result<(u32, u32)> {
|
||||
let parts: Vec<&str> = version.split('.').collect();
|
||||
|
||||
if parts.len() != 2 {
|
||||
bail!(
|
||||
"Invalid version format '{}': expected 'major.minor' (e.g., '1.0')",
|
||||
version
|
||||
);
|
||||
}
|
||||
|
||||
let major: u32 = parts[0]
|
||||
.parse()
|
||||
.context("Major version must be a number")?;
|
||||
let minor: u32 = parts[1]
|
||||
.parse()
|
||||
.context("Minor version must be a number")?;
|
||||
|
||||
// Only support v1.x for now
|
||||
if major != 1 {
|
||||
bail!("Major version {} is not supported (only v1.x migrations are implemented)", major);
|
||||
}
|
||||
|
||||
Ok((major, minor))
|
||||
}
|
||||
|
||||
/// Validate that migration is allowed between versions.
|
||||
///
|
||||
/// Rules:
|
||||
/// - Major version changes (v1 -> v2) are NOT allowed (breaking changes)
|
||||
/// - Downgrades (v1.1 -> v1.0) are NOT allowed (data loss risk)
|
||||
/// - Same version (v1.0 -> v1.0) is allowed (identity migration)
|
||||
pub fn validate_migration(from: &str, to: &str) -> Result<()> {
|
||||
let (from_major, from_minor) = parse_version(from)?;
|
||||
let (to_major, to_minor) = parse_version(to)?;
|
||||
|
||||
// Reject major version changes
|
||||
if from_major != to_major {
|
||||
bail!(
|
||||
"Cannot migrate from v{}.{} to v{}.{}: major version changes are breaking changes and require a full data migration plan",
|
||||
from_major, from_minor, to_major, to_minor
|
||||
);
|
||||
}
|
||||
|
||||
// Reject downgrades
|
||||
if to_minor < from_minor {
|
||||
bail!(
|
||||
"Cannot downgrade from v{}.{} to v{}.{}: downgrades may lose data and are not supported",
|
||||
from_major, from_minor, to_major, to_minor
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convenience function for migrating a JSON value between schema versions.
|
||||
///
|
||||
/// This function creates a new registry and runs the migration.
|
||||
/// For repeated migrations, creating a `MigrationRegistry` instance is more efficient.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `from` - Source schema version (e.g., "1.0")
|
||||
/// * `to` - Target schema version (e.g., "1.0", "1.1")
|
||||
/// * `json` - JSON value to migrate
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns the migrated JSON value, or an error if the migration fails.
|
||||
pub fn migrate(from: &str, to: &str, json: Value) -> Result<Value> {
|
||||
let registry = MigrationRegistry::new();
|
||||
registry.migrate(from, to, json)
|
||||
}
|
||||
|
||||
/// Read JSON from a file path or stdin.
|
||||
pub fn read_json(path: &str) -> Result<Value> {
|
||||
let json_str = if path == "-" {
|
||||
let mut buffer = String::new();
|
||||
io::stdin().read_to_string(&mut buffer)
|
||||
.context("Failed to read JSON from stdin")?;
|
||||
buffer
|
||||
} else {
|
||||
std::fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to read JSON from '{}'", path))?
|
||||
};
|
||||
|
||||
serde_json::from_str(&json_str)
|
||||
.with_context(|| format!("Failed to parse JSON from '{}'", path))
|
||||
}
|
||||
|
||||
/// Write JSON to a file path or stdout.
|
||||
pub fn write_json(path: &str, json: &Value, pretty: bool) -> Result<()> {
|
||||
let json_str = if pretty {
|
||||
serde_json::to_string_pretty(json)
|
||||
} else {
|
||||
serde_json::to_string(json)
|
||||
}
|
||||
.context("Failed to serialize output JSON")?;
|
||||
|
||||
if path == "-" {
|
||||
io::stdout()
|
||||
.write_all(json_str.as_bytes())
|
||||
.context("Failed to write JSON to stdout")?;
|
||||
} else {
|
||||
std::fs::write(path, json_str)
|
||||
.with_context(|| format!("Failed to write JSON to '{}'", path))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run a schema migration.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `from` - Source schema version (e.g., "1.0")
|
||||
/// * `to` - Target schema version (e.g., "1.0", "1.1")
|
||||
/// * `input` - Input JSON file path ("-" for stdin)
|
||||
/// * `output` - Output JSON file path ("-" for stdout)
|
||||
/// * `pretty` - Whether to pretty-print the output
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns `Ok(())` on success, or an error if the migration fails.
|
||||
pub fn run_migration(from: &str, to: &str, input: &str, output: &str, pretty: bool) -> Result<()> {
|
||||
// Create migration registry
|
||||
let registry = MigrationRegistry::new();
|
||||
|
||||
// Check if the specific migration exists
|
||||
if !registry.has_migration(from, to) {
|
||||
// Give a helpful error message
|
||||
if from == to {
|
||||
// Same version should always be supported
|
||||
bail!(
|
||||
"Identity migration for v{} is missing from registry - this is a bug",
|
||||
from
|
||||
);
|
||||
} else {
|
||||
bail!(
|
||||
"Migration from v{} to v{} is not yet implemented. Available migrations: v1.0 -> v1.0 (identity)",
|
||||
from, to
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Read input JSON
|
||||
let json_value = read_json(input)?;
|
||||
|
||||
// Perform migration
|
||||
let mut migrated_json = registry
|
||||
.migrate(from, to, json_value)
|
||||
.with_context(|| format!("Migration from v{} to v{} failed", from, to))?;
|
||||
|
||||
// Update schema_version field if it exists and versions differ
|
||||
if from != to {
|
||||
if let Some(obj) = migrated_json.as_object_mut() {
|
||||
// Update schema_version to the target version
|
||||
obj.insert("schema_version".to_string(), Value::String(to.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
// Write output JSON
|
||||
write_json(output, &migrated_json, pretty)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn test_parse_version_valid() {
|
||||
assert_eq!(parse_version("1.0").unwrap(), (1, 0));
|
||||
assert_eq!(parse_version("1.1").unwrap(), (1, 1));
|
||||
assert_eq!(parse_version("1.10").unwrap(), (1, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_version_invalid() {
|
||||
assert!(parse_version("1").is_err());
|
||||
assert!(parse_version("1.0.0").is_err());
|
||||
assert!(parse_version("v1.0").is_err());
|
||||
assert!(parse_version("2.0").is_err()); // Only v1.x supported
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_migration_same_version() {
|
||||
assert!(validate_migration("1.0", "1.0").is_ok());
|
||||
assert!(validate_migration("1.1", "1.1").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_migration_upgrade_allowed() {
|
||||
assert!(validate_migration("1.0", "1.1").is_ok());
|
||||
assert!(validate_migration("1.0", "1.10").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_migration_downgrade_rejected() {
|
||||
assert!(validate_migration("1.1", "1.0").is_err());
|
||||
assert!(validate_migration("1.10", "1.0").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_migration_major_version_change_rejected() {
|
||||
assert!(validate_migration("1.0", "2.0").is_err());
|
||||
// This test will fail once we actually support v2, but that's intentional
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_migration_registry_identity() {
|
||||
let registry = MigrationRegistry::new();
|
||||
|
||||
let input = json!({
|
||||
"schema_version": "1.0",
|
||||
"test": "value"
|
||||
});
|
||||
|
||||
let result = registry.migrate("1.0", "1.0", input.clone()).unwrap();
|
||||
|
||||
// Identity migration should return unchanged value
|
||||
assert_eq!(input, result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_migration_registry_unsupported() {
|
||||
let registry = MigrationRegistry::new();
|
||||
|
||||
let input = json!({"test": "value"});
|
||||
|
||||
let result = registry.migrate("1.0", "1.1", input);
|
||||
|
||||
assert!(result.is_err());
|
||||
assert!(result
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("No migration registered"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_migration_registry_has_migration() {
|
||||
let registry = MigrationRegistry::new();
|
||||
|
||||
assert!(registry.has_migration("1.0", "1.0"));
|
||||
assert!(!registry.has_migration("1.0", "1.1"));
|
||||
assert!(!registry.has_migration("2.0", "2.0"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_migrate_convenience_function() {
|
||||
let input = json!({"test": "value"});
|
||||
let result = migrate("1.0", "1.0", input.clone()).unwrap();
|
||||
assert_eq!(input, result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_migration_registry_default() {
|
||||
let registry = MigrationRegistry::default();
|
||||
assert!(registry.has_migration("1.0", "1.0"));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue