1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
//! Schema evolution and migration tools for DataFrames
//!
//! This module provides a comprehensive set of tools for defining, versioning,
//! and migrating DataFrame schemas over time. It is a core part of the PandRS
//! v0.3.0 feature set.
//!
//! # Overview
//!
//! Schema evolution allows you to:
//!
//! - **Define schemas** — specify the expected structure of a DataFrame including
//! column names, types, nullability, constraints, and metadata.
//! - **Version schemas** — assign semantic versions to schemas so that you can
//! track changes over time.
//! - **Create migrations** — define ordered sets of changes (add/remove/rename
//! columns, change types, add constraints, etc.) that move data from one
//! schema version to another.
//! - **Register schemas and migrations** — store them in a central registry and
//! find migration paths automatically.
//! - **Apply migrations** — transform an actual `DataFrame` according to a
//! migration, producing a new `DataFrame` that conforms to the target schema.
//! - **Validate data** — check that a `DataFrame` conforms to a schema, producing
//! a detailed validation report.
//! - **Infer schemas** — automatically derive a schema from an existing `DataFrame`.
//! - **Check compatibility** — determine whether data can flow from one schema to
//! another without data loss or type errors.
//! - **Serialize/deserialize** — save and load schemas and migrations as JSON or YAML.
//!
//! # Quick Start
//!
//! ```rust
//! use pandrs::schema_evolution::{
//! DataFrameSchema, ColumnSchema, SchemaDataType, SchemaVersion,
//! SchemaConstraint, Migration, SchemaChange,
//! SchemaRegistry, SchemaMigrator,
//! save_schema, load_schema, SchemaFormat,
//! };
//! use pandrs::{DataFrame, Series};
//!
//! // Define a schema
//! let v1 = DataFrameSchema::new("users", SchemaVersion::new(1, 0, 0))
//! .with_column(ColumnSchema::new("id", SchemaDataType::Int64).with_nullable(false))
//! .with_column(ColumnSchema::new("name", SchemaDataType::String))
//! .with_constraint(SchemaConstraint::NotNull("id".to_string()));
//!
//! // Create a DataFrame
//! let mut df = DataFrame::new();
//! df.add_column("id".to_string(),
//! Series::new(vec![1i64, 2, 3], Some("id".to_string())).expect("series"))
//! .expect("add");
//! df.add_column("name".to_string(),
//! Series::new(vec!["Alice".to_string(), "Bob".to_string(), "Carol".to_string()], Some("name".to_string())).expect("series"))
//! .expect("add");
//!
//! // Validate the DataFrame against the schema
//! let migrator = SchemaMigrator::empty();
//! let report = migrator.validate(&df, &v1).expect("validate");
//! assert!(report.is_valid);
//!
//! // Define a migration to v1.1
//! let migration = Migration::new(
//! "m001",
//! SchemaVersion::new(1, 0, 0),
//! SchemaVersion::new(1, 1, 0),
//! "Add email column",
//! )
//! .with_change(SchemaChange::AddColumn {
//! schema: ColumnSchema::new("email", SchemaDataType::String),
//! position: None,
//! });
//!
//! // Apply the migration
//! let migrated_df = migrator.apply_migration(&df, &migration).expect("migrate");
//! assert!(migrated_df.contains_column("email"));
//! ```
// --- schema types ---
pub use ;
// --- evolution types ---
pub use ;
// --- registry ---
pub use SchemaRegistry;
// --- migrator ---
pub use ;
// --- serialization helpers ---
pub use ;