Skip to main content

nodedb_types/columnar/
schema.rs

1//! Strict document and columnar schemas with shared operations trait.
2
3use serde::{Deserialize, Serialize};
4
5use super::column_type::ColumnDef;
6use crate::columnar::ColumnType;
7
8/// Shared schema operations (eliminates duplication between Strict and Columnar).
9pub trait SchemaOps {
10    fn columns(&self) -> &[ColumnDef];
11
12    fn column_index(&self, name: &str) -> Option<usize> {
13        self.columns().iter().position(|c| c.name == name)
14    }
15
16    fn column(&self, name: &str) -> Option<&ColumnDef> {
17        self.columns().iter().find(|c| c.name == name)
18    }
19
20    fn primary_key_columns(&self) -> Vec<&ColumnDef> {
21        self.columns().iter().filter(|c| c.primary_key).collect()
22    }
23
24    fn len(&self) -> usize {
25        self.columns().len()
26    }
27
28    fn is_empty(&self) -> bool {
29        self.columns().is_empty()
30    }
31}
32
33/// Schema for a strict document collection (Binary Tuple serialization).
34#[derive(
35    Debug,
36    Clone,
37    PartialEq,
38    Eq,
39    Serialize,
40    Deserialize,
41    zerompk::ToMessagePack,
42    zerompk::FromMessagePack,
43)]
44pub struct StrictSchema {
45    pub columns: Vec<ColumnDef>,
46    pub version: u16,
47    /// Columns that were removed via `ALTER DROP COLUMN`. Retained so the
48    /// reader can reconstruct the physical layout of tuples written before
49    /// the drop.
50    #[serde(default, skip_serializing_if = "Vec::is_empty")]
51    pub dropped_columns: Vec<DroppedColumn>,
52}
53
54/// Tombstone for a column removed by `ALTER DROP COLUMN`.
55#[derive(
56    Debug,
57    Clone,
58    PartialEq,
59    Eq,
60    Serialize,
61    Deserialize,
62    zerompk::ToMessagePack,
63    zerompk::FromMessagePack,
64)]
65pub struct DroppedColumn {
66    /// The full column definition at time of drop.
67    pub def: ColumnDef,
68    /// The column's position in the column list before it was removed.
69    pub position: usize,
70    /// The schema version at which the column was dropped.
71    pub dropped_at_version: u16,
72}
73
74/// Schema for a columnar collection (compressed segment files).
75#[derive(
76    Debug,
77    Clone,
78    PartialEq,
79    Eq,
80    Serialize,
81    Deserialize,
82    zerompk::ToMessagePack,
83    zerompk::FromMessagePack,
84)]
85pub struct ColumnarSchema {
86    pub columns: Vec<ColumnDef>,
87    pub version: u16,
88}
89
90/// Schema validation errors.
91#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
92pub enum SchemaError {
93    #[error("schema must have at least one column")]
94    Empty,
95    #[error("duplicate column name: '{0}'")]
96    DuplicateColumn(String),
97    #[error("VECTOR dimension must be positive, got 0 for column '{0}'")]
98    ZeroVectorDim(String),
99    #[error("primary key column '{0}' must be NOT NULL")]
100    NullablePrimaryKey(String),
101}
102
103fn validate_columns(columns: &[ColumnDef]) -> Result<(), SchemaError> {
104    if columns.is_empty() {
105        return Err(SchemaError::Empty);
106    }
107    let mut seen = std::collections::HashSet::with_capacity(columns.len());
108    for col in columns {
109        if !seen.insert(&col.name) {
110            return Err(SchemaError::DuplicateColumn(col.name.clone()));
111        }
112        if col.primary_key && col.nullable {
113            return Err(SchemaError::NullablePrimaryKey(col.name.clone()));
114        }
115        if let ColumnType::Vector(0) = col.column_type {
116            return Err(SchemaError::ZeroVectorDim(col.name.clone()));
117        }
118    }
119    Ok(())
120}
121
122impl SchemaOps for StrictSchema {
123    fn columns(&self) -> &[ColumnDef] {
124        &self.columns
125    }
126}
127
128impl SchemaOps for ColumnarSchema {
129    fn columns(&self) -> &[ColumnDef] {
130        &self.columns
131    }
132}
133
134impl StrictSchema {
135    pub fn new(columns: Vec<ColumnDef>) -> Result<Self, SchemaError> {
136        validate_columns(&columns)?;
137        Ok(Self {
138            columns,
139            version: 1,
140            dropped_columns: Vec::new(),
141        })
142    }
143
144    /// Count of variable-length columns (determines offset table size).
145    pub fn variable_column_count(&self) -> usize {
146        self.columns
147            .iter()
148            .filter(|c| c.column_type.is_variable_length())
149            .count()
150    }
151
152    /// Total fixed-field byte size (for Binary Tuple layout computation).
153    pub fn fixed_fields_size(&self) -> usize {
154        self.columns
155            .iter()
156            .filter_map(|c| c.column_type.fixed_size())
157            .sum()
158    }
159
160    /// Null bitmap size in bytes.
161    pub fn null_bitmap_size(&self) -> usize {
162        self.columns.len().div_ceil(8)
163    }
164
165    /// Build a sub-schema matching the physical layout of tuples written at
166    /// the given version. Columns added after `version` are excluded;
167    /// columns dropped after `version` are re-inserted at their original
168    /// positions.
169    pub fn schema_for_version(&self, version: u16) -> StrictSchema {
170        // Start with live columns that existed at this version.
171        let mut cols: Vec<ColumnDef> = self
172            .columns
173            .iter()
174            .filter(|c| c.added_at_version <= version)
175            .cloned()
176            .collect();
177
178        // Re-insert dropped columns that were still alive at this version,
179        // sorted by position (ascending) so inserts don't shift later indices.
180        let mut to_reinsert: Vec<&DroppedColumn> = self
181            .dropped_columns
182            .iter()
183            .filter(|dc| dc.def.added_at_version <= version && dc.dropped_at_version > version)
184            .collect();
185        to_reinsert.sort_by_key(|dc| dc.position);
186        for dc in to_reinsert {
187            let pos = dc.position.min(cols.len());
188            cols.insert(pos, dc.def.clone());
189        }
190
191        StrictSchema {
192            version,
193            columns: cols,
194            dropped_columns: Vec::new(),
195        }
196    }
197
198    /// Parse a SQL default literal (e.g. `'n/a'`, `0`, `true`) into a `Value`.
199    ///
200    /// Covers the common cases produced by `ALTER ADD COLUMN ... DEFAULT ...`.
201    /// Returns `Value::Null` for expressions that cannot be trivially evaluated
202    /// at read time (functions, sub-queries, etc.).
203    pub fn parse_default_literal(expr: &str) -> crate::value::Value {
204        use crate::value::Value;
205
206        let trimmed = expr.trim();
207
208        // String literals: 'foo'
209        if trimmed.starts_with('\'') && trimmed.ends_with('\'') && trimmed.len() >= 2 {
210            return Value::String(trimmed[1..trimmed.len() - 1].replace("''", "'"));
211        }
212
213        // Boolean
214        match trimmed.to_uppercase().as_str() {
215            "TRUE" => return Value::Bool(true),
216            "FALSE" => return Value::Bool(false),
217            "NULL" => return Value::Null,
218            _ => {}
219        }
220
221        // Integer
222        if let Ok(i) = trimmed.parse::<i64>() {
223            return Value::Integer(i);
224        }
225
226        // Float
227        if let Ok(f) = trimmed.parse::<f64>() {
228            return Value::Float(f);
229        }
230
231        Value::Null
232    }
233}
234
235impl ColumnarSchema {
236    pub fn new(columns: Vec<ColumnDef>) -> Result<Self, SchemaError> {
237        validate_columns(&columns)?;
238        Ok(Self {
239            columns,
240            version: 1,
241        })
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use crate::columnar::ColumnType;
249
250    #[test]
251    fn strict_schema_validation() {
252        let schema = StrictSchema::new(vec![
253            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
254            ColumnDef::nullable("name", ColumnType::String),
255        ]);
256        assert!(schema.is_ok());
257        assert!(StrictSchema::new(vec![]).is_err());
258    }
259
260    #[test]
261    fn schema_ops_trait() {
262        let schema = StrictSchema::new(vec![
263            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
264            ColumnDef::nullable("name", ColumnType::String),
265            ColumnDef::nullable("balance", ColumnType::Decimal),
266        ])
267        .unwrap();
268        assert_eq!(schema.len(), 3);
269        assert_eq!(schema.column_index("balance"), Some(2));
270        assert!(schema.column("nonexistent").is_none());
271        assert_eq!(schema.primary_key_columns().len(), 1);
272    }
273
274    #[test]
275    fn strict_layout_helpers() {
276        let schema = StrictSchema::new(vec![
277            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
278            ColumnDef::nullable("name", ColumnType::String),
279            ColumnDef::nullable("balance", ColumnType::Decimal),
280            ColumnDef::nullable("bio", ColumnType::String),
281        ])
282        .unwrap();
283        assert_eq!(schema.null_bitmap_size(), 1);
284        assert_eq!(schema.fixed_fields_size(), 8 + 16);
285        assert_eq!(schema.variable_column_count(), 2);
286    }
287
288    #[test]
289    fn columnar_schema_validation() {
290        let schema = ColumnarSchema::new(vec![
291            ColumnDef::required("time", ColumnType::Timestamp),
292            ColumnDef::nullable("cpu", ColumnType::Float64),
293        ]);
294        assert!(schema.is_ok());
295        assert_eq!(schema.unwrap().len(), 2);
296    }
297
298    #[test]
299    fn nullable_pk_rejected() {
300        let cols = vec![ColumnDef {
301            name: "id".into(),
302            column_type: ColumnType::Int64,
303            nullable: true,
304            default: None,
305            primary_key: true,
306            modifiers: Vec::new(),
307            generated_expr: None,
308            generated_deps: Vec::new(),
309            added_at_version: 1,
310        }];
311        assert!(matches!(
312            StrictSchema::new(cols),
313            Err(SchemaError::NullablePrimaryKey(_))
314        ));
315    }
316
317    #[test]
318    fn zero_vector_dim_rejected() {
319        let cols = vec![ColumnDef::required("emb", ColumnType::Vector(0))];
320        assert!(matches!(
321            StrictSchema::new(cols),
322            Err(SchemaError::ZeroVectorDim(_))
323        ));
324    }
325}