Skip to main content

nodedb_types/columnar/
schema.rs

1//! Strict document and columnar schemas with shared operations trait.
2
3use serde::{Deserialize, Serialize};
4
5use super::column_type::ColumnDef;
6use crate::columnar::ColumnType;
7
8/// Shared schema operations (eliminates duplication between Strict and Columnar).
9pub trait SchemaOps {
10    fn columns(&self) -> &[ColumnDef];
11
12    fn column_index(&self, name: &str) -> Option<usize> {
13        self.columns().iter().position(|c| c.name == name)
14    }
15
16    fn column(&self, name: &str) -> Option<&ColumnDef> {
17        self.columns().iter().find(|c| c.name == name)
18    }
19
20    fn primary_key_columns(&self) -> Vec<&ColumnDef> {
21        self.columns().iter().filter(|c| c.primary_key).collect()
22    }
23
24    fn len(&self) -> usize {
25        self.columns().len()
26    }
27
28    fn is_empty(&self) -> bool {
29        self.columns().is_empty()
30    }
31}
32
33/// Schema for a strict document collection (Binary Tuple serialization).
34#[derive(
35    Debug,
36    Clone,
37    PartialEq,
38    Eq,
39    Serialize,
40    Deserialize,
41    zerompk::ToMessagePack,
42    zerompk::FromMessagePack,
43)]
44pub struct StrictSchema {
45    pub columns: Vec<ColumnDef>,
46    pub version: u16,
47}
48
49/// Schema for a columnar collection (compressed segment files).
50#[derive(
51    Debug,
52    Clone,
53    PartialEq,
54    Eq,
55    Serialize,
56    Deserialize,
57    zerompk::ToMessagePack,
58    zerompk::FromMessagePack,
59)]
60pub struct ColumnarSchema {
61    pub columns: Vec<ColumnDef>,
62    pub version: u16,
63}
64
65/// Schema validation errors.
66#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
67pub enum SchemaError {
68    #[error("schema must have at least one column")]
69    Empty,
70    #[error("duplicate column name: '{0}'")]
71    DuplicateColumn(String),
72    #[error("VECTOR dimension must be positive, got 0 for column '{0}'")]
73    ZeroVectorDim(String),
74    #[error("primary key column '{0}' must be NOT NULL")]
75    NullablePrimaryKey(String),
76}
77
78fn validate_columns(columns: &[ColumnDef]) -> Result<(), SchemaError> {
79    if columns.is_empty() {
80        return Err(SchemaError::Empty);
81    }
82    let mut seen = std::collections::HashSet::with_capacity(columns.len());
83    for col in columns {
84        if !seen.insert(&col.name) {
85            return Err(SchemaError::DuplicateColumn(col.name.clone()));
86        }
87        if col.primary_key && col.nullable {
88            return Err(SchemaError::NullablePrimaryKey(col.name.clone()));
89        }
90        if let ColumnType::Vector(0) = col.column_type {
91            return Err(SchemaError::ZeroVectorDim(col.name.clone()));
92        }
93    }
94    Ok(())
95}
96
97impl SchemaOps for StrictSchema {
98    fn columns(&self) -> &[ColumnDef] {
99        &self.columns
100    }
101}
102
103impl SchemaOps for ColumnarSchema {
104    fn columns(&self) -> &[ColumnDef] {
105        &self.columns
106    }
107}
108
109impl StrictSchema {
110    pub fn new(columns: Vec<ColumnDef>) -> Result<Self, SchemaError> {
111        validate_columns(&columns)?;
112        Ok(Self {
113            columns,
114            version: 1,
115        })
116    }
117
118    /// Count of variable-length columns (determines offset table size).
119    pub fn variable_column_count(&self) -> usize {
120        self.columns
121            .iter()
122            .filter(|c| c.column_type.is_variable_length())
123            .count()
124    }
125
126    /// Total fixed-field byte size (for Binary Tuple layout computation).
127    pub fn fixed_fields_size(&self) -> usize {
128        self.columns
129            .iter()
130            .filter_map(|c| c.column_type.fixed_size())
131            .sum()
132    }
133
134    /// Null bitmap size in bytes.
135    pub fn null_bitmap_size(&self) -> usize {
136        self.columns.len().div_ceil(8)
137    }
138}
139
140impl ColumnarSchema {
141    pub fn new(columns: Vec<ColumnDef>) -> Result<Self, SchemaError> {
142        validate_columns(&columns)?;
143        Ok(Self {
144            columns,
145            version: 1,
146        })
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153    use crate::columnar::ColumnType;
154
155    #[test]
156    fn strict_schema_validation() {
157        let schema = StrictSchema::new(vec![
158            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
159            ColumnDef::nullable("name", ColumnType::String),
160        ]);
161        assert!(schema.is_ok());
162        assert!(StrictSchema::new(vec![]).is_err());
163    }
164
165    #[test]
166    fn schema_ops_trait() {
167        let schema = StrictSchema::new(vec![
168            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
169            ColumnDef::nullable("name", ColumnType::String),
170            ColumnDef::nullable("balance", ColumnType::Decimal),
171        ])
172        .unwrap();
173        assert_eq!(schema.len(), 3);
174        assert_eq!(schema.column_index("balance"), Some(2));
175        assert!(schema.column("nonexistent").is_none());
176        assert_eq!(schema.primary_key_columns().len(), 1);
177    }
178
179    #[test]
180    fn strict_layout_helpers() {
181        let schema = StrictSchema::new(vec![
182            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
183            ColumnDef::nullable("name", ColumnType::String),
184            ColumnDef::nullable("balance", ColumnType::Decimal),
185            ColumnDef::nullable("bio", ColumnType::String),
186        ])
187        .unwrap();
188        assert_eq!(schema.null_bitmap_size(), 1);
189        assert_eq!(schema.fixed_fields_size(), 8 + 16);
190        assert_eq!(schema.variable_column_count(), 2);
191    }
192
193    #[test]
194    fn columnar_schema_validation() {
195        let schema = ColumnarSchema::new(vec![
196            ColumnDef::required("time", ColumnType::Timestamp),
197            ColumnDef::nullable("cpu", ColumnType::Float64),
198        ]);
199        assert!(schema.is_ok());
200        assert_eq!(schema.unwrap().len(), 2);
201    }
202
203    #[test]
204    fn nullable_pk_rejected() {
205        let cols = vec![ColumnDef {
206            name: "id".into(),
207            column_type: ColumnType::Int64,
208            nullable: true,
209            default: None,
210            primary_key: true,
211            modifiers: Vec::new(),
212            generated_expr: None,
213            generated_deps: Vec::new(),
214        }];
215        assert!(matches!(
216            StrictSchema::new(cols),
217            Err(SchemaError::NullablePrimaryKey(_))
218        ));
219    }
220
221    #[test]
222    fn zero_vector_dim_rejected() {
223        let cols = vec![ColumnDef::required("emb", ColumnType::Vector(0))];
224        assert!(matches!(
225            StrictSchema::new(cols),
226            Err(SchemaError::ZeroVectorDim(_))
227        ));
228    }
229}