Skip to main content

nodedb_types/columnar/
schema.rs

1//! Strict document and columnar schemas with shared operations trait.
2
3use serde::{Deserialize, Serialize};
4
5use super::column_type::ColumnDef;
6use crate::columnar::ColumnType;
7
8/// Shared schema operations (eliminates duplication between Strict and Columnar).
9pub trait SchemaOps {
10    fn columns(&self) -> &[ColumnDef];
11
12    fn column_index(&self, name: &str) -> Option<usize> {
13        self.columns().iter().position(|c| c.name == name)
14    }
15
16    fn column(&self, name: &str) -> Option<&ColumnDef> {
17        self.columns().iter().find(|c| c.name == name)
18    }
19
20    fn primary_key_columns(&self) -> Vec<&ColumnDef> {
21        self.columns().iter().filter(|c| c.primary_key).collect()
22    }
23
24    fn len(&self) -> usize {
25        self.columns().len()
26    }
27
28    fn is_empty(&self) -> bool {
29        self.columns().is_empty()
30    }
31}
32
33/// Schema for a strict document collection (Binary Tuple serialization).
34#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
35pub struct StrictSchema {
36    pub columns: Vec<ColumnDef>,
37    pub version: u16,
38}
39
40/// Schema for a columnar collection (compressed segment files).
41#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
42pub struct ColumnarSchema {
43    pub columns: Vec<ColumnDef>,
44    pub version: u16,
45}
46
47/// Schema validation errors.
48#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
49pub enum SchemaError {
50    #[error("schema must have at least one column")]
51    Empty,
52    #[error("duplicate column name: '{0}'")]
53    DuplicateColumn(String),
54    #[error("VECTOR dimension must be positive, got 0 for column '{0}'")]
55    ZeroVectorDim(String),
56    #[error("primary key column '{0}' must be NOT NULL")]
57    NullablePrimaryKey(String),
58}
59
60fn validate_columns(columns: &[ColumnDef]) -> Result<(), SchemaError> {
61    if columns.is_empty() {
62        return Err(SchemaError::Empty);
63    }
64    let mut seen = std::collections::HashSet::with_capacity(columns.len());
65    for col in columns {
66        if !seen.insert(&col.name) {
67            return Err(SchemaError::DuplicateColumn(col.name.clone()));
68        }
69        if col.primary_key && col.nullable {
70            return Err(SchemaError::NullablePrimaryKey(col.name.clone()));
71        }
72        if let ColumnType::Vector(0) = col.column_type {
73            return Err(SchemaError::ZeroVectorDim(col.name.clone()));
74        }
75    }
76    Ok(())
77}
78
79impl SchemaOps for StrictSchema {
80    fn columns(&self) -> &[ColumnDef] {
81        &self.columns
82    }
83}
84
85impl SchemaOps for ColumnarSchema {
86    fn columns(&self) -> &[ColumnDef] {
87        &self.columns
88    }
89}
90
91impl StrictSchema {
92    pub fn new(columns: Vec<ColumnDef>) -> Result<Self, SchemaError> {
93        validate_columns(&columns)?;
94        Ok(Self {
95            columns,
96            version: 1,
97        })
98    }
99
100    /// Count of variable-length columns (determines offset table size).
101    pub fn variable_column_count(&self) -> usize {
102        self.columns
103            .iter()
104            .filter(|c| c.column_type.is_variable_length())
105            .count()
106    }
107
108    /// Total fixed-field byte size (for Binary Tuple layout computation).
109    pub fn fixed_fields_size(&self) -> usize {
110        self.columns
111            .iter()
112            .filter_map(|c| c.column_type.fixed_size())
113            .sum()
114    }
115
116    /// Null bitmap size in bytes.
117    pub fn null_bitmap_size(&self) -> usize {
118        self.columns.len().div_ceil(8)
119    }
120}
121
122impl ColumnarSchema {
123    pub fn new(columns: Vec<ColumnDef>) -> Result<Self, SchemaError> {
124        validate_columns(&columns)?;
125        Ok(Self {
126            columns,
127            version: 1,
128        })
129    }
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135    use crate::columnar::ColumnType;
136
137    #[test]
138    fn strict_schema_validation() {
139        let schema = StrictSchema::new(vec![
140            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
141            ColumnDef::nullable("name", ColumnType::String),
142        ]);
143        assert!(schema.is_ok());
144        assert!(StrictSchema::new(vec![]).is_err());
145    }
146
147    #[test]
148    fn schema_ops_trait() {
149        let schema = StrictSchema::new(vec![
150            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
151            ColumnDef::nullable("name", ColumnType::String),
152            ColumnDef::nullable("balance", ColumnType::Decimal),
153        ])
154        .unwrap();
155        assert_eq!(schema.len(), 3);
156        assert_eq!(schema.column_index("balance"), Some(2));
157        assert!(schema.column("nonexistent").is_none());
158        assert_eq!(schema.primary_key_columns().len(), 1);
159    }
160
161    #[test]
162    fn strict_layout_helpers() {
163        let schema = StrictSchema::new(vec![
164            ColumnDef::required("id", ColumnType::Int64).with_primary_key(),
165            ColumnDef::nullable("name", ColumnType::String),
166            ColumnDef::nullable("balance", ColumnType::Decimal),
167            ColumnDef::nullable("bio", ColumnType::String),
168        ])
169        .unwrap();
170        assert_eq!(schema.null_bitmap_size(), 1);
171        assert_eq!(schema.fixed_fields_size(), 8 + 16);
172        assert_eq!(schema.variable_column_count(), 2);
173    }
174
175    #[test]
176    fn columnar_schema_validation() {
177        let schema = ColumnarSchema::new(vec![
178            ColumnDef::required("time", ColumnType::Timestamp),
179            ColumnDef::nullable("cpu", ColumnType::Float64),
180        ]);
181        assert!(schema.is_ok());
182        assert_eq!(schema.unwrap().len(), 2);
183    }
184
185    #[test]
186    fn nullable_pk_rejected() {
187        let cols = vec![ColumnDef {
188            name: "id".into(),
189            column_type: ColumnType::Int64,
190            nullable: true,
191            default: None,
192            primary_key: true,
193        }];
194        assert!(matches!(
195            StrictSchema::new(cols),
196            Err(SchemaError::NullablePrimaryKey(_))
197        ));
198    }
199
200    #[test]
201    fn zero_vector_dim_rejected() {
202        let cols = vec![ColumnDef::required("emb", ColumnType::Vector(0))];
203        assert!(matches!(
204            StrictSchema::new(cols),
205            Err(SchemaError::ZeroVectorDim(_))
206        ));
207    }
208}