Skip to main content

contextdb_core/
table_meta.rs

1use crate::Direction;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5#[derive(Debug, Clone, Default, Serialize)]
6pub struct TableMeta {
7    pub columns: Vec<ColumnDef>,
8    pub immutable: bool,
9    pub state_machine: Option<StateMachineConstraint>,
10    #[serde(default)]
11    pub dag_edge_types: Vec<String>,
12    #[serde(default)]
13    pub unique_constraints: Vec<Vec<String>>,
14    pub natural_key_column: Option<String>,
15    #[serde(default)]
16    pub propagation_rules: Vec<PropagationRule>,
17    #[serde(default)]
18    pub default_ttl_seconds: Option<u64>,
19    #[serde(default)]
20    pub sync_safe: bool,
21    #[serde(default)]
22    pub expires_column: Option<String>,
23    #[serde(default)]
24    pub indexes: Vec<IndexDecl>,
25}
26
27// Custom `Deserialize` that tolerates prior on-disk `TableMeta` encoded
28// without the trailing `indexes` field (backward-compat).
29impl<'de> serde::Deserialize<'de> for TableMeta {
30    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
31    where
32        D: serde::Deserializer<'de>,
33    {
34        use serde::de::{MapAccess, SeqAccess, Visitor};
35        use std::fmt;
36
37        struct TableMetaVisitor;
38
39        impl<'de> Visitor<'de> for TableMetaVisitor {
40            type Value = TableMeta;
41
42            fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43                f.write_str("a TableMeta")
44            }
45
46            fn visit_seq<A>(self, mut seq: A) -> std::result::Result<TableMeta, A::Error>
47            where
48                A: SeqAccess<'de>,
49            {
50                let columns = seq
51                    .next_element::<Vec<ColumnDef>>()?
52                    .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?;
53                let immutable = seq
54                    .next_element::<bool>()?
55                    .ok_or_else(|| serde::de::Error::invalid_length(1, &self))?;
56                let state_machine = seq
57                    .next_element::<Option<StateMachineConstraint>>()?
58                    .ok_or_else(|| serde::de::Error::invalid_length(2, &self))?;
59                let dag_edge_types = seq.next_element::<Vec<String>>()?.unwrap_or_default();
60                let unique_constraints =
61                    seq.next_element::<Vec<Vec<String>>>()?.unwrap_or_default();
62                let natural_key_column = seq.next_element::<Option<String>>()?.unwrap_or_default();
63                let propagation_rules = seq
64                    .next_element::<Vec<PropagationRule>>()?
65                    .unwrap_or_default();
66                let default_ttl_seconds = seq.next_element::<Option<u64>>()?.unwrap_or_default();
67                let sync_safe = seq.next_element::<bool>()?.unwrap_or_default();
68                // Ok(None) at a declared-length tail is legitimate serde
69                // behavior (declared-length sequence exhausted). Decode
70                // errors, in contrast, indicate corruption or incompatible
71                // on-disk payloads and must propagate rather than silently
72                // default.
73                let expires_column = seq.next_element::<Option<String>>()?.unwrap_or_default();
74                let indexes = seq.next_element::<Vec<IndexDecl>>()?.unwrap_or_default();
75                Ok(TableMeta {
76                    columns,
77                    immutable,
78                    state_machine,
79                    dag_edge_types,
80                    unique_constraints,
81                    natural_key_column,
82                    propagation_rules,
83                    default_ttl_seconds,
84                    sync_safe,
85                    expires_column,
86                    indexes,
87                })
88            }
89
90            fn visit_map<A>(self, mut map: A) -> std::result::Result<TableMeta, A::Error>
91            where
92                A: MapAccess<'de>,
93            {
94                let mut columns: Option<Vec<ColumnDef>> = None;
95                let mut immutable: Option<bool> = None;
96                let mut state_machine: Option<Option<StateMachineConstraint>> = None;
97                let mut dag_edge_types: Option<Vec<String>> = None;
98                let mut unique_constraints: Option<Vec<Vec<String>>> = None;
99                let mut natural_key_column: Option<Option<String>> = None;
100                let mut propagation_rules: Option<Vec<PropagationRule>> = None;
101                let mut default_ttl_seconds: Option<Option<u64>> = None;
102                let mut sync_safe: Option<bool> = None;
103                let mut expires_column: Option<Option<String>> = None;
104                let mut indexes: Option<Vec<IndexDecl>> = None;
105
106                while let Some(key) = map.next_key::<String>()? {
107                    match key.as_str() {
108                        "columns" => columns = Some(map.next_value()?),
109                        "immutable" => immutable = Some(map.next_value()?),
110                        "state_machine" => state_machine = Some(map.next_value()?),
111                        "dag_edge_types" => dag_edge_types = Some(map.next_value()?),
112                        "unique_constraints" => unique_constraints = Some(map.next_value()?),
113                        "natural_key_column" => natural_key_column = Some(map.next_value()?),
114                        "propagation_rules" => propagation_rules = Some(map.next_value()?),
115                        "default_ttl_seconds" => default_ttl_seconds = Some(map.next_value()?),
116                        "sync_safe" => sync_safe = Some(map.next_value()?),
117                        "expires_column" => expires_column = Some(map.next_value()?),
118                        "indexes" => indexes = Some(map.next_value()?),
119                        _ => {
120                            let _: serde::de::IgnoredAny = map.next_value()?;
121                        }
122                    }
123                }
124
125                Ok(TableMeta {
126                    columns: columns.ok_or_else(|| serde::de::Error::missing_field("columns"))?,
127                    immutable: immutable
128                        .ok_or_else(|| serde::de::Error::missing_field("immutable"))?,
129                    state_machine: state_machine.unwrap_or_default(),
130                    dag_edge_types: dag_edge_types.unwrap_or_default(),
131                    unique_constraints: unique_constraints.unwrap_or_default(),
132                    natural_key_column: natural_key_column.unwrap_or_default(),
133                    propagation_rules: propagation_rules.unwrap_or_default(),
134                    default_ttl_seconds: default_ttl_seconds.unwrap_or_default(),
135                    sync_safe: sync_safe.unwrap_or_default(),
136                    expires_column: expires_column.unwrap_or_default(),
137                    indexes: indexes.unwrap_or_default(),
138                })
139            }
140        }
141
142        const FIELDS: &[&str] = &[
143            "columns",
144            "immutable",
145            "state_machine",
146            "dag_edge_types",
147            "unique_constraints",
148            "natural_key_column",
149            "propagation_rules",
150            "default_ttl_seconds",
151            "sync_safe",
152            "expires_column",
153            "indexes",
154        ];
155        deserializer.deserialize_struct("TableMeta", FIELDS, TableMetaVisitor)
156    }
157}
158
159/// Direction for a column within an engine-local index declaration.
160/// Distinct from `contextdb_parser::ast::SortDirection`, which carries a
161/// `CosineDistance` variant that is meaningful only for vector ordering.
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
163pub enum SortDirection {
164    #[default]
165    Asc,
166    Desc,
167}
168
169/// How an index entered `TableMeta.indexes`. `Auto` indexes are synthesized
170/// at CREATE TABLE time from PRIMARY KEY / UNIQUE constraints. `UserDeclared`
171/// indexes come from `CREATE INDEX` DDL. The distinction drives surface
172/// rendering (auto-indexes omitted from `.schema`), schema-rendering verbose
173/// flags, and sync DDL emission (auto-indexes are not re-emitted since they
174/// are derived from the CreateTable payload).
175#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
176pub enum IndexKind {
177    Auto,
178    #[default]
179    UserDeclared,
180}
181
182#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
183pub struct IndexDecl {
184    pub name: String,
185    pub columns: Vec<(String, SortDirection)>,
186    #[serde(default)]
187    pub kind: IndexKind,
188}
189
190impl IndexDecl {
191    pub fn estimated_bytes(&self) -> usize {
192        32 + self.name.len() * 16
193            + self
194                .columns
195                .iter()
196                .fold(0usize, |acc, (c, _)| acc.saturating_add(24 + c.len() * 16))
197    }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub enum PropagationRule {
202    ForeignKey {
203        fk_column: String,
204        referenced_table: String,
205        referenced_column: String,
206        trigger_state: String,
207        target_state: String,
208        max_depth: u32,
209        abort_on_failure: bool,
210    },
211    Edge {
212        edge_type: String,
213        direction: Direction,
214        trigger_state: String,
215        target_state: String,
216        max_depth: u32,
217        abort_on_failure: bool,
218    },
219    VectorExclusion {
220        trigger_state: String,
221    },
222}
223
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct StateMachineConstraint {
226    pub column: String,
227    pub transitions: HashMap<String, Vec<String>>,
228}
229
230#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
231pub struct ColumnDef {
232    pub name: String,
233    pub column_type: ColumnType,
234    pub nullable: bool,
235    pub primary_key: bool,
236    #[serde(default)]
237    pub unique: bool,
238    #[serde(default)]
239    pub default: Option<String>,
240    #[serde(default)]
241    pub references: Option<ForeignKeyReference>,
242    #[serde(default)]
243    pub expires: bool,
244    #[serde(default)]
245    pub immutable: bool,
246    #[serde(default)]
247    pub quantization: VectorQuantization,
248    #[serde(default)]
249    pub rank_policy: Option<RankPolicy>,
250}
251
252// Custom `Deserialize` that tolerates prior on-disk schemas missing the
253// trailing fields (backward-compat, I5). JSON / other formats that distinguish
254// "missing field" from "required field" continue to work via `serde(default)`
255// on the fields themselves.
256impl<'de> serde::Deserialize<'de> for ColumnDef {
257    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
258    where
259        D: serde::Deserializer<'de>,
260    {
261        use serde::de::{MapAccess, SeqAccess, Visitor};
262        use std::fmt;
263
264        struct ColumnDefVisitor;
265
266        impl<'de> Visitor<'de> for ColumnDefVisitor {
267            type Value = ColumnDef;
268
269            fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
270                f.write_str("a ColumnDef")
271            }
272
273            fn visit_seq<A>(self, mut seq: A) -> std::result::Result<ColumnDef, A::Error>
274            where
275                A: SeqAccess<'de>,
276            {
277                let name = seq
278                    .next_element::<String>()?
279                    .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?;
280                let column_type = seq
281                    .next_element::<ColumnType>()?
282                    .ok_or_else(|| serde::de::Error::invalid_length(1, &self))?;
283                let nullable = seq
284                    .next_element::<bool>()?
285                    .ok_or_else(|| serde::de::Error::invalid_length(2, &self))?;
286                let primary_key = seq
287                    .next_element::<bool>()?
288                    .ok_or_else(|| serde::de::Error::invalid_length(3, &self))?;
289                let unique = seq.next_element::<bool>()?.unwrap_or_default();
290                let default = seq.next_element::<Option<String>>()?.unwrap_or_default();
291                let references = seq
292                    .next_element::<Option<ForeignKeyReference>>()?
293                    .unwrap_or_default();
294                let expires = seq.next_element::<bool>()?.unwrap_or_default();
295                // Trailing field. `Ok(None)` means the declared-length seq
296                // ended naturally (legitimate for JSON paths). Decode errors
297                // propagate — silently defaulting to `false` would let a
298                // corrupt payload pose as a non-immutable column.
299                let immutable = seq.next_element::<bool>()?.unwrap_or_default();
300                let quantization = seq
301                    .next_element::<VectorQuantization>()?
302                    .unwrap_or_default();
303                let rank_policy = seq
304                    .next_element::<Option<RankPolicy>>()?
305                    .unwrap_or_default();
306                Ok(ColumnDef {
307                    name,
308                    column_type,
309                    nullable,
310                    primary_key,
311                    unique,
312                    default,
313                    references,
314                    expires,
315                    immutable,
316                    quantization,
317                    rank_policy,
318                })
319            }
320
321            fn visit_map<A>(self, mut map: A) -> std::result::Result<ColumnDef, A::Error>
322            where
323                A: MapAccess<'de>,
324            {
325                let mut name: Option<String> = None;
326                let mut column_type: Option<ColumnType> = None;
327                let mut nullable: Option<bool> = None;
328                let mut primary_key: Option<bool> = None;
329                let mut unique: Option<bool> = None;
330                let mut default: Option<Option<String>> = None;
331                let mut references: Option<Option<ForeignKeyReference>> = None;
332                let mut expires: Option<bool> = None;
333                let mut immutable: Option<bool> = None;
334                let mut quantization: Option<VectorQuantization> = None;
335                let mut rank_policy: Option<Option<RankPolicy>> = None;
336
337                while let Some(key) = map.next_key::<String>()? {
338                    match key.as_str() {
339                        "name" => name = Some(map.next_value()?),
340                        "column_type" => column_type = Some(map.next_value()?),
341                        "nullable" => nullable = Some(map.next_value()?),
342                        "primary_key" => primary_key = Some(map.next_value()?),
343                        "unique" => unique = Some(map.next_value()?),
344                        "default" => default = Some(map.next_value()?),
345                        "references" => references = Some(map.next_value()?),
346                        "expires" => expires = Some(map.next_value()?),
347                        "immutable" => immutable = Some(map.next_value()?),
348                        "quantization" => quantization = Some(map.next_value()?),
349                        "rank_policy" => rank_policy = Some(map.next_value()?),
350                        _ => {
351                            let _: serde::de::IgnoredAny = map.next_value()?;
352                        }
353                    }
354                }
355
356                Ok(ColumnDef {
357                    name: name.ok_or_else(|| serde::de::Error::missing_field("name"))?,
358                    column_type: column_type
359                        .ok_or_else(|| serde::de::Error::missing_field("column_type"))?,
360                    nullable: nullable
361                        .ok_or_else(|| serde::de::Error::missing_field("nullable"))?,
362                    primary_key: primary_key
363                        .ok_or_else(|| serde::de::Error::missing_field("primary_key"))?,
364                    unique: unique.unwrap_or_default(),
365                    default: default.unwrap_or_default(),
366                    references: references.unwrap_or_default(),
367                    expires: expires.unwrap_or_default(),
368                    immutable: immutable.unwrap_or_default(),
369                    quantization: quantization.unwrap_or_default(),
370                    rank_policy: rank_policy.unwrap_or_default(),
371                })
372            }
373        }
374
375        const FIELDS: &[&str] = &[
376            "name",
377            "column_type",
378            "nullable",
379            "primary_key",
380            "unique",
381            "default",
382            "references",
383            "expires",
384            "immutable",
385            "quantization",
386            "rank_policy",
387        ];
388        deserializer.deserialize_struct("ColumnDef", FIELDS, ColumnDefVisitor)
389    }
390}
391
392#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
393pub struct RankPolicy {
394    pub joined_table: String,
395    pub joined_column: String,
396    #[serde(default)]
397    pub anchor_column: String,
398    pub sort_key: String,
399    pub formula: String,
400    pub protected_index: String,
401}
402
403#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
404pub struct ForeignKeyReference {
405    pub table: String,
406    pub column: String,
407}
408
409#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
410pub enum ColumnType {
411    Integer,
412    Real,
413    Text,
414    Boolean,
415    Json,
416    Uuid,
417    Vector(usize),
418    Timestamp,
419    TxId,
420}
421
422#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
423pub enum VectorQuantization {
424    #[default]
425    F32,
426    SQ8,
427    SQ4,
428}
429
430impl VectorQuantization {
431    pub fn as_str(self) -> &'static str {
432        match self {
433            VectorQuantization::F32 => "F32",
434            VectorQuantization::SQ8 => "SQ8",
435            VectorQuantization::SQ4 => "SQ4",
436        }
437    }
438
439    pub fn storage_bytes(self, dimension: usize) -> usize {
440        match self {
441            VectorQuantization::F32 => dimension.saturating_mul(std::mem::size_of::<f32>()),
442            VectorQuantization::SQ8 => dimension.saturating_add(8),
443            VectorQuantization::SQ4 => dimension.div_ceil(2).saturating_add(8),
444        }
445    }
446}
447
448impl TableMeta {
449    pub fn estimated_bytes(&self) -> usize {
450        let columns_bytes = self.columns.iter().fold(0usize, |acc, column| {
451            acc.saturating_add(column.estimated_bytes())
452        });
453        let state_machine_bytes = self
454            .state_machine
455            .as_ref()
456            .map(StateMachineConstraint::estimated_bytes)
457            .unwrap_or(0);
458        let dag_bytes = self.dag_edge_types.iter().fold(0usize, |acc, edge_type| {
459            acc.saturating_add(32 + edge_type.len() * 16)
460        });
461        let unique_constraint_bytes =
462            self.unique_constraints.iter().fold(0usize, |acc, columns| {
463                acc.saturating_add(
464                    24 + columns
465                        .iter()
466                        .map(|column| 16 + column.len() * 16)
467                        .sum::<usize>(),
468                )
469            });
470        let natural_key_bytes = self
471            .natural_key_column
472            .as_ref()
473            .map(|column| 32 + column.len() * 16)
474            .unwrap_or(0);
475        let propagation_bytes = self.propagation_rules.iter().fold(0usize, |acc, rule| {
476            acc.saturating_add(rule.estimated_bytes())
477        });
478        let expires_bytes = self
479            .expires_column
480            .as_ref()
481            .map(|column| 32 + column.len() * 16)
482            .unwrap_or(0);
483        let indexes_bytes = self
484            .indexes
485            .iter()
486            .fold(0usize, |acc, i| acc.saturating_add(i.estimated_bytes()));
487
488        16 + columns_bytes
489            + state_machine_bytes
490            + dag_bytes
491            + unique_constraint_bytes
492            + natural_key_bytes
493            + propagation_bytes
494            + expires_bytes
495            + indexes_bytes
496            + self.default_ttl_seconds.map(|_| 8).unwrap_or(0)
497            + 8
498    }
499}
500
501impl PropagationRule {
502    fn estimated_bytes(&self) -> usize {
503        match self {
504            PropagationRule::ForeignKey {
505                fk_column,
506                referenced_table,
507                referenced_column,
508                trigger_state,
509                target_state,
510                ..
511            } => {
512                24 + fk_column.len() * 16
513                    + referenced_table.len() * 16
514                    + referenced_column.len() * 16
515                    + trigger_state.len() * 16
516                    + target_state.len() * 16
517            }
518            PropagationRule::Edge {
519                edge_type,
520                trigger_state,
521                target_state,
522                ..
523            } => 24 + edge_type.len() * 16 + trigger_state.len() * 16 + target_state.len() * 16,
524            PropagationRule::VectorExclusion { trigger_state } => 16 + trigger_state.len() * 16,
525        }
526    }
527}
528
529impl StateMachineConstraint {
530    fn estimated_bytes(&self) -> usize {
531        let transitions_bytes = self.transitions.iter().fold(0usize, |acc, (from, tos)| {
532            acc.saturating_add(
533                32 + from.len() * 16 + tos.iter().map(|to| 16 + to.len() * 16).sum::<usize>(),
534            )
535        });
536        24 + self.column.len() * 16 + transitions_bytes
537    }
538}
539
540impl ColumnDef {
541    fn estimated_bytes(&self) -> usize {
542        let default_bytes = self
543            .default
544            .as_ref()
545            .map(|value| 32 + value.len() * 16)
546            .unwrap_or(0);
547        let reference_bytes = self
548            .references
549            .as_ref()
550            .map(|reference| 32 + reference.table.len() * 16 + reference.column.len() * 16)
551            .unwrap_or(0);
552        let rank_policy_bytes = self
553            .rank_policy
554            .as_ref()
555            .map(|policy| {
556                40 + policy.joined_table.len() * 16
557                    + policy.joined_column.len() * 16
558                    + policy.anchor_column.len() * 16
559                    + policy.sort_key.len() * 16
560                    + policy.formula.len() * 16
561                    + policy.protected_index.len() * 16
562            })
563            .unwrap_or(0);
564        8 + self.name.len() * 16
565            + self.column_type.estimated_bytes()
566            + default_bytes
567            + reference_bytes
568            + rank_policy_bytes
569            + 8
570    }
571}
572
573impl ColumnType {
574    fn estimated_bytes(&self) -> usize {
575        match self {
576            ColumnType::Integer => 16,
577            ColumnType::Real => 16,
578            ColumnType::Text => 16,
579            ColumnType::Boolean => 16,
580            ColumnType::Json => 24,
581            ColumnType::Uuid => 16,
582            ColumnType::Vector(_) => 24,
583            ColumnType::Timestamp => 16,
584            ColumnType::TxId => 8,
585        }
586    }
587}