Skip to main content

nodedb_types/
collection.rs

1//! Collection type enum shared between Origin and Lite.
2//!
3//! Determines routing, storage format, and query execution strategy.
4
5use serde::{Deserialize, Serialize};
6
7use crate::columnar::{ColumnarProfile, DocumentMode, StrictSchema};
8use crate::kv::{KV_DEFAULT_INLINE_THRESHOLD, KvConfig, KvTtlPolicy};
9
10/// The type of a collection, determining its storage engine and query behavior.
11///
12/// Three top-level modes:
13/// - `Document`: B-tree storage in redb (schemaless MessagePack or strict Binary Tuples).
14/// - `Columnar`: Compressed segment files with profile specialization (plain, timeseries, spatial).
15/// - `KeyValue`: Hash-indexed O(1) point lookups with typed value fields (Binary Tuples).
16#[derive(
17    Debug,
18    Clone,
19    PartialEq,
20    Eq,
21    Serialize,
22    Deserialize,
23    zerompk::ToMessagePack,
24    zerompk::FromMessagePack,
25)]
26#[serde(tag = "storage")]
27pub enum CollectionType {
28    /// Document storage in redb B-tree.
29    /// Schemaless (MessagePack) or strict (Binary Tuples).
30    Document(DocumentMode),
31    /// Columnar storage in compressed segment files.
32    /// Profile determines constraints and specialized behavior.
33    Columnar(ColumnarProfile),
34    /// Key-Value storage with hash-indexed primary key.
35    /// O(1) point lookups, optional TTL, optional secondary indexes.
36    /// Value fields use Binary Tuple codec (same as strict mode) for O(1) field extraction.
37    KeyValue(KvConfig),
38}
39
40impl Default for CollectionType {
41    fn default() -> Self {
42        Self::Document(DocumentMode::default())
43    }
44}
45
46impl CollectionType {
47    /// Schemaless document (default, backward compatible).
48    pub fn document() -> Self {
49        Self::Document(DocumentMode::Schemaless)
50    }
51
52    /// Strict document with schema.
53    pub fn strict(schema: StrictSchema) -> Self {
54        Self::Document(DocumentMode::Strict(schema))
55    }
56
57    /// Plain columnar (general analytics).
58    pub fn columnar() -> Self {
59        Self::Columnar(ColumnarProfile::Plain)
60    }
61
62    /// Columnar with timeseries profile.
63    pub fn timeseries(time_key: impl Into<String>, interval: impl Into<String>) -> Self {
64        Self::Columnar(ColumnarProfile::Timeseries {
65            time_key: time_key.into(),
66            interval: interval.into(),
67        })
68    }
69
70    /// Columnar with spatial profile.
71    pub fn spatial(geometry_column: impl Into<String>) -> Self {
72        Self::Columnar(ColumnarProfile::Spatial {
73            geometry_column: geometry_column.into(),
74            auto_rtree: true,
75            auto_geohash: true,
76        })
77    }
78
79    /// Key-Value collection with typed schema and optional TTL.
80    ///
81    /// The schema MUST contain exactly one PRIMARY KEY column (the hash key).
82    /// Remaining columns are value fields encoded as Binary Tuples.
83    pub fn kv(schema: StrictSchema) -> Self {
84        Self::KeyValue(KvConfig {
85            schema,
86            ttl: None,
87            capacity_hint: 0,
88            inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
89        })
90    }
91
92    /// Key-Value collection with TTL policy.
93    pub fn kv_with_ttl(schema: StrictSchema, ttl: KvTtlPolicy) -> Self {
94        Self::KeyValue(KvConfig {
95            schema,
96            ttl: Some(ttl),
97            capacity_hint: 0,
98            inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
99        })
100    }
101
102    pub fn is_document(&self) -> bool {
103        matches!(self, Self::Document(_))
104    }
105
106    /// Returns `true` for any columnar-family type (Plain, Timeseries, Spatial).
107    /// Use `is_plain_columnar()` to check for plain columnar only.
108    pub fn is_columnar_family(&self) -> bool {
109        matches!(self, Self::Columnar(_))
110    }
111
112    pub fn is_plain_columnar(&self) -> bool {
113        matches!(self, Self::Columnar(ColumnarProfile::Plain))
114    }
115
116    pub fn is_timeseries(&self) -> bool {
117        matches!(self, Self::Columnar(ColumnarProfile::Timeseries { .. }))
118    }
119
120    pub fn is_spatial(&self) -> bool {
121        matches!(self, Self::Columnar(ColumnarProfile::Spatial { .. }))
122    }
123
124    pub fn is_strict(&self) -> bool {
125        matches!(self, Self::Document(DocumentMode::Strict(_)))
126    }
127
128    pub fn is_schemaless(&self) -> bool {
129        matches!(self, Self::Document(DocumentMode::Schemaless))
130    }
131
132    pub fn is_kv(&self) -> bool {
133        matches!(self, Self::KeyValue(_))
134    }
135
136    pub fn as_str(&self) -> &'static str {
137        match self {
138            Self::Document(DocumentMode::Schemaless) => "document",
139            Self::Document(DocumentMode::Strict(_)) => "strict",
140            Self::Columnar(ColumnarProfile::Plain) => "columnar",
141            Self::Columnar(ColumnarProfile::Timeseries { .. }) => "timeseries",
142            Self::Columnar(ColumnarProfile::Spatial { .. }) => "columnar:spatial",
143            Self::KeyValue(_) => "kv",
144        }
145    }
146
147    /// Get the document mode, if this is a document collection.
148    pub fn document_mode(&self) -> Option<&DocumentMode> {
149        match self {
150            Self::Document(mode) => Some(mode),
151            _ => None,
152        }
153    }
154
155    /// Get the columnar profile, if this is a columnar collection.
156    pub fn columnar_profile(&self) -> Option<&ColumnarProfile> {
157        match self {
158            Self::Columnar(profile) => Some(profile),
159            _ => None,
160        }
161    }
162
163    /// Get the KV config, if this is a key-value collection.
164    pub fn kv_config(&self) -> Option<&KvConfig> {
165        match self {
166            Self::KeyValue(config) => Some(config),
167            _ => None,
168        }
169    }
170}
171
172impl std::fmt::Display for CollectionType {
173    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
174        f.write_str(self.as_str())
175    }
176}
177
178impl std::str::FromStr for CollectionType {
179    type Err = String;
180
181    fn from_str(s: &str) -> Result<Self, Self::Err> {
182        match s.to_lowercase().as_str() {
183            "document" | "doc" => Ok(Self::document()),
184            "strict" => Ok(Self::Document(DocumentMode::Strict(
185                // Placeholder — real schema comes from DDL parsing, not FromStr.
186                // FromStr only resolves the storage mode; schema is attached separately.
187                StrictSchema {
188                    columns: vec![],
189                    version: 1,
190                },
191            ))),
192            "columnar" => Ok(Self::columnar()),
193            "timeseries" | "ts" => Ok(Self::timeseries("time", "1h")),
194            "kv" | "key_value" | "keyvalue" => Ok(Self::KeyValue(KvConfig {
195                // Placeholder — real schema comes from DDL parsing, not FromStr.
196                schema: StrictSchema {
197                    columns: vec![],
198                    version: 1,
199                },
200                ttl: None,
201                capacity_hint: 0,
202                inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
203            })),
204            other => Err(format!("unknown collection type: '{other}'")),
205        }
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use crate::columnar::{ColumnDef, ColumnType};
213
214    #[test]
215    fn default_is_schemaless_document() {
216        let ct = CollectionType::default();
217        assert!(ct.is_document());
218        assert!(ct.is_schemaless());
219        assert!(!ct.is_columnar_family());
220        assert!(!ct.is_timeseries());
221        assert!(!ct.is_kv());
222    }
223
224    #[test]
225    fn factory_methods() {
226        assert!(CollectionType::document().is_schemaless());
227        assert!(CollectionType::columnar().is_columnar_family());
228        assert!(CollectionType::timeseries("time", "1h").is_timeseries());
229        assert!(CollectionType::spatial("geom").is_columnar_family());
230        assert!(CollectionType::spatial("geom").is_spatial());
231
232        let schema = StrictSchema::new(vec![
233            ColumnDef::required("key", ColumnType::String).with_primary_key(),
234            ColumnDef::nullable("value", ColumnType::Bytes),
235        ])
236        .unwrap();
237        let kv = CollectionType::kv(schema);
238        assert!(kv.is_kv());
239        assert!(!kv.is_document());
240        assert!(!kv.is_columnar_family());
241    }
242
243    #[test]
244    fn kv_with_ttl_factory() {
245        let schema = StrictSchema::new(vec![
246            ColumnDef::required("ip", ColumnType::String).with_primary_key(),
247            ColumnDef::required("hits", ColumnType::Int64),
248        ])
249        .unwrap();
250        let ttl = KvTtlPolicy::FixedDuration {
251            duration_ms: 60_000,
252        };
253        let ct = CollectionType::kv_with_ttl(schema, ttl);
254        assert!(ct.is_kv());
255        let config = ct.kv_config().unwrap();
256        assert!(config.has_ttl());
257        match config.ttl.as_ref().unwrap() {
258            KvTtlPolicy::FixedDuration { duration_ms } => assert_eq!(*duration_ms, 60_000),
259            _ => panic!("expected FixedDuration"),
260        }
261    }
262
263    #[test]
264    fn serde_roundtrip_document() {
265        let ct = CollectionType::document();
266        let json = sonic_rs::to_string(&ct).unwrap();
267        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
268        assert_eq!(back, ct);
269    }
270
271    #[test]
272    fn serde_roundtrip_columnar() {
273        let ct = CollectionType::columnar();
274        let json = sonic_rs::to_string(&ct).unwrap();
275        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
276        assert_eq!(back, ct);
277    }
278
279    #[test]
280    fn serde_roundtrip_timeseries() {
281        let ct = CollectionType::timeseries("ts", "1h");
282        let json = sonic_rs::to_string(&ct).unwrap();
283        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
284        assert_eq!(back, ct);
285    }
286
287    #[test]
288    fn serde_roundtrip_kv_no_ttl() {
289        let schema = StrictSchema::new(vec![
290            ColumnDef::required("k", ColumnType::String).with_primary_key(),
291            ColumnDef::nullable("v", ColumnType::Bytes),
292        ])
293        .unwrap();
294        let ct = CollectionType::kv(schema);
295        let json = sonic_rs::to_string(&ct).unwrap();
296        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
297        assert_eq!(back, ct);
298    }
299
300    #[test]
301    fn serde_roundtrip_kv_fixed_ttl() {
302        let schema = StrictSchema::new(vec![
303            ColumnDef::required("k", ColumnType::String).with_primary_key(),
304            ColumnDef::required("v", ColumnType::Bytes),
305        ])
306        .unwrap();
307        let ttl = KvTtlPolicy::FixedDuration {
308            duration_ms: 900_000,
309        };
310        let ct = CollectionType::kv_with_ttl(schema, ttl);
311        let json = sonic_rs::to_string(&ct).unwrap();
312        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
313        assert_eq!(back, ct);
314    }
315
316    #[test]
317    fn serde_roundtrip_kv_field_ttl() {
318        let schema = StrictSchema::new(vec![
319            ColumnDef::required("k", ColumnType::String).with_primary_key(),
320            ColumnDef::required("last_active", ColumnType::Timestamp),
321        ])
322        .unwrap();
323        let ttl = KvTtlPolicy::FieldBased {
324            field: "last_active".into(),
325            offset_ms: 3_600_000,
326        };
327        let ct = CollectionType::kv_with_ttl(schema, ttl);
328        let json = sonic_rs::to_string(&ct).unwrap();
329        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
330        assert_eq!(back, ct);
331    }
332
333    #[test]
334    fn display() {
335        assert_eq!(CollectionType::document().to_string(), "document");
336        assert_eq!(CollectionType::columnar().to_string(), "columnar");
337        assert_eq!(
338            CollectionType::timeseries("time", "1h").to_string(),
339            "timeseries"
340        );
341
342        let schema = StrictSchema::new(vec![
343            ColumnDef::required("k", ColumnType::String).with_primary_key(),
344        ])
345        .unwrap();
346        assert_eq!(CollectionType::kv(schema).to_string(), "kv");
347    }
348
349    #[test]
350    fn from_str() {
351        assert!("document".parse::<CollectionType>().unwrap().is_document());
352        assert!(
353            "columnar"
354                .parse::<CollectionType>()
355                .unwrap()
356                .is_columnar_family()
357        );
358        assert!(
359            "timeseries"
360                .parse::<CollectionType>()
361                .unwrap()
362                .is_timeseries()
363        );
364        assert!("ts".parse::<CollectionType>().unwrap().is_timeseries());
365        assert!("kv".parse::<CollectionType>().unwrap().is_kv());
366        assert!("key_value".parse::<CollectionType>().unwrap().is_kv());
367        assert!("keyvalue".parse::<CollectionType>().unwrap().is_kv());
368        assert!("unknown".parse::<CollectionType>().is_err());
369    }
370
371    #[test]
372    fn accessors() {
373        let ct = CollectionType::timeseries("time", "1h");
374        assert!(ct.columnar_profile().is_some());
375        assert!(ct.document_mode().is_none());
376        assert!(ct.kv_config().is_none());
377
378        let doc = CollectionType::document();
379        assert!(doc.document_mode().is_some());
380        assert!(doc.columnar_profile().is_none());
381        assert!(doc.kv_config().is_none());
382
383        let schema = StrictSchema::new(vec![
384            ColumnDef::required("k", ColumnType::String).with_primary_key(),
385        ])
386        .unwrap();
387        let kv = CollectionType::kv(schema);
388        assert!(kv.kv_config().is_some());
389        assert!(kv.document_mode().is_none());
390        assert!(kv.columnar_profile().is_none());
391    }
392}