Skip to main content

nodedb_types/
collection.rs

1//! Collection type enum shared between Origin and Lite.
2//!
3//! Determines routing, storage format, and query execution strategy.
4
5use serde::{Deserialize, Serialize};
6
7use crate::columnar::{ColumnarProfile, DocumentMode, StrictSchema};
8use crate::kv::{KV_DEFAULT_INLINE_THRESHOLD, KvConfig, KvTtlPolicy};
9
10/// The type of a collection, determining its storage engine and query behavior.
11///
12/// Three top-level modes:
13/// - `Document`: B-tree storage in redb (schemaless MessagePack or strict Binary Tuples).
14/// - `Columnar`: Compressed segment files with profile specialization (plain, timeseries, spatial).
15/// - `KeyValue`: Hash-indexed O(1) point lookups with typed value fields (Binary Tuples).
16#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(tag = "storage")]
18pub enum CollectionType {
19    /// Document storage in redb B-tree.
20    /// Schemaless (MessagePack) or strict (Binary Tuples).
21    Document(DocumentMode),
22    /// Columnar storage in compressed segment files.
23    /// Profile determines constraints and specialized behavior.
24    Columnar(ColumnarProfile),
25    /// Key-Value storage with hash-indexed primary key.
26    /// O(1) point lookups, optional TTL, optional secondary indexes.
27    /// Value fields use Binary Tuple codec (same as strict mode) for O(1) field extraction.
28    KeyValue(KvConfig),
29}
30
31impl Default for CollectionType {
32    fn default() -> Self {
33        Self::Document(DocumentMode::default())
34    }
35}
36
37impl CollectionType {
38    /// Schemaless document (default, backward compatible).
39    pub fn document() -> Self {
40        Self::Document(DocumentMode::Schemaless)
41    }
42
43    /// Strict document with schema.
44    pub fn strict(schema: StrictSchema) -> Self {
45        Self::Document(DocumentMode::Strict(schema))
46    }
47
48    /// Plain columnar (general analytics).
49    pub fn columnar() -> Self {
50        Self::Columnar(ColumnarProfile::Plain)
51    }
52
53    /// Columnar with timeseries profile.
54    pub fn timeseries(time_key: impl Into<String>, interval: impl Into<String>) -> Self {
55        Self::Columnar(ColumnarProfile::Timeseries {
56            time_key: time_key.into(),
57            interval: interval.into(),
58        })
59    }
60
61    /// Columnar with spatial profile.
62    pub fn spatial(geometry_column: impl Into<String>) -> Self {
63        Self::Columnar(ColumnarProfile::Spatial {
64            geometry_column: geometry_column.into(),
65            auto_rtree: true,
66            auto_geohash: true,
67        })
68    }
69
70    /// Key-Value collection with typed schema and optional TTL.
71    ///
72    /// The schema MUST contain exactly one PRIMARY KEY column (the hash key).
73    /// Remaining columns are value fields encoded as Binary Tuples.
74    pub fn kv(schema: StrictSchema) -> Self {
75        Self::KeyValue(KvConfig {
76            schema,
77            ttl: None,
78            capacity_hint: 0,
79            inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
80        })
81    }
82
83    /// Key-Value collection with TTL policy.
84    pub fn kv_with_ttl(schema: StrictSchema, ttl: KvTtlPolicy) -> Self {
85        Self::KeyValue(KvConfig {
86            schema,
87            ttl: Some(ttl),
88            capacity_hint: 0,
89            inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
90        })
91    }
92
93    pub fn is_document(&self) -> bool {
94        matches!(self, Self::Document(_))
95    }
96
97    pub fn is_columnar(&self) -> bool {
98        matches!(self, Self::Columnar(_))
99    }
100
101    pub fn is_timeseries(&self) -> bool {
102        matches!(self, Self::Columnar(ColumnarProfile::Timeseries { .. }))
103    }
104
105    pub fn is_strict(&self) -> bool {
106        matches!(self, Self::Document(DocumentMode::Strict(_)))
107    }
108
109    pub fn is_schemaless(&self) -> bool {
110        matches!(self, Self::Document(DocumentMode::Schemaless))
111    }
112
113    pub fn is_kv(&self) -> bool {
114        matches!(self, Self::KeyValue(_))
115    }
116
117    pub fn as_str(&self) -> &'static str {
118        match self {
119            Self::Document(DocumentMode::Schemaless) => "document",
120            Self::Document(DocumentMode::Strict(_)) => "strict",
121            Self::Columnar(ColumnarProfile::Plain) => "columnar",
122            Self::Columnar(ColumnarProfile::Timeseries { .. }) => "timeseries",
123            Self::Columnar(ColumnarProfile::Spatial { .. }) => "columnar:spatial",
124            Self::KeyValue(_) => "kv",
125        }
126    }
127
128    /// Get the document mode, if this is a document collection.
129    pub fn document_mode(&self) -> Option<&DocumentMode> {
130        match self {
131            Self::Document(mode) => Some(mode),
132            _ => None,
133        }
134    }
135
136    /// Get the columnar profile, if this is a columnar collection.
137    pub fn columnar_profile(&self) -> Option<&ColumnarProfile> {
138        match self {
139            Self::Columnar(profile) => Some(profile),
140            _ => None,
141        }
142    }
143
144    /// Get the KV config, if this is a key-value collection.
145    pub fn kv_config(&self) -> Option<&KvConfig> {
146        match self {
147            Self::KeyValue(config) => Some(config),
148            _ => None,
149        }
150    }
151}
152
153impl std::fmt::Display for CollectionType {
154    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
155        f.write_str(self.as_str())
156    }
157}
158
159impl std::str::FromStr for CollectionType {
160    type Err = String;
161
162    fn from_str(s: &str) -> Result<Self, Self::Err> {
163        match s.to_lowercase().as_str() {
164            "document" | "doc" => Ok(Self::document()),
165            "strict" => Ok(Self::Document(DocumentMode::Strict(
166                // Placeholder — real schema comes from DDL parsing, not FromStr.
167                // FromStr only resolves the storage mode; schema is attached separately.
168                StrictSchema {
169                    columns: vec![],
170                    version: 1,
171                },
172            ))),
173            "columnar" => Ok(Self::columnar()),
174            "timeseries" | "ts" => Ok(Self::timeseries("time", "1h")),
175            "kv" | "key_value" | "keyvalue" => Ok(Self::KeyValue(KvConfig {
176                // Placeholder — real schema comes from DDL parsing, not FromStr.
177                schema: StrictSchema {
178                    columns: vec![],
179                    version: 1,
180                },
181                ttl: None,
182                capacity_hint: 0,
183                inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
184            })),
185            other => Err(format!("unknown collection type: '{other}'")),
186        }
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use crate::columnar::{ColumnDef, ColumnType};
194
195    #[test]
196    fn default_is_schemaless_document() {
197        let ct = CollectionType::default();
198        assert!(ct.is_document());
199        assert!(ct.is_schemaless());
200        assert!(!ct.is_columnar());
201        assert!(!ct.is_timeseries());
202        assert!(!ct.is_kv());
203    }
204
205    #[test]
206    fn factory_methods() {
207        assert!(CollectionType::document().is_schemaless());
208        assert!(CollectionType::columnar().is_columnar());
209        assert!(CollectionType::timeseries("time", "1h").is_timeseries());
210        assert!(CollectionType::spatial("geom").is_columnar());
211
212        let schema = StrictSchema::new(vec![
213            ColumnDef::required("key", ColumnType::String).with_primary_key(),
214            ColumnDef::nullable("value", ColumnType::Bytes),
215        ])
216        .unwrap();
217        let kv = CollectionType::kv(schema);
218        assert!(kv.is_kv());
219        assert!(!kv.is_document());
220        assert!(!kv.is_columnar());
221    }
222
223    #[test]
224    fn kv_with_ttl_factory() {
225        let schema = StrictSchema::new(vec![
226            ColumnDef::required("ip", ColumnType::String).with_primary_key(),
227            ColumnDef::required("hits", ColumnType::Int64),
228        ])
229        .unwrap();
230        let ttl = KvTtlPolicy::FixedDuration {
231            duration_ms: 60_000,
232        };
233        let ct = CollectionType::kv_with_ttl(schema, ttl);
234        assert!(ct.is_kv());
235        let config = ct.kv_config().unwrap();
236        assert!(config.has_ttl());
237        match config.ttl.as_ref().unwrap() {
238            KvTtlPolicy::FixedDuration { duration_ms } => assert_eq!(*duration_ms, 60_000),
239            _ => panic!("expected FixedDuration"),
240        }
241    }
242
243    #[test]
244    fn serde_roundtrip_document() {
245        let ct = CollectionType::document();
246        let json = serde_json::to_string(&ct).unwrap();
247        let back: CollectionType = serde_json::from_str(&json).unwrap();
248        assert_eq!(back, ct);
249    }
250
251    #[test]
252    fn serde_roundtrip_columnar() {
253        let ct = CollectionType::columnar();
254        let json = serde_json::to_string(&ct).unwrap();
255        let back: CollectionType = serde_json::from_str(&json).unwrap();
256        assert_eq!(back, ct);
257    }
258
259    #[test]
260    fn serde_roundtrip_timeseries() {
261        let ct = CollectionType::timeseries("ts", "1h");
262        let json = serde_json::to_string(&ct).unwrap();
263        let back: CollectionType = serde_json::from_str(&json).unwrap();
264        assert_eq!(back, ct);
265    }
266
267    #[test]
268    fn serde_roundtrip_kv_no_ttl() {
269        let schema = StrictSchema::new(vec![
270            ColumnDef::required("k", ColumnType::String).with_primary_key(),
271            ColumnDef::nullable("v", ColumnType::Bytes),
272        ])
273        .unwrap();
274        let ct = CollectionType::kv(schema);
275        let json = serde_json::to_string(&ct).unwrap();
276        let back: CollectionType = serde_json::from_str(&json).unwrap();
277        assert_eq!(back, ct);
278    }
279
280    #[test]
281    fn serde_roundtrip_kv_fixed_ttl() {
282        let schema = StrictSchema::new(vec![
283            ColumnDef::required("k", ColumnType::String).with_primary_key(),
284            ColumnDef::required("v", ColumnType::Bytes),
285        ])
286        .unwrap();
287        let ttl = KvTtlPolicy::FixedDuration {
288            duration_ms: 900_000,
289        };
290        let ct = CollectionType::kv_with_ttl(schema, ttl);
291        let json = serde_json::to_string(&ct).unwrap();
292        let back: CollectionType = serde_json::from_str(&json).unwrap();
293        assert_eq!(back, ct);
294    }
295
296    #[test]
297    fn serde_roundtrip_kv_field_ttl() {
298        let schema = StrictSchema::new(vec![
299            ColumnDef::required("k", ColumnType::String).with_primary_key(),
300            ColumnDef::required("last_active", ColumnType::Timestamp),
301        ])
302        .unwrap();
303        let ttl = KvTtlPolicy::FieldBased {
304            field: "last_active".into(),
305            offset_ms: 3_600_000,
306        };
307        let ct = CollectionType::kv_with_ttl(schema, ttl);
308        let json = serde_json::to_string(&ct).unwrap();
309        let back: CollectionType = serde_json::from_str(&json).unwrap();
310        assert_eq!(back, ct);
311    }
312
313    #[test]
314    fn display() {
315        assert_eq!(CollectionType::document().to_string(), "document");
316        assert_eq!(CollectionType::columnar().to_string(), "columnar");
317        assert_eq!(
318            CollectionType::timeseries("time", "1h").to_string(),
319            "timeseries"
320        );
321
322        let schema = StrictSchema::new(vec![
323            ColumnDef::required("k", ColumnType::String).with_primary_key(),
324        ])
325        .unwrap();
326        assert_eq!(CollectionType::kv(schema).to_string(), "kv");
327    }
328
329    #[test]
330    fn from_str() {
331        assert!("document".parse::<CollectionType>().unwrap().is_document());
332        assert!("columnar".parse::<CollectionType>().unwrap().is_columnar());
333        assert!(
334            "timeseries"
335                .parse::<CollectionType>()
336                .unwrap()
337                .is_timeseries()
338        );
339        assert!("ts".parse::<CollectionType>().unwrap().is_timeseries());
340        assert!("kv".parse::<CollectionType>().unwrap().is_kv());
341        assert!("key_value".parse::<CollectionType>().unwrap().is_kv());
342        assert!("keyvalue".parse::<CollectionType>().unwrap().is_kv());
343        assert!("unknown".parse::<CollectionType>().is_err());
344    }
345
346    #[test]
347    fn accessors() {
348        let ct = CollectionType::timeseries("time", "1h");
349        assert!(ct.columnar_profile().is_some());
350        assert!(ct.document_mode().is_none());
351        assert!(ct.kv_config().is_none());
352
353        let doc = CollectionType::document();
354        assert!(doc.document_mode().is_some());
355        assert!(doc.columnar_profile().is_none());
356        assert!(doc.kv_config().is_none());
357
358        let schema = StrictSchema::new(vec![
359            ColumnDef::required("k", ColumnType::String).with_primary_key(),
360        ])
361        .unwrap();
362        let kv = CollectionType::kv(schema);
363        assert!(kv.kv_config().is_some());
364        assert!(kv.document_mode().is_none());
365        assert!(kv.columnar_profile().is_none());
366    }
367}