Skip to main content

nodedb_types/
collection.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Collection type enum shared between Origin and Lite.
4//!
5//! Determines routing, storage format, and query execution strategy.
6
7use serde::{Deserialize, Serialize};
8
9use crate::columnar::{ColumnarProfile, DocumentMode, StrictSchema};
10use crate::kv::{KV_DEFAULT_INLINE_THRESHOLD, KvConfig, KvTtlPolicy};
11
12/// The type of a collection, determining its storage engine and query behavior.
13///
14/// Three top-level modes:
15/// - `Document`: B-tree storage in redb (schemaless MessagePack or strict Binary Tuples).
16/// - `Columnar`: Compressed segment files with profile specialization (plain, timeseries, spatial).
17/// - `KeyValue`: Hash-indexed O(1) point lookups with typed value fields (Binary Tuples).
18#[derive(
19    Debug,
20    Clone,
21    PartialEq,
22    Eq,
23    Serialize,
24    Deserialize,
25    zerompk::ToMessagePack,
26    zerompk::FromMessagePack,
27)]
28#[serde(tag = "storage")]
29pub enum CollectionType {
30    /// Document storage in redb B-tree.
31    /// Schemaless (MessagePack) or strict (Binary Tuples).
32    Document(DocumentMode),
33    /// Columnar storage in compressed segment files.
34    /// Profile determines constraints and specialized behavior.
35    Columnar(ColumnarProfile),
36    /// Key-Value storage with hash-indexed primary key.
37    /// O(1) point lookups, optional TTL, optional secondary indexes.
38    /// Value fields use Binary Tuple codec (same as strict mode) for O(1) field extraction.
39    KeyValue(KvConfig),
40}
41
42impl Default for CollectionType {
43    fn default() -> Self {
44        Self::Document(DocumentMode::default())
45    }
46}
47
48impl CollectionType {
49    /// Schemaless document (default, backward compatible).
50    pub fn document() -> Self {
51        Self::Document(DocumentMode::Schemaless)
52    }
53
54    /// Strict document with schema.
55    pub fn strict(schema: StrictSchema) -> Self {
56        Self::Document(DocumentMode::Strict(schema))
57    }
58
59    /// Plain columnar (general analytics).
60    pub fn columnar() -> Self {
61        Self::Columnar(ColumnarProfile::Plain)
62    }
63
64    /// Columnar with timeseries profile.
65    pub fn timeseries(time_key: impl Into<String>, interval: impl Into<String>) -> Self {
66        Self::Columnar(ColumnarProfile::Timeseries {
67            time_key: time_key.into(),
68            interval: interval.into(),
69        })
70    }
71
72    /// Columnar with spatial profile.
73    pub fn spatial(geometry_column: impl Into<String>) -> Self {
74        Self::Columnar(ColumnarProfile::Spatial {
75            geometry_column: geometry_column.into(),
76            auto_rtree: true,
77            auto_geohash: true,
78        })
79    }
80
81    /// Key-Value collection with typed schema and optional TTL.
82    ///
83    /// The schema MUST contain exactly one PRIMARY KEY column (the hash key).
84    /// Remaining columns are value fields encoded as Binary Tuples.
85    pub fn kv(schema: StrictSchema) -> Self {
86        Self::KeyValue(KvConfig {
87            schema,
88            ttl: None,
89            capacity_hint: 0,
90            inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
91        })
92    }
93
94    /// Key-Value collection with TTL policy.
95    pub fn kv_with_ttl(schema: StrictSchema, ttl: KvTtlPolicy) -> Self {
96        Self::KeyValue(KvConfig {
97            schema,
98            ttl: Some(ttl),
99            capacity_hint: 0,
100            inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
101        })
102    }
103
104    pub fn is_document(&self) -> bool {
105        matches!(self, Self::Document(_))
106    }
107
108    /// Returns `true` for any columnar-family type (Plain, Timeseries, Spatial).
109    /// Use `is_plain_columnar()` to check for plain columnar only.
110    pub fn is_columnar_family(&self) -> bool {
111        matches!(self, Self::Columnar(_))
112    }
113
114    pub fn is_plain_columnar(&self) -> bool {
115        matches!(self, Self::Columnar(ColumnarProfile::Plain))
116    }
117
118    pub fn is_timeseries(&self) -> bool {
119        matches!(self, Self::Columnar(ColumnarProfile::Timeseries { .. }))
120    }
121
122    pub fn is_spatial(&self) -> bool {
123        matches!(self, Self::Columnar(ColumnarProfile::Spatial { .. }))
124    }
125
126    pub fn is_strict(&self) -> bool {
127        matches!(self, Self::Document(DocumentMode::Strict(_)))
128    }
129
130    pub fn is_schemaless(&self) -> bool {
131        matches!(self, Self::Document(DocumentMode::Schemaless))
132    }
133
134    pub fn is_kv(&self) -> bool {
135        matches!(self, Self::KeyValue(_))
136    }
137
138    pub fn as_str(&self) -> &'static str {
139        match self {
140            Self::Document(DocumentMode::Schemaless) => "document_schemaless",
141            Self::Document(DocumentMode::Strict(_)) => "document_strict",
142            Self::Columnar(ColumnarProfile::Plain) => "columnar",
143            Self::Columnar(ColumnarProfile::Timeseries { .. }) => "timeseries",
144            Self::Columnar(ColumnarProfile::Spatial { .. }) => "spatial",
145            Self::KeyValue(_) => "kv",
146        }
147    }
148
149    /// Get the document mode, if this is a document collection.
150    pub fn document_mode(&self) -> Option<&DocumentMode> {
151        match self {
152            Self::Document(mode) => Some(mode),
153            _ => None,
154        }
155    }
156
157    /// Get the columnar profile, if this is a columnar collection.
158    pub fn columnar_profile(&self) -> Option<&ColumnarProfile> {
159        match self {
160            Self::Columnar(profile) => Some(profile),
161            _ => None,
162        }
163    }
164
165    /// Get the KV config, if this is a key-value collection.
166    pub fn kv_config(&self) -> Option<&KvConfig> {
167        match self {
168            Self::KeyValue(config) => Some(config),
169            _ => None,
170        }
171    }
172}
173
174impl std::fmt::Display for CollectionType {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        f.write_str(self.as_str())
177    }
178}
179
180/// Error returned by [`CollectionType`]'s [`std::str::FromStr`] impl when
181/// an unrecognised or deprecated engine name is supplied.
182#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
183#[non_exhaustive]
184pub enum CollectionTypeParseError {
185    /// The input string is not a recognised canonical engine name.
186    #[error(
187        "unknown collection type '{input}': valid names are \
188         document_schemaless, document_strict, columnar, timeseries, spatial, kv"
189    )]
190    Unknown { input: String },
191    /// The input string is a deprecated alias; the canonical name is provided.
192    #[error("deprecated collection type '{input}': use '{canonical}' instead")]
193    Deprecated {
194        input: String,
195        canonical: &'static str,
196    },
197}
198
199impl std::str::FromStr for CollectionType {
200    type Err = CollectionTypeParseError;
201
202    fn from_str(s: &str) -> Result<Self, Self::Err> {
203        let lower = s.to_lowercase();
204        match lower.as_str() {
205            "document_schemaless" => Ok(Self::document()),
206            "document_strict" => Ok(Self::Document(DocumentMode::Strict(
207                // Placeholder — real schema comes from DDL parsing, not FromStr.
208                // FromStr only resolves the storage mode; schema is attached separately.
209                StrictSchema {
210                    columns: vec![],
211                    version: 1,
212                    dropped_columns: Vec::new(),
213                    bitemporal: false,
214                },
215            ))),
216            "columnar" => Ok(Self::columnar()),
217            "timeseries" => Ok(Self::timeseries("time", "1h")),
218            "spatial" => Ok(Self::spatial("geom")),
219            "kv" => Ok(Self::KeyValue(KvConfig {
220                // Placeholder — real schema comes from DDL parsing, not FromStr.
221                schema: StrictSchema {
222                    columns: vec![],
223                    version: 1,
224                    dropped_columns: Vec::new(),
225                    bitemporal: false,
226                },
227                ttl: None,
228                capacity_hint: 0,
229                inline_threshold: KV_DEFAULT_INLINE_THRESHOLD,
230            })),
231            // Deprecated aliases — rejected with a canonical hint.
232            "document" | "doc" => Err(CollectionTypeParseError::Deprecated {
233                input: lower,
234                canonical: "document_schemaless",
235            }),
236            "strict" => Err(CollectionTypeParseError::Deprecated {
237                input: lower,
238                canonical: "document_strict",
239            }),
240            "ts" => Err(CollectionTypeParseError::Deprecated {
241                input: lower,
242                canonical: "timeseries",
243            }),
244            "columnar:spatial" => Err(CollectionTypeParseError::Deprecated {
245                input: lower,
246                canonical: "spatial",
247            }),
248            "key_value" | "keyvalue" => Err(CollectionTypeParseError::Deprecated {
249                input: lower,
250                canonical: "kv",
251            }),
252            _ => Err(CollectionTypeParseError::Unknown { input: lower }),
253        }
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260    use crate::columnar::{ColumnDef, ColumnType};
261
262    #[test]
263    fn default_is_schemaless_document() {
264        let ct = CollectionType::default();
265        assert!(ct.is_document());
266        assert!(ct.is_schemaless());
267        assert!(!ct.is_columnar_family());
268        assert!(!ct.is_timeseries());
269        assert!(!ct.is_kv());
270    }
271
272    #[test]
273    fn factory_methods() {
274        assert!(CollectionType::document().is_schemaless());
275        assert!(CollectionType::columnar().is_columnar_family());
276        assert!(CollectionType::timeseries("time", "1h").is_timeseries());
277        assert!(CollectionType::spatial("geom").is_columnar_family());
278        assert!(CollectionType::spatial("geom").is_spatial());
279
280        let schema = StrictSchema::new(vec![
281            ColumnDef::required("key", ColumnType::String).with_primary_key(),
282            ColumnDef::nullable("value", ColumnType::Bytes),
283        ])
284        .unwrap();
285        let kv = CollectionType::kv(schema);
286        assert!(kv.is_kv());
287        assert!(!kv.is_document());
288        assert!(!kv.is_columnar_family());
289    }
290
291    #[test]
292    fn kv_with_ttl_factory() {
293        let schema = StrictSchema::new(vec![
294            ColumnDef::required("ip", ColumnType::String).with_primary_key(),
295            ColumnDef::required("hits", ColumnType::Int64),
296        ])
297        .unwrap();
298        let ttl = KvTtlPolicy::FixedDuration {
299            duration_ms: 60_000,
300        };
301        let ct = CollectionType::kv_with_ttl(schema, ttl);
302        assert!(ct.is_kv());
303        let config = ct.kv_config().unwrap();
304        assert!(config.has_ttl());
305        match config.ttl.as_ref().unwrap() {
306            KvTtlPolicy::FixedDuration { duration_ms } => assert_eq!(*duration_ms, 60_000),
307            _ => panic!("expected FixedDuration"),
308        }
309    }
310
311    #[test]
312    fn serde_roundtrip_document() {
313        let ct = CollectionType::document();
314        let json = sonic_rs::to_string(&ct).unwrap();
315        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
316        assert_eq!(back, ct);
317    }
318
319    #[test]
320    fn serde_roundtrip_columnar() {
321        let ct = CollectionType::columnar();
322        let json = sonic_rs::to_string(&ct).unwrap();
323        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
324        assert_eq!(back, ct);
325    }
326
327    #[test]
328    fn serde_roundtrip_timeseries() {
329        let ct = CollectionType::timeseries("ts", "1h");
330        let json = sonic_rs::to_string(&ct).unwrap();
331        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
332        assert_eq!(back, ct);
333    }
334
335    #[test]
336    fn serde_roundtrip_kv_no_ttl() {
337        let schema = StrictSchema::new(vec![
338            ColumnDef::required("k", ColumnType::String).with_primary_key(),
339            ColumnDef::nullable("v", ColumnType::Bytes),
340        ])
341        .unwrap();
342        let ct = CollectionType::kv(schema);
343        let json = sonic_rs::to_string(&ct).unwrap();
344        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
345        assert_eq!(back, ct);
346    }
347
348    #[test]
349    fn serde_roundtrip_kv_fixed_ttl() {
350        let schema = StrictSchema::new(vec![
351            ColumnDef::required("k", ColumnType::String).with_primary_key(),
352            ColumnDef::required("v", ColumnType::Bytes),
353        ])
354        .unwrap();
355        let ttl = KvTtlPolicy::FixedDuration {
356            duration_ms: 900_000,
357        };
358        let ct = CollectionType::kv_with_ttl(schema, ttl);
359        let json = sonic_rs::to_string(&ct).unwrap();
360        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
361        assert_eq!(back, ct);
362    }
363
364    #[test]
365    fn serde_roundtrip_kv_field_ttl() {
366        let schema = StrictSchema::new(vec![
367            ColumnDef::required("k", ColumnType::String).with_primary_key(),
368            ColumnDef::required("last_active", ColumnType::Timestamp),
369        ])
370        .unwrap();
371        let ttl = KvTtlPolicy::FieldBased {
372            field: "last_active".into(),
373            offset_ms: 3_600_000,
374        };
375        let ct = CollectionType::kv_with_ttl(schema, ttl);
376        let json = sonic_rs::to_string(&ct).unwrap();
377        let back: CollectionType = sonic_rs::from_str(&json).unwrap();
378        assert_eq!(back, ct);
379    }
380
381    #[test]
382    fn display() {
383        assert_eq!(
384            CollectionType::document().to_string(),
385            "document_schemaless"
386        );
387        let schema_strict = StrictSchema::new(vec![
388            ColumnDef::required("k", ColumnType::String).with_primary_key(),
389        ])
390        .unwrap();
391        assert_eq!(
392            CollectionType::strict(schema_strict).to_string(),
393            "document_strict"
394        );
395        assert_eq!(CollectionType::columnar().to_string(), "columnar");
396        assert_eq!(
397            CollectionType::timeseries("time", "1h").to_string(),
398            "timeseries"
399        );
400        assert_eq!(CollectionType::spatial("geom").to_string(), "spatial");
401
402        let schema_kv = StrictSchema::new(vec![
403            ColumnDef::required("k", ColumnType::String).with_primary_key(),
404        ])
405        .unwrap();
406        assert_eq!(CollectionType::kv(schema_kv).to_string(), "kv");
407    }
408
409    #[test]
410    fn from_str_canonical_accepted() {
411        assert!(
412            "document_schemaless"
413                .parse::<CollectionType>()
414                .unwrap()
415                .is_document()
416        );
417        assert!(
418            "document_strict"
419                .parse::<CollectionType>()
420                .unwrap()
421                .is_document()
422        );
423        assert!(
424            "columnar"
425                .parse::<CollectionType>()
426                .unwrap()
427                .is_columnar_family()
428        );
429        assert!(
430            "timeseries"
431                .parse::<CollectionType>()
432                .unwrap()
433                .is_timeseries()
434        );
435        assert!("spatial".parse::<CollectionType>().unwrap().is_spatial());
436        assert!("kv".parse::<CollectionType>().unwrap().is_kv());
437    }
438
439    #[test]
440    fn from_str_deprecated_rejected() {
441        for deprecated in &[
442            "document",
443            "doc",
444            "strict",
445            "ts",
446            "columnar:spatial",
447            "key_value",
448            "keyvalue",
449        ] {
450            let result = deprecated.parse::<CollectionType>();
451            assert!(
452                matches!(result, Err(CollectionTypeParseError::Deprecated { .. })),
453                "expected Deprecated error for '{deprecated}', got {result:?}"
454            );
455        }
456    }
457
458    #[test]
459    fn from_str_unknown_rejected() {
460        let result = "unknown".parse::<CollectionType>();
461        assert!(
462            matches!(result, Err(CollectionTypeParseError::Unknown { .. })),
463            "expected Unknown error, got {result:?}"
464        );
465    }
466
467    #[test]
468    fn accessors() {
469        let ct = CollectionType::timeseries("time", "1h");
470        assert!(ct.columnar_profile().is_some());
471        assert!(ct.document_mode().is_none());
472        assert!(ct.kv_config().is_none());
473
474        let doc = CollectionType::document();
475        assert!(doc.document_mode().is_some());
476        assert!(doc.columnar_profile().is_none());
477        assert!(doc.kv_config().is_none());
478
479        let schema = StrictSchema::new(vec![
480            ColumnDef::required("k", ColumnType::String).with_primary_key(),
481        ])
482        .unwrap();
483        let kv = CollectionType::kv(schema);
484        assert!(kv.kv_config().is_some());
485        assert!(kv.document_mode().is_none());
486        assert!(kv.columnar_profile().is_none());
487    }
488}