Skip to main content

nodedb_types/columnar/
column_type.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! ColumnType — the atomic value type for typed schemas.
4
5use serde::{Deserialize, Serialize};
6
7use crate::value::Value;
8
9/// Typed column definition for strict document and columnar collections.
10///
11/// `#[non_exhaustive]` — this enum grows with each type system expansion
12/// (e.g. future variants may add `Decimal { precision, scale }` or split
13/// `Timestamp`/`TimestampTz`). External exhaustive `match` arms must handle
14/// future variants via a typed error arm rather than `_ => unreachable!()`.
15#[non_exhaustive]
16#[derive(
17    Debug,
18    Clone,
19    Copy,
20    PartialEq,
21    Eq,
22    Hash,
23    Serialize,
24    Deserialize,
25    zerompk::ToMessagePack,
26    zerompk::FromMessagePack,
27)]
28#[serde(tag = "type", content = "params")]
29pub enum ColumnType {
30    Int64,
31    Float64,
32    String,
33    Bool,
34    Bytes,
35    /// Naive (no-timezone) timestamp with microsecond precision. OID 1114.
36    Timestamp,
37    /// UTC (timezone-aware) timestamp with microsecond precision. OID 1184.
38    Timestamptz,
39    /// System-assigned timestamp (bitemporal `system_from_ms`). Same 8-byte
40    /// layout as `Timestamp`, but tagged distinctly so the planner and DDL
41    /// layer can reject user-supplied values — the column is populated by the
42    /// engine from HLC at commit.
43    SystemTimestamp,
44    /// Arbitrary-precision decimal with explicit precision and scale.
45    ///
46    /// `precision`: total significant digits, 1–38. `scale`: digits after the
47    /// decimal point, 0–precision. Default when unspecified: `{38, 10}`.
48    Decimal {
49        precision: u8,
50        scale: u8,
51    },
52    Geometry,
53    /// Fixed-dimension float32 vector.
54    Vector(u32),
55    Uuid,
56    /// Arbitrary nested data stored as inline MessagePack.
57    /// Variable-length. Accepts any Value type.
58    Json,
59    /// ULID: 16-byte Crockford Base32-encoded sortable ID.
60    Ulid,
61    /// Duration: signed microsecond precision (i64 internally).
62    Duration,
63    /// Ordered array of values. Variable-length, inline MessagePack.
64    Array,
65    /// Ordered set (auto-deduplicated). Variable-length, inline MessagePack.
66    Set,
67    /// Compiled regex pattern. Stored as string internally.
68    Regex,
69    /// Bounded range of values. Variable-length, inline MessagePack.
70    Range,
71    /// Typed reference to another record (`table:id`). Variable-length, inline MessagePack.
72    Record,
73}
74
75impl ColumnType {
76    /// Whether this type has a fixed byte size in Binary Tuple layout.
77    pub fn fixed_size(&self) -> Option<usize> {
78        match self {
79            Self::Int64
80            | Self::Float64
81            | Self::Timestamp
82            | Self::Timestamptz
83            | Self::SystemTimestamp
84            | Self::Duration => Some(8),
85            Self::Bool => Some(1),
86            Self::Decimal { .. } | Self::Uuid | Self::Ulid => Some(16),
87            Self::Vector(dim) => Some(*dim as usize * 4),
88            Self::String
89            | Self::Bytes
90            | Self::Geometry
91            | Self::Json
92            | Self::Array
93            | Self::Set
94            | Self::Regex
95            | Self::Range
96            | Self::Record => None,
97        }
98    }
99
100    /// Whether this type is variable-length (requires offset table entry).
101    pub fn is_variable_length(&self) -> bool {
102        self.fixed_size().is_none()
103    }
104
105    /// Return the canonical PostgreSQL type OID for this column type.
106    ///
107    /// This is the single authoritative mapping between NodeDB `ColumnType`
108    /// variants and PostgreSQL wire-protocol OIDs. All pgwire code must derive
109    /// OIDs from this method — no local string-matching tables.
110    ///
111    /// Choices for non-native types:
112    /// - `Geometry` → `25` (TEXT): no standard pg geometry OID; PostGIS uses
113    ///   its own extension OID which we cannot claim. TEXT lets clients at least
114    ///   see the WKT/WKB string.
115    /// - `Vector(_)` → `1021` (FLOAT4_ARRAY): closest built-in pg type for a
116    ///   fixed-dimension float32 vector; pgvector uses a custom OID, which we
117    ///   avoid to stay dependency-free.
118    /// - `Array`, `Set`, `Range`, `Record`, `Regex` → `114` (JSON): these are
119    ///   variable-length MessagePack-encoded structures; JSON is the safest
120    ///   generic text OID for clients that need to read the value as a string.
121    pub fn to_pg_oid(&self) -> u32 {
122        match self {
123            Self::Bool => 16,
124            Self::Bytes => 17,
125            Self::Int64 => 20,
126            Self::Float64 => 701,
127            Self::String => 25,
128            Self::Timestamp | Self::SystemTimestamp => 1114,
129            Self::Timestamptz => 1184,
130            Self::Decimal { .. } => 1700,
131            Self::Uuid | Self::Ulid => 2950,
132            Self::Json => 3802,
133            Self::Duration => 1186,
134            // No standard built-in OID for geometry; TEXT lets clients read WKT.
135            Self::Geometry => 25,
136            // FLOAT4_ARRAY (1021) is the closest built-in for fixed float32 vectors.
137            Self::Vector(_) => 1021,
138            // Variable-length structured types: expose as JSONB so clients can
139            // parse the serialized representation.
140            Self::Array | Self::Set | Self::Range | Self::Record | Self::Regex => 3802,
141        }
142    }
143
144    /// Whether a `Value` is compatible with this column type.
145    ///
146    /// Accepts both native Value types (e.g., `Value::DateTime` for Timestamp)
147    /// AND coercion sources from SQL input (e.g., `Value::String` for Timestamp).
148    /// Null is accepted for any type — nullability is enforced at schema level.
149    pub fn accepts(&self, value: &Value) -> bool {
150        matches!(
151            (self, value),
152            (Self::Int64, Value::Integer(_))
153                | (Self::Float64, Value::Float(_) | Value::Integer(_))
154                | (Self::String, Value::String(_))
155                | (Self::Bool, Value::Bool(_))
156                | (Self::Bytes, Value::Bytes(_))
157                | (
158                    Self::Timestamp,
159                    Value::NaiveDateTime(_) | Value::Integer(_) | Value::String(_)
160                )
161                | (
162                    Self::Timestamptz,
163                    Value::DateTime(_) | Value::Integer(_) | Value::String(_)
164                )
165                | (
166                    Self::SystemTimestamp,
167                    Value::DateTime(_) | Value::Integer(_)
168                )
169                | (
170                    Self::Decimal { .. },
171                    Value::Decimal(_) | Value::String(_) | Value::Float(_) | Value::Integer(_)
172                )
173                | (Self::Geometry, Value::Geometry(_) | Value::String(_))
174                | (Self::Vector(_), Value::Array(_) | Value::Bytes(_))
175                | (Self::Uuid, Value::Uuid(_) | Value::String(_))
176                | (Self::Ulid, Value::Ulid(_) | Value::String(_))
177                | (
178                    Self::Duration,
179                    Value::Duration(_) | Value::Integer(_) | Value::String(_)
180                )
181                | (Self::Array, Value::Array(_))
182                | (Self::Set, Value::Set(_) | Value::Array(_))
183                | (Self::Regex, Value::Regex(_) | Value::String(_))
184                | (Self::Range, Value::Range { .. })
185                | (Self::Record, Value::Record { .. } | Value::String(_))
186                | (Self::Json, _)
187                | (_, Value::Null)
188        )
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    fn nodedb_types_datetime_epoch() -> crate::datetime::NdbDateTime {
197        crate::datetime::NdbDateTime::from_micros(0)
198    }
199
200    #[test]
201    fn to_pg_oid_stable() {
202        assert_eq!(ColumnType::Bool.to_pg_oid(), 16);
203        assert_eq!(ColumnType::Bytes.to_pg_oid(), 17);
204        assert_eq!(ColumnType::Int64.to_pg_oid(), 20);
205        assert_eq!(ColumnType::String.to_pg_oid(), 25);
206        assert_eq!(ColumnType::Float64.to_pg_oid(), 701);
207        assert_eq!(ColumnType::Timestamp.to_pg_oid(), 1114);
208        assert_eq!(ColumnType::Timestamptz.to_pg_oid(), 1184);
209        assert_eq!(ColumnType::SystemTimestamp.to_pg_oid(), 1114);
210        assert_eq!(ColumnType::Duration.to_pg_oid(), 1186);
211        assert_eq!(
212            ColumnType::Decimal {
213                precision: 38,
214                scale: 10
215            }
216            .to_pg_oid(),
217            1700
218        );
219        assert_eq!(ColumnType::Uuid.to_pg_oid(), 2950);
220        assert_eq!(ColumnType::Ulid.to_pg_oid(), 2950);
221        assert_eq!(ColumnType::Json.to_pg_oid(), 3802);
222        assert_eq!(ColumnType::Geometry.to_pg_oid(), 25);
223        assert_eq!(ColumnType::Vector(768).to_pg_oid(), 1021);
224        assert_eq!(ColumnType::Array.to_pg_oid(), 3802);
225        assert_eq!(ColumnType::Set.to_pg_oid(), 3802);
226        assert_eq!(ColumnType::Range.to_pg_oid(), 3802);
227        assert_eq!(ColumnType::Record.to_pg_oid(), 3802);
228        assert_eq!(ColumnType::Regex.to_pg_oid(), 3802);
229    }
230
231    #[test]
232    fn parse_system_timestamp() {
233        assert_eq!(
234            "SYSTEM_TIMESTAMP".parse::<ColumnType>().unwrap(),
235            ColumnType::SystemTimestamp
236        );
237        assert_eq!(
238            "SystemTimestamp".parse::<ColumnType>().unwrap(),
239            ColumnType::SystemTimestamp
240        );
241        assert_eq!(ColumnType::SystemTimestamp.fixed_size(), Some(8));
242        assert!(!ColumnType::SystemTimestamp.is_variable_length());
243        assert_eq!(ColumnType::SystemTimestamp.to_string(), "SYSTEM_TIMESTAMP");
244        assert!(!ColumnType::SystemTimestamp.accepts(&Value::String("2024-01-01".into())));
245        assert!(ColumnType::SystemTimestamp.accepts(&Value::Integer(1_700_000_000)));
246    }
247
248    #[test]
249    fn parse_canonical() {
250        assert_eq!("BIGINT".parse::<ColumnType>().unwrap(), ColumnType::Int64);
251        assert_eq!(
252            "FLOAT64".parse::<ColumnType>().unwrap(),
253            ColumnType::Float64
254        );
255        assert_eq!("TEXT".parse::<ColumnType>().unwrap(), ColumnType::String);
256        assert_eq!("BOOL".parse::<ColumnType>().unwrap(), ColumnType::Bool);
257        assert_eq!(
258            "TIMESTAMP".parse::<ColumnType>().unwrap(),
259            ColumnType::Timestamp
260        );
261        assert_eq!(
262            "TIMESTAMPTZ".parse::<ColumnType>().unwrap(),
263            ColumnType::Timestamptz
264        );
265        assert_eq!(
266            "TIMESTAMP WITH TIME ZONE".parse::<ColumnType>().unwrap(),
267            ColumnType::Timestamptz
268        );
269        assert_eq!(
270            "GEOMETRY".parse::<ColumnType>().unwrap(),
271            ColumnType::Geometry
272        );
273        assert_eq!("UUID".parse::<ColumnType>().unwrap(), ColumnType::Uuid);
274    }
275
276    #[test]
277    fn parse_vector() {
278        assert_eq!(
279            "VECTOR(768)".parse::<ColumnType>().unwrap(),
280            ColumnType::Vector(768)
281        );
282        assert!("VECTOR(0)".parse::<ColumnType>().is_err());
283    }
284
285    #[test]
286    fn display_roundtrip() {
287        for ct in [
288            ColumnType::Int64,
289            ColumnType::Float64,
290            ColumnType::String,
291            ColumnType::Timestamp,
292            ColumnType::Timestamptz,
293            ColumnType::Vector(768),
294            ColumnType::Decimal {
295                precision: 10,
296                scale: 2,
297            },
298            ColumnType::Decimal {
299                precision: 38,
300                scale: 10,
301            },
302        ] {
303            let s = ct.to_string();
304            let parsed: ColumnType = s.parse().unwrap();
305            assert_eq!(parsed, ct);
306        }
307    }
308
309    #[test]
310    fn decimal_parse_with_params() {
311        assert_eq!(
312            "NUMERIC(10,2)".parse::<ColumnType>().unwrap(),
313            ColumnType::Decimal {
314                precision: 10,
315                scale: 2
316            }
317        );
318        assert_eq!(
319            "DECIMAL(38,10)".parse::<ColumnType>().unwrap(),
320            ColumnType::Decimal {
321                precision: 38,
322                scale: 10
323            }
324        );
325        assert_eq!(
326            "NUMERIC".parse::<ColumnType>().unwrap(),
327            ColumnType::Decimal {
328                precision: 38,
329                scale: 10
330            }
331        );
332        assert_eq!(
333            "DECIMAL".parse::<ColumnType>().unwrap(),
334            ColumnType::Decimal {
335                precision: 38,
336                scale: 10
337            }
338        );
339    }
340
341    #[test]
342    fn decimal_parse_invalid() {
343        assert!("DECIMAL(5,6)".parse::<ColumnType>().is_err());
344        assert!("DECIMAL(0,0)".parse::<ColumnType>().is_err());
345        assert!("DECIMAL(39,0)".parse::<ColumnType>().is_err());
346    }
347
348    #[test]
349    fn decimal_fixed_size() {
350        assert_eq!(
351            ColumnType::Decimal {
352                precision: 10,
353                scale: 2
354            }
355            .fixed_size(),
356            Some(16)
357        );
358    }
359
360    #[test]
361    fn decimal_to_pg_oid_is_1700() {
362        assert_eq!(
363            ColumnType::Decimal {
364                precision: 10,
365                scale: 2
366            }
367            .to_pg_oid(),
368            1700
369        );
370    }
371
372    #[test]
373    fn accepts_native_values() {
374        assert!(ColumnType::Int64.accepts(&Value::Integer(42)));
375        assert!(ColumnType::Float64.accepts(&Value::Float(42.0)));
376        assert!(ColumnType::Float64.accepts(&Value::Integer(42)));
377        assert!(ColumnType::String.accepts(&Value::String("x".into())));
378        assert!(ColumnType::Bool.accepts(&Value::Bool(true)));
379        assert!(ColumnType::Bytes.accepts(&Value::Bytes(vec![1])));
380        assert!(
381            ColumnType::Uuid.accepts(&Value::Uuid("550e8400-e29b-41d4-a716-446655440000".into()))
382        );
383        assert!(
384            ColumnType::Decimal {
385                precision: 38,
386                scale: 10
387            }
388            .accepts(&Value::Decimal(rust_decimal::Decimal::ZERO))
389        );
390
391        let naive = Value::NaiveDateTime(nodedb_types_datetime_epoch());
392        let tz = Value::DateTime(nodedb_types_datetime_epoch());
393        assert!(ColumnType::Timestamp.accepts(&naive));
394        assert!(!ColumnType::Timestamp.accepts(&tz));
395        assert!(ColumnType::Timestamptz.accepts(&tz));
396        assert!(!ColumnType::Timestamptz.accepts(&naive));
397
398        assert!(ColumnType::Int64.accepts(&Value::Null));
399        assert!(!ColumnType::Int64.accepts(&Value::String("x".into())));
400        assert!(!ColumnType::Bool.accepts(&Value::Integer(1)));
401    }
402
403    #[test]
404    fn accepts_coercion_sources() {
405        assert!(ColumnType::Timestamp.accepts(&Value::String("2024-01-01".into())));
406        assert!(ColumnType::Timestamp.accepts(&Value::Integer(1_700_000_000)));
407        assert!(ColumnType::Timestamptz.accepts(&Value::String("2024-01-01T00:00:00Z".into())));
408        assert!(ColumnType::Timestamptz.accepts(&Value::Integer(1_700_000_000)));
409        assert!(ColumnType::Uuid.accepts(&Value::String(
410            "550e8400-e29b-41d4-a716-446655440000".into()
411        )));
412        assert!(
413            ColumnType::Decimal {
414                precision: 10,
415                scale: 2
416            }
417            .accepts(&Value::String("99.95".into()))
418        );
419        assert!(
420            ColumnType::Decimal {
421                precision: 10,
422                scale: 2
423            }
424            .accepts(&Value::Float(99.95))
425        );
426        assert!(ColumnType::Geometry.accepts(&Value::String("POINT(0 0)".into())));
427    }
428
429    #[test]
430    fn column_def_display() {
431        use super::super::column_def::ColumnDef;
432        let col = ColumnDef::required("id", ColumnType::Int64).with_primary_key();
433        assert_eq!(col.to_string(), "id BIGINT NOT NULL PRIMARY KEY");
434    }
435}