Skip to main content

nodedb_sql/parser/
type_expr.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Parser and validator for type expression strings used by the typeguard system.
4//!
5//! Parses strings like `"STRING"`, `"INT|NULL"`, `"ARRAY<STRING>"` into a
6//! [`TypeExpr`] that can be used to validate [`nodedb_types::Value`] instances
7//! at write time.
8
9use nodedb_types::Value;
10
11use crate::error::SqlError;
12
13/// A parsed type expression that can validate Values.
14#[derive(Debug, Clone, PartialEq)]
15pub enum TypeExpr {
16    /// Matches `Value::Null` / absent.
17    Null,
18    /// Matches a specific Value variant.
19    Simple(SimpleType),
20    /// Typed array: every element must match inner.
21    TypedArray(Box<TypeExpr>),
22    /// Typed set: every element must match inner.
23    TypedSet(Box<TypeExpr>),
24    /// Union: value must match at least one variant.
25    Union(Vec<TypeExpr>),
26}
27
28/// Leaf type variants that map to a single Value discriminant.
29#[derive(Debug, Clone, PartialEq)]
30pub enum SimpleType {
31    Int,
32    Float,
33    String,
34    Bool,
35    Bytes,
36    Timestamp,
37    Timestamptz,
38    Decimal,
39    Uuid,
40    Ulid,
41    Geometry,
42    Duration,
43    /// Untyped array (any element type).
44    Array,
45    Object,
46    Json,
47    /// Untyped set (any element type).
48    Set,
49    Regex,
50    Range,
51    Record,
52    /// Fixed-dimension float32 vector.
53    Vector(u32),
54}
55
56// ── Parser ───────────────────────────────────────────────────────────────────
57
58/// Parse a type expression string into a [`TypeExpr`].
59///
60/// # Examples
61///
62/// ```
63/// use nodedb_sql::parser::type_expr::{parse_type_expr, TypeExpr, SimpleType};
64///
65/// assert_eq!(parse_type_expr("STRING").unwrap(), TypeExpr::Simple(SimpleType::String));
66/// assert_eq!(
67///     parse_type_expr("STRING|NULL").unwrap(),
68///     TypeExpr::Union(vec![TypeExpr::Simple(SimpleType::String), TypeExpr::Null]),
69/// );
70/// ```
71pub fn parse_type_expr(s: &str) -> Result<TypeExpr, SqlError> {
72    let s = s.trim();
73    if s.is_empty() {
74        return Err(SqlError::Parse {
75            detail: "empty type expression".to_string(),
76        });
77    }
78    let mut pos = 0usize;
79    let chars: Vec<char> = s.chars().collect();
80    parse_union(&chars, &mut pos, false)
81}
82
83/// Parse `single_type ('|' single_type)*`.
84///
85/// When `stop_at_gt` is true the parser stops before a `>` character (used
86/// when parsing inside `ARRAY<...>` / `SET<...>`).
87fn parse_union(chars: &[char], pos: &mut usize, stop_at_gt: bool) -> Result<TypeExpr, SqlError> {
88    let mut variants: Vec<TypeExpr> = Vec::new();
89    variants.push(parse_single(chars, pos, stop_at_gt)?);
90
91    loop {
92        skip_ws(chars, pos);
93        if *pos >= chars.len() {
94            break;
95        }
96        if stop_at_gt && chars[*pos] == '>' {
97            break;
98        }
99        if chars[*pos] != '|' {
100            break;
101        }
102        *pos += 1; // consume '|'
103        skip_ws(chars, pos);
104        variants.push(parse_single(chars, pos, stop_at_gt)?);
105    }
106
107    if variants.len() == 1 {
108        Ok(variants.remove(0))
109    } else {
110        Ok(TypeExpr::Union(variants))
111    }
112}
113
114/// Parse a single type token (keyword, ARRAY<...>, SET<...>, VECTOR(N)).
115fn parse_single(chars: &[char], pos: &mut usize, stop_at_gt: bool) -> Result<TypeExpr, SqlError> {
116    skip_ws(chars, pos);
117    let keyword = read_keyword(chars, pos);
118    if keyword.is_empty() {
119        return Err(SqlError::Parse {
120            detail: format!(
121                "expected type keyword at position {pos}, found: {:?}",
122                chars.get(*pos)
123            ),
124        });
125    }
126
127    match keyword.as_str() {
128        "NULL" => Ok(TypeExpr::Null),
129
130        "INT" | "INTEGER" | "BIGINT" | "INT64" => Ok(TypeExpr::Simple(SimpleType::Int)),
131        "FLOAT" | "DOUBLE" | "REAL" | "FLOAT64" => Ok(TypeExpr::Simple(SimpleType::Float)),
132        "STRING" | "TEXT" | "VARCHAR" => Ok(TypeExpr::Simple(SimpleType::String)),
133        "BOOL" | "BOOLEAN" => Ok(TypeExpr::Simple(SimpleType::Bool)),
134        "BYTES" | "BYTEA" | "BLOB" => Ok(TypeExpr::Simple(SimpleType::Bytes)),
135        "TIMESTAMP" => Ok(TypeExpr::Simple(SimpleType::Timestamp)),
136        "TIMESTAMPTZ" => Ok(TypeExpr::Simple(SimpleType::Timestamptz)),
137        "DECIMAL" | "NUMERIC" => Ok(TypeExpr::Simple(SimpleType::Decimal)),
138        "UUID" => Ok(TypeExpr::Simple(SimpleType::Uuid)),
139        "ULID" => Ok(TypeExpr::Simple(SimpleType::Ulid)),
140        "GEOMETRY" => Ok(TypeExpr::Simple(SimpleType::Geometry)),
141        "DURATION" => Ok(TypeExpr::Simple(SimpleType::Duration)),
142        "OBJECT" => Ok(TypeExpr::Simple(SimpleType::Object)),
143        "JSON" => Ok(TypeExpr::Simple(SimpleType::Json)),
144        "REGEX" => Ok(TypeExpr::Simple(SimpleType::Regex)),
145        "RANGE" => Ok(TypeExpr::Simple(SimpleType::Range)),
146        "RECORD" => Ok(TypeExpr::Simple(SimpleType::Record)),
147
148        "VECTOR" => {
149            // Expect '(' digits ')'.
150            skip_ws(chars, pos);
151            if *pos >= chars.len() || chars[*pos] != '(' {
152                return Err(SqlError::Parse {
153                    detail: format!("expected '(' after VECTOR at position {pos}"),
154                });
155            }
156            *pos += 1; // consume '('
157            skip_ws(chars, pos);
158            let digits = read_digits(chars, pos);
159            if digits.is_empty() {
160                return Err(SqlError::Parse {
161                    detail: "expected dimension digits inside VECTOR(...)".to_string(),
162                });
163            }
164            let dim: u32 = digits.parse().map_err(|_| SqlError::Parse {
165                detail: format!("invalid VECTOR dimension: '{digits}'"),
166            })?;
167            if dim == 0 {
168                return Err(SqlError::Parse {
169                    detail: "VECTOR dimension must be > 0".to_string(),
170                });
171            }
172            skip_ws(chars, pos);
173            if *pos >= chars.len() || chars[*pos] != ')' {
174                return Err(SqlError::Parse {
175                    detail: format!("expected ')' to close VECTOR({dim} at position {pos}"),
176                });
177            }
178            *pos += 1; // consume ')'
179            Ok(TypeExpr::Simple(SimpleType::Vector(dim)))
180        }
181
182        "ARRAY" => {
183            // Optional typed variant: ARRAY<inner>
184            skip_ws(chars, pos);
185            if *pos < chars.len() && chars[*pos] == '<' {
186                *pos += 1; // consume '<'
187                skip_ws(chars, pos);
188                let inner = parse_union(chars, pos, true)?;
189                skip_ws(chars, pos);
190                if *pos >= chars.len() || chars[*pos] != '>' {
191                    return Err(SqlError::Parse {
192                        detail: format!("expected '>' to close ARRAY<...> at position {pos}"),
193                    });
194                }
195                *pos += 1; // consume '>'
196                Ok(TypeExpr::TypedArray(Box::new(inner)))
197            } else {
198                Ok(TypeExpr::Simple(SimpleType::Array))
199            }
200        }
201
202        "SET" => {
203            // Optional typed variant: SET<inner>
204            skip_ws(chars, pos);
205            if *pos < chars.len() && chars[*pos] == '<' && !stop_at_gt {
206                *pos += 1; // consume '<'
207                skip_ws(chars, pos);
208                let inner = parse_union(chars, pos, true)?;
209                skip_ws(chars, pos);
210                if *pos >= chars.len() || chars[*pos] != '>' {
211                    return Err(SqlError::Parse {
212                        detail: format!("expected '>' to close SET<...> at position {pos}"),
213                    });
214                }
215                *pos += 1; // consume '>'
216                Ok(TypeExpr::TypedSet(Box::new(inner)))
217            } else if *pos < chars.len() && chars[*pos] == '<' {
218                // Inside a nested context — consume the '<' and inner normally.
219                *pos += 1;
220                skip_ws(chars, pos);
221                let inner = parse_union(chars, pos, true)?;
222                skip_ws(chars, pos);
223                if *pos >= chars.len() || chars[*pos] != '>' {
224                    return Err(SqlError::Parse {
225                        detail: format!("expected '>' to close SET<...> at position {pos}"),
226                    });
227                }
228                *pos += 1;
229                Ok(TypeExpr::TypedSet(Box::new(inner)))
230            } else {
231                Ok(TypeExpr::Simple(SimpleType::Set))
232            }
233        }
234
235        other => Err(SqlError::Parse {
236            detail: format!("unknown type keyword: '{other}'"),
237        }),
238    }
239}
240
241fn skip_ws(chars: &[char], pos: &mut usize) {
242    while *pos < chars.len() && chars[*pos].is_ascii_whitespace() {
243        *pos += 1;
244    }
245}
246
247/// Read a contiguous alphabetic/digit/underscore token (uppercased).
248fn read_keyword(chars: &[char], pos: &mut usize) -> String {
249    let mut s = String::new();
250    while *pos < chars.len() {
251        let c = chars[*pos];
252        if c.is_ascii_alphanumeric() || c == '_' {
253            s.push(c.to_ascii_uppercase());
254            *pos += 1;
255        } else {
256            break;
257        }
258    }
259    s
260}
261
262/// Read contiguous ASCII digits.
263fn read_digits(chars: &[char], pos: &mut usize) -> String {
264    let mut s = String::new();
265    while *pos < chars.len() && chars[*pos].is_ascii_digit() {
266        s.push(chars[*pos]);
267        *pos += 1;
268    }
269    s
270}
271
272// ── Validator ────────────────────────────────────────────────────────────────
273
274/// Check if a [`Value`] matches a [`TypeExpr`].
275///
276/// Coercion rules:
277/// - `Simple(Float)` accepts `Value::Integer` (int→float widening).
278/// - `Simple(Timestamp)` accepts `Value::DateTime`, `Value::Integer`, and
279///   `Value::String` (same rules as `ColumnType::Timestamp.accepts()`).
280/// - `Simple(Decimal)` accepts `Value::Decimal`, `Value::Float`, `Value::Integer`,
281///   and `Value::String`.
282/// - `Simple(Uuid)` accepts `Value::Uuid` and `Value::String`.
283/// - `Simple(Geometry)` accepts `Value::Geometry` and `Value::String`.
284/// - `TypedArray(inner)` matches `Value::Array` where every element matches `inner`.
285/// - `TypedSet(inner)` matches `Value::Set` where every element matches `inner`.
286/// - `Union(variants)` matches if any variant matches.
287pub fn value_matches_type(value: &Value, expr: &TypeExpr) -> bool {
288    match expr {
289        TypeExpr::Null => matches!(value, Value::Null),
290
291        TypeExpr::Simple(simple) => value_matches_simple(value, simple),
292
293        TypeExpr::TypedArray(inner) => match value {
294            Value::Array(items) => items.iter().all(|item| value_matches_type(item, inner)),
295            _ => false,
296        },
297
298        TypeExpr::TypedSet(inner) => match value {
299            Value::Set(items) => items.iter().all(|item| value_matches_type(item, inner)),
300            _ => false,
301        },
302
303        TypeExpr::Union(variants) => variants.iter().any(|v| value_matches_type(value, v)),
304    }
305}
306
307fn value_matches_simple(value: &Value, simple: &SimpleType) -> bool {
308    match simple {
309        SimpleType::Int => matches!(value, Value::Integer(_)),
310        SimpleType::Float => matches!(value, Value::Float(_) | Value::Integer(_)),
311        SimpleType::String => matches!(value, Value::String(_)),
312        SimpleType::Bool => matches!(value, Value::Bool(_)),
313        SimpleType::Bytes => matches!(value, Value::Bytes(_)),
314        SimpleType::Timestamp => matches!(
315            value,
316            Value::NaiveDateTime(_) | Value::Integer(_) | Value::String(_)
317        ),
318        SimpleType::Timestamptz => matches!(
319            value,
320            Value::DateTime(_) | Value::Integer(_) | Value::String(_)
321        ),
322        SimpleType::Decimal => matches!(
323            value,
324            Value::Decimal(_) | Value::Float(_) | Value::Integer(_) | Value::String(_)
325        ),
326        SimpleType::Uuid => matches!(value, Value::Uuid(_) | Value::String(_)),
327        SimpleType::Ulid => matches!(value, Value::Ulid(_) | Value::String(_)),
328        SimpleType::Geometry => matches!(value, Value::Geometry(_) | Value::String(_)),
329        SimpleType::Duration => matches!(value, Value::Duration(_)),
330        SimpleType::Array => matches!(value, Value::Array(_)),
331        SimpleType::Object => matches!(value, Value::Object(_)),
332        SimpleType::Json => true, // Json accepts any value (same as ColumnType::Json)
333        SimpleType::Set => matches!(value, Value::Set(_)),
334        SimpleType::Regex => matches!(value, Value::Regex(_)),
335        SimpleType::Range => matches!(value, Value::Range { .. }),
336        SimpleType::Record => matches!(value, Value::Record { .. }),
337        SimpleType::Vector(_) => matches!(value, Value::Array(_) | Value::Bytes(_)),
338    }
339}
340
341// ── Tests ────────────────────────────────────────────────────────────────────
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346
347    // ── Parsing ──────────────────────────────────────────────────────────────
348
349    #[test]
350    fn parse_simple() {
351        assert_eq!(
352            parse_type_expr("STRING").unwrap(),
353            TypeExpr::Simple(SimpleType::String)
354        );
355    }
356
357    #[test]
358    fn parse_union() {
359        assert_eq!(
360            parse_type_expr("STRING|NULL").unwrap(),
361            TypeExpr::Union(vec![TypeExpr::Simple(SimpleType::String), TypeExpr::Null])
362        );
363    }
364
365    #[test]
366    fn parse_typed_array() {
367        assert_eq!(
368            parse_type_expr("ARRAY<STRING>").unwrap(),
369            TypeExpr::TypedArray(Box::new(TypeExpr::Simple(SimpleType::String)))
370        );
371    }
372
373    #[test]
374    fn parse_typed_array_union() {
375        assert_eq!(
376            parse_type_expr("ARRAY<INT|FLOAT>").unwrap(),
377            TypeExpr::TypedArray(Box::new(TypeExpr::Union(vec![
378                TypeExpr::Simple(SimpleType::Int),
379                TypeExpr::Simple(SimpleType::Float),
380            ])))
381        );
382    }
383
384    #[test]
385    fn parse_vector() {
386        assert_eq!(
387            parse_type_expr("VECTOR(384)").unwrap(),
388            TypeExpr::Simple(SimpleType::Vector(384))
389        );
390    }
391
392    #[test]
393    fn parse_case_insensitive() {
394        assert_eq!(
395            parse_type_expr("string|null").unwrap(),
396            TypeExpr::Union(vec![TypeExpr::Simple(SimpleType::String), TypeExpr::Null])
397        );
398    }
399
400    #[test]
401    fn parse_aliases() {
402        // INT aliases
403        assert_eq!(
404            parse_type_expr("INTEGER").unwrap(),
405            TypeExpr::Simple(SimpleType::Int)
406        );
407        assert_eq!(
408            parse_type_expr("BIGINT").unwrap(),
409            TypeExpr::Simple(SimpleType::Int)
410        );
411        assert_eq!(
412            parse_type_expr("INT64").unwrap(),
413            TypeExpr::Simple(SimpleType::Int)
414        );
415        // TEXT alias
416        assert_eq!(
417            parse_type_expr("TEXT").unwrap(),
418            TypeExpr::Simple(SimpleType::String)
419        );
420        // BOOLEAN alias
421        assert_eq!(
422            parse_type_expr("BOOLEAN").unwrap(),
423            TypeExpr::Simple(SimpleType::Bool)
424        );
425        // BYTEA alias
426        assert_eq!(
427            parse_type_expr("BYTEA").unwrap(),
428            TypeExpr::Simple(SimpleType::Bytes)
429        );
430    }
431
432    #[test]
433    fn parse_typed_set() {
434        assert_eq!(
435            parse_type_expr("SET<INT>").unwrap(),
436            TypeExpr::TypedSet(Box::new(TypeExpr::Simple(SimpleType::Int)))
437        );
438    }
439
440    #[test]
441    fn parse_untyped_array() {
442        assert_eq!(
443            parse_type_expr("ARRAY").unwrap(),
444            TypeExpr::Simple(SimpleType::Array)
445        );
446    }
447
448    #[test]
449    fn parse_untyped_set() {
450        assert_eq!(
451            parse_type_expr("SET").unwrap(),
452            TypeExpr::Simple(SimpleType::Set)
453        );
454    }
455
456    #[test]
457    fn parse_error_unknown_keyword() {
458        assert!(parse_type_expr("FOOBAR").is_err());
459    }
460
461    #[test]
462    fn parse_error_empty() {
463        assert!(parse_type_expr("").is_err());
464        assert!(parse_type_expr("  ").is_err());
465    }
466
467    #[test]
468    fn parse_error_vector_zero_dim() {
469        assert!(parse_type_expr("VECTOR(0)").is_err());
470    }
471
472    // ── Matching ─────────────────────────────────────────────────────────────
473
474    #[test]
475    fn match_string() {
476        let expr = parse_type_expr("STRING").unwrap();
477        assert!(value_matches_type(&Value::String("hello".into()), &expr));
478        assert!(!value_matches_type(&Value::Integer(1), &expr));
479    }
480
481    #[test]
482    fn match_null_union() {
483        let expr = parse_type_expr("STRING|NULL").unwrap();
484        assert!(value_matches_type(&Value::Null, &expr));
485        assert!(value_matches_type(&Value::String("x".into()), &expr));
486        assert!(!value_matches_type(&Value::Integer(1), &expr));
487    }
488
489    #[test]
490    fn match_typed_array() {
491        let expr = parse_type_expr("ARRAY<INT>").unwrap();
492        assert!(value_matches_type(
493            &Value::Array(vec![Value::Integer(1), Value::Integer(2)]),
494            &expr
495        ));
496    }
497
498    #[test]
499    fn match_typed_array_fail() {
500        let expr = parse_type_expr("ARRAY<INT>").unwrap();
501        // Mixed-type array should fail.
502        assert!(!value_matches_type(
503            &Value::Array(vec![Value::Integer(1), Value::String("x".into())]),
504            &expr
505        ));
506    }
507
508    #[test]
509    fn match_int_coercion() {
510        // Integer should match Float (widening coercion).
511        let expr = parse_type_expr("FLOAT").unwrap();
512        assert!(value_matches_type(&Value::Integer(42), &expr));
513    }
514
515    #[test]
516    fn no_match() {
517        let expr = parse_type_expr("STRING").unwrap();
518        assert!(!value_matches_type(&Value::Integer(99), &expr));
519    }
520
521    #[test]
522    fn match_timestamp_coercions() {
523        let expr = parse_type_expr("TIMESTAMP").unwrap();
524        assert!(value_matches_type(
525            &Value::String("2024-01-01".into()),
526            &expr
527        ));
528        assert!(value_matches_type(&Value::Integer(1_700_000_000), &expr));
529    }
530
531    #[test]
532    fn parse_timestamptz() {
533        assert_eq!(
534            parse_type_expr("TIMESTAMPTZ").unwrap(),
535            TypeExpr::Simple(SimpleType::Timestamptz)
536        );
537    }
538
539    #[test]
540    fn match_timestamptz_coercions() {
541        let expr = parse_type_expr("TIMESTAMPTZ").unwrap();
542        let dt = nodedb_types::NdbDateTime::from_micros(1_700_000_000_000_000);
543        assert!(value_matches_type(&Value::DateTime(dt), &expr));
544        assert!(value_matches_type(
545            &Value::String("2024-01-01T00:00:00Z".into()),
546            &expr
547        ));
548        assert!(value_matches_type(&Value::Integer(1_700_000_000), &expr));
549    }
550
551    #[test]
552    fn match_null_expr() {
553        let expr = TypeExpr::Null;
554        assert!(value_matches_type(&Value::Null, &expr));
555        assert!(!value_matches_type(&Value::Integer(0), &expr));
556    }
557
558    #[test]
559    fn match_json_accepts_any() {
560        let expr = TypeExpr::Simple(SimpleType::Json);
561        assert!(value_matches_type(&Value::Null, &expr));
562        assert!(value_matches_type(&Value::Integer(1), &expr));
563        assert!(value_matches_type(&Value::String("x".into()), &expr));
564        assert!(value_matches_type(&Value::Bool(true), &expr));
565    }
566
567    #[test]
568    fn match_vector_type() {
569        let expr = parse_type_expr("VECTOR(128)").unwrap();
570        // Accepts Array (float list) or Bytes (packed floats).
571        assert!(value_matches_type(
572            &Value::Array(vec![Value::Float(0.1)]),
573            &expr
574        ));
575        assert!(value_matches_type(&Value::Bytes(vec![0u8; 512]), &expr));
576        assert!(!value_matches_type(&Value::Integer(1), &expr));
577    }
578}