Skip to main content

nodedb_sql/parser/
type_expr.rs

1//! Parser and validator for type expression strings used by the typeguard system.
2//!
3//! Parses strings like `"STRING"`, `"INT|NULL"`, `"ARRAY<STRING>"` into a
4//! [`TypeExpr`] that can be used to validate [`nodedb_types::Value`] instances
5//! at write time.
6
7use nodedb_types::Value;
8
9/// A parsed type expression that can validate Values.
10#[derive(Debug, Clone, PartialEq)]
11pub enum TypeExpr {
12    /// Matches `Value::Null` / absent.
13    Null,
14    /// Matches a specific Value variant.
15    Simple(SimpleType),
16    /// Typed array: every element must match inner.
17    TypedArray(Box<TypeExpr>),
18    /// Typed set: every element must match inner.
19    TypedSet(Box<TypeExpr>),
20    /// Union: value must match at least one variant.
21    Union(Vec<TypeExpr>),
22}
23
24/// Leaf type variants that map to a single Value discriminant.
25#[derive(Debug, Clone, PartialEq)]
26pub enum SimpleType {
27    Int,
28    Float,
29    String,
30    Bool,
31    Bytes,
32    Timestamp,
33    Decimal,
34    Uuid,
35    Ulid,
36    Geometry,
37    Duration,
38    /// Untyped array (any element type).
39    Array,
40    Object,
41    Json,
42    /// Untyped set (any element type).
43    Set,
44    Regex,
45    Range,
46    Record,
47    /// Fixed-dimension float32 vector.
48    Vector(u32),
49}
50
51// ── Parser ───────────────────────────────────────────────────────────────────
52
53/// Parse a type expression string into a [`TypeExpr`].
54///
55/// # Examples
56///
57/// ```
58/// use nodedb_sql::parser::type_expr::{parse_type_expr, TypeExpr, SimpleType};
59///
60/// assert_eq!(parse_type_expr("STRING").unwrap(), TypeExpr::Simple(SimpleType::String));
61/// assert_eq!(
62///     parse_type_expr("STRING|NULL").unwrap(),
63///     TypeExpr::Union(vec![TypeExpr::Simple(SimpleType::String), TypeExpr::Null]),
64/// );
65/// ```
66pub fn parse_type_expr(s: &str) -> Result<TypeExpr, String> {
67    let s = s.trim();
68    if s.is_empty() {
69        return Err("empty type expression".to_string());
70    }
71    let mut pos = 0usize;
72    let chars: Vec<char> = s.chars().collect();
73    parse_union(&chars, &mut pos, false)
74}
75
76/// Parse `single_type ('|' single_type)*`.
77///
78/// When `stop_at_gt` is true the parser stops before a `>` character (used
79/// when parsing inside `ARRAY<...>` / `SET<...>`).
80fn parse_union(chars: &[char], pos: &mut usize, stop_at_gt: bool) -> Result<TypeExpr, String> {
81    let mut variants: Vec<TypeExpr> = Vec::new();
82    variants.push(parse_single(chars, pos, stop_at_gt)?);
83
84    loop {
85        skip_ws(chars, pos);
86        if *pos >= chars.len() {
87            break;
88        }
89        if stop_at_gt && chars[*pos] == '>' {
90            break;
91        }
92        if chars[*pos] != '|' {
93            break;
94        }
95        *pos += 1; // consume '|'
96        skip_ws(chars, pos);
97        variants.push(parse_single(chars, pos, stop_at_gt)?);
98    }
99
100    if variants.len() == 1 {
101        Ok(variants.remove(0))
102    } else {
103        Ok(TypeExpr::Union(variants))
104    }
105}
106
107/// Parse a single type token (keyword, ARRAY<...>, SET<...>, VECTOR(N)).
108fn parse_single(chars: &[char], pos: &mut usize, stop_at_gt: bool) -> Result<TypeExpr, String> {
109    skip_ws(chars, pos);
110    let keyword = read_keyword(chars, pos);
111    if keyword.is_empty() {
112        return Err(format!(
113            "expected type keyword at position {pos}, found: {:?}",
114            chars.get(*pos)
115        ));
116    }
117
118    match keyword.as_str() {
119        "NULL" => Ok(TypeExpr::Null),
120
121        "INT" | "INTEGER" | "BIGINT" | "INT64" => Ok(TypeExpr::Simple(SimpleType::Int)),
122        "FLOAT" | "DOUBLE" | "REAL" | "FLOAT64" => Ok(TypeExpr::Simple(SimpleType::Float)),
123        "STRING" | "TEXT" | "VARCHAR" => Ok(TypeExpr::Simple(SimpleType::String)),
124        "BOOL" | "BOOLEAN" => Ok(TypeExpr::Simple(SimpleType::Bool)),
125        "BYTES" | "BYTEA" | "BLOB" => Ok(TypeExpr::Simple(SimpleType::Bytes)),
126        "TIMESTAMP" | "TIMESTAMPTZ" => Ok(TypeExpr::Simple(SimpleType::Timestamp)),
127        "DECIMAL" | "NUMERIC" => Ok(TypeExpr::Simple(SimpleType::Decimal)),
128        "UUID" => Ok(TypeExpr::Simple(SimpleType::Uuid)),
129        "ULID" => Ok(TypeExpr::Simple(SimpleType::Ulid)),
130        "GEOMETRY" => Ok(TypeExpr::Simple(SimpleType::Geometry)),
131        "DURATION" => Ok(TypeExpr::Simple(SimpleType::Duration)),
132        "OBJECT" => Ok(TypeExpr::Simple(SimpleType::Object)),
133        "JSON" => Ok(TypeExpr::Simple(SimpleType::Json)),
134        "REGEX" => Ok(TypeExpr::Simple(SimpleType::Regex)),
135        "RANGE" => Ok(TypeExpr::Simple(SimpleType::Range)),
136        "RECORD" => Ok(TypeExpr::Simple(SimpleType::Record)),
137
138        "VECTOR" => {
139            // Expect '(' digits ')'.
140            skip_ws(chars, pos);
141            if *pos >= chars.len() || chars[*pos] != '(' {
142                return Err(format!("expected '(' after VECTOR at position {pos}"));
143            }
144            *pos += 1; // consume '('
145            skip_ws(chars, pos);
146            let digits = read_digits(chars, pos);
147            if digits.is_empty() {
148                return Err("expected dimension digits inside VECTOR(...)".to_string());
149            }
150            let dim: u32 = digits
151                .parse()
152                .map_err(|_| format!("invalid VECTOR dimension: '{digits}'"))?;
153            if dim == 0 {
154                return Err("VECTOR dimension must be > 0".to_string());
155            }
156            skip_ws(chars, pos);
157            if *pos >= chars.len() || chars[*pos] != ')' {
158                return Err(format!(
159                    "expected ')' to close VECTOR({dim} at position {pos}"
160                ));
161            }
162            *pos += 1; // consume ')'
163            Ok(TypeExpr::Simple(SimpleType::Vector(dim)))
164        }
165
166        "ARRAY" => {
167            // Optional typed variant: ARRAY<inner>
168            skip_ws(chars, pos);
169            if *pos < chars.len() && chars[*pos] == '<' {
170                *pos += 1; // consume '<'
171                skip_ws(chars, pos);
172                let inner = parse_union(chars, pos, true)?;
173                skip_ws(chars, pos);
174                if *pos >= chars.len() || chars[*pos] != '>' {
175                    return Err(format!(
176                        "expected '>' to close ARRAY<...> at position {pos}"
177                    ));
178                }
179                *pos += 1; // consume '>'
180                Ok(TypeExpr::TypedArray(Box::new(inner)))
181            } else {
182                Ok(TypeExpr::Simple(SimpleType::Array))
183            }
184        }
185
186        "SET" => {
187            // Optional typed variant: SET<inner>
188            skip_ws(chars, pos);
189            if *pos < chars.len() && chars[*pos] == '<' && !stop_at_gt {
190                *pos += 1; // consume '<'
191                skip_ws(chars, pos);
192                let inner = parse_union(chars, pos, true)?;
193                skip_ws(chars, pos);
194                if *pos >= chars.len() || chars[*pos] != '>' {
195                    return Err(format!("expected '>' to close SET<...> at position {pos}"));
196                }
197                *pos += 1; // consume '>'
198                Ok(TypeExpr::TypedSet(Box::new(inner)))
199            } else if *pos < chars.len() && chars[*pos] == '<' {
200                // Inside a nested context — consume the '<' and inner normally.
201                *pos += 1;
202                skip_ws(chars, pos);
203                let inner = parse_union(chars, pos, true)?;
204                skip_ws(chars, pos);
205                if *pos >= chars.len() || chars[*pos] != '>' {
206                    return Err(format!("expected '>' to close SET<...> at position {pos}"));
207                }
208                *pos += 1;
209                Ok(TypeExpr::TypedSet(Box::new(inner)))
210            } else {
211                Ok(TypeExpr::Simple(SimpleType::Set))
212            }
213        }
214
215        other => Err(format!("unknown type keyword: '{other}'")),
216    }
217}
218
219fn skip_ws(chars: &[char], pos: &mut usize) {
220    while *pos < chars.len() && chars[*pos].is_ascii_whitespace() {
221        *pos += 1;
222    }
223}
224
225/// Read a contiguous alphabetic/digit/underscore token (uppercased).
226fn read_keyword(chars: &[char], pos: &mut usize) -> String {
227    let mut s = String::new();
228    while *pos < chars.len() {
229        let c = chars[*pos];
230        if c.is_ascii_alphanumeric() || c == '_' {
231            s.push(c.to_ascii_uppercase());
232            *pos += 1;
233        } else {
234            break;
235        }
236    }
237    s
238}
239
240/// Read contiguous ASCII digits.
241fn read_digits(chars: &[char], pos: &mut usize) -> String {
242    let mut s = String::new();
243    while *pos < chars.len() && chars[*pos].is_ascii_digit() {
244        s.push(chars[*pos]);
245        *pos += 1;
246    }
247    s
248}
249
250// ── Validator ────────────────────────────────────────────────────────────────
251
252/// Check if a [`Value`] matches a [`TypeExpr`].
253///
254/// Coercion rules:
255/// - `Simple(Float)` accepts `Value::Integer` (int→float widening).
256/// - `Simple(Timestamp)` accepts `Value::DateTime`, `Value::Integer`, and
257///   `Value::String` (same rules as `ColumnType::Timestamp.accepts()`).
258/// - `Simple(Decimal)` accepts `Value::Decimal`, `Value::Float`, `Value::Integer`,
259///   and `Value::String`.
260/// - `Simple(Uuid)` accepts `Value::Uuid` and `Value::String`.
261/// - `Simple(Geometry)` accepts `Value::Geometry` and `Value::String`.
262/// - `TypedArray(inner)` matches `Value::Array` where every element matches `inner`.
263/// - `TypedSet(inner)` matches `Value::Set` where every element matches `inner`.
264/// - `Union(variants)` matches if any variant matches.
265pub fn value_matches_type(value: &Value, expr: &TypeExpr) -> bool {
266    match expr {
267        TypeExpr::Null => matches!(value, Value::Null),
268
269        TypeExpr::Simple(simple) => value_matches_simple(value, simple),
270
271        TypeExpr::TypedArray(inner) => match value {
272            Value::Array(items) => items.iter().all(|item| value_matches_type(item, inner)),
273            _ => false,
274        },
275
276        TypeExpr::TypedSet(inner) => match value {
277            Value::Set(items) => items.iter().all(|item| value_matches_type(item, inner)),
278            _ => false,
279        },
280
281        TypeExpr::Union(variants) => variants.iter().any(|v| value_matches_type(value, v)),
282    }
283}
284
285fn value_matches_simple(value: &Value, simple: &SimpleType) -> bool {
286    match simple {
287        SimpleType::Int => matches!(value, Value::Integer(_)),
288        SimpleType::Float => matches!(value, Value::Float(_) | Value::Integer(_)),
289        SimpleType::String => matches!(value, Value::String(_)),
290        SimpleType::Bool => matches!(value, Value::Bool(_)),
291        SimpleType::Bytes => matches!(value, Value::Bytes(_)),
292        SimpleType::Timestamp => matches!(
293            value,
294            Value::DateTime(_) | Value::Integer(_) | Value::String(_)
295        ),
296        SimpleType::Decimal => matches!(
297            value,
298            Value::Decimal(_) | Value::Float(_) | Value::Integer(_) | Value::String(_)
299        ),
300        SimpleType::Uuid => matches!(value, Value::Uuid(_) | Value::String(_)),
301        SimpleType::Ulid => matches!(value, Value::Ulid(_) | Value::String(_)),
302        SimpleType::Geometry => matches!(value, Value::Geometry(_) | Value::String(_)),
303        SimpleType::Duration => matches!(value, Value::Duration(_)),
304        SimpleType::Array => matches!(value, Value::Array(_)),
305        SimpleType::Object => matches!(value, Value::Object(_)),
306        SimpleType::Json => true, // Json accepts any value (same as ColumnType::Json)
307        SimpleType::Set => matches!(value, Value::Set(_)),
308        SimpleType::Regex => matches!(value, Value::Regex(_)),
309        SimpleType::Range => matches!(value, Value::Range { .. }),
310        SimpleType::Record => matches!(value, Value::Record { .. }),
311        SimpleType::Vector(_) => matches!(value, Value::Array(_) | Value::Bytes(_)),
312    }
313}
314
315// ── Tests ────────────────────────────────────────────────────────────────────
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320
321    // ── Parsing ──────────────────────────────────────────────────────────────
322
323    #[test]
324    fn parse_simple() {
325        assert_eq!(
326            parse_type_expr("STRING").unwrap(),
327            TypeExpr::Simple(SimpleType::String)
328        );
329    }
330
331    #[test]
332    fn parse_union() {
333        assert_eq!(
334            parse_type_expr("STRING|NULL").unwrap(),
335            TypeExpr::Union(vec![TypeExpr::Simple(SimpleType::String), TypeExpr::Null])
336        );
337    }
338
339    #[test]
340    fn parse_typed_array() {
341        assert_eq!(
342            parse_type_expr("ARRAY<STRING>").unwrap(),
343            TypeExpr::TypedArray(Box::new(TypeExpr::Simple(SimpleType::String)))
344        );
345    }
346
347    #[test]
348    fn parse_typed_array_union() {
349        assert_eq!(
350            parse_type_expr("ARRAY<INT|FLOAT>").unwrap(),
351            TypeExpr::TypedArray(Box::new(TypeExpr::Union(vec![
352                TypeExpr::Simple(SimpleType::Int),
353                TypeExpr::Simple(SimpleType::Float),
354            ])))
355        );
356    }
357
358    #[test]
359    fn parse_vector() {
360        assert_eq!(
361            parse_type_expr("VECTOR(384)").unwrap(),
362            TypeExpr::Simple(SimpleType::Vector(384))
363        );
364    }
365
366    #[test]
367    fn parse_case_insensitive() {
368        assert_eq!(
369            parse_type_expr("string|null").unwrap(),
370            TypeExpr::Union(vec![TypeExpr::Simple(SimpleType::String), TypeExpr::Null])
371        );
372    }
373
374    #[test]
375    fn parse_aliases() {
376        // INT aliases
377        assert_eq!(
378            parse_type_expr("INTEGER").unwrap(),
379            TypeExpr::Simple(SimpleType::Int)
380        );
381        assert_eq!(
382            parse_type_expr("BIGINT").unwrap(),
383            TypeExpr::Simple(SimpleType::Int)
384        );
385        assert_eq!(
386            parse_type_expr("INT64").unwrap(),
387            TypeExpr::Simple(SimpleType::Int)
388        );
389        // TEXT alias
390        assert_eq!(
391            parse_type_expr("TEXT").unwrap(),
392            TypeExpr::Simple(SimpleType::String)
393        );
394        // BOOLEAN alias
395        assert_eq!(
396            parse_type_expr("BOOLEAN").unwrap(),
397            TypeExpr::Simple(SimpleType::Bool)
398        );
399        // BYTEA alias
400        assert_eq!(
401            parse_type_expr("BYTEA").unwrap(),
402            TypeExpr::Simple(SimpleType::Bytes)
403        );
404    }
405
406    #[test]
407    fn parse_typed_set() {
408        assert_eq!(
409            parse_type_expr("SET<INT>").unwrap(),
410            TypeExpr::TypedSet(Box::new(TypeExpr::Simple(SimpleType::Int)))
411        );
412    }
413
414    #[test]
415    fn parse_untyped_array() {
416        assert_eq!(
417            parse_type_expr("ARRAY").unwrap(),
418            TypeExpr::Simple(SimpleType::Array)
419        );
420    }
421
422    #[test]
423    fn parse_untyped_set() {
424        assert_eq!(
425            parse_type_expr("SET").unwrap(),
426            TypeExpr::Simple(SimpleType::Set)
427        );
428    }
429
430    #[test]
431    fn parse_error_unknown_keyword() {
432        assert!(parse_type_expr("FOOBAR").is_err());
433    }
434
435    #[test]
436    fn parse_error_empty() {
437        assert!(parse_type_expr("").is_err());
438        assert!(parse_type_expr("  ").is_err());
439    }
440
441    #[test]
442    fn parse_error_vector_zero_dim() {
443        assert!(parse_type_expr("VECTOR(0)").is_err());
444    }
445
446    // ── Matching ─────────────────────────────────────────────────────────────
447
448    #[test]
449    fn match_string() {
450        let expr = parse_type_expr("STRING").unwrap();
451        assert!(value_matches_type(&Value::String("hello".into()), &expr));
452        assert!(!value_matches_type(&Value::Integer(1), &expr));
453    }
454
455    #[test]
456    fn match_null_union() {
457        let expr = parse_type_expr("STRING|NULL").unwrap();
458        assert!(value_matches_type(&Value::Null, &expr));
459        assert!(value_matches_type(&Value::String("x".into()), &expr));
460        assert!(!value_matches_type(&Value::Integer(1), &expr));
461    }
462
463    #[test]
464    fn match_typed_array() {
465        let expr = parse_type_expr("ARRAY<INT>").unwrap();
466        assert!(value_matches_type(
467            &Value::Array(vec![Value::Integer(1), Value::Integer(2)]),
468            &expr
469        ));
470    }
471
472    #[test]
473    fn match_typed_array_fail() {
474        let expr = parse_type_expr("ARRAY<INT>").unwrap();
475        // Mixed-type array should fail.
476        assert!(!value_matches_type(
477            &Value::Array(vec![Value::Integer(1), Value::String("x".into())]),
478            &expr
479        ));
480    }
481
482    #[test]
483    fn match_int_coercion() {
484        // Integer should match Float (widening coercion).
485        let expr = parse_type_expr("FLOAT").unwrap();
486        assert!(value_matches_type(&Value::Integer(42), &expr));
487    }
488
489    #[test]
490    fn no_match() {
491        let expr = parse_type_expr("STRING").unwrap();
492        assert!(!value_matches_type(&Value::Integer(99), &expr));
493    }
494
495    #[test]
496    fn match_timestamp_coercions() {
497        let expr = parse_type_expr("TIMESTAMP").unwrap();
498        assert!(value_matches_type(
499            &Value::String("2024-01-01".into()),
500            &expr
501        ));
502        assert!(value_matches_type(&Value::Integer(1_700_000_000), &expr));
503    }
504
505    #[test]
506    fn match_null_expr() {
507        let expr = TypeExpr::Null;
508        assert!(value_matches_type(&Value::Null, &expr));
509        assert!(!value_matches_type(&Value::Integer(0), &expr));
510    }
511
512    #[test]
513    fn match_json_accepts_any() {
514        let expr = TypeExpr::Simple(SimpleType::Json);
515        assert!(value_matches_type(&Value::Null, &expr));
516        assert!(value_matches_type(&Value::Integer(1), &expr));
517        assert!(value_matches_type(&Value::String("x".into()), &expr));
518        assert!(value_matches_type(&Value::Bool(true), &expr));
519    }
520
521    #[test]
522    fn match_vector_type() {
523        let expr = parse_type_expr("VECTOR(128)").unwrap();
524        // Accepts Array (float list) or Bytes (packed floats).
525        assert!(value_matches_type(
526            &Value::Array(vec![Value::Float(0.1)]),
527            &expr
528        ));
529        assert!(value_matches_type(&Value::Bytes(vec![0u8; 512]), &expr));
530        assert!(!value_matches_type(&Value::Integer(1), &expr));
531    }
532}