Skip to main content

rest_sql/parsing/
token.rs

1use crate::ast::Operator;
2use crate::parsing::span::Span;
3use std::collections::HashMap;
4use std::sync::OnceLock;
5
6/// Operator table — single source of truth for all supported operators.
7///
8/// Entries are ordered longest-match first so the lexer can scan top-to-bottom
9/// and stop at the first match without look-ahead. To add a new operator,
10/// insert it here (maintaining longest-first order). No other file needs to change.
11pub const OPERATORS: &[(&str, Operator)] = &[
12    ("=between=", Operator::Between),
13    ("=notnull=", Operator::NotNull),
14    ("=ilike=", Operator::Ilike),
15    ("=like=", Operator::Like),
16    ("=null=", Operator::Null),
17    ("=neq=", Operator::Neq),
18    ("=out=", Operator::Out),
19    ("=in=", Operator::In),
20    ("=eq=", Operator::Eq),
21    ("=le=", Operator::Lte),
22    ("=ge=", Operator::Gte),
23    ("=lt=", Operator::Lt),
24    ("=gt=", Operator::Gt),
25    ("<=", Operator::Lte),
26    (">=", Operator::Gte),
27    ("!=", Operator::Neq),
28    ("==", Operator::Eq),
29    ("<", Operator::Lt),
30    (">", Operator::Gt),
31];
32
33/// Lazily-initialized HashMap view of OPERATORS, keyed by operator string.
34///
35/// Built once on first call, then shared for the lifetime of the process.
36/// Keys are `&'static str` — same pointers as in the OPERATORS slice, no heap
37/// allocation for keys. Used by the lexer for O(1) lookup after delimiting a
38/// `=`-enclosed token whose exact bounds are already known.
39static OPERATOR_MAP: OnceLock<HashMap<&'static str, Operator>> = OnceLock::new();
40
41pub fn operator_map() -> &'static HashMap<&'static str, Operator> {
42    OPERATOR_MAP.get_or_init(|| OPERATORS.iter().map(|(k, v)| (*k, v.clone())).collect())
43}
44
45/// A single lexical token.
46///
47/// Literals are typed at lex time: `null`/`true`/`false` become their own
48/// variants; unquoted digit sequences become `Integer` or `Float`.
49/// This avoids re-scanning in the grammar layer.
50#[derive(Debug, Clone, PartialEq)]
51pub enum Token {
52    /// The bare keyword `null`.
53    Null,
54    /// The bare keyword `true` or `false`.
55    Bool(bool),
56    /// An unquoted integer literal (no `.`).
57    Integer(i64),
58    /// An unquoted floating-point literal (contains `.`).
59    Float(f64),
60    /// An unquoted date literal matching `YYYY-MM-DD`.
61    Date(String),
62    /// An unquoted datetime literal matching `YYYY-MM-DDTHH:MM:SSZ`.
63    DateTime(String),
64    /// A single- or double-quoted string (quotes stripped, no escape processing).
65    QuotedStr(String),
66    /// An unquoted identifier or bare-string value (not a keyword or number).
67    Word(String),
68    /// A matched operator from the OPERATORS table.
69    Op(Operator),
70    /// `(`
71    LParen,
72    /// `)`
73    RParen,
74    /// `,` — OR separator at expression level; element separator inside lists.
75    Comma,
76    /// `;` — AND separator.
77    Semi,
78}
79
80/// A token paired with the byte range it occupies in the source string.
81#[derive(Debug, Clone, PartialEq)]
82pub struct Spanned {
83    pub token: Token,
84    pub span: Span,
85}
86
87impl Spanned {
88    pub fn new(token: Token, span: Span) -> Self {
89        Spanned { token, span }
90    }
91}
92
93#[cfg(test)]
94mod tests {
95    use super::*;
96
97    #[test]
98    fn operators_longest_first() {
99        let mut prev_len = usize::MAX;
100        for (s, _) in OPERATORS {
101            assert!(
102                s.len() <= prev_len,
103                "operator {s:?} is longer than its predecessor — order is wrong"
104            );
105            prev_len = s.len();
106        }
107    }
108
109    #[test]
110    fn longest_operator_is_between() {
111        assert_eq!(OPERATORS[0].0, "=between=");
112    }
113
114    #[test]
115    fn single_char_operators_are_last() {
116        let last = OPERATORS.last().unwrap().0;
117        assert_eq!(
118            last.len(),
119            1,
120            "last operator should be single-char, got {last:?}"
121        );
122    }
123
124    #[test]
125    fn spanned_carries_span() {
126        let span = Span::new(0, 4);
127        let s = Spanned::new(Token::Word("name".into()), span);
128        assert_eq!(s.span, span);
129        assert_eq!(s.token, Token::Word("name".into()));
130    }
131}