Skip to main content

kyu_parser/
token.rs

1use smol_str::SmolStr;
2use std::collections::HashMap;
3use std::sync::LazyLock;
4
5/// Lexical token for the Cypher query language.
6/// Produced by the hand-written lexer and consumed by the chumsky parser.
7#[derive(Clone, Debug, PartialEq, Eq, Hash)]
8pub enum Token {
9    // === Literals ===
10    Integer(i64),
11    Float(SmolStr), // stored as string to preserve exact representation; parsed to f64 later
12    StringLiteral(SmolStr),
13    True,
14    False,
15    Null,
16
17    // === Identifiers ===
18    Ident(SmolStr),
19    EscapedIdent(SmolStr), // backtick-escaped `identifier`
20    Parameter(SmolStr),    // $paramName
21
22    // === Punctuation ===
23    LeftParen,    // (
24    RightParen,   // )
25    LeftBracket,  // [
26    RightBracket, // ]
27    LeftBrace,    // {
28    RightBrace,   // }
29    Comma,        // ,
30    Dot,          // .
31    Colon,        // :
32    Semicolon,    // ;
33    Pipe,         // |
34    DoubleDot,    // ..
35    Arrow,        // ->
36    LeftArrow,    // <-
37    Dash,         // -
38    Underscore,   // _
39
40    // === Operators ===
41    Eq,         // =
42    Neq,        // <>
43    Lt,         // <
44    Le,         // <=
45    Gt,         // >
46    Ge,         // >=
47    Plus,       // +
48    Star,       // *
49    Slash,      // /
50    Percent,    // %
51    Caret,      // ^
52    Ampersand,  // &
53    Tilde,      // ~
54    RegexMatch, // =~
55    ShiftLeft,  // <<
56    ShiftRight, // >>
57    Exclaim,    // !
58    PlusEq,     // +=
59
60    // === Cypher Keywords ===
61    Match,
62    Optional,
63    Where,
64    Return,
65    With,
66    Unwind,
67    Create,
68    Merge,
69    Set,
70    Delete,
71    Detach,
72    Remove,
73    Order,
74    By,
75    Limit,
76    Skip,
77    Asc,
78    Desc,
79    Distinct,
80    As,
81    And,
82    Or,
83    Not,
84    Xor,
85    In,
86    Is,
87    Starts,
88    Ends,
89    Contains,
90    Case,
91    When,
92    Then,
93    Else,
94    End,
95    Union,
96    All,
97    Any,
98    None,
99    Single,
100    Exists,
101    Count,
102    Call,
103    Yield,
104    On,
105
106    // === DDL Keywords ===
107    Node,
108    Rel,
109    Table,
110    Group,
111    Rdf,
112    Graph,
113    From,
114    To,
115    Primary,
116    Key,
117    Drop,
118    Alter,
119    Add,
120    Column,
121    Rename,
122    Comment,
123    Default,
124    Copy,
125    Load,
126    Attach,
127    Use,
128    Database,
129    Export,
130    Import,
131    Install,
132    Extension,
133
134    // === Type Keywords ===
135    BoolType,
136    Int8Type,
137    Int16Type,
138    Int32Type,
139    Int64Type,
140    Int128Type,
141    UInt8Type,
142    UInt16Type,
143    UInt32Type,
144    UInt64Type,
145    FloatType,
146    DoubleType,
147    StringType,
148    DateType,
149    TimestampType,
150    IntervalType,
151    BlobType,
152    UuidType,
153    SerialType,
154    ListType,
155    MapType,
156    StructType,
157    UnionType,
158
159    // === Transaction Keywords ===
160    Begin,
161    Commit,
162    Rollback,
163    Transaction,
164    Read,
165    Write,
166    Only,
167
168    // === Special ===
169    If,
170    NotKw, // NOT as distinct from Not (logical op) — used in "IF NOT EXISTS" etc.
171    Macro,
172    Shortest,
173    Profile,
174    Explain,
175
176    // === EOF sentinel ===
177    Eof,
178}
179
180impl std::fmt::Display for Token {
181    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182        match self {
183            Self::Integer(n) => write!(f, "{n}"),
184            Self::Float(s) => write!(f, "{s}"),
185            Self::StringLiteral(s) => write!(f, "'{s}'"),
186            Self::True => write!(f, "TRUE"),
187            Self::False => write!(f, "FALSE"),
188            Self::Null => write!(f, "NULL"),
189            Self::Ident(s) => write!(f, "{s}"),
190            Self::EscapedIdent(s) => write!(f, "`{s}`"),
191            Self::Parameter(s) => write!(f, "${s}"),
192            Self::LeftParen => write!(f, "("),
193            Self::RightParen => write!(f, ")"),
194            Self::LeftBracket => write!(f, "["),
195            Self::RightBracket => write!(f, "]"),
196            Self::LeftBrace => write!(f, "{{"),
197            Self::RightBrace => write!(f, "}}"),
198            Self::Comma => write!(f, ","),
199            Self::Dot => write!(f, "."),
200            Self::Colon => write!(f, ":"),
201            Self::Semicolon => write!(f, ";"),
202            Self::Pipe => write!(f, "|"),
203            Self::DoubleDot => write!(f, ".."),
204            Self::Arrow => write!(f, "->"),
205            Self::LeftArrow => write!(f, "<-"),
206            Self::Dash => write!(f, "-"),
207            Self::Underscore => write!(f, "_"),
208            Self::Eq => write!(f, "="),
209            Self::Neq => write!(f, "<>"),
210            Self::Lt => write!(f, "<"),
211            Self::Le => write!(f, "<="),
212            Self::Gt => write!(f, ">"),
213            Self::Ge => write!(f, ">="),
214            Self::Plus => write!(f, "+"),
215            Self::Star => write!(f, "*"),
216            Self::Slash => write!(f, "/"),
217            Self::Percent => write!(f, "%"),
218            Self::Caret => write!(f, "^"),
219            Self::Ampersand => write!(f, "&"),
220            Self::Tilde => write!(f, "~"),
221            Self::RegexMatch => write!(f, "=~"),
222            Self::ShiftLeft => write!(f, "<<"),
223            Self::ShiftRight => write!(f, ">>"),
224            Self::Exclaim => write!(f, "!"),
225            Self::PlusEq => write!(f, "+="),
226            Self::Eof => write!(f, "<EOF>"),
227            other => write!(f, "{}", keyword_name(other)),
228        }
229    }
230}
231
232fn keyword_name(tok: &Token) -> &'static str {
233    match tok {
234        Token::Match => "MATCH",
235        Token::Optional => "OPTIONAL",
236        Token::Where => "WHERE",
237        Token::Return => "RETURN",
238        Token::With => "WITH",
239        Token::Unwind => "UNWIND",
240        Token::Create => "CREATE",
241        Token::Merge => "MERGE",
242        Token::Set => "SET",
243        Token::Delete => "DELETE",
244        Token::Detach => "DETACH",
245        Token::Remove => "REMOVE",
246        Token::Order => "ORDER",
247        Token::By => "BY",
248        Token::Limit => "LIMIT",
249        Token::Skip => "SKIP",
250        Token::Asc => "ASC",
251        Token::Desc => "DESC",
252        Token::Distinct => "DISTINCT",
253        Token::As => "AS",
254        Token::And => "AND",
255        Token::Or => "OR",
256        Token::Not => "NOT",
257        Token::Xor => "XOR",
258        Token::In => "IN",
259        Token::Is => "IS",
260        Token::Starts => "STARTS",
261        Token::Ends => "ENDS",
262        Token::Contains => "CONTAINS",
263        Token::Case => "CASE",
264        Token::When => "WHEN",
265        Token::Then => "THEN",
266        Token::Else => "ELSE",
267        Token::End => "END",
268        Token::Union => "UNION",
269        Token::All => "ALL",
270        Token::Any => "ANY",
271        Token::None => "NONE",
272        Token::Single => "SINGLE",
273        Token::Exists => "EXISTS",
274        Token::Count => "COUNT",
275        Token::Call => "CALL",
276        Token::Yield => "YIELD",
277        Token::On => "ON",
278        Token::Node => "NODE",
279        Token::Rel => "REL",
280        Token::Table => "TABLE",
281        Token::Group => "GROUP",
282        Token::Rdf => "RDF",
283        Token::Graph => "GRAPH",
284        Token::From => "FROM",
285        Token::To => "TO",
286        Token::Primary => "PRIMARY",
287        Token::Key => "KEY",
288        Token::Drop => "DROP",
289        Token::Alter => "ALTER",
290        Token::Add => "ADD",
291        Token::Column => "COLUMN",
292        Token::Rename => "RENAME",
293        Token::Comment => "COMMENT",
294        Token::Default => "DEFAULT",
295        Token::Copy => "COPY",
296        Token::Load => "LOAD",
297        Token::Attach => "ATTACH",
298        Token::Use => "USE",
299        Token::Database => "DATABASE",
300        Token::Export => "EXPORT",
301        Token::Import => "IMPORT",
302        Token::Install => "INSTALL",
303        Token::Extension => "EXTENSION",
304        Token::BoolType => "BOOL",
305        Token::Int8Type => "INT8",
306        Token::Int16Type => "INT16",
307        Token::Int32Type => "INT32",
308        Token::Int64Type => "INT64",
309        Token::Int128Type => "INT128",
310        Token::UInt8Type => "UINT8",
311        Token::UInt16Type => "UINT16",
312        Token::UInt32Type => "UINT32",
313        Token::UInt64Type => "UINT64",
314        Token::FloatType => "FLOAT",
315        Token::DoubleType => "DOUBLE",
316        Token::StringType => "STRING",
317        Token::DateType => "DATE",
318        Token::TimestampType => "TIMESTAMP",
319        Token::IntervalType => "INTERVAL",
320        Token::BlobType => "BLOB",
321        Token::UuidType => "UUID",
322        Token::SerialType => "SERIAL",
323        Token::ListType => "LIST",
324        Token::MapType => "MAP",
325        Token::StructType => "STRUCT",
326        Token::UnionType => "UNION",
327        Token::Begin => "BEGIN",
328        Token::Commit => "COMMIT",
329        Token::Rollback => "ROLLBACK",
330        Token::Transaction => "TRANSACTION",
331        Token::Read => "READ",
332        Token::Write => "WRITE",
333        Token::Only => "ONLY",
334        Token::If => "IF",
335        Token::NotKw => "NOT",
336        Token::Macro => "MACRO",
337        Token::Shortest => "SHORTEST",
338        Token::Profile => "PROFILE",
339        Token::Explain => "EXPLAIN",
340        _ => "<unknown>",
341    }
342}
343
344/// Case-insensitive keyword lookup table.
345static KEYWORDS: LazyLock<HashMap<&'static str, Token>> = LazyLock::new(|| {
346    let mut m = HashMap::new();
347    // Cypher keywords
348    m.insert("match", Token::Match);
349    m.insert("optional", Token::Optional);
350    m.insert("where", Token::Where);
351    m.insert("return", Token::Return);
352    m.insert("with", Token::With);
353    m.insert("unwind", Token::Unwind);
354    m.insert("create", Token::Create);
355    m.insert("merge", Token::Merge);
356    m.insert("set", Token::Set);
357    m.insert("delete", Token::Delete);
358    m.insert("detach", Token::Detach);
359    m.insert("remove", Token::Remove);
360    m.insert("order", Token::Order);
361    m.insert("by", Token::By);
362    m.insert("limit", Token::Limit);
363    m.insert("skip", Token::Skip);
364    m.insert("asc", Token::Asc);
365    m.insert("ascending", Token::Asc);
366    m.insert("desc", Token::Desc);
367    m.insert("descending", Token::Desc);
368    m.insert("distinct", Token::Distinct);
369    m.insert("as", Token::As);
370    m.insert("and", Token::And);
371    m.insert("or", Token::Or);
372    m.insert("not", Token::Not);
373    m.insert("xor", Token::Xor);
374    m.insert("in", Token::In);
375    m.insert("is", Token::Is);
376    m.insert("starts", Token::Starts);
377    m.insert("ends", Token::Ends);
378    m.insert("contains", Token::Contains);
379    m.insert("case", Token::Case);
380    m.insert("when", Token::When);
381    m.insert("then", Token::Then);
382    m.insert("else", Token::Else);
383    m.insert("end", Token::End);
384    m.insert("union", Token::Union);
385    m.insert("all", Token::All);
386    m.insert("any", Token::Any);
387    m.insert("none", Token::None);
388    m.insert("single", Token::Single);
389    m.insert("exists", Token::Exists);
390    m.insert("count", Token::Count);
391    m.insert("call", Token::Call);
392    m.insert("yield", Token::Yield);
393    m.insert("on", Token::On);
394    m.insert("true", Token::True);
395    m.insert("false", Token::False);
396    m.insert("null", Token::Null);
397
398    // DDL keywords
399    m.insert("node", Token::Node);
400    m.insert("rel", Token::Rel);
401    m.insert("table", Token::Table);
402    m.insert("group", Token::Group);
403    m.insert("rdf", Token::Rdf);
404    m.insert("graph", Token::Graph);
405    m.insert("from", Token::From);
406    m.insert("to", Token::To);
407    m.insert("primary", Token::Primary);
408    m.insert("key", Token::Key);
409    m.insert("drop", Token::Drop);
410    m.insert("alter", Token::Alter);
411    m.insert("add", Token::Add);
412    m.insert("column", Token::Column);
413    m.insert("rename", Token::Rename);
414    m.insert("comment", Token::Comment);
415    m.insert("default", Token::Default);
416    m.insert("copy", Token::Copy);
417    m.insert("load", Token::Load);
418    m.insert("attach", Token::Attach);
419    m.insert("use", Token::Use);
420    m.insert("database", Token::Database);
421    m.insert("export", Token::Export);
422    m.insert("import", Token::Import);
423    m.insert("install", Token::Install);
424    m.insert("extension", Token::Extension);
425
426    // Type keywords
427    m.insert("bool", Token::BoolType);
428    m.insert("boolean", Token::BoolType);
429    m.insert("int8", Token::Int8Type);
430    m.insert("int16", Token::Int16Type);
431    m.insert("int32", Token::Int32Type);
432    m.insert("int", Token::Int32Type);
433    m.insert("integer", Token::Int32Type);
434    m.insert("int64", Token::Int64Type);
435    m.insert("int128", Token::Int128Type);
436    m.insert("uint8", Token::UInt8Type);
437    m.insert("uint16", Token::UInt16Type);
438    m.insert("uint32", Token::UInt32Type);
439    m.insert("uint64", Token::UInt64Type);
440    m.insert("float", Token::FloatType);
441    m.insert("double", Token::DoubleType);
442    m.insert("string", Token::StringType);
443    m.insert("date", Token::DateType);
444    m.insert("timestamp", Token::TimestampType);
445    m.insert("interval", Token::IntervalType);
446    m.insert("blob", Token::BlobType);
447    m.insert("uuid", Token::UuidType);
448    m.insert("serial", Token::SerialType);
449    m.insert("list", Token::ListType);
450    m.insert("map", Token::MapType);
451    m.insert("struct", Token::StructType);
452
453    // Transaction keywords
454    m.insert("begin", Token::Begin);
455    m.insert("commit", Token::Commit);
456    m.insert("rollback", Token::Rollback);
457    m.insert("transaction", Token::Transaction);
458    m.insert("read", Token::Read);
459    m.insert("write", Token::Write);
460    m.insert("only", Token::Only);
461
462    // Special
463    m.insert("if", Token::If);
464    m.insert("macro", Token::Macro);
465    m.insert("shortest", Token::Shortest);
466    m.insert("profile", Token::Profile);
467    m.insert("explain", Token::Explain);
468
469    m
470});
471
472/// Look up whether an identifier is a keyword.
473/// Cypher keywords are case-insensitive.
474pub fn lookup_keyword(ident: &str) -> Option<Token> {
475    let lower = ident.to_ascii_lowercase();
476    KEYWORDS.get(lower.as_str()).cloned()
477}
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482
483    #[test]
484    fn keyword_case_insensitive() {
485        assert_eq!(lookup_keyword("MATCH"), Some(Token::Match));
486        assert_eq!(lookup_keyword("match"), Some(Token::Match));
487        assert_eq!(lookup_keyword("Match"), Some(Token::Match));
488    }
489
490    #[test]
491    fn non_keyword_returns_none() {
492        assert_eq!(lookup_keyword("foobar"), None);
493        assert_eq!(lookup_keyword("x"), None);
494    }
495
496    #[test]
497    fn type_keywords() {
498        assert_eq!(lookup_keyword("INT64"), Some(Token::Int64Type));
499        assert_eq!(lookup_keyword("string"), Some(Token::StringType));
500        assert_eq!(lookup_keyword("BOOLEAN"), Some(Token::BoolType));
501        assert_eq!(lookup_keyword("INT"), Some(Token::Int32Type));
502        assert_eq!(lookup_keyword("INTEGER"), Some(Token::Int32Type));
503    }
504
505    #[test]
506    fn display_tokens() {
507        assert_eq!(Token::LeftParen.to_string(), "(");
508        assert_eq!(Token::Arrow.to_string(), "->");
509        assert_eq!(Token::Match.to_string(), "MATCH");
510        assert_eq!(Token::Integer(42).to_string(), "42");
511        assert_eq!(Token::StringLiteral(SmolStr::new("hi")).to_string(), "'hi'");
512        assert_eq!(Token::Eof.to_string(), "<EOF>");
513    }
514}