1use smol_str::SmolStr;
2use std::collections::HashMap;
3use std::sync::LazyLock;
4
5#[derive(Clone, Debug, PartialEq, Eq, Hash)]
8pub enum Token {
9 Integer(i64),
11 Float(SmolStr), StringLiteral(SmolStr),
13 True,
14 False,
15 Null,
16
17 Ident(SmolStr),
19 EscapedIdent(SmolStr), Parameter(SmolStr), LeftParen, RightParen, LeftBracket, RightBracket, LeftBrace, RightBrace, Comma, Dot, Colon, Semicolon, Pipe, DoubleDot, Arrow, LeftArrow, Dash, Underscore, Eq, Neq, Lt, Le, Gt, Ge, Plus, Star, Slash, Percent, Caret, Ampersand, Tilde, RegexMatch, ShiftLeft, ShiftRight, Exclaim, PlusEq, Match,
62 Optional,
63 Where,
64 Return,
65 With,
66 Unwind,
67 Create,
68 Merge,
69 Set,
70 Delete,
71 Detach,
72 Remove,
73 Order,
74 By,
75 Limit,
76 Skip,
77 Asc,
78 Desc,
79 Distinct,
80 As,
81 And,
82 Or,
83 Not,
84 Xor,
85 In,
86 Is,
87 Starts,
88 Ends,
89 Contains,
90 Case,
91 When,
92 Then,
93 Else,
94 End,
95 Union,
96 All,
97 Any,
98 None,
99 Single,
100 Exists,
101 Count,
102 Call,
103 Yield,
104 On,
105
106 Node,
108 Rel,
109 Table,
110 Group,
111 Rdf,
112 Graph,
113 From,
114 To,
115 Primary,
116 Key,
117 Drop,
118 Alter,
119 Add,
120 Column,
121 Rename,
122 Comment,
123 Default,
124 Copy,
125 Load,
126 Attach,
127 Use,
128 Database,
129 Export,
130 Import,
131 Install,
132 Extension,
133
134 BoolType,
136 Int8Type,
137 Int16Type,
138 Int32Type,
139 Int64Type,
140 Int128Type,
141 UInt8Type,
142 UInt16Type,
143 UInt32Type,
144 UInt64Type,
145 FloatType,
146 DoubleType,
147 StringType,
148 DateType,
149 TimestampType,
150 IntervalType,
151 BlobType,
152 UuidType,
153 SerialType,
154 ListType,
155 MapType,
156 StructType,
157 UnionType,
158
159 Begin,
161 Commit,
162 Rollback,
163 Transaction,
164 Read,
165 Write,
166 Only,
167
168 If,
170 NotKw, Macro,
172 Shortest,
173 Profile,
174 Explain,
175
176 Eof,
178}
179
180impl std::fmt::Display for Token {
181 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182 match self {
183 Self::Integer(n) => write!(f, "{n}"),
184 Self::Float(s) => write!(f, "{s}"),
185 Self::StringLiteral(s) => write!(f, "'{s}'"),
186 Self::True => write!(f, "TRUE"),
187 Self::False => write!(f, "FALSE"),
188 Self::Null => write!(f, "NULL"),
189 Self::Ident(s) => write!(f, "{s}"),
190 Self::EscapedIdent(s) => write!(f, "`{s}`"),
191 Self::Parameter(s) => write!(f, "${s}"),
192 Self::LeftParen => write!(f, "("),
193 Self::RightParen => write!(f, ")"),
194 Self::LeftBracket => write!(f, "["),
195 Self::RightBracket => write!(f, "]"),
196 Self::LeftBrace => write!(f, "{{"),
197 Self::RightBrace => write!(f, "}}"),
198 Self::Comma => write!(f, ","),
199 Self::Dot => write!(f, "."),
200 Self::Colon => write!(f, ":"),
201 Self::Semicolon => write!(f, ";"),
202 Self::Pipe => write!(f, "|"),
203 Self::DoubleDot => write!(f, ".."),
204 Self::Arrow => write!(f, "->"),
205 Self::LeftArrow => write!(f, "<-"),
206 Self::Dash => write!(f, "-"),
207 Self::Underscore => write!(f, "_"),
208 Self::Eq => write!(f, "="),
209 Self::Neq => write!(f, "<>"),
210 Self::Lt => write!(f, "<"),
211 Self::Le => write!(f, "<="),
212 Self::Gt => write!(f, ">"),
213 Self::Ge => write!(f, ">="),
214 Self::Plus => write!(f, "+"),
215 Self::Star => write!(f, "*"),
216 Self::Slash => write!(f, "/"),
217 Self::Percent => write!(f, "%"),
218 Self::Caret => write!(f, "^"),
219 Self::Ampersand => write!(f, "&"),
220 Self::Tilde => write!(f, "~"),
221 Self::RegexMatch => write!(f, "=~"),
222 Self::ShiftLeft => write!(f, "<<"),
223 Self::ShiftRight => write!(f, ">>"),
224 Self::Exclaim => write!(f, "!"),
225 Self::PlusEq => write!(f, "+="),
226 Self::Eof => write!(f, "<EOF>"),
227 other => write!(f, "{}", keyword_name(other)),
228 }
229 }
230}
231
232fn keyword_name(tok: &Token) -> &'static str {
233 match tok {
234 Token::Match => "MATCH",
235 Token::Optional => "OPTIONAL",
236 Token::Where => "WHERE",
237 Token::Return => "RETURN",
238 Token::With => "WITH",
239 Token::Unwind => "UNWIND",
240 Token::Create => "CREATE",
241 Token::Merge => "MERGE",
242 Token::Set => "SET",
243 Token::Delete => "DELETE",
244 Token::Detach => "DETACH",
245 Token::Remove => "REMOVE",
246 Token::Order => "ORDER",
247 Token::By => "BY",
248 Token::Limit => "LIMIT",
249 Token::Skip => "SKIP",
250 Token::Asc => "ASC",
251 Token::Desc => "DESC",
252 Token::Distinct => "DISTINCT",
253 Token::As => "AS",
254 Token::And => "AND",
255 Token::Or => "OR",
256 Token::Not => "NOT",
257 Token::Xor => "XOR",
258 Token::In => "IN",
259 Token::Is => "IS",
260 Token::Starts => "STARTS",
261 Token::Ends => "ENDS",
262 Token::Contains => "CONTAINS",
263 Token::Case => "CASE",
264 Token::When => "WHEN",
265 Token::Then => "THEN",
266 Token::Else => "ELSE",
267 Token::End => "END",
268 Token::Union => "UNION",
269 Token::All => "ALL",
270 Token::Any => "ANY",
271 Token::None => "NONE",
272 Token::Single => "SINGLE",
273 Token::Exists => "EXISTS",
274 Token::Count => "COUNT",
275 Token::Call => "CALL",
276 Token::Yield => "YIELD",
277 Token::On => "ON",
278 Token::Node => "NODE",
279 Token::Rel => "REL",
280 Token::Table => "TABLE",
281 Token::Group => "GROUP",
282 Token::Rdf => "RDF",
283 Token::Graph => "GRAPH",
284 Token::From => "FROM",
285 Token::To => "TO",
286 Token::Primary => "PRIMARY",
287 Token::Key => "KEY",
288 Token::Drop => "DROP",
289 Token::Alter => "ALTER",
290 Token::Add => "ADD",
291 Token::Column => "COLUMN",
292 Token::Rename => "RENAME",
293 Token::Comment => "COMMENT",
294 Token::Default => "DEFAULT",
295 Token::Copy => "COPY",
296 Token::Load => "LOAD",
297 Token::Attach => "ATTACH",
298 Token::Use => "USE",
299 Token::Database => "DATABASE",
300 Token::Export => "EXPORT",
301 Token::Import => "IMPORT",
302 Token::Install => "INSTALL",
303 Token::Extension => "EXTENSION",
304 Token::BoolType => "BOOL",
305 Token::Int8Type => "INT8",
306 Token::Int16Type => "INT16",
307 Token::Int32Type => "INT32",
308 Token::Int64Type => "INT64",
309 Token::Int128Type => "INT128",
310 Token::UInt8Type => "UINT8",
311 Token::UInt16Type => "UINT16",
312 Token::UInt32Type => "UINT32",
313 Token::UInt64Type => "UINT64",
314 Token::FloatType => "FLOAT",
315 Token::DoubleType => "DOUBLE",
316 Token::StringType => "STRING",
317 Token::DateType => "DATE",
318 Token::TimestampType => "TIMESTAMP",
319 Token::IntervalType => "INTERVAL",
320 Token::BlobType => "BLOB",
321 Token::UuidType => "UUID",
322 Token::SerialType => "SERIAL",
323 Token::ListType => "LIST",
324 Token::MapType => "MAP",
325 Token::StructType => "STRUCT",
326 Token::UnionType => "UNION",
327 Token::Begin => "BEGIN",
328 Token::Commit => "COMMIT",
329 Token::Rollback => "ROLLBACK",
330 Token::Transaction => "TRANSACTION",
331 Token::Read => "READ",
332 Token::Write => "WRITE",
333 Token::Only => "ONLY",
334 Token::If => "IF",
335 Token::NotKw => "NOT",
336 Token::Macro => "MACRO",
337 Token::Shortest => "SHORTEST",
338 Token::Profile => "PROFILE",
339 Token::Explain => "EXPLAIN",
340 _ => "<unknown>",
341 }
342}
343
344static KEYWORDS: LazyLock<HashMap<&'static str, Token>> = LazyLock::new(|| {
346 let mut m = HashMap::new();
347 m.insert("match", Token::Match);
349 m.insert("optional", Token::Optional);
350 m.insert("where", Token::Where);
351 m.insert("return", Token::Return);
352 m.insert("with", Token::With);
353 m.insert("unwind", Token::Unwind);
354 m.insert("create", Token::Create);
355 m.insert("merge", Token::Merge);
356 m.insert("set", Token::Set);
357 m.insert("delete", Token::Delete);
358 m.insert("detach", Token::Detach);
359 m.insert("remove", Token::Remove);
360 m.insert("order", Token::Order);
361 m.insert("by", Token::By);
362 m.insert("limit", Token::Limit);
363 m.insert("skip", Token::Skip);
364 m.insert("asc", Token::Asc);
365 m.insert("ascending", Token::Asc);
366 m.insert("desc", Token::Desc);
367 m.insert("descending", Token::Desc);
368 m.insert("distinct", Token::Distinct);
369 m.insert("as", Token::As);
370 m.insert("and", Token::And);
371 m.insert("or", Token::Or);
372 m.insert("not", Token::Not);
373 m.insert("xor", Token::Xor);
374 m.insert("in", Token::In);
375 m.insert("is", Token::Is);
376 m.insert("starts", Token::Starts);
377 m.insert("ends", Token::Ends);
378 m.insert("contains", Token::Contains);
379 m.insert("case", Token::Case);
380 m.insert("when", Token::When);
381 m.insert("then", Token::Then);
382 m.insert("else", Token::Else);
383 m.insert("end", Token::End);
384 m.insert("union", Token::Union);
385 m.insert("all", Token::All);
386 m.insert("any", Token::Any);
387 m.insert("none", Token::None);
388 m.insert("single", Token::Single);
389 m.insert("exists", Token::Exists);
390 m.insert("count", Token::Count);
391 m.insert("call", Token::Call);
392 m.insert("yield", Token::Yield);
393 m.insert("on", Token::On);
394 m.insert("true", Token::True);
395 m.insert("false", Token::False);
396 m.insert("null", Token::Null);
397
398 m.insert("node", Token::Node);
400 m.insert("rel", Token::Rel);
401 m.insert("table", Token::Table);
402 m.insert("group", Token::Group);
403 m.insert("rdf", Token::Rdf);
404 m.insert("graph", Token::Graph);
405 m.insert("from", Token::From);
406 m.insert("to", Token::To);
407 m.insert("primary", Token::Primary);
408 m.insert("key", Token::Key);
409 m.insert("drop", Token::Drop);
410 m.insert("alter", Token::Alter);
411 m.insert("add", Token::Add);
412 m.insert("column", Token::Column);
413 m.insert("rename", Token::Rename);
414 m.insert("comment", Token::Comment);
415 m.insert("default", Token::Default);
416 m.insert("copy", Token::Copy);
417 m.insert("load", Token::Load);
418 m.insert("attach", Token::Attach);
419 m.insert("use", Token::Use);
420 m.insert("database", Token::Database);
421 m.insert("export", Token::Export);
422 m.insert("import", Token::Import);
423 m.insert("install", Token::Install);
424 m.insert("extension", Token::Extension);
425
426 m.insert("bool", Token::BoolType);
428 m.insert("boolean", Token::BoolType);
429 m.insert("int8", Token::Int8Type);
430 m.insert("int16", Token::Int16Type);
431 m.insert("int32", Token::Int32Type);
432 m.insert("int", Token::Int32Type);
433 m.insert("integer", Token::Int32Type);
434 m.insert("int64", Token::Int64Type);
435 m.insert("int128", Token::Int128Type);
436 m.insert("uint8", Token::UInt8Type);
437 m.insert("uint16", Token::UInt16Type);
438 m.insert("uint32", Token::UInt32Type);
439 m.insert("uint64", Token::UInt64Type);
440 m.insert("float", Token::FloatType);
441 m.insert("double", Token::DoubleType);
442 m.insert("string", Token::StringType);
443 m.insert("date", Token::DateType);
444 m.insert("timestamp", Token::TimestampType);
445 m.insert("interval", Token::IntervalType);
446 m.insert("blob", Token::BlobType);
447 m.insert("uuid", Token::UuidType);
448 m.insert("serial", Token::SerialType);
449 m.insert("list", Token::ListType);
450 m.insert("map", Token::MapType);
451 m.insert("struct", Token::StructType);
452
453 m.insert("begin", Token::Begin);
455 m.insert("commit", Token::Commit);
456 m.insert("rollback", Token::Rollback);
457 m.insert("transaction", Token::Transaction);
458 m.insert("read", Token::Read);
459 m.insert("write", Token::Write);
460 m.insert("only", Token::Only);
461
462 m.insert("if", Token::If);
464 m.insert("macro", Token::Macro);
465 m.insert("shortest", Token::Shortest);
466 m.insert("profile", Token::Profile);
467 m.insert("explain", Token::Explain);
468
469 m
470});
471
472pub fn lookup_keyword(ident: &str) -> Option<Token> {
475 let lower = ident.to_ascii_lowercase();
476 KEYWORDS.get(lower.as_str()).cloned()
477}
478
479#[cfg(test)]
480mod tests {
481 use super::*;
482
483 #[test]
484 fn keyword_case_insensitive() {
485 assert_eq!(lookup_keyword("MATCH"), Some(Token::Match));
486 assert_eq!(lookup_keyword("match"), Some(Token::Match));
487 assert_eq!(lookup_keyword("Match"), Some(Token::Match));
488 }
489
490 #[test]
491 fn non_keyword_returns_none() {
492 assert_eq!(lookup_keyword("foobar"), None);
493 assert_eq!(lookup_keyword("x"), None);
494 }
495
496 #[test]
497 fn type_keywords() {
498 assert_eq!(lookup_keyword("INT64"), Some(Token::Int64Type));
499 assert_eq!(lookup_keyword("string"), Some(Token::StringType));
500 assert_eq!(lookup_keyword("BOOLEAN"), Some(Token::BoolType));
501 assert_eq!(lookup_keyword("INT"), Some(Token::Int32Type));
502 assert_eq!(lookup_keyword("INTEGER"), Some(Token::Int32Type));
503 }
504
505 #[test]
506 fn display_tokens() {
507 assert_eq!(Token::LeftParen.to_string(), "(");
508 assert_eq!(Token::Arrow.to_string(), "->");
509 assert_eq!(Token::Match.to_string(), "MATCH");
510 assert_eq!(Token::Integer(42).to_string(), "42");
511 assert_eq!(Token::StringLiteral(SmolStr::new("hi")).to_string(), "'hi'");
512 assert_eq!(Token::Eof.to_string(), "<EOF>");
513 }
514}