1use std::ops::Range;
16
17use logos::Lexer;
18use logos::Logos;
19
20#[derive(Debug)]
21pub struct Tokenizer<'source> {
22 lexer: Lexer<'source, TokenKind>,
23 eoi: bool,
24}
25
26impl<'source> Tokenizer<'source> {
27 pub fn new(source: &'source str) -> Self {
28 Self {
29 lexer: TokenKind::lexer(source),
30 eoi: false,
31 }
32 }
33
34 pub fn slice(&self) -> &'source str {
35 self.lexer.slice()
36 }
37
38 pub fn span(&self) -> Range<usize> {
39 self.lexer.span()
40 }
41}
42
43impl<'source> Iterator for Tokenizer<'source> {
44 type Item = Result<TokenKind, ()>;
45
46 fn next(&mut self) -> Option<Self::Item> {
47 match self.lexer.next() {
48 Some(Err(())) => Some(Err(())),
49 Some(Ok(kind)) => Some(Ok(kind)),
50 None => {
51 if self.eoi {
52 None
54 } else {
55 self.eoi = true;
57 Some(Ok(TokenKind::EOI))
58 }
59 }
60 }
61 }
62}
63
64#[derive(Logos, Clone, Copy, Debug, PartialEq, Eq)]
65pub enum TokenKind {
66 EOI,
68
69 #[regex(r"[ \t\r\n\f]+")]
72 Whitespace,
73
74 #[regex(r"--[^\r\n\f]*")]
76 #[regex(r"/\*([^\*]|(\*[^/]))*\*/")]
77 Comment,
78
79 #[regex(r#"[_a-zA-Z][_a-zA-Z0-9]*"#)]
84 Ident,
85
86 #[regex(r#"'([^'\\]|\\.|'')*'"#)]
87 #[regex(r#""([^"\\]|\\.|"")*""#)]
88 #[regex(r#"`([^`\\]|\\.|``)*`"#)]
89 LiteralString,
90 #[regex(r"[xX]'[a-fA-F0-9]*'")]
91 LiteralHexBinaryString,
92
93 #[regex(r"[0-9]+(_|[0-9])*")]
94 LiteralInteger,
95 #[regex(r"0[xX][a-fA-F0-9]+")]
97 LiteralHexInteger,
98 #[regex(r"[0-9]+[eE][+-]?[0-9]+")]
100 #[regex(r"[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?")]
101 LiteralFloat,
102
103 #[token("=")]
105 Eq,
106 #[token("<>")]
107 #[token("!=")]
108 NotEq,
109 #[token("<")]
110 Lt,
111 #[token(">")]
112 Gt,
113 #[token("<=")]
114 Lte,
115 #[token(">=")]
116 Gte,
117 #[token("+")]
118 Plus,
119 #[token("-")]
120 Minus,
121 #[token("*")]
122 Multiply,
123 #[token("/")]
124 Divide,
125 #[token("%")]
126 Modulo,
127 #[token("||")]
128 Concat,
129 #[token("(")]
130 LParen,
131 #[token(")")]
132 RParen,
133 #[token("[")]
134 LBracket,
135 #[token("]")]
136 RBracket,
137 #[token("{")]
138 LBrace,
139 #[token("}")]
140 RBrace,
141 #[token(",")]
142 Comma,
143 #[token(".")]
144 Dot,
145 #[token(":")]
146 Colon,
147 #[token("::")]
148 DoubleColon,
149 #[token(";")]
150 SemiColon,
151 #[token("$")]
152 Dollar,
153 #[token("=>")]
154 Arrow,
155
156 #[token("ADD", ignore(case))]
158 ADD,
159 #[token("AGGREGATE", ignore(case))]
160 AGGREGATE,
161 #[token("ALL", ignore(case))]
162 ALL,
163 #[token("ALTER", ignore(case))]
164 ALTER,
165 #[token("ANALYZE", ignore(case))]
166 ANALYZE,
167 #[token("AND", ignore(case))]
168 AND,
169 #[token("ANY", ignore(case))]
170 ANY,
171 #[token("ARRAY", ignore(case))]
172 ARRAY,
173 #[token("AS", ignore(case))]
174 AS,
175 #[token("ASC", ignore(case))]
176 ASC,
177 #[token("BEGIN", ignore(case))]
178 BEGIN,
179 #[token("BETWEEN", ignore(case))]
180 BETWEEN,
181 #[token("BOOLEAN", ignore(case))]
182 BOOLEAN,
183 #[token("BY", ignore(case))]
184 BY,
185 #[token("CASE", ignore(case))]
186 CASE,
187 #[token("CAST", ignore(case))]
188 CAST,
189 #[token("CLUSTER", ignore(case))]
190 CLUSTER,
191 #[token("COLUMN", ignore(case))]
192 COLUMN,
193 #[token("COMMENT", ignore(case))]
194 COMMENT,
195 #[token("CREATE", ignore(case))]
196 CREATE,
197 #[token("DATABASES", ignore(case))]
198 DATABASES,
199 #[token("DATABASE", ignore(case))]
200 DATABASE,
201 #[token("DELETE", ignore(case))]
202 DELETE,
203 #[token("DESC", ignore(case))]
204 DESC,
205 #[token("DESCRIBE", ignore(case))]
206 DESCRIBE,
207 #[token("DISTINCT", ignore(case))]
208 DISTINCT,
209 #[token("DROP", ignore(case))]
210 DROP,
211 #[token("ELSE", ignore(case))]
212 ELSE,
213 #[token("END", ignore(case))]
214 END,
215 #[token("EQUALITY", ignore(case))]
216 EQUALITY,
217 #[token("EXCLUDE", ignore(case))]
218 EXCLUDE,
219 #[token("EXEC", ignore(case))]
220 EXEC,
221 #[token("EXISTS", ignore(case))]
222 EXISTS,
223 #[token("EXPLAIN", ignore(case))]
224 EXPLAIN,
225 #[token("FALSE", ignore(case))]
226 FALSE,
227 #[token("FIRST", ignore(case))]
228 FIRST,
229 #[token("FLOAT", ignore(case))]
230 FLOAT,
231 #[token("FROM", ignore(case))]
232 FROM,
233 #[token("FULL", ignore(case))]
234 FULL,
235 #[token("GROUP", ignore(case))]
236 GROUP,
237 #[token("IF", ignore(case))]
238 IF,
239 #[token("IN", ignore(case))]
240 IN,
241 #[token("INDEX", ignore(case))]
242 INDEX,
243 #[token("INNER", ignore(case))]
244 INNER,
245 #[token("INSERT", ignore(case))]
246 INSERT,
247 #[token("INT", ignore(case))]
248 INT,
249 #[token("INTERVAL", ignore(case))]
250 INTERVAL,
251 #[token("INTO", ignore(case))]
252 INTO,
253 #[token("IS", ignore(case))]
254 IS,
255 #[token("JOB", ignore(case))]
256 JOB,
257 #[token("JOBS", ignore(case))]
258 JOBS,
259 #[token("JOIN", ignore(case))]
260 JOIN,
261 #[token("KEY", ignore(case))]
262 KEY,
263 #[token("LAST", ignore(case))]
264 LAST,
265 #[token("LEFT", ignore(case))]
266 LEFT,
267 #[token("LIMIT", ignore(case))]
268 LIMIT,
269 #[token("MATERIALIZED", ignore(case))]
270 MATERIALIZED,
271 #[token("NODEGROUP", ignore(case))]
272 NODEGROUP,
273 #[token("NOT", ignore(case))]
274 NOT,
275 #[token("NULL", ignore(case))]
276 NULL,
277 #[token("NULLS", ignore(case))]
278 NULLS,
279 #[token("OBJECT", ignore(case))]
280 OBJECT,
281 #[token("OFFSET", ignore(case))]
282 OFFSET,
283 #[token("ON", ignore(case))]
284 ON,
285 #[token("OPTIMIZE", ignore(case))]
286 OPTIMIZE,
287 #[token("OR", ignore(case))]
288 OR,
289 #[token("ORDER", ignore(case))]
290 ORDER,
291 #[token("OUTER", ignore(case))]
292 OUTER,
293 #[token("PERCENT", ignore(case))]
294 PERCENT,
295 #[token("PLAN", ignore(case))]
296 PLAN,
297 #[token("RANGE", ignore(case))]
298 RANGE,
299 #[token("RENAME", ignore(case))]
300 RENAME,
301 #[token("REPLACE", ignore(case))]
302 REPLACE,
303 #[token("RESUME", ignore(case))]
304 RESUME,
305 #[token("RIGHT", ignore(case))]
306 RIGHT,
307 #[token("SAMPLE", ignore(case))]
308 SAMPLE,
309 #[token("SCHEDULE", ignore(case))]
310 SCHEDULE,
311 #[token("SCHEMAS", ignore(case))]
312 SCHEMAS,
313 #[token("SCHEMA", ignore(case))]
314 SCHEMA,
315 #[token("SEARCH", ignore(case))]
316 SEARCH,
317 #[token("SELECT", ignore(case))]
318 SELECT,
319 #[token("SET", ignore(case))]
320 SET,
321 #[token("SHOW", ignore(case))]
322 SHOW,
323 #[token("STATEMENTS", ignore(case))]
324 STATEMENTS,
325 #[token("STRING", ignore(case))]
326 STRING,
327 #[token("SUSPEND", ignore(case))]
328 SUSPEND,
329 #[token("TABLE", ignore(case))]
330 TABLE,
331 #[token("TABLES", ignore(case))]
332 TABLES,
333 #[token("THEN", ignore(case))]
334 THEN,
335 #[token("TIMESTAMP", ignore(case))]
336 TIMESTAMP,
337 #[token("TO", ignore(case))]
338 TO,
339 #[token("TRUE", ignore(case))]
340 TRUE,
341 #[token("UINT", ignore(case))]
342 UINT,
343 #[token("UNION", ignore(case))]
344 UNION,
345 #[token("UPDATE", ignore(case))]
346 UPDATE,
347 #[token("VACUUM", ignore(case))]
348 VACUUM,
349 #[token("VALUES", ignore(case))]
350 VALUES,
351 #[token("VIEW", ignore(case))]
352 VIEW,
353 #[token("VIEWS", ignore(case))]
354 VIEWS,
355 #[token("WHEN", ignore(case))]
356 WHEN,
357 #[token("WHERE", ignore(case))]
358 WHERE,
359 #[token("WINDOW", ignore(case))]
360 WINDOW,
361 #[token("WITH", ignore(case))]
362 WITH,
363 #[token("WITHIN", ignore(case))]
364 WITHIN,
365 #[token("XOR", ignore(case))]
366 XOR,
367
368 #[cfg(feature = "command")]
370 #[token("\\")]
371 BackSlash,
372 #[cfg(feature = "command")]
373 #[token("CANCEL", ignore(case))]
374 CANCEL,
375}
376
377impl TokenKind {
378 pub fn is_literal(&self) -> bool {
379 use TokenKind::*;
380
381 matches!(
382 self,
383 LiteralFloat
384 | LiteralInteger
385 | LiteralString
386 | LiteralHexBinaryString
387 | LiteralHexInteger
388 )
389 }
390
391 pub fn is_symbol(&self) -> bool {
392 use TokenKind::*;
393
394 #[cfg(feature = "command")]
395 if matches!(self, BackSlash) {
396 return true;
397 }
398
399 matches!(
400 self,
401 Eq | NotEq
402 | Lt
403 | Gt
404 | Lte
405 | Gte
406 | Plus
407 | Minus
408 | Multiply
409 | Divide
410 | Modulo
411 | Concat
412 | LParen
413 | RParen
414 | LBracket
415 | RBracket
416 | LBrace
417 | RBrace
418 | Comma
419 | Dot
420 | Colon
421 | DoubleColon
422 | SemiColon
423 | Dollar
424 | Arrow
425 )
426 }
427
428 pub fn is_keyword(&self) -> bool {
429 use TokenKind::*;
430
431 !self.is_literal()
432 && !self.is_symbol()
433 && !matches!(self, Ident | EOI | Whitespace | Comment)
434 }
435
436 pub fn is_reserved_keyword(&self) -> bool {
437 use TokenKind::*;
438
439 matches!(
440 self,
441 FROM | JOIN
442 | VALUES
443 | WHERE
444 | ORDER
445 | DISTINCT
446 | LIMIT
447 | SELECT
448 | AGGREGATE
449 | WINDOW
450 | WITHIN
451 | GROUP
452 | INSERT
453 | UNION
454 | SAMPLE
455 | NULL
456 | TRUE
457 | FALSE
458 | AS
459 | BY
460 | ON
461 | CASE
462 | WHEN
463 | THEN
464 | ELSE
465 | END
466 | CAST
467 | NOT
468 | IS
469 | IN
470 | BETWEEN
471 | AND
472 | OR
473 )
474 }
475}