1use std::ops::Range;
16
17use logos::Lexer;
18use logos::Logos;
19
20#[derive(Debug)]
21pub struct Tokenizer<'source> {
22 lexer: Lexer<'source, TokenKind>,
23 eoi: bool,
24}
25
26impl<'source> Tokenizer<'source> {
27 pub fn new(source: &'source str) -> Self {
28 Self {
29 lexer: TokenKind::lexer(source),
30 eoi: false,
31 }
32 }
33
34 pub fn slice(&self) -> &'source str {
35 self.lexer.slice()
36 }
37
38 pub fn span(&self) -> Range<usize> {
39 self.lexer.span()
40 }
41}
42
43impl<'source> Iterator for Tokenizer<'source> {
44 type Item = Result<TokenKind, ()>;
45
46 fn next(&mut self) -> Option<Self::Item> {
47 match self.lexer.next() {
48 Some(Err(())) => Some(Err(())),
49 Some(Ok(kind)) => Some(Ok(kind)),
50 None => {
51 if self.eoi {
52 None
54 } else {
55 self.eoi = true;
57 Some(Ok(TokenKind::EOI))
58 }
59 }
60 }
61 }
62}
63
64#[derive(Logos, Clone, Copy, Debug, PartialEq, Eq)]
65pub enum TokenKind {
66 EOI,
68
69 #[regex(r"[ \t\r\n\f]+")]
72 Whitespace,
73
74 #[regex(r"--[^\r\n\f]*")]
76 #[regex(r"/\*([^\*]|(\*[^/]))*\*/")]
77 Comment,
78
79 #[regex(r#"[_a-zA-Z][_a-zA-Z0-9]*"#)]
84 Ident,
85
86 #[regex(r#"'([^'\\]|\\.|'')*'"#)]
87 #[regex(r#""([^"\\]|\\.|"")*""#)]
88 #[regex(r#"`([^`\\]|\\.|``)*`"#)]
89 LiteralString,
90 #[regex(r"[xX]'[a-fA-F0-9]*'")]
91 LiteralHexBinaryString,
92
93 #[regex(r"[0-9]+(_|[0-9])*")]
94 LiteralInteger,
95 #[regex(r"0[xX][a-fA-F0-9]+")]
97 LiteralHexInteger,
98 #[regex(r"[0-9]+[eE][+-]?[0-9]+")]
100 #[regex(r"[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?")]
101 LiteralFloat,
102
103 #[token("=")]
105 Eq,
106 #[token("<>")]
107 #[token("!=")]
108 NotEq,
109 #[token("<")]
110 Lt,
111 #[token(">")]
112 Gt,
113 #[token("<=")]
114 Lte,
115 #[token(">=")]
116 Gte,
117 #[token("+")]
118 Plus,
119 #[token("-")]
120 Minus,
121 #[token("*")]
122 Multiply,
123 #[token("/")]
124 Divide,
125 #[token("%")]
126 Modulo,
127 #[token("||")]
128 Concat,
129 #[token("(")]
130 LParen,
131 #[token(")")]
132 RParen,
133 #[token("[")]
134 LBracket,
135 #[token("]")]
136 RBracket,
137 #[token("{")]
138 LBrace,
139 #[token("}")]
140 RBrace,
141 #[token(",")]
142 Comma,
143 #[token(".")]
144 Dot,
145 #[token(":")]
146 Colon,
147 #[token("::")]
148 DoubleColon,
149 #[token(";")]
150 SemiColon,
151 #[token("$")]
152 Dollar,
153 #[token("=>")]
154 Arrow,
155
156 #[token("ADD", ignore(case))]
158 ADD,
159 #[token("AGGREGATE", ignore(case))]
160 AGGREGATE,
161 #[token("ALL", ignore(case))]
162 ALL,
163 #[token("ALTER", ignore(case))]
164 ALTER,
165 #[token("ANALYZE", ignore(case))]
166 ANALYZE,
167 #[token("AND", ignore(case))]
168 AND,
169 #[token("ANY", ignore(case))]
170 ANY,
171 #[token("ARRAY", ignore(case))]
172 ARRAY,
173 #[token("AS", ignore(case))]
174 AS,
175 #[token("ASC", ignore(case))]
176 ASC,
177 #[token("BEGIN", ignore(case))]
178 BEGIN,
179 #[token("BETWEEN", ignore(case))]
180 BETWEEN,
181 #[token("BOOLEAN", ignore(case))]
182 BOOLEAN,
183 #[token("BY", ignore(case))]
184 BY,
185 #[token("CASE", ignore(case))]
186 CASE,
187 #[token("CAST", ignore(case))]
188 CAST,
189 #[token("CLUSTER", ignore(case))]
190 CLUSTER,
191 #[token("COLUMN", ignore(case))]
192 COLUMN,
193 #[token("COMMENT", ignore(case))]
194 COMMENT,
195 #[token("CREATE", ignore(case))]
196 CREATE,
197 #[token("DATABASES", ignore(case))]
198 DATABASES,
199 #[token("DATABASE", ignore(case))]
200 DATABASE,
201 #[token("DELETE", ignore(case))]
202 DELETE,
203 #[token("DESC", ignore(case))]
204 DESC,
205 #[token("DESCRIBE", ignore(case))]
206 DESCRIBE,
207 #[token("DISTINCT", ignore(case))]
208 DISTINCT,
209 #[token("DROP", ignore(case))]
210 DROP,
211 #[token("ELSE", ignore(case))]
212 ELSE,
213 #[token("END", ignore(case))]
214 END,
215 #[token("EXCLUDE", ignore(case))]
216 EXCLUDE,
217 #[token("EXEC", ignore(case))]
218 EXEC,
219 #[token("EXISTS", ignore(case))]
220 EXISTS,
221 #[token("EXPLAIN", ignore(case))]
222 EXPLAIN,
223 #[token("FALSE", ignore(case))]
224 FALSE,
225 #[token("FIRST", ignore(case))]
226 FIRST,
227 #[token("FLOAT", ignore(case))]
228 FLOAT,
229 #[token("FROM", ignore(case))]
230 FROM,
231 #[token("FULL", ignore(case))]
232 FULL,
233 #[token("GROUP", ignore(case))]
234 GROUP,
235 #[token("IF", ignore(case))]
236 IF,
237 #[token("IN", ignore(case))]
238 IN,
239 #[token("INDEX", ignore(case))]
240 INDEX,
241 #[token("INNER", ignore(case))]
242 INNER,
243 #[token("INSERT", ignore(case))]
244 INSERT,
245 #[token("INT", ignore(case))]
246 INT,
247 #[token("INTERVAL", ignore(case))]
248 INTERVAL,
249 #[token("INTO", ignore(case))]
250 INTO,
251 #[token("IS", ignore(case))]
252 IS,
253 #[token("JOB", ignore(case))]
254 JOB,
255 #[token("JOBS", ignore(case))]
256 JOBS,
257 #[token("JOIN", ignore(case))]
258 JOIN,
259 #[token("KEY", ignore(case))]
260 KEY,
261 #[token("LAST", ignore(case))]
262 LAST,
263 #[token("LEFT", ignore(case))]
264 LEFT,
265 #[token("LIMIT", ignore(case))]
266 LIMIT,
267 #[token("MATERIALIZED", ignore(case))]
268 MATERIALIZED,
269 #[token("NODEGROUP", ignore(case))]
270 NODEGROUP,
271 #[token("NOT", ignore(case))]
272 NOT,
273 #[token("NULL", ignore(case))]
274 NULL,
275 #[token("NULLS", ignore(case))]
276 NULLS,
277 #[token("OBJECT", ignore(case))]
278 OBJECT,
279 #[token("OFFSET", ignore(case))]
280 OFFSET,
281 #[token("ON", ignore(case))]
282 ON,
283 #[token("OPTIMIZE", ignore(case))]
284 OPTIMIZE,
285 #[token("OR", ignore(case))]
286 OR,
287 #[token("ORDER", ignore(case))]
288 ORDER,
289 #[token("OUTER", ignore(case))]
290 OUTER,
291 #[token("PARTITION", ignore(case))]
292 PARTITION,
293 #[token("PERCENT", ignore(case))]
294 PERCENT,
295 #[token("PLAN", ignore(case))]
296 PLAN,
297 #[token("POINT", ignore(case))]
298 POINT,
299 #[token("RANGE", ignore(case))]
300 RANGE,
301 #[token("RENAME", ignore(case))]
302 RENAME,
303 #[token("REPLACE", ignore(case))]
304 REPLACE,
305 #[token("RESUME", ignore(case))]
306 RESUME,
307 #[token("RIGHT", ignore(case))]
308 RIGHT,
309 #[token("SAMPLE", ignore(case))]
310 SAMPLE,
311 #[token("SCHEDULE", ignore(case))]
312 SCHEDULE,
313 #[token("SCHEMAS", ignore(case))]
314 SCHEMAS,
315 #[token("SCHEMA", ignore(case))]
316 SCHEMA,
317 #[token("SEARCH", ignore(case))]
318 SEARCH,
319 #[token("SELECT", ignore(case))]
320 SELECT,
321 #[token("SET", ignore(case))]
322 SET,
323 #[token("SHOW", ignore(case))]
324 SHOW,
325 #[token("STATEMENTS", ignore(case))]
326 STATEMENTS,
327 #[token("STRING", ignore(case))]
328 STRING,
329 #[token("SUSPEND", ignore(case))]
330 SUSPEND,
331 #[token("TABLE", ignore(case))]
332 TABLE,
333 #[token("TABLES", ignore(case))]
334 TABLES,
335 #[token("THEN", ignore(case))]
336 THEN,
337 #[token("TIMESTAMP", ignore(case))]
338 TIMESTAMP,
339 #[token("TO", ignore(case))]
340 TO,
341 #[token("TRUE", ignore(case))]
342 TRUE,
343 #[token("UINT", ignore(case))]
344 UINT,
345 #[token("UNION", ignore(case))]
346 UNION,
347 #[token("UPDATE", ignore(case))]
348 UPDATE,
349 #[token("VACUUM", ignore(case))]
350 VACUUM,
351 #[token("VALUES", ignore(case))]
352 VALUES,
353 #[token("VIEW", ignore(case))]
354 VIEW,
355 #[token("VIEWS", ignore(case))]
356 VIEWS,
357 #[token("WHEN", ignore(case))]
358 WHEN,
359 #[token("WHERE", ignore(case))]
360 WHERE,
361 #[token("WINDOW", ignore(case))]
362 WINDOW,
363 #[token("WITH", ignore(case))]
364 WITH,
365 #[token("WITHIN", ignore(case))]
366 WITHIN,
367 #[token("XOR", ignore(case))]
368 XOR,
369
370 #[cfg(feature = "command")]
372 #[token("\\")]
373 BackSlash,
374 #[cfg(feature = "command")]
375 #[token("CANCEL", ignore(case))]
376 CANCEL,
377}
378
379impl TokenKind {
380 pub fn is_literal(&self) -> bool {
381 use TokenKind::*;
382
383 matches!(
384 self,
385 LiteralFloat
386 | LiteralInteger
387 | LiteralString
388 | LiteralHexBinaryString
389 | LiteralHexInteger
390 )
391 }
392
393 pub fn is_symbol(&self) -> bool {
394 use TokenKind::*;
395
396 #[cfg(feature = "command")]
397 if matches!(self, BackSlash) {
398 return true;
399 }
400
401 matches!(
402 self,
403 Eq | NotEq
404 | Lt
405 | Gt
406 | Lte
407 | Gte
408 | Plus
409 | Minus
410 | Multiply
411 | Divide
412 | Modulo
413 | Concat
414 | LParen
415 | RParen
416 | LBracket
417 | RBracket
418 | LBrace
419 | RBrace
420 | Comma
421 | Dot
422 | Colon
423 | DoubleColon
424 | SemiColon
425 | Dollar
426 | Arrow
427 )
428 }
429
430 pub fn is_keyword(&self) -> bool {
431 use TokenKind::*;
432
433 !self.is_literal()
434 && !self.is_symbol()
435 && !matches!(self, Ident | EOI | Whitespace | Comment)
436 }
437
438 pub fn is_reserved_keyword(&self) -> bool {
439 use TokenKind::*;
440
441 matches!(
442 self,
443 FROM | JOIN
444 | VALUES
445 | WHERE
446 | ORDER
447 | DISTINCT
448 | LIMIT
449 | SELECT
450 | AGGREGATE
451 | WINDOW
452 | WITHIN
453 | GROUP
454 | INSERT
455 | UNION
456 | SAMPLE
457 | NULL
458 | TRUE
459 | FALSE
460 | AS
461 | BY
462 | ON
463 | CASE
464 | WHEN
465 | THEN
466 | ELSE
467 | END
468 | CAST
469 | NOT
470 | IS
471 | IN
472 | BETWEEN
473 | AND
474 | OR
475 )
476 }
477}