1#[derive(Debug, Clone, PartialEq)]
7pub enum Token {
8 Select,
10 From,
11 Where,
12 With, And,
14 Or,
15 In,
16 Not,
17 Between,
18 Like,
19 Is,
20 Null,
21 OrderBy,
22 GroupBy,
23 Having,
24 As,
25 Asc,
26 Desc,
27 Limit,
28 Offset,
29 DateTime, Case, When, Then, Else, End, Distinct, Over, Partition, By, Rows, Range, Unbounded, Preceding, Following, Current, Row, Union, Intersect, Except, Web, Join, Inner, Left, Right, Full, Outer, On, Cross, Identifier(String),
69 QuotedIdentifier(String), StringLiteral(String),
71 NumberLiteral(String),
72 Star,
73
74 Dot,
76 Comma,
77 Colon,
78 LeftParen,
79 RightParen,
80 Equal,
81 NotEqual,
82 LessThan,
83 GreaterThan,
84 LessThanOrEqual,
85 GreaterThanOrEqual,
86
87 Plus,
89 Minus,
90 Divide,
91 Modulo,
92
93 Concat, Eof,
98}
99
100impl Token {
101 pub fn from_keyword(s: &str) -> Option<Token> {
103 match s.to_uppercase().as_str() {
104 "SELECT" => Some(Token::Select),
105 "FROM" => Some(Token::From),
106 "WHERE" => Some(Token::Where),
107 "WITH" => Some(Token::With),
108 "AND" => Some(Token::And),
109 "OR" => Some(Token::Or),
110 "IN" => Some(Token::In),
111 "NOT" => Some(Token::Not),
112 "BETWEEN" => Some(Token::Between),
113 "LIKE" => Some(Token::Like),
114 "IS" => Some(Token::Is),
115 "NULL" => Some(Token::Null),
116 "ORDER" => Some(Token::OrderBy),
117 "GROUP" => Some(Token::GroupBy),
118 "HAVING" => Some(Token::Having),
119 "AS" => Some(Token::As),
120 "ASC" => Some(Token::Asc),
121 "DESC" => Some(Token::Desc),
122 "LIMIT" => Some(Token::Limit),
123 "OFFSET" => Some(Token::Offset),
124 "DISTINCT" => Some(Token::Distinct),
125 "CASE" => Some(Token::Case),
126 "WHEN" => Some(Token::When),
127 "THEN" => Some(Token::Then),
128 "ELSE" => Some(Token::Else),
129 "END" => Some(Token::End),
130 "OVER" => Some(Token::Over),
131 "PARTITION" => Some(Token::Partition),
132 "BY" => Some(Token::By),
133 "ROWS" => Some(Token::Rows),
134 "RANGE" => Some(Token::Range),
135 "UNBOUNDED" => Some(Token::Unbounded),
136 "PRECEDING" => Some(Token::Preceding),
137 "FOLLOWING" => Some(Token::Following),
138 "CURRENT" => Some(Token::Current),
139 "ROW" => Some(Token::Row),
140 "UNION" => Some(Token::Union),
141 "INTERSECT" => Some(Token::Intersect),
142 "EXCEPT" => Some(Token::Except),
143 "WEB" => Some(Token::Web),
144 "JOIN" => Some(Token::Join),
145 "INNER" => Some(Token::Inner),
146 "LEFT" => Some(Token::Left),
147 "RIGHT" => Some(Token::Right),
148 "FULL" => Some(Token::Full),
149 "OUTER" => Some(Token::Outer),
150 "ON" => Some(Token::On),
151 "CROSS" => Some(Token::Cross),
152 _ => None,
153 }
154 }
155
156 pub fn is_logical_operator(&self) -> bool {
158 matches!(self, Token::And | Token::Or)
159 }
160
161 pub fn is_join_type(&self) -> bool {
163 matches!(
164 self,
165 Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
166 )
167 }
168
169 pub fn is_clause_terminator(&self) -> bool {
171 matches!(
172 self,
173 Token::OrderBy
174 | Token::GroupBy
175 | Token::Having
176 | Token::Limit
177 | Token::Offset
178 | Token::Union
179 | Token::Intersect
180 | Token::Except
181 )
182 }
183
184 pub fn as_keyword_str(&self) -> Option<&'static str> {
186 match self {
187 Token::Select => Some("SELECT"),
188 Token::From => Some("FROM"),
189 Token::Where => Some("WHERE"),
190 Token::With => Some("WITH"),
191 Token::And => Some("AND"),
192 Token::Or => Some("OR"),
193 Token::OrderBy => Some("ORDER BY"),
194 Token::GroupBy => Some("GROUP BY"),
195 Token::Having => Some("HAVING"),
196 _ => None,
198 }
199 }
200}
201
202#[derive(Debug, Clone)]
203pub struct Lexer {
204 input: Vec<char>,
205 position: usize,
206 current_char: Option<char>,
207}
208
209impl Lexer {
210 #[must_use]
211 pub fn new(input: &str) -> Self {
212 let chars: Vec<char> = input.chars().collect();
213 let current = chars.first().copied();
214 Self {
215 input: chars,
216 position: 0,
217 current_char: current,
218 }
219 }
220
221 fn advance(&mut self) {
222 self.position += 1;
223 self.current_char = self.input.get(self.position).copied();
224 }
225
226 fn peek(&self, offset: usize) -> Option<char> {
227 self.input.get(self.position + offset).copied()
228 }
229
230 fn skip_whitespace(&mut self) {
231 while let Some(ch) = self.current_char {
232 if ch.is_whitespace() {
233 self.advance();
234 } else {
235 break;
236 }
237 }
238 }
239
240 fn skip_whitespace_and_comments(&mut self) {
241 loop {
242 while let Some(ch) = self.current_char {
244 if ch.is_whitespace() {
245 self.advance();
246 } else {
247 break;
248 }
249 }
250
251 match self.current_char {
253 Some('-') if self.peek(1) == Some('-') => {
254 self.advance(); self.advance(); while let Some(ch) = self.current_char {
258 self.advance();
259 if ch == '\n' {
260 break;
261 }
262 }
263 }
264 Some('/') if self.peek(1) == Some('*') => {
265 self.advance(); self.advance(); while let Some(ch) = self.current_char {
269 if ch == '*' && self.peek(1) == Some('/') {
270 self.advance(); self.advance(); break;
273 }
274 self.advance();
275 }
276 }
277 _ => {
278 break;
280 }
281 }
282 }
283 }
284
285 fn read_identifier(&mut self) -> String {
286 let mut result = String::new();
287 while let Some(ch) = self.current_char {
288 if ch.is_alphanumeric() || ch == '_' {
289 result.push(ch);
290 self.advance();
291 } else {
292 break;
293 }
294 }
295 result
296 }
297
298 fn read_string(&mut self) -> String {
299 let mut result = String::new();
300 let quote_char = self.current_char.unwrap(); self.advance(); while let Some(ch) = self.current_char {
304 if ch == quote_char {
305 self.advance(); break;
307 }
308 result.push(ch);
309 self.advance();
310 }
311 result
312 }
313
314 fn read_number(&mut self) -> String {
315 let mut result = String::new();
316 let mut has_e = false;
317
318 while let Some(ch) = self.current_char {
320 if !has_e && (ch.is_numeric() || ch == '.') {
321 result.push(ch);
322 self.advance();
323 } else if (ch == 'e' || ch == 'E') && !has_e && !result.is_empty() {
324 result.push(ch);
326 self.advance();
327 has_e = true;
328
329 if let Some(sign) = self.current_char {
331 if sign == '+' || sign == '-' {
332 result.push(sign);
333 self.advance();
334 }
335 }
336
337 while let Some(digit) = self.current_char {
339 if digit.is_numeric() {
340 result.push(digit);
341 self.advance();
342 } else {
343 break;
344 }
345 }
346 break; } else {
348 break;
349 }
350 }
351 result
352 }
353
354 pub fn next_token(&mut self) -> Token {
355 self.skip_whitespace_and_comments();
356
357 match self.current_char {
358 None => Token::Eof,
359 Some('*') => {
360 self.advance();
361 Token::Star }
365 Some('+') => {
366 self.advance();
367 Token::Plus
368 }
369 Some('/') => {
370 if self.peek(1) == Some('*') {
372 self.skip_whitespace_and_comments();
375 return self.next_token();
376 }
377 self.advance();
378 Token::Divide
379 }
380 Some('%') => {
381 self.advance();
382 Token::Modulo
383 }
384 Some('.') => {
385 self.advance();
386 Token::Dot
387 }
388 Some(',') => {
389 self.advance();
390 Token::Comma
391 }
392 Some(':') => {
393 self.advance();
394 Token::Colon
395 }
396 Some('(') => {
397 self.advance();
398 Token::LeftParen
399 }
400 Some(')') => {
401 self.advance();
402 Token::RightParen
403 }
404 Some('=') => {
405 self.advance();
406 Token::Equal
407 }
408 Some('<') => {
409 self.advance();
410 if self.current_char == Some('=') {
411 self.advance();
412 Token::LessThanOrEqual
413 } else if self.current_char == Some('>') {
414 self.advance();
415 Token::NotEqual
416 } else {
417 Token::LessThan
418 }
419 }
420 Some('>') => {
421 self.advance();
422 if self.current_char == Some('=') {
423 self.advance();
424 Token::GreaterThanOrEqual
425 } else {
426 Token::GreaterThan
427 }
428 }
429 Some('!') if self.peek(1) == Some('=') => {
430 self.advance();
431 self.advance();
432 Token::NotEqual
433 }
434 Some('|') if self.peek(1) == Some('|') => {
435 self.advance();
436 self.advance();
437 Token::Concat
438 }
439 Some('"') => {
440 let ident_val = self.read_string();
442 Token::QuotedIdentifier(ident_val)
443 }
444 Some('\'') => {
445 let string_val = self.read_string();
447 Token::StringLiteral(string_val)
448 }
449 Some('-') if self.peek(1) == Some('-') => {
450 self.skip_whitespace_and_comments();
452 self.next_token()
453 }
454 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
455 self.advance(); let num = self.read_number();
458 Token::NumberLiteral(format!("-{num}"))
459 }
460 Some('-') => {
461 self.advance();
463 Token::Minus
464 }
465 Some(ch) if ch.is_numeric() => {
466 let num = self.read_number();
467 Token::NumberLiteral(num)
468 }
469 Some(ch) if ch.is_alphabetic() || ch == '_' => {
470 let ident = self.read_identifier();
471 match ident.to_uppercase().as_str() {
472 "SELECT" => Token::Select,
473 "FROM" => Token::From,
474 "WHERE" => Token::Where,
475 "WITH" => Token::With,
476 "AND" => Token::And,
477 "OR" => Token::Or,
478 "IN" => Token::In,
479 "NOT" => Token::Not,
480 "BETWEEN" => Token::Between,
481 "LIKE" => Token::Like,
482 "IS" => Token::Is,
483 "NULL" => Token::Null,
484 "ORDER" if self.peek_keyword("BY") => {
485 self.skip_whitespace();
486 self.read_identifier(); Token::OrderBy
488 }
489 "GROUP" if self.peek_keyword("BY") => {
490 self.skip_whitespace();
491 self.read_identifier(); Token::GroupBy
493 }
494 "HAVING" => Token::Having,
495 "AS" => Token::As,
496 "ASC" => Token::Asc,
497 "DESC" => Token::Desc,
498 "LIMIT" => Token::Limit,
499 "OFFSET" => Token::Offset,
500 "DATETIME" => Token::DateTime,
501 "CASE" => Token::Case,
502 "WHEN" => Token::When,
503 "THEN" => Token::Then,
504 "ELSE" => Token::Else,
505 "END" => Token::End,
506 "DISTINCT" => Token::Distinct,
507 "OVER" => Token::Over,
508 "PARTITION" => Token::Partition,
509 "BY" => Token::By,
510 "ROWS" => Token::Rows,
512 "UNBOUNDED" => Token::Unbounded,
515 "PRECEDING" => Token::Preceding,
516 "FOLLOWING" => Token::Following,
517 "CURRENT" => Token::Current,
518 "ROW" => Token::Row,
519 "UNION" => Token::Union,
521 "INTERSECT" => Token::Intersect,
522 "EXCEPT" => Token::Except,
523 "WEB" => Token::Web,
525 "JOIN" => Token::Join,
527 "INNER" => Token::Inner,
528 "LEFT" => Token::Left,
529 "RIGHT" => Token::Right,
530 "FULL" => Token::Full,
531 "OUTER" => Token::Outer,
532 "ON" => Token::On,
533 "CROSS" => Token::Cross,
534 _ => Token::Identifier(ident),
535 }
536 }
537 Some(ch) => {
538 self.advance();
539 Token::Identifier(ch.to_string())
540 }
541 }
542 }
543
544 fn peek_keyword(&mut self, keyword: &str) -> bool {
545 let saved_pos = self.position;
546 let saved_char = self.current_char;
547
548 self.skip_whitespace_and_comments();
549 let next_word = self.read_identifier();
550 let matches = next_word.to_uppercase() == keyword;
551
552 self.position = saved_pos;
554 self.current_char = saved_char;
555
556 matches
557 }
558
559 #[must_use]
560 pub fn get_position(&self) -> usize {
561 self.position
562 }
563
564 pub fn tokenize_all(&mut self) -> Vec<Token> {
565 let mut tokens = Vec::new();
566 loop {
567 let token = self.next_token();
568 if matches!(token, Token::Eof) {
569 tokens.push(token);
570 break;
571 }
572 tokens.push(token);
573 }
574 tokens
575 }
576
577 pub fn tokenize_all_with_positions(&mut self) -> Vec<(usize, usize, Token)> {
578 let mut tokens = Vec::new();
579 loop {
580 self.skip_whitespace_and_comments();
581 let start_pos = self.position;
582 let token = self.next_token();
583 let end_pos = self.position;
584
585 if matches!(token, Token::Eof) {
586 break;
587 }
588 tokens.push((start_pos, end_pos, token));
589 }
590 tokens
591 }
592}