1#[derive(Debug, Clone, Copy, PartialEq)]
8pub enum LexerMode {
9 SkipComments,
11 PreserveComments,
13}
14
15impl Default for LexerMode {
16 fn default() -> Self {
17 LexerMode::SkipComments
18 }
19}
20
21#[derive(Debug, Clone, PartialEq)]
22pub enum Token {
23 Select,
25 From,
26 Where,
27 With, And,
29 Or,
30 In,
31 Not,
32 Between,
33 Like,
34 Is,
35 Null,
36 OrderBy,
37 GroupBy,
38 Having,
39 As,
40 Asc,
41 Desc,
42 Limit,
43 Offset,
44 Into, DateTime, Case, When, Then, Else, End, Distinct, Over, Partition, By, Rows, Range, Unbounded, Preceding, Following, Current, Row, Union, Intersect, Except, Web, Unnest, Join, Inner, Left, Right, Full, Outer, On, Cross, Identifier(String),
88 QuotedIdentifier(String), StringLiteral(String),
90 JsonBlock(String), NumberLiteral(String),
92 Star,
93
94 Dot,
96 Comma,
97 Colon,
98 LeftParen,
99 RightParen,
100 Equal,
101 NotEqual,
102 LessThan,
103 GreaterThan,
104 LessThanOrEqual,
105 GreaterThanOrEqual,
106
107 Plus,
109 Minus,
110 Divide,
111 Modulo,
112
113 Concat, LineComment(String), BlockComment(String), Eof,
122}
123
124impl Token {
125 pub fn from_keyword(s: &str) -> Option<Token> {
127 match s.to_uppercase().as_str() {
128 "SELECT" => Some(Token::Select),
129 "FROM" => Some(Token::From),
130 "WHERE" => Some(Token::Where),
131 "WITH" => Some(Token::With),
132 "AND" => Some(Token::And),
133 "OR" => Some(Token::Or),
134 "IN" => Some(Token::In),
135 "NOT" => Some(Token::Not),
136 "BETWEEN" => Some(Token::Between),
137 "LIKE" => Some(Token::Like),
138 "IS" => Some(Token::Is),
139 "NULL" => Some(Token::Null),
140 "ORDER" => Some(Token::OrderBy),
141 "GROUP" => Some(Token::GroupBy),
142 "HAVING" => Some(Token::Having),
143 "AS" => Some(Token::As),
144 "ASC" => Some(Token::Asc),
145 "DESC" => Some(Token::Desc),
146 "LIMIT" => Some(Token::Limit),
147 "OFFSET" => Some(Token::Offset),
148 "INTO" => Some(Token::Into),
149 "DISTINCT" => Some(Token::Distinct),
150 "CASE" => Some(Token::Case),
151 "WHEN" => Some(Token::When),
152 "THEN" => Some(Token::Then),
153 "ELSE" => Some(Token::Else),
154 "END" => Some(Token::End),
155 "OVER" => Some(Token::Over),
156 "PARTITION" => Some(Token::Partition),
157 "BY" => Some(Token::By),
158 "ROWS" => Some(Token::Rows),
159 "RANGE" => Some(Token::Range),
160 "UNBOUNDED" => Some(Token::Unbounded),
161 "PRECEDING" => Some(Token::Preceding),
162 "FOLLOWING" => Some(Token::Following),
163 "CURRENT" => Some(Token::Current),
164 "ROW" => Some(Token::Row),
165 "UNION" => Some(Token::Union),
166 "INTERSECT" => Some(Token::Intersect),
167 "EXCEPT" => Some(Token::Except),
168 "WEB" => Some(Token::Web),
169 "UNNEST" => Some(Token::Unnest),
170 "JOIN" => Some(Token::Join),
171 "INNER" => Some(Token::Inner),
172 "LEFT" => Some(Token::Left),
173 "RIGHT" => Some(Token::Right),
174 "FULL" => Some(Token::Full),
175 "OUTER" => Some(Token::Outer),
176 "ON" => Some(Token::On),
177 "CROSS" => Some(Token::Cross),
178 _ => None,
179 }
180 }
181
182 pub fn is_logical_operator(&self) -> bool {
184 matches!(self, Token::And | Token::Or)
185 }
186
187 pub fn is_join_type(&self) -> bool {
189 matches!(
190 self,
191 Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
192 )
193 }
194
195 pub fn is_clause_terminator(&self) -> bool {
197 matches!(
198 self,
199 Token::OrderBy
200 | Token::GroupBy
201 | Token::Having
202 | Token::Limit
203 | Token::Offset
204 | Token::Union
205 | Token::Intersect
206 | Token::Except
207 )
208 }
209
210 pub fn as_keyword_str(&self) -> Option<&'static str> {
213 match self {
214 Token::Select => Some("SELECT"),
215 Token::From => Some("FROM"),
216 Token::Where => Some("WHERE"),
217 Token::With => Some("WITH"),
218 Token::And => Some("AND"),
219 Token::Or => Some("OR"),
220 Token::In => Some("IN"),
221 Token::Not => Some("NOT"),
222 Token::Between => Some("BETWEEN"),
223 Token::Like => Some("LIKE"),
224 Token::Is => Some("IS"),
225 Token::Null => Some("NULL"),
226 Token::OrderBy => Some("ORDER BY"),
227 Token::GroupBy => Some("GROUP BY"),
228 Token::Having => Some("HAVING"),
229 Token::As => Some("AS"),
230 Token::Asc => Some("ASC"),
231 Token::Desc => Some("DESC"),
232 Token::Limit => Some("LIMIT"),
233 Token::Offset => Some("OFFSET"),
234 Token::Into => Some("INTO"),
235 Token::Distinct => Some("DISTINCT"),
236 Token::Case => Some("CASE"),
237 Token::When => Some("WHEN"),
238 Token::Then => Some("THEN"),
239 Token::Else => Some("ELSE"),
240 Token::End => Some("END"),
241 Token::Join => Some("JOIN"),
242 Token::Inner => Some("INNER"),
243 Token::Left => Some("LEFT"),
244 Token::Right => Some("RIGHT"),
245 Token::Full => Some("FULL"),
246 Token::Cross => Some("CROSS"),
247 Token::On => Some("ON"),
248 Token::Union => Some("UNION"),
249 Token::Intersect => Some("INTERSECT"),
250 Token::Except => Some("EXCEPT"),
251 Token::Over => Some("OVER"),
252 Token::Partition => Some("PARTITION"),
253 Token::By => Some("BY"),
254 Token::Rows => Some("ROWS"),
255 Token::Range => Some("RANGE"),
256 Token::Preceding => Some("PRECEDING"),
257 Token::Following => Some("FOLLOWING"),
258 Token::Current => Some("CURRENT"),
259 Token::Row => Some("ROW"),
260 Token::Unbounded => Some("UNBOUNDED"),
261 Token::DateTime => Some("DATETIME"),
262 _ => None,
263 }
264 }
265}
266
267#[derive(Debug, Clone)]
268pub struct Lexer {
269 input: Vec<char>,
270 position: usize,
271 current_char: Option<char>,
272 mode: LexerMode,
273}
274
275impl Lexer {
276 #[must_use]
277 pub fn new(input: &str) -> Self {
278 Self::with_mode(input, LexerMode::default())
279 }
280
281 #[must_use]
283 pub fn with_mode(input: &str, mode: LexerMode) -> Self {
284 let chars: Vec<char> = input.chars().collect();
285 let current = chars.first().copied();
286 Self {
287 input: chars,
288 position: 0,
289 current_char: current,
290 mode,
291 }
292 }
293
294 fn advance(&mut self) {
295 self.position += 1;
296 self.current_char = self.input.get(self.position).copied();
297 }
298
299 fn peek(&self, offset: usize) -> Option<char> {
300 self.input.get(self.position + offset).copied()
301 }
302
303 fn peek_string(&self, n: usize) -> String {
305 let mut result = String::new();
306 for i in 0..n {
307 if let Some(ch) = self.input.get(self.position + i) {
308 result.push(*ch);
309 } else {
310 break;
311 }
312 }
313 result
314 }
315
316 fn read_json_block(&mut self) -> String {
319 let mut result = String::new();
320
321 for _ in 0..6 {
323 self.advance();
324 }
325
326 while let Some(ch) = self.current_char {
328 if ch == '$' && self.peek_string(6) == "$JSON$" {
330 for _ in 0..6 {
332 self.advance();
333 }
334 break;
335 }
336 result.push(ch);
337 self.advance();
338 }
339
340 result
341 }
342
343 fn skip_whitespace(&mut self) {
344 while let Some(ch) = self.current_char {
345 if ch.is_whitespace() {
346 self.advance();
347 } else {
348 break;
349 }
350 }
351 }
352
353 fn read_line_comment(&mut self) -> String {
355 let mut result = String::new();
356
357 self.advance();
359 self.advance();
360
361 while let Some(ch) = self.current_char {
363 if ch == '\n' {
364 self.advance(); break;
366 }
367 result.push(ch);
368 self.advance();
369 }
370
371 result
372 }
373
374 fn read_block_comment(&mut self) -> String {
376 let mut result = String::new();
377
378 self.advance();
380 self.advance();
381
382 while let Some(ch) = self.current_char {
384 if ch == '*' && self.peek(1) == Some('/') {
385 self.advance(); self.advance(); break;
388 }
389 result.push(ch);
390 self.advance();
391 }
392
393 result
394 }
395
396 fn skip_whitespace_and_comments(&mut self) {
399 loop {
400 while let Some(ch) = self.current_char {
402 if ch.is_whitespace() {
403 self.advance();
404 } else {
405 break;
406 }
407 }
408
409 match self.current_char {
411 Some('-') if self.peek(1) == Some('-') => {
412 self.advance(); self.advance(); while let Some(ch) = self.current_char {
416 self.advance();
417 if ch == '\n' {
418 break;
419 }
420 }
421 }
422 Some('/') if self.peek(1) == Some('*') => {
423 self.advance(); self.advance(); while let Some(ch) = self.current_char {
427 if ch == '*' && self.peek(1) == Some('/') {
428 self.advance(); self.advance(); break;
431 }
432 self.advance();
433 }
434 }
435 _ => {
436 break;
438 }
439 }
440 }
441 }
442
443 fn read_identifier(&mut self) -> String {
444 let mut result = String::new();
445 while let Some(ch) = self.current_char {
446 if ch.is_alphanumeric() || ch == '_' {
447 result.push(ch);
448 self.advance();
449 } else {
450 break;
451 }
452 }
453 result
454 }
455
456 fn read_string(&mut self) -> String {
457 let mut result = String::new();
458 let quote_char = self.current_char.unwrap(); self.advance(); while let Some(ch) = self.current_char {
462 if ch == quote_char {
463 self.advance(); break;
465 }
466 result.push(ch);
467 self.advance();
468 }
469 result
470 }
471
472 fn read_number(&mut self) -> String {
473 let mut result = String::new();
474 let has_e = false;
475
476 while let Some(ch) = self.current_char {
478 if !has_e && (ch.is_numeric() || ch == '.') {
479 result.push(ch);
480 self.advance();
481 } else if (ch == 'e' || ch == 'E') && !has_e && !result.is_empty() {
482 result.push(ch);
484 self.advance();
485 let _ = has_e; if let Some(sign) = self.current_char {
489 if sign == '+' || sign == '-' {
490 result.push(sign);
491 self.advance();
492 }
493 }
494
495 while let Some(digit) = self.current_char {
497 if digit.is_numeric() {
498 result.push(digit);
499 self.advance();
500 } else {
501 break;
502 }
503 }
504 break; } else {
506 break;
507 }
508 }
509 result
510 }
511
512 pub fn next_token_with_comments(&mut self) -> Token {
515 self.skip_whitespace();
517
518 match self.current_char {
519 None => Token::Eof,
520 Some('-') if self.peek(1) == Some('-') => {
522 let comment_text = self.read_line_comment();
523 Token::LineComment(comment_text)
524 }
525 Some('/') if self.peek(1) == Some('*') => {
526 let comment_text = self.read_block_comment();
527 Token::BlockComment(comment_text)
528 }
529 Some('*') => {
530 self.advance();
531 Token::Star
532 }
533 Some('+') => {
534 self.advance();
535 Token::Plus
536 }
537 Some('/') => {
538 self.advance();
540 Token::Divide
541 }
542 Some('%') => {
543 self.advance();
544 Token::Modulo
545 }
546 Some('.') => {
547 self.advance();
548 Token::Dot
549 }
550 Some(',') => {
551 self.advance();
552 Token::Comma
553 }
554 Some(':') => {
555 self.advance();
556 Token::Colon
557 }
558 Some('(') => {
559 self.advance();
560 Token::LeftParen
561 }
562 Some(')') => {
563 self.advance();
564 Token::RightParen
565 }
566 Some('=') => {
567 self.advance();
568 Token::Equal
569 }
570 Some('<') => {
571 self.advance();
572 if self.current_char == Some('=') {
573 self.advance();
574 Token::LessThanOrEqual
575 } else if self.current_char == Some('>') {
576 self.advance();
577 Token::NotEqual
578 } else {
579 Token::LessThan
580 }
581 }
582 Some('>') => {
583 self.advance();
584 if self.current_char == Some('=') {
585 self.advance();
586 Token::GreaterThanOrEqual
587 } else {
588 Token::GreaterThan
589 }
590 }
591 Some('!') if self.peek(1) == Some('=') => {
592 self.advance();
593 self.advance();
594 Token::NotEqual
595 }
596 Some('|') if self.peek(1) == Some('|') => {
597 self.advance();
598 self.advance();
599 Token::Concat
600 }
601 Some('"') => {
602 let ident_val = self.read_string();
603 Token::QuotedIdentifier(ident_val)
604 }
605 Some('$') => {
606 if self.peek_string(6) == "$JSON$" {
607 let json_content = self.read_json_block();
608 Token::JsonBlock(json_content)
609 } else {
610 let ident = self.read_identifier();
611 Token::Identifier(ident)
612 }
613 }
614 Some('\'') => {
615 let string_val = self.read_string();
616 Token::StringLiteral(string_val)
617 }
618 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
619 self.advance();
620 let num = self.read_number();
621 Token::NumberLiteral(format!("-{num}"))
622 }
623 Some('-') => {
624 self.advance();
625 Token::Minus
626 }
627 Some(ch) if ch.is_numeric() => {
628 let num = self.read_number();
629 Token::NumberLiteral(num)
630 }
631 Some('#') => {
632 self.advance();
633 let table_name = self.read_identifier();
634 if table_name.is_empty() {
635 Token::Identifier("#".to_string())
636 } else {
637 Token::Identifier(format!("#{}", table_name))
638 }
639 }
640 Some(ch) if ch.is_alphabetic() || ch == '_' => {
641 let ident = self.read_identifier();
642 match ident.to_uppercase().as_str() {
644 "ORDER" if self.peek_keyword("BY") => {
645 self.skip_whitespace();
646 self.read_identifier(); Token::OrderBy
648 }
649 "GROUP" if self.peek_keyword("BY") => {
650 self.skip_whitespace();
651 self.read_identifier(); Token::GroupBy
653 }
654 _ => Token::from_keyword(&ident).unwrap_or_else(|| Token::Identifier(ident)),
655 }
656 }
657 Some(ch) => {
658 self.advance();
659 Token::Identifier(ch.to_string())
660 }
661 }
662 }
663
664 pub fn next_token(&mut self) -> Token {
666 match self.mode {
667 LexerMode::SkipComments => self.next_token_skip_comments(),
668 LexerMode::PreserveComments => self.next_token_with_comments(),
669 }
670 }
671
672 fn next_token_skip_comments(&mut self) -> Token {
674 self.skip_whitespace_and_comments();
675
676 match self.current_char {
677 None => Token::Eof,
678 Some('*') => {
679 self.advance();
680 Token::Star }
684 Some('+') => {
685 self.advance();
686 Token::Plus
687 }
688 Some('/') => {
689 if self.peek(1) == Some('*') {
691 self.skip_whitespace_and_comments();
694 return self.next_token();
695 }
696 self.advance();
697 Token::Divide
698 }
699 Some('%') => {
700 self.advance();
701 Token::Modulo
702 }
703 Some('.') => {
704 self.advance();
705 Token::Dot
706 }
707 Some(',') => {
708 self.advance();
709 Token::Comma
710 }
711 Some(':') => {
712 self.advance();
713 Token::Colon
714 }
715 Some('(') => {
716 self.advance();
717 Token::LeftParen
718 }
719 Some(')') => {
720 self.advance();
721 Token::RightParen
722 }
723 Some('=') => {
724 self.advance();
725 Token::Equal
726 }
727 Some('<') => {
728 self.advance();
729 if self.current_char == Some('=') {
730 self.advance();
731 Token::LessThanOrEqual
732 } else if self.current_char == Some('>') {
733 self.advance();
734 Token::NotEqual
735 } else {
736 Token::LessThan
737 }
738 }
739 Some('>') => {
740 self.advance();
741 if self.current_char == Some('=') {
742 self.advance();
743 Token::GreaterThanOrEqual
744 } else {
745 Token::GreaterThan
746 }
747 }
748 Some('!') if self.peek(1) == Some('=') => {
749 self.advance();
750 self.advance();
751 Token::NotEqual
752 }
753 Some('|') if self.peek(1) == Some('|') => {
754 self.advance();
755 self.advance();
756 Token::Concat
757 }
758 Some('"') => {
759 let ident_val = self.read_string();
761 Token::QuotedIdentifier(ident_val)
762 }
763 Some('$') => {
764 if self.peek_string(6) == "$JSON$" {
766 let json_content = self.read_json_block();
767 Token::JsonBlock(json_content)
768 } else {
769 let ident = self.read_identifier();
772 Token::Identifier(ident)
773 }
774 }
775 Some('\'') => {
776 let string_val = self.read_string();
778 Token::StringLiteral(string_val)
779 }
780 Some('-') if self.peek(1) == Some('-') => {
781 self.skip_whitespace_and_comments();
783 self.next_token()
784 }
785 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
786 self.advance(); let num = self.read_number();
789 Token::NumberLiteral(format!("-{num}"))
790 }
791 Some('-') => {
792 self.advance();
794 Token::Minus
795 }
796 Some(ch) if ch.is_numeric() => {
797 let num = self.read_number();
798 Token::NumberLiteral(num)
799 }
800 Some('#') => {
801 self.advance(); let table_name = self.read_identifier();
804 if table_name.is_empty() {
805 Token::Identifier("#".to_string())
807 } else {
808 Token::Identifier(format!("#{}", table_name))
810 }
811 }
812 Some(ch) if ch.is_alphabetic() || ch == '_' => {
813 let ident = self.read_identifier();
814 match ident.to_uppercase().as_str() {
815 "SELECT" => Token::Select,
816 "FROM" => Token::From,
817 "WHERE" => Token::Where,
818 "WITH" => Token::With,
819 "AND" => Token::And,
820 "OR" => Token::Or,
821 "IN" => Token::In,
822 "NOT" => Token::Not,
823 "BETWEEN" => Token::Between,
824 "LIKE" => Token::Like,
825 "IS" => Token::Is,
826 "NULL" => Token::Null,
827 "ORDER" if self.peek_keyword("BY") => {
828 self.skip_whitespace();
829 self.read_identifier(); Token::OrderBy
831 }
832 "GROUP" if self.peek_keyword("BY") => {
833 self.skip_whitespace();
834 self.read_identifier(); Token::GroupBy
836 }
837 "HAVING" => Token::Having,
838 "AS" => Token::As,
839 "ASC" => Token::Asc,
840 "DESC" => Token::Desc,
841 "LIMIT" => Token::Limit,
842 "OFFSET" => Token::Offset,
843 "INTO" => Token::Into,
844 "DATETIME" => Token::DateTime,
845 "CASE" => Token::Case,
846 "WHEN" => Token::When,
847 "THEN" => Token::Then,
848 "ELSE" => Token::Else,
849 "END" => Token::End,
850 "DISTINCT" => Token::Distinct,
851 "OVER" => Token::Over,
852 "PARTITION" => Token::Partition,
853 "BY" => Token::By,
854 "ROWS" => Token::Rows,
856 "UNBOUNDED" => Token::Unbounded,
859 "PRECEDING" => Token::Preceding,
860 "FOLLOWING" => Token::Following,
861 "CURRENT" => Token::Current,
862 "ROW" => Token::Row,
863 "UNION" => Token::Union,
865 "INTERSECT" => Token::Intersect,
866 "EXCEPT" => Token::Except,
867 "WEB" => Token::Web,
869 "UNNEST" => Token::Unnest,
871 "JOIN" => Token::Join,
873 "INNER" => Token::Inner,
874 "LEFT" => Token::Left,
875 "RIGHT" => Token::Right,
876 "FULL" => Token::Full,
877 "OUTER" => Token::Outer,
878 "ON" => Token::On,
879 "CROSS" => Token::Cross,
880 _ => Token::Identifier(ident),
881 }
882 }
883 Some(ch) => {
884 self.advance();
885 Token::Identifier(ch.to_string())
886 }
887 }
888 }
889
890 fn peek_keyword(&mut self, keyword: &str) -> bool {
891 let saved_pos = self.position;
892 let saved_char = self.current_char;
893
894 self.skip_whitespace_and_comments();
895 let next_word = self.read_identifier();
896 let matches = next_word.to_uppercase() == keyword;
897
898 self.position = saved_pos;
900 self.current_char = saved_char;
901
902 matches
903 }
904
905 #[must_use]
906 pub fn get_position(&self) -> usize {
907 self.position
908 }
909
910 pub fn tokenize_all(&mut self) -> Vec<Token> {
911 let mut tokens = Vec::new();
912 loop {
913 let token = self.next_token();
914 if matches!(token, Token::Eof) {
915 tokens.push(token);
916 break;
917 }
918 tokens.push(token);
919 }
920 tokens
921 }
922
923 pub fn tokenize_all_with_positions(&mut self) -> Vec<(usize, usize, Token)> {
924 let mut tokens = Vec::new();
925 loop {
926 self.skip_whitespace_and_comments();
927 let start_pos = self.position;
928 let token = self.next_token();
929 let end_pos = self.position;
930
931 if matches!(token, Token::Eof) {
932 break;
933 }
934 tokens.push((start_pos, end_pos, token));
935 }
936 tokens
937 }
938
939 pub fn tokenize_all_with_comments(&mut self) -> Vec<Token> {
942 let mut tokens = Vec::new();
943 loop {
944 let token = self.next_token_with_comments();
945 if matches!(token, Token::Eof) {
946 tokens.push(token);
947 break;
948 }
949 tokens.push(token);
950 }
951 tokens
952 }
953}
954
955#[cfg(test)]
956mod tests {
957 use super::*;
958
959 #[test]
960 fn test_line_comment_tokenization() {
961 let sql = "SELECT col1, -- this is a comment\ncol2 FROM table";
962 let mut lexer = Lexer::new(sql);
963 let tokens = lexer.tokenize_all_with_comments();
964
965 let comment_token = tokens.iter().find(|t| matches!(t, Token::LineComment(_)));
967 assert!(comment_token.is_some(), "Should find line comment token");
968
969 if let Some(Token::LineComment(text)) = comment_token {
970 assert_eq!(text.trim(), "this is a comment");
971 }
972 }
973
974 #[test]
975 fn test_block_comment_tokenization() {
976 let sql = "SELECT /* block comment */ col1 FROM table";
977 let mut lexer = Lexer::new(sql);
978 let tokens = lexer.tokenize_all_with_comments();
979
980 let comment_token = tokens.iter().find(|t| matches!(t, Token::BlockComment(_)));
982 assert!(comment_token.is_some(), "Should find block comment token");
983
984 if let Some(Token::BlockComment(text)) = comment_token {
985 assert_eq!(text.trim(), "block comment");
986 }
987 }
988
989 #[test]
990 fn test_multiple_comments() {
991 let sql = "-- First comment\nSELECT col1, /* inline */ col2\n-- Second comment\nFROM table";
992 let mut lexer = Lexer::new(sql);
993 let tokens = lexer.tokenize_all_with_comments();
994
995 let line_comments: Vec<_> = tokens
996 .iter()
997 .filter(|t| matches!(t, Token::LineComment(_)))
998 .collect();
999 let block_comments: Vec<_> = tokens
1000 .iter()
1001 .filter(|t| matches!(t, Token::BlockComment(_)))
1002 .collect();
1003
1004 assert_eq!(line_comments.len(), 2, "Should find 2 line comments");
1005 assert_eq!(block_comments.len(), 1, "Should find 1 block comment");
1006 }
1007
1008 #[test]
1009 fn test_backwards_compatibility() {
1010 let sql = "SELECT -- comment\ncol1 FROM table";
1012 let mut lexer = Lexer::new(sql);
1013 let tokens = lexer.tokenize_all();
1014
1015 let has_comments = tokens
1017 .iter()
1018 .any(|t| matches!(t, Token::LineComment(_) | Token::BlockComment(_)));
1019 assert!(
1020 !has_comments,
1021 "next_token() should skip comments for backwards compatibility"
1022 );
1023
1024 assert!(tokens.iter().any(|t| matches!(t, Token::Select)));
1026 assert!(tokens.iter().any(|t| matches!(t, Token::From)));
1027 }
1028
1029 #[test]
1032 fn test_lexer_mode_skip_comments() {
1033 let sql = "SELECT id -- comment\nFROM table";
1034
1035 let mut lexer = Lexer::with_mode(sql, LexerMode::SkipComments);
1037
1038 assert_eq!(lexer.next_token(), Token::Select);
1039 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1040 assert_eq!(lexer.next_token(), Token::From);
1042 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1043 assert_eq!(lexer.next_token(), Token::Eof);
1044 }
1045
1046 #[test]
1047 fn test_lexer_mode_preserve_comments() {
1048 let sql = "SELECT id -- comment\nFROM table";
1049
1050 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1052
1053 assert_eq!(lexer.next_token(), Token::Select);
1054 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1055
1056 let comment_tok = lexer.next_token();
1058 assert!(matches!(comment_tok, Token::LineComment(_)));
1059 if let Token::LineComment(text) = comment_tok {
1060 assert_eq!(text.trim(), "comment");
1061 }
1062
1063 assert_eq!(lexer.next_token(), Token::From);
1064 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1065 assert_eq!(lexer.next_token(), Token::Eof);
1066 }
1067
1068 #[test]
1069 fn test_lexer_mode_default_is_skip() {
1070 let sql = "SELECT id -- comment\nFROM table";
1071
1072 let mut lexer = Lexer::new(sql);
1074
1075 let mut tok_count = 0;
1076 loop {
1077 let tok = lexer.next_token();
1078 if matches!(tok, Token::Eof) {
1079 break;
1080 }
1081 assert!(!matches!(
1083 tok,
1084 Token::LineComment(_) | Token::BlockComment(_)
1085 ));
1086 tok_count += 1;
1087 }
1088
1089 assert_eq!(tok_count, 4);
1091 }
1092
1093 #[test]
1094 fn test_lexer_mode_block_comments() {
1095 let sql = "SELECT /* block */ id FROM table";
1096
1097 let mut lexer_skip = Lexer::with_mode(sql, LexerMode::SkipComments);
1099 assert_eq!(lexer_skip.next_token(), Token::Select);
1100 assert_eq!(lexer_skip.next_token(), Token::Identifier("id".into()));
1101 assert_eq!(lexer_skip.next_token(), Token::From);
1102
1103 let mut lexer_preserve = Lexer::with_mode(sql, LexerMode::PreserveComments);
1105 assert_eq!(lexer_preserve.next_token(), Token::Select);
1106
1107 let comment_tok = lexer_preserve.next_token();
1108 assert!(matches!(comment_tok, Token::BlockComment(_)));
1109 if let Token::BlockComment(text) = comment_tok {
1110 assert_eq!(text.trim(), "block");
1111 }
1112
1113 assert_eq!(lexer_preserve.next_token(), Token::Identifier("id".into()));
1114 }
1115
1116 #[test]
1117 fn test_lexer_mode_mixed_comments() {
1118 let sql = "-- leading\nSELECT /* inline */ id -- trailing\nFROM table";
1119
1120 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1121
1122 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1124
1125 assert_eq!(lexer.next_token(), Token::Select);
1127
1128 assert!(matches!(lexer.next_token(), Token::BlockComment(_)));
1130
1131 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1133
1134 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1136
1137 assert_eq!(lexer.next_token(), Token::From);
1139 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1140 assert_eq!(lexer.next_token(), Token::Eof);
1141 }
1142}