1#[derive(Debug, Clone, Copy, PartialEq)]
8pub enum LexerMode {
9 SkipComments,
11 PreserveComments,
13}
14
15impl Default for LexerMode {
16 fn default() -> Self {
17 LexerMode::SkipComments
18 }
19}
20
21#[derive(Debug, Clone, PartialEq)]
22pub enum Token {
23 Select,
25 From,
26 Where,
27 With, And,
29 Or,
30 In,
31 Not,
32 Between,
33 Like,
34 Is,
35 Null,
36 OrderBy,
37 GroupBy,
38 Having,
39 Qualify,
40 As,
41 Asc,
42 Desc,
43 Limit,
44 Offset,
45 Into, DateTime, Case, When, Then, Else, End, Distinct, Over, Partition, By, Rows, Range, Unbounded, Preceding, Following, Current, Row, Union, Intersect, Except, Web, Unnest, Join, Inner, Left, Right, Full, Outer, On, Cross, Identifier(String),
89 QuotedIdentifier(String), StringLiteral(String),
91 JsonBlock(String), NumberLiteral(String),
93 Star,
94
95 Dot,
97 Comma,
98 Colon,
99 LeftParen,
100 RightParen,
101 Equal,
102 NotEqual,
103 LessThan,
104 GreaterThan,
105 LessThanOrEqual,
106 GreaterThanOrEqual,
107
108 Plus,
110 Minus,
111 Divide,
112 Modulo,
113
114 Concat, LineComment(String), BlockComment(String), Eof,
123}
124
125impl Token {
126 pub fn from_keyword(s: &str) -> Option<Token> {
128 match s.to_uppercase().as_str() {
129 "SELECT" => Some(Token::Select),
130 "FROM" => Some(Token::From),
131 "WHERE" => Some(Token::Where),
132 "WITH" => Some(Token::With),
133 "AND" => Some(Token::And),
134 "OR" => Some(Token::Or),
135 "IN" => Some(Token::In),
136 "NOT" => Some(Token::Not),
137 "BETWEEN" => Some(Token::Between),
138 "LIKE" => Some(Token::Like),
139 "IS" => Some(Token::Is),
140 "NULL" => Some(Token::Null),
141 "ORDER" => Some(Token::OrderBy),
142 "GROUP" => Some(Token::GroupBy),
143 "HAVING" => Some(Token::Having),
144 "QUALIFY" => Some(Token::Qualify),
145 "AS" => Some(Token::As),
146 "ASC" => Some(Token::Asc),
147 "DESC" => Some(Token::Desc),
148 "LIMIT" => Some(Token::Limit),
149 "OFFSET" => Some(Token::Offset),
150 "INTO" => Some(Token::Into),
151 "DISTINCT" => Some(Token::Distinct),
152 "CASE" => Some(Token::Case),
153 "WHEN" => Some(Token::When),
154 "THEN" => Some(Token::Then),
155 "ELSE" => Some(Token::Else),
156 "END" => Some(Token::End),
157 "OVER" => Some(Token::Over),
158 "PARTITION" => Some(Token::Partition),
159 "BY" => Some(Token::By),
160 "ROWS" => Some(Token::Rows),
161 "RANGE" => Some(Token::Range),
162 "UNBOUNDED" => Some(Token::Unbounded),
163 "PRECEDING" => Some(Token::Preceding),
164 "FOLLOWING" => Some(Token::Following),
165 "CURRENT" => Some(Token::Current),
166 "ROW" => Some(Token::Row),
167 "UNION" => Some(Token::Union),
168 "INTERSECT" => Some(Token::Intersect),
169 "EXCEPT" => Some(Token::Except),
170 "WEB" => Some(Token::Web),
171 "UNNEST" => Some(Token::Unnest),
172 "JOIN" => Some(Token::Join),
173 "INNER" => Some(Token::Inner),
174 "LEFT" => Some(Token::Left),
175 "RIGHT" => Some(Token::Right),
176 "FULL" => Some(Token::Full),
177 "OUTER" => Some(Token::Outer),
178 "ON" => Some(Token::On),
179 "CROSS" => Some(Token::Cross),
180 _ => None,
181 }
182 }
183
184 pub fn is_logical_operator(&self) -> bool {
186 matches!(self, Token::And | Token::Or)
187 }
188
189 pub fn is_join_type(&self) -> bool {
191 matches!(
192 self,
193 Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
194 )
195 }
196
197 pub fn is_clause_terminator(&self) -> bool {
199 matches!(
200 self,
201 Token::OrderBy
202 | Token::GroupBy
203 | Token::Having
204 | Token::Limit
205 | Token::Offset
206 | Token::Union
207 | Token::Intersect
208 | Token::Except
209 )
210 }
211
212 pub fn as_keyword_str(&self) -> Option<&'static str> {
215 match self {
216 Token::Select => Some("SELECT"),
217 Token::From => Some("FROM"),
218 Token::Where => Some("WHERE"),
219 Token::With => Some("WITH"),
220 Token::And => Some("AND"),
221 Token::Or => Some("OR"),
222 Token::In => Some("IN"),
223 Token::Not => Some("NOT"),
224 Token::Between => Some("BETWEEN"),
225 Token::Like => Some("LIKE"),
226 Token::Is => Some("IS"),
227 Token::Null => Some("NULL"),
228 Token::OrderBy => Some("ORDER BY"),
229 Token::GroupBy => Some("GROUP BY"),
230 Token::Having => Some("HAVING"),
231 Token::Qualify => Some("QUALIFY"),
232 Token::As => Some("AS"),
233 Token::Asc => Some("ASC"),
234 Token::Desc => Some("DESC"),
235 Token::Limit => Some("LIMIT"),
236 Token::Offset => Some("OFFSET"),
237 Token::Into => Some("INTO"),
238 Token::Distinct => Some("DISTINCT"),
239 Token::Case => Some("CASE"),
240 Token::When => Some("WHEN"),
241 Token::Then => Some("THEN"),
242 Token::Else => Some("ELSE"),
243 Token::End => Some("END"),
244 Token::Join => Some("JOIN"),
245 Token::Inner => Some("INNER"),
246 Token::Left => Some("LEFT"),
247 Token::Right => Some("RIGHT"),
248 Token::Full => Some("FULL"),
249 Token::Cross => Some("CROSS"),
250 Token::On => Some("ON"),
251 Token::Union => Some("UNION"),
252 Token::Intersect => Some("INTERSECT"),
253 Token::Except => Some("EXCEPT"),
254 Token::Over => Some("OVER"),
255 Token::Partition => Some("PARTITION"),
256 Token::By => Some("BY"),
257 Token::Rows => Some("ROWS"),
258 Token::Range => Some("RANGE"),
259 Token::Preceding => Some("PRECEDING"),
260 Token::Following => Some("FOLLOWING"),
261 Token::Current => Some("CURRENT"),
262 Token::Row => Some("ROW"),
263 Token::Unbounded => Some("UNBOUNDED"),
264 Token::DateTime => Some("DATETIME"),
265 _ => None,
266 }
267 }
268}
269
270#[derive(Debug, Clone)]
271pub struct Lexer {
272 input: Vec<char>,
273 position: usize,
274 current_char: Option<char>,
275 mode: LexerMode,
276}
277
278impl Lexer {
279 #[must_use]
280 pub fn new(input: &str) -> Self {
281 Self::with_mode(input, LexerMode::default())
282 }
283
284 #[must_use]
286 pub fn with_mode(input: &str, mode: LexerMode) -> Self {
287 let chars: Vec<char> = input.chars().collect();
288 let current = chars.first().copied();
289 Self {
290 input: chars,
291 position: 0,
292 current_char: current,
293 mode,
294 }
295 }
296
297 fn advance(&mut self) {
298 self.position += 1;
299 self.current_char = self.input.get(self.position).copied();
300 }
301
302 fn peek(&self, offset: usize) -> Option<char> {
303 self.input.get(self.position + offset).copied()
304 }
305
306 fn peek_string(&self, n: usize) -> String {
308 let mut result = String::new();
309 for i in 0..n {
310 if let Some(ch) = self.input.get(self.position + i) {
311 result.push(*ch);
312 } else {
313 break;
314 }
315 }
316 result
317 }
318
319 fn read_json_block(&mut self) -> String {
322 let mut result = String::new();
323
324 for _ in 0..6 {
326 self.advance();
327 }
328
329 while let Some(ch) = self.current_char {
331 if ch == '$' && self.peek_string(6) == "$JSON$" {
333 for _ in 0..6 {
335 self.advance();
336 }
337 break;
338 }
339 result.push(ch);
340 self.advance();
341 }
342
343 result
344 }
345
346 fn skip_whitespace(&mut self) {
347 while let Some(ch) = self.current_char {
348 if ch.is_whitespace() {
349 self.advance();
350 } else {
351 break;
352 }
353 }
354 }
355
356 fn read_line_comment(&mut self) -> String {
358 let mut result = String::new();
359
360 self.advance();
362 self.advance();
363
364 while let Some(ch) = self.current_char {
366 if ch == '\n' {
367 self.advance(); break;
369 }
370 result.push(ch);
371 self.advance();
372 }
373
374 result
375 }
376
377 fn read_block_comment(&mut self) -> String {
379 let mut result = String::new();
380
381 self.advance();
383 self.advance();
384
385 while let Some(ch) = self.current_char {
387 if ch == '*' && self.peek(1) == Some('/') {
388 self.advance(); self.advance(); break;
391 }
392 result.push(ch);
393 self.advance();
394 }
395
396 result
397 }
398
399 fn skip_whitespace_and_comments(&mut self) {
402 loop {
403 while let Some(ch) = self.current_char {
405 if ch.is_whitespace() {
406 self.advance();
407 } else {
408 break;
409 }
410 }
411
412 match self.current_char {
414 Some('-') if self.peek(1) == Some('-') => {
415 self.advance(); self.advance(); while let Some(ch) = self.current_char {
419 self.advance();
420 if ch == '\n' {
421 break;
422 }
423 }
424 }
425 Some('/') if self.peek(1) == Some('*') => {
426 self.advance(); self.advance(); while let Some(ch) = self.current_char {
430 if ch == '*' && self.peek(1) == Some('/') {
431 self.advance(); self.advance(); break;
434 }
435 self.advance();
436 }
437 }
438 _ => {
439 break;
441 }
442 }
443 }
444 }
445
446 fn read_identifier(&mut self) -> String {
447 let mut result = String::new();
448 while let Some(ch) = self.current_char {
449 if ch.is_alphanumeric() || ch == '_' {
450 result.push(ch);
451 self.advance();
452 } else {
453 break;
454 }
455 }
456 result
457 }
458
459 fn read_string(&mut self) -> String {
460 let mut result = String::new();
461 let quote_char = self.current_char.unwrap(); self.advance(); while let Some(ch) = self.current_char {
465 if ch == quote_char {
466 self.advance(); break;
468 }
469 result.push(ch);
470 self.advance();
471 }
472 result
473 }
474
475 fn read_number(&mut self) -> String {
476 let mut result = String::new();
477 let has_e = false;
478
479 while let Some(ch) = self.current_char {
481 if !has_e && (ch.is_numeric() || ch == '.') {
482 result.push(ch);
483 self.advance();
484 } else if (ch == 'e' || ch == 'E') && !has_e && !result.is_empty() {
485 result.push(ch);
487 self.advance();
488 let _ = has_e; if let Some(sign) = self.current_char {
492 if sign == '+' || sign == '-' {
493 result.push(sign);
494 self.advance();
495 }
496 }
497
498 while let Some(digit) = self.current_char {
500 if digit.is_numeric() {
501 result.push(digit);
502 self.advance();
503 } else {
504 break;
505 }
506 }
507 break; } else {
509 break;
510 }
511 }
512 result
513 }
514
515 pub fn next_token_with_comments(&mut self) -> Token {
518 self.skip_whitespace();
520
521 match self.current_char {
522 None => Token::Eof,
523 Some('-') if self.peek(1) == Some('-') => {
525 let comment_text = self.read_line_comment();
526 Token::LineComment(comment_text)
527 }
528 Some('/') if self.peek(1) == Some('*') => {
529 let comment_text = self.read_block_comment();
530 Token::BlockComment(comment_text)
531 }
532 Some('*') => {
533 self.advance();
534 Token::Star
535 }
536 Some('+') => {
537 self.advance();
538 Token::Plus
539 }
540 Some('/') => {
541 self.advance();
543 Token::Divide
544 }
545 Some('%') => {
546 self.advance();
547 Token::Modulo
548 }
549 Some('.') => {
550 self.advance();
551 Token::Dot
552 }
553 Some(',') => {
554 self.advance();
555 Token::Comma
556 }
557 Some(':') => {
558 self.advance();
559 Token::Colon
560 }
561 Some('(') => {
562 self.advance();
563 Token::LeftParen
564 }
565 Some(')') => {
566 self.advance();
567 Token::RightParen
568 }
569 Some('=') => {
570 self.advance();
571 Token::Equal
572 }
573 Some('<') => {
574 self.advance();
575 if self.current_char == Some('=') {
576 self.advance();
577 Token::LessThanOrEqual
578 } else if self.current_char == Some('>') {
579 self.advance();
580 Token::NotEqual
581 } else {
582 Token::LessThan
583 }
584 }
585 Some('>') => {
586 self.advance();
587 if self.current_char == Some('=') {
588 self.advance();
589 Token::GreaterThanOrEqual
590 } else {
591 Token::GreaterThan
592 }
593 }
594 Some('!') if self.peek(1) == Some('=') => {
595 self.advance();
596 self.advance();
597 Token::NotEqual
598 }
599 Some('|') if self.peek(1) == Some('|') => {
600 self.advance();
601 self.advance();
602 Token::Concat
603 }
604 Some('"') => {
605 let ident_val = self.read_string();
606 Token::QuotedIdentifier(ident_val)
607 }
608 Some('$') => {
609 if self.peek_string(6) == "$JSON$" {
610 let json_content = self.read_json_block();
611 Token::JsonBlock(json_content)
612 } else {
613 let ident = self.read_identifier();
614 Token::Identifier(ident)
615 }
616 }
617 Some('\'') => {
618 let string_val = self.read_string();
619 Token::StringLiteral(string_val)
620 }
621 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
622 self.advance();
623 let num = self.read_number();
624 Token::NumberLiteral(format!("-{num}"))
625 }
626 Some('-') => {
627 self.advance();
628 Token::Minus
629 }
630 Some(ch) if ch.is_numeric() => {
631 let num = self.read_number();
632 Token::NumberLiteral(num)
633 }
634 Some('#') => {
635 self.advance();
636 let table_name = self.read_identifier();
637 if table_name.is_empty() {
638 Token::Identifier("#".to_string())
639 } else {
640 Token::Identifier(format!("#{}", table_name))
641 }
642 }
643 Some(ch) if ch.is_alphabetic() || ch == '_' => {
644 let ident = self.read_identifier();
645 match ident.to_uppercase().as_str() {
647 "ORDER" if self.peek_keyword("BY") => {
648 self.skip_whitespace();
649 self.read_identifier(); Token::OrderBy
651 }
652 "GROUP" if self.peek_keyword("BY") => {
653 self.skip_whitespace();
654 self.read_identifier(); Token::GroupBy
656 }
657 _ => Token::from_keyword(&ident).unwrap_or_else(|| Token::Identifier(ident)),
658 }
659 }
660 Some(ch) => {
661 self.advance();
662 Token::Identifier(ch.to_string())
663 }
664 }
665 }
666
667 pub fn next_token(&mut self) -> Token {
669 match self.mode {
670 LexerMode::SkipComments => self.next_token_skip_comments(),
671 LexerMode::PreserveComments => self.next_token_with_comments(),
672 }
673 }
674
675 fn next_token_skip_comments(&mut self) -> Token {
677 self.skip_whitespace_and_comments();
678
679 match self.current_char {
680 None => Token::Eof,
681 Some('*') => {
682 self.advance();
683 Token::Star }
687 Some('+') => {
688 self.advance();
689 Token::Plus
690 }
691 Some('/') => {
692 if self.peek(1) == Some('*') {
694 self.skip_whitespace_and_comments();
697 return self.next_token();
698 }
699 self.advance();
700 Token::Divide
701 }
702 Some('%') => {
703 self.advance();
704 Token::Modulo
705 }
706 Some('.') => {
707 self.advance();
708 Token::Dot
709 }
710 Some(',') => {
711 self.advance();
712 Token::Comma
713 }
714 Some(':') => {
715 self.advance();
716 Token::Colon
717 }
718 Some('(') => {
719 self.advance();
720 Token::LeftParen
721 }
722 Some(')') => {
723 self.advance();
724 Token::RightParen
725 }
726 Some('=') => {
727 self.advance();
728 Token::Equal
729 }
730 Some('<') => {
731 self.advance();
732 if self.current_char == Some('=') {
733 self.advance();
734 Token::LessThanOrEqual
735 } else if self.current_char == Some('>') {
736 self.advance();
737 Token::NotEqual
738 } else {
739 Token::LessThan
740 }
741 }
742 Some('>') => {
743 self.advance();
744 if self.current_char == Some('=') {
745 self.advance();
746 Token::GreaterThanOrEqual
747 } else {
748 Token::GreaterThan
749 }
750 }
751 Some('!') if self.peek(1) == Some('=') => {
752 self.advance();
753 self.advance();
754 Token::NotEqual
755 }
756 Some('|') if self.peek(1) == Some('|') => {
757 self.advance();
758 self.advance();
759 Token::Concat
760 }
761 Some('"') => {
762 let ident_val = self.read_string();
764 Token::QuotedIdentifier(ident_val)
765 }
766 Some('$') => {
767 if self.peek_string(6) == "$JSON$" {
769 let json_content = self.read_json_block();
770 Token::JsonBlock(json_content)
771 } else {
772 let ident = self.read_identifier();
775 Token::Identifier(ident)
776 }
777 }
778 Some('\'') => {
779 let string_val = self.read_string();
781 Token::StringLiteral(string_val)
782 }
783 Some('-') if self.peek(1) == Some('-') => {
784 self.skip_whitespace_and_comments();
786 self.next_token()
787 }
788 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
789 self.advance(); let num = self.read_number();
792 Token::NumberLiteral(format!("-{num}"))
793 }
794 Some('-') => {
795 self.advance();
797 Token::Minus
798 }
799 Some(ch) if ch.is_numeric() => {
800 let num = self.read_number();
801 Token::NumberLiteral(num)
802 }
803 Some('#') => {
804 self.advance(); let table_name = self.read_identifier();
807 if table_name.is_empty() {
808 Token::Identifier("#".to_string())
810 } else {
811 Token::Identifier(format!("#{}", table_name))
813 }
814 }
815 Some(ch) if ch.is_alphabetic() || ch == '_' => {
816 let ident = self.read_identifier();
817 match ident.to_uppercase().as_str() {
818 "SELECT" => Token::Select,
819 "FROM" => Token::From,
820 "WHERE" => Token::Where,
821 "WITH" => Token::With,
822 "AND" => Token::And,
823 "OR" => Token::Or,
824 "IN" => Token::In,
825 "NOT" => Token::Not,
826 "BETWEEN" => Token::Between,
827 "LIKE" => Token::Like,
828 "IS" => Token::Is,
829 "NULL" => Token::Null,
830 "ORDER" if self.peek_keyword("BY") => {
831 self.skip_whitespace();
832 self.read_identifier(); Token::OrderBy
834 }
835 "GROUP" if self.peek_keyword("BY") => {
836 self.skip_whitespace();
837 self.read_identifier(); Token::GroupBy
839 }
840 "HAVING" => Token::Having,
841 "QUALIFY" => Token::Qualify,
842 "AS" => Token::As,
843 "ASC" => Token::Asc,
844 "DESC" => Token::Desc,
845 "LIMIT" => Token::Limit,
846 "OFFSET" => Token::Offset,
847 "INTO" => Token::Into,
848 "DATETIME" => Token::DateTime,
849 "CASE" => Token::Case,
850 "WHEN" => Token::When,
851 "THEN" => Token::Then,
852 "ELSE" => Token::Else,
853 "END" => Token::End,
854 "DISTINCT" => Token::Distinct,
855 "OVER" => Token::Over,
856 "PARTITION" => Token::Partition,
857 "BY" => Token::By,
858 "ROWS" => Token::Rows,
860 "UNBOUNDED" => Token::Unbounded,
863 "PRECEDING" => Token::Preceding,
864 "FOLLOWING" => Token::Following,
865 "CURRENT" => Token::Current,
866 "ROW" => Token::Row,
867 "UNION" => Token::Union,
869 "INTERSECT" => Token::Intersect,
870 "EXCEPT" => Token::Except,
871 "WEB" => Token::Web,
873 "UNNEST" => Token::Unnest,
875 "JOIN" => Token::Join,
877 "INNER" => Token::Inner,
878 "LEFT" => Token::Left,
879 "RIGHT" => Token::Right,
880 "FULL" => Token::Full,
881 "OUTER" => Token::Outer,
882 "ON" => Token::On,
883 "CROSS" => Token::Cross,
884 _ => Token::Identifier(ident),
885 }
886 }
887 Some(ch) => {
888 self.advance();
889 Token::Identifier(ch.to_string())
890 }
891 }
892 }
893
894 fn peek_keyword(&mut self, keyword: &str) -> bool {
895 let saved_pos = self.position;
896 let saved_char = self.current_char;
897
898 self.skip_whitespace_and_comments();
899 let next_word = self.read_identifier();
900 let matches = next_word.to_uppercase() == keyword;
901
902 self.position = saved_pos;
904 self.current_char = saved_char;
905
906 matches
907 }
908
909 #[must_use]
910 pub fn get_position(&self) -> usize {
911 self.position
912 }
913
914 pub fn tokenize_all(&mut self) -> Vec<Token> {
915 let mut tokens = Vec::new();
916 loop {
917 let token = self.next_token();
918 if matches!(token, Token::Eof) {
919 tokens.push(token);
920 break;
921 }
922 tokens.push(token);
923 }
924 tokens
925 }
926
927 pub fn tokenize_all_with_positions(&mut self) -> Vec<(usize, usize, Token)> {
928 let mut tokens = Vec::new();
929 loop {
930 self.skip_whitespace_and_comments();
931 let start_pos = self.position;
932 let token = self.next_token();
933 let end_pos = self.position;
934
935 if matches!(token, Token::Eof) {
936 break;
937 }
938 tokens.push((start_pos, end_pos, token));
939 }
940 tokens
941 }
942
943 pub fn tokenize_all_with_comments(&mut self) -> Vec<Token> {
946 let mut tokens = Vec::new();
947 loop {
948 let token = self.next_token_with_comments();
949 if matches!(token, Token::Eof) {
950 tokens.push(token);
951 break;
952 }
953 tokens.push(token);
954 }
955 tokens
956 }
957}
958
959#[cfg(test)]
960mod tests {
961 use super::*;
962
963 #[test]
964 fn test_line_comment_tokenization() {
965 let sql = "SELECT col1, -- this is a comment\ncol2 FROM table";
966 let mut lexer = Lexer::new(sql);
967 let tokens = lexer.tokenize_all_with_comments();
968
969 let comment_token = tokens.iter().find(|t| matches!(t, Token::LineComment(_)));
971 assert!(comment_token.is_some(), "Should find line comment token");
972
973 if let Some(Token::LineComment(text)) = comment_token {
974 assert_eq!(text.trim(), "this is a comment");
975 }
976 }
977
978 #[test]
979 fn test_block_comment_tokenization() {
980 let sql = "SELECT /* block comment */ col1 FROM table";
981 let mut lexer = Lexer::new(sql);
982 let tokens = lexer.tokenize_all_with_comments();
983
984 let comment_token = tokens.iter().find(|t| matches!(t, Token::BlockComment(_)));
986 assert!(comment_token.is_some(), "Should find block comment token");
987
988 if let Some(Token::BlockComment(text)) = comment_token {
989 assert_eq!(text.trim(), "block comment");
990 }
991 }
992
993 #[test]
994 fn test_multiple_comments() {
995 let sql = "-- First comment\nSELECT col1, /* inline */ col2\n-- Second comment\nFROM table";
996 let mut lexer = Lexer::new(sql);
997 let tokens = lexer.tokenize_all_with_comments();
998
999 let line_comments: Vec<_> = tokens
1000 .iter()
1001 .filter(|t| matches!(t, Token::LineComment(_)))
1002 .collect();
1003 let block_comments: Vec<_> = tokens
1004 .iter()
1005 .filter(|t| matches!(t, Token::BlockComment(_)))
1006 .collect();
1007
1008 assert_eq!(line_comments.len(), 2, "Should find 2 line comments");
1009 assert_eq!(block_comments.len(), 1, "Should find 1 block comment");
1010 }
1011
1012 #[test]
1013 fn test_backwards_compatibility() {
1014 let sql = "SELECT -- comment\ncol1 FROM table";
1016 let mut lexer = Lexer::new(sql);
1017 let tokens = lexer.tokenize_all();
1018
1019 let has_comments = tokens
1021 .iter()
1022 .any(|t| matches!(t, Token::LineComment(_) | Token::BlockComment(_)));
1023 assert!(
1024 !has_comments,
1025 "next_token() should skip comments for backwards compatibility"
1026 );
1027
1028 assert!(tokens.iter().any(|t| matches!(t, Token::Select)));
1030 assert!(tokens.iter().any(|t| matches!(t, Token::From)));
1031 }
1032
1033 #[test]
1036 fn test_lexer_mode_skip_comments() {
1037 let sql = "SELECT id -- comment\nFROM table";
1038
1039 let mut lexer = Lexer::with_mode(sql, LexerMode::SkipComments);
1041
1042 assert_eq!(lexer.next_token(), Token::Select);
1043 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1044 assert_eq!(lexer.next_token(), Token::From);
1046 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1047 assert_eq!(lexer.next_token(), Token::Eof);
1048 }
1049
1050 #[test]
1051 fn test_lexer_mode_preserve_comments() {
1052 let sql = "SELECT id -- comment\nFROM table";
1053
1054 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1056
1057 assert_eq!(lexer.next_token(), Token::Select);
1058 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1059
1060 let comment_tok = lexer.next_token();
1062 assert!(matches!(comment_tok, Token::LineComment(_)));
1063 if let Token::LineComment(text) = comment_tok {
1064 assert_eq!(text.trim(), "comment");
1065 }
1066
1067 assert_eq!(lexer.next_token(), Token::From);
1068 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1069 assert_eq!(lexer.next_token(), Token::Eof);
1070 }
1071
1072 #[test]
1073 fn test_lexer_mode_default_is_skip() {
1074 let sql = "SELECT id -- comment\nFROM table";
1075
1076 let mut lexer = Lexer::new(sql);
1078
1079 let mut tok_count = 0;
1080 loop {
1081 let tok = lexer.next_token();
1082 if matches!(tok, Token::Eof) {
1083 break;
1084 }
1085 assert!(!matches!(
1087 tok,
1088 Token::LineComment(_) | Token::BlockComment(_)
1089 ));
1090 tok_count += 1;
1091 }
1092
1093 assert_eq!(tok_count, 4);
1095 }
1096
1097 #[test]
1098 fn test_lexer_mode_block_comments() {
1099 let sql = "SELECT /* block */ id FROM table";
1100
1101 let mut lexer_skip = Lexer::with_mode(sql, LexerMode::SkipComments);
1103 assert_eq!(lexer_skip.next_token(), Token::Select);
1104 assert_eq!(lexer_skip.next_token(), Token::Identifier("id".into()));
1105 assert_eq!(lexer_skip.next_token(), Token::From);
1106
1107 let mut lexer_preserve = Lexer::with_mode(sql, LexerMode::PreserveComments);
1109 assert_eq!(lexer_preserve.next_token(), Token::Select);
1110
1111 let comment_tok = lexer_preserve.next_token();
1112 assert!(matches!(comment_tok, Token::BlockComment(_)));
1113 if let Token::BlockComment(text) = comment_tok {
1114 assert_eq!(text.trim(), "block");
1115 }
1116
1117 assert_eq!(lexer_preserve.next_token(), Token::Identifier("id".into()));
1118 }
1119
1120 #[test]
1121 fn test_lexer_mode_mixed_comments() {
1122 let sql = "-- leading\nSELECT /* inline */ id -- trailing\nFROM table";
1123
1124 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1125
1126 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1128
1129 assert_eq!(lexer.next_token(), Token::Select);
1131
1132 assert!(matches!(lexer.next_token(), Token::BlockComment(_)));
1134
1135 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1137
1138 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1140
1141 assert_eq!(lexer.next_token(), Token::From);
1143 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1144 assert_eq!(lexer.next_token(), Token::Eof);
1145 }
1146}