1#[derive(Debug, Clone, Copy, PartialEq)]
8pub enum LexerMode {
9 SkipComments,
11 PreserveComments,
13}
14
15impl Default for LexerMode {
16 fn default() -> Self {
17 LexerMode::SkipComments
18 }
19}
20
21#[derive(Debug, Clone, PartialEq)]
22pub enum Token {
23 Select,
25 From,
26 Where,
27 With, And,
29 Or,
30 In,
31 Not,
32 Between,
33 Like,
34 ILike, Is,
36 Null,
37 OrderBy,
38 GroupBy,
39 Having,
40 Qualify,
41 As,
42 Asc,
43 Desc,
44 Limit,
45 Offset,
46 Into, DateTime, Case, When, Then, Else, End, Distinct, Over, Partition, By, Exclude, Rows, Range, Unbounded, Preceding, Following, Current, Row, Union, Intersect, Except, Web, Unnest, Join, Inner, Left, Right, Full, Outer, On, Cross, Identifier(String),
93 QuotedIdentifier(String), StringLiteral(String),
95 JsonBlock(String), NumberLiteral(String),
97 Star,
98
99 Dot,
101 Comma,
102 Colon,
103 LeftParen,
104 RightParen,
105 Equal,
106 NotEqual,
107 LessThan,
108 GreaterThan,
109 LessThanOrEqual,
110 GreaterThanOrEqual,
111
112 Plus,
114 Minus,
115 Divide,
116 Modulo,
117
118 Concat, LineComment(String), BlockComment(String), Eof,
127}
128
129impl Token {
130 pub fn from_keyword(s: &str) -> Option<Token> {
132 match s.to_uppercase().as_str() {
133 "SELECT" => Some(Token::Select),
134 "FROM" => Some(Token::From),
135 "WHERE" => Some(Token::Where),
136 "WITH" => Some(Token::With),
137 "AND" => Some(Token::And),
138 "OR" => Some(Token::Or),
139 "IN" => Some(Token::In),
140 "NOT" => Some(Token::Not),
141 "BETWEEN" => Some(Token::Between),
142 "LIKE" => Some(Token::Like),
143 "ILIKE" => Some(Token::ILike),
144 "IS" => Some(Token::Is),
145 "NULL" => Some(Token::Null),
146 "ORDER" => Some(Token::OrderBy),
147 "GROUP" => Some(Token::GroupBy),
148 "HAVING" => Some(Token::Having),
149 "QUALIFY" => Some(Token::Qualify),
150 "AS" => Some(Token::As),
151 "ASC" => Some(Token::Asc),
152 "DESC" => Some(Token::Desc),
153 "LIMIT" => Some(Token::Limit),
154 "OFFSET" => Some(Token::Offset),
155 "INTO" => Some(Token::Into),
156 "DISTINCT" => Some(Token::Distinct),
157 "EXCLUDE" => Some(Token::Exclude),
158 "CASE" => Some(Token::Case),
159 "WHEN" => Some(Token::When),
160 "THEN" => Some(Token::Then),
161 "ELSE" => Some(Token::Else),
162 "END" => Some(Token::End),
163 "OVER" => Some(Token::Over),
164 "PARTITION" => Some(Token::Partition),
165 "BY" => Some(Token::By),
166 "ROWS" => Some(Token::Rows),
167 "RANGE" => Some(Token::Range),
168 "UNBOUNDED" => Some(Token::Unbounded),
169 "PRECEDING" => Some(Token::Preceding),
170 "FOLLOWING" => Some(Token::Following),
171 "CURRENT" => Some(Token::Current),
172 "ROW" => Some(Token::Row),
173 "UNION" => Some(Token::Union),
174 "INTERSECT" => Some(Token::Intersect),
175 "EXCEPT" => Some(Token::Except),
176 "WEB" => Some(Token::Web),
177 "UNNEST" => Some(Token::Unnest),
178 "JOIN" => Some(Token::Join),
179 "INNER" => Some(Token::Inner),
180 "LEFT" => Some(Token::Left),
181 "RIGHT" => Some(Token::Right),
182 "FULL" => Some(Token::Full),
183 "OUTER" => Some(Token::Outer),
184 "ON" => Some(Token::On),
185 "CROSS" => Some(Token::Cross),
186 _ => None,
187 }
188 }
189
190 pub fn is_logical_operator(&self) -> bool {
192 matches!(self, Token::And | Token::Or)
193 }
194
195 pub fn is_join_type(&self) -> bool {
197 matches!(
198 self,
199 Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
200 )
201 }
202
203 pub fn is_clause_terminator(&self) -> bool {
205 matches!(
206 self,
207 Token::OrderBy
208 | Token::GroupBy
209 | Token::Having
210 | Token::Limit
211 | Token::Offset
212 | Token::Union
213 | Token::Intersect
214 | Token::Except
215 )
216 }
217
218 pub fn as_keyword_str(&self) -> Option<&'static str> {
221 match self {
222 Token::Select => Some("SELECT"),
223 Token::From => Some("FROM"),
224 Token::Where => Some("WHERE"),
225 Token::With => Some("WITH"),
226 Token::And => Some("AND"),
227 Token::Or => Some("OR"),
228 Token::In => Some("IN"),
229 Token::Not => Some("NOT"),
230 Token::Between => Some("BETWEEN"),
231 Token::Like => Some("LIKE"),
232 Token::ILike => Some("ILIKE"),
233 Token::Is => Some("IS"),
234 Token::Null => Some("NULL"),
235 Token::OrderBy => Some("ORDER BY"),
236 Token::GroupBy => Some("GROUP BY"),
237 Token::Having => Some("HAVING"),
238 Token::Qualify => Some("QUALIFY"),
239 Token::As => Some("AS"),
240 Token::Asc => Some("ASC"),
241 Token::Desc => Some("DESC"),
242 Token::Limit => Some("LIMIT"),
243 Token::Offset => Some("OFFSET"),
244 Token::Into => Some("INTO"),
245 Token::Distinct => Some("DISTINCT"),
246 Token::Exclude => Some("EXCLUDE"),
247 Token::Case => Some("CASE"),
248 Token::When => Some("WHEN"),
249 Token::Then => Some("THEN"),
250 Token::Else => Some("ELSE"),
251 Token::End => Some("END"),
252 Token::Join => Some("JOIN"),
253 Token::Inner => Some("INNER"),
254 Token::Left => Some("LEFT"),
255 Token::Right => Some("RIGHT"),
256 Token::Full => Some("FULL"),
257 Token::Cross => Some("CROSS"),
258 Token::On => Some("ON"),
259 Token::Union => Some("UNION"),
260 Token::Intersect => Some("INTERSECT"),
261 Token::Except => Some("EXCEPT"),
262 Token::Over => Some("OVER"),
263 Token::Partition => Some("PARTITION"),
264 Token::By => Some("BY"),
265 Token::Rows => Some("ROWS"),
266 Token::Range => Some("RANGE"),
267 Token::Preceding => Some("PRECEDING"),
268 Token::Following => Some("FOLLOWING"),
269 Token::Current => Some("CURRENT"),
270 Token::Row => Some("ROW"),
271 Token::Unbounded => Some("UNBOUNDED"),
272 Token::DateTime => Some("DATETIME"),
273 _ => None,
274 }
275 }
276}
277
278#[derive(Debug, Clone)]
279pub struct Lexer {
280 input: Vec<char>,
281 position: usize,
282 current_char: Option<char>,
283 mode: LexerMode,
284}
285
286impl Lexer {
287 #[must_use]
288 pub fn new(input: &str) -> Self {
289 Self::with_mode(input, LexerMode::default())
290 }
291
292 #[must_use]
294 pub fn with_mode(input: &str, mode: LexerMode) -> Self {
295 let chars: Vec<char> = input.chars().collect();
296 let current = chars.first().copied();
297 Self {
298 input: chars,
299 position: 0,
300 current_char: current,
301 mode,
302 }
303 }
304
305 fn advance(&mut self) {
306 self.position += 1;
307 self.current_char = self.input.get(self.position).copied();
308 }
309
310 fn peek(&self, offset: usize) -> Option<char> {
311 self.input.get(self.position + offset).copied()
312 }
313
314 fn peek_string(&self, n: usize) -> String {
316 let mut result = String::new();
317 for i in 0..n {
318 if let Some(ch) = self.input.get(self.position + i) {
319 result.push(*ch);
320 } else {
321 break;
322 }
323 }
324 result
325 }
326
327 fn read_json_block(&mut self) -> String {
330 let mut result = String::new();
331
332 for _ in 0..6 {
334 self.advance();
335 }
336
337 while let Some(ch) = self.current_char {
339 if ch == '$' && self.peek_string(6) == "$JSON$" {
341 for _ in 0..6 {
343 self.advance();
344 }
345 break;
346 }
347 result.push(ch);
348 self.advance();
349 }
350
351 result
352 }
353
354 fn skip_whitespace(&mut self) {
355 while let Some(ch) = self.current_char {
356 if ch.is_whitespace() {
357 self.advance();
358 } else {
359 break;
360 }
361 }
362 }
363
364 fn read_line_comment(&mut self) -> String {
366 let mut result = String::new();
367
368 self.advance();
370 self.advance();
371
372 while let Some(ch) = self.current_char {
374 if ch == '\n' {
375 self.advance(); break;
377 }
378 result.push(ch);
379 self.advance();
380 }
381
382 result
383 }
384
385 fn read_block_comment(&mut self) -> String {
387 let mut result = String::new();
388
389 self.advance();
391 self.advance();
392
393 while let Some(ch) = self.current_char {
395 if ch == '*' && self.peek(1) == Some('/') {
396 self.advance(); self.advance(); break;
399 }
400 result.push(ch);
401 self.advance();
402 }
403
404 result
405 }
406
407 fn skip_whitespace_and_comments(&mut self) {
410 loop {
411 while let Some(ch) = self.current_char {
413 if ch.is_whitespace() {
414 self.advance();
415 } else {
416 break;
417 }
418 }
419
420 match self.current_char {
422 Some('-') if self.peek(1) == Some('-') => {
423 self.advance(); self.advance(); while let Some(ch) = self.current_char {
427 self.advance();
428 if ch == '\n' {
429 break;
430 }
431 }
432 }
433 Some('/') if self.peek(1) == Some('*') => {
434 self.advance(); self.advance(); while let Some(ch) = self.current_char {
438 if ch == '*' && self.peek(1) == Some('/') {
439 self.advance(); self.advance(); break;
442 }
443 self.advance();
444 }
445 }
446 _ => {
447 break;
449 }
450 }
451 }
452 }
453
454 fn read_identifier(&mut self) -> String {
455 let mut result = String::new();
456 while let Some(ch) = self.current_char {
457 if ch.is_alphanumeric() || ch == '_' {
458 result.push(ch);
459 self.advance();
460 } else {
461 break;
462 }
463 }
464 result
465 }
466
467 fn read_string(&mut self) -> String {
468 let mut result = String::new();
469 let quote_char = self.current_char.unwrap(); self.advance(); while let Some(ch) = self.current_char {
473 if ch == quote_char {
474 self.advance(); break;
476 }
477 result.push(ch);
478 self.advance();
479 }
480 result
481 }
482
483 fn read_number(&mut self) -> String {
484 let mut result = String::new();
485 let has_e = false;
486
487 while let Some(ch) = self.current_char {
489 if !has_e && (ch.is_numeric() || ch == '.') {
490 result.push(ch);
491 self.advance();
492 } else if (ch == 'e' || ch == 'E') && !has_e && !result.is_empty() {
493 result.push(ch);
495 self.advance();
496 let _ = has_e; if let Some(sign) = self.current_char {
500 if sign == '+' || sign == '-' {
501 result.push(sign);
502 self.advance();
503 }
504 }
505
506 while let Some(digit) = self.current_char {
508 if digit.is_numeric() {
509 result.push(digit);
510 self.advance();
511 } else {
512 break;
513 }
514 }
515 break; } else {
517 break;
518 }
519 }
520 result
521 }
522
523 pub fn next_token_with_comments(&mut self) -> Token {
526 self.skip_whitespace();
528
529 match self.current_char {
530 None => Token::Eof,
531 Some('-') if self.peek(1) == Some('-') => {
533 let comment_text = self.read_line_comment();
534 Token::LineComment(comment_text)
535 }
536 Some('/') if self.peek(1) == Some('*') => {
537 let comment_text = self.read_block_comment();
538 Token::BlockComment(comment_text)
539 }
540 Some('*') => {
541 self.advance();
542 Token::Star
543 }
544 Some('+') => {
545 self.advance();
546 Token::Plus
547 }
548 Some('/') => {
549 self.advance();
551 Token::Divide
552 }
553 Some('%') => {
554 self.advance();
555 Token::Modulo
556 }
557 Some('.') => {
558 self.advance();
559 Token::Dot
560 }
561 Some(',') => {
562 self.advance();
563 Token::Comma
564 }
565 Some(':') => {
566 self.advance();
567 Token::Colon
568 }
569 Some('(') => {
570 self.advance();
571 Token::LeftParen
572 }
573 Some(')') => {
574 self.advance();
575 Token::RightParen
576 }
577 Some('=') => {
578 self.advance();
579 Token::Equal
580 }
581 Some('<') => {
582 self.advance();
583 if self.current_char == Some('=') {
584 self.advance();
585 Token::LessThanOrEqual
586 } else if self.current_char == Some('>') {
587 self.advance();
588 Token::NotEqual
589 } else {
590 Token::LessThan
591 }
592 }
593 Some('>') => {
594 self.advance();
595 if self.current_char == Some('=') {
596 self.advance();
597 Token::GreaterThanOrEqual
598 } else {
599 Token::GreaterThan
600 }
601 }
602 Some('!') if self.peek(1) == Some('=') => {
603 self.advance();
604 self.advance();
605 Token::NotEqual
606 }
607 Some('|') if self.peek(1) == Some('|') => {
608 self.advance();
609 self.advance();
610 Token::Concat
611 }
612 Some('"') => {
613 let ident_val = self.read_string();
614 Token::QuotedIdentifier(ident_val)
615 }
616 Some('$') => {
617 if self.peek_string(6) == "$JSON$" {
618 let json_content = self.read_json_block();
619 Token::JsonBlock(json_content)
620 } else {
621 let ident = self.read_identifier();
622 Token::Identifier(ident)
623 }
624 }
625 Some('\'') => {
626 let string_val = self.read_string();
627 Token::StringLiteral(string_val)
628 }
629 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
630 self.advance();
631 let num = self.read_number();
632 Token::NumberLiteral(format!("-{num}"))
633 }
634 Some('-') => {
635 self.advance();
636 Token::Minus
637 }
638 Some(ch) if ch.is_numeric() => {
639 let num = self.read_number();
640 Token::NumberLiteral(num)
641 }
642 Some('#') => {
643 self.advance();
644 let table_name = self.read_identifier();
645 if table_name.is_empty() {
646 Token::Identifier("#".to_string())
647 } else {
648 Token::Identifier(format!("#{}", table_name))
649 }
650 }
651 Some(ch) if ch.is_alphabetic() || ch == '_' => {
652 let ident = self.read_identifier();
653 match ident.to_uppercase().as_str() {
655 "ORDER" if self.peek_keyword("BY") => {
656 self.skip_whitespace();
657 self.read_identifier(); Token::OrderBy
659 }
660 "GROUP" if self.peek_keyword("BY") => {
661 self.skip_whitespace();
662 self.read_identifier(); Token::GroupBy
664 }
665 _ => Token::from_keyword(&ident).unwrap_or_else(|| Token::Identifier(ident)),
666 }
667 }
668 Some(ch) => {
669 self.advance();
670 Token::Identifier(ch.to_string())
671 }
672 }
673 }
674
675 pub fn next_token(&mut self) -> Token {
677 match self.mode {
678 LexerMode::SkipComments => self.next_token_skip_comments(),
679 LexerMode::PreserveComments => self.next_token_with_comments(),
680 }
681 }
682
683 fn next_token_skip_comments(&mut self) -> Token {
685 self.skip_whitespace_and_comments();
686
687 match self.current_char {
688 None => Token::Eof,
689 Some('*') => {
690 self.advance();
691 Token::Star }
695 Some('+') => {
696 self.advance();
697 Token::Plus
698 }
699 Some('/') => {
700 if self.peek(1) == Some('*') {
702 self.skip_whitespace_and_comments();
705 return self.next_token();
706 }
707 self.advance();
708 Token::Divide
709 }
710 Some('%') => {
711 self.advance();
712 Token::Modulo
713 }
714 Some('.') => {
715 self.advance();
716 Token::Dot
717 }
718 Some(',') => {
719 self.advance();
720 Token::Comma
721 }
722 Some(':') => {
723 self.advance();
724 Token::Colon
725 }
726 Some('(') => {
727 self.advance();
728 Token::LeftParen
729 }
730 Some(')') => {
731 self.advance();
732 Token::RightParen
733 }
734 Some('=') => {
735 self.advance();
736 Token::Equal
737 }
738 Some('<') => {
739 self.advance();
740 if self.current_char == Some('=') {
741 self.advance();
742 Token::LessThanOrEqual
743 } else if self.current_char == Some('>') {
744 self.advance();
745 Token::NotEqual
746 } else {
747 Token::LessThan
748 }
749 }
750 Some('>') => {
751 self.advance();
752 if self.current_char == Some('=') {
753 self.advance();
754 Token::GreaterThanOrEqual
755 } else {
756 Token::GreaterThan
757 }
758 }
759 Some('!') if self.peek(1) == Some('=') => {
760 self.advance();
761 self.advance();
762 Token::NotEqual
763 }
764 Some('|') if self.peek(1) == Some('|') => {
765 self.advance();
766 self.advance();
767 Token::Concat
768 }
769 Some('"') => {
770 let ident_val = self.read_string();
772 Token::QuotedIdentifier(ident_val)
773 }
774 Some('$') => {
775 if self.peek_string(6) == "$JSON$" {
777 let json_content = self.read_json_block();
778 Token::JsonBlock(json_content)
779 } else {
780 let ident = self.read_identifier();
783 Token::Identifier(ident)
784 }
785 }
786 Some('\'') => {
787 let string_val = self.read_string();
789 Token::StringLiteral(string_val)
790 }
791 Some('-') if self.peek(1) == Some('-') => {
792 self.skip_whitespace_and_comments();
794 self.next_token()
795 }
796 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
797 self.advance(); let num = self.read_number();
800 Token::NumberLiteral(format!("-{num}"))
801 }
802 Some('-') => {
803 self.advance();
805 Token::Minus
806 }
807 Some(ch) if ch.is_numeric() => {
808 let num = self.read_number();
809 Token::NumberLiteral(num)
810 }
811 Some('#') => {
812 self.advance(); let table_name = self.read_identifier();
815 if table_name.is_empty() {
816 Token::Identifier("#".to_string())
818 } else {
819 Token::Identifier(format!("#{}", table_name))
821 }
822 }
823 Some(ch) if ch.is_alphabetic() || ch == '_' => {
824 let ident = self.read_identifier();
825 match ident.to_uppercase().as_str() {
826 "SELECT" => Token::Select,
827 "FROM" => Token::From,
828 "WHERE" => Token::Where,
829 "WITH" => Token::With,
830 "AND" => Token::And,
831 "OR" => Token::Or,
832 "IN" => Token::In,
833 "NOT" => Token::Not,
834 "BETWEEN" => Token::Between,
835 "LIKE" => Token::Like,
836 "ILIKE" => Token::ILike,
837 "IS" => Token::Is,
838 "NULL" => Token::Null,
839 "ORDER" if self.peek_keyword("BY") => {
840 self.skip_whitespace();
841 self.read_identifier(); Token::OrderBy
843 }
844 "GROUP" if self.peek_keyword("BY") => {
845 self.skip_whitespace();
846 self.read_identifier(); Token::GroupBy
848 }
849 "HAVING" => Token::Having,
850 "QUALIFY" => Token::Qualify,
851 "AS" => Token::As,
852 "ASC" => Token::Asc,
853 "DESC" => Token::Desc,
854 "LIMIT" => Token::Limit,
855 "OFFSET" => Token::Offset,
856 "INTO" => Token::Into,
857 "DATETIME" => Token::DateTime,
858 "CASE" => Token::Case,
859 "WHEN" => Token::When,
860 "THEN" => Token::Then,
861 "ELSE" => Token::Else,
862 "END" => Token::End,
863 "DISTINCT" => Token::Distinct,
864 "EXCLUDE" => Token::Exclude,
865 "OVER" => Token::Over,
866 "PARTITION" => Token::Partition,
867 "BY" => Token::By,
868 "ROWS" => Token::Rows,
870 "UNBOUNDED" => Token::Unbounded,
873 "PRECEDING" => Token::Preceding,
874 "FOLLOWING" => Token::Following,
875 "CURRENT" => Token::Current,
876 "ROW" => Token::Row,
877 "UNION" => Token::Union,
879 "INTERSECT" => Token::Intersect,
880 "EXCEPT" => Token::Except,
881 "WEB" => Token::Web,
883 "UNNEST" => Token::Unnest,
885 "JOIN" => Token::Join,
887 "INNER" => Token::Inner,
888 "LEFT" => Token::Left,
889 "RIGHT" => Token::Right,
890 "FULL" => Token::Full,
891 "OUTER" => Token::Outer,
892 "ON" => Token::On,
893 "CROSS" => Token::Cross,
894 _ => Token::Identifier(ident),
895 }
896 }
897 Some(ch) => {
898 self.advance();
899 Token::Identifier(ch.to_string())
900 }
901 }
902 }
903
904 fn peek_keyword(&mut self, keyword: &str) -> bool {
905 let saved_pos = self.position;
906 let saved_char = self.current_char;
907
908 self.skip_whitespace_and_comments();
909 let next_word = self.read_identifier();
910 let matches = next_word.to_uppercase() == keyword;
911
912 self.position = saved_pos;
914 self.current_char = saved_char;
915
916 matches
917 }
918
919 #[must_use]
920 pub fn get_position(&self) -> usize {
921 self.position
922 }
923
924 pub fn tokenize_all(&mut self) -> Vec<Token> {
925 let mut tokens = Vec::new();
926 loop {
927 let token = self.next_token();
928 if matches!(token, Token::Eof) {
929 tokens.push(token);
930 break;
931 }
932 tokens.push(token);
933 }
934 tokens
935 }
936
937 pub fn tokenize_all_with_positions(&mut self) -> Vec<(usize, usize, Token)> {
938 let mut tokens = Vec::new();
939 loop {
940 self.skip_whitespace_and_comments();
941 let start_pos = self.position;
942 let token = self.next_token();
943 let end_pos = self.position;
944
945 if matches!(token, Token::Eof) {
946 break;
947 }
948 tokens.push((start_pos, end_pos, token));
949 }
950 tokens
951 }
952
953 pub fn tokenize_all_with_comments(&mut self) -> Vec<Token> {
956 let mut tokens = Vec::new();
957 loop {
958 let token = self.next_token_with_comments();
959 if matches!(token, Token::Eof) {
960 tokens.push(token);
961 break;
962 }
963 tokens.push(token);
964 }
965 tokens
966 }
967}
968
969#[cfg(test)]
970mod tests {
971 use super::*;
972
973 #[test]
974 fn test_line_comment_tokenization() {
975 let sql = "SELECT col1, -- this is a comment\ncol2 FROM table";
976 let mut lexer = Lexer::new(sql);
977 let tokens = lexer.tokenize_all_with_comments();
978
979 let comment_token = tokens.iter().find(|t| matches!(t, Token::LineComment(_)));
981 assert!(comment_token.is_some(), "Should find line comment token");
982
983 if let Some(Token::LineComment(text)) = comment_token {
984 assert_eq!(text.trim(), "this is a comment");
985 }
986 }
987
988 #[test]
989 fn test_block_comment_tokenization() {
990 let sql = "SELECT /* block comment */ col1 FROM table";
991 let mut lexer = Lexer::new(sql);
992 let tokens = lexer.tokenize_all_with_comments();
993
994 let comment_token = tokens.iter().find(|t| matches!(t, Token::BlockComment(_)));
996 assert!(comment_token.is_some(), "Should find block comment token");
997
998 if let Some(Token::BlockComment(text)) = comment_token {
999 assert_eq!(text.trim(), "block comment");
1000 }
1001 }
1002
1003 #[test]
1004 fn test_multiple_comments() {
1005 let sql = "-- First comment\nSELECT col1, /* inline */ col2\n-- Second comment\nFROM table";
1006 let mut lexer = Lexer::new(sql);
1007 let tokens = lexer.tokenize_all_with_comments();
1008
1009 let line_comments: Vec<_> = tokens
1010 .iter()
1011 .filter(|t| matches!(t, Token::LineComment(_)))
1012 .collect();
1013 let block_comments: Vec<_> = tokens
1014 .iter()
1015 .filter(|t| matches!(t, Token::BlockComment(_)))
1016 .collect();
1017
1018 assert_eq!(line_comments.len(), 2, "Should find 2 line comments");
1019 assert_eq!(block_comments.len(), 1, "Should find 1 block comment");
1020 }
1021
1022 #[test]
1023 fn test_backwards_compatibility() {
1024 let sql = "SELECT -- comment\ncol1 FROM table";
1026 let mut lexer = Lexer::new(sql);
1027 let tokens = lexer.tokenize_all();
1028
1029 let has_comments = tokens
1031 .iter()
1032 .any(|t| matches!(t, Token::LineComment(_) | Token::BlockComment(_)));
1033 assert!(
1034 !has_comments,
1035 "next_token() should skip comments for backwards compatibility"
1036 );
1037
1038 assert!(tokens.iter().any(|t| matches!(t, Token::Select)));
1040 assert!(tokens.iter().any(|t| matches!(t, Token::From)));
1041 }
1042
1043 #[test]
1046 fn test_lexer_mode_skip_comments() {
1047 let sql = "SELECT id -- comment\nFROM table";
1048
1049 let mut lexer = Lexer::with_mode(sql, LexerMode::SkipComments);
1051
1052 assert_eq!(lexer.next_token(), Token::Select);
1053 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1054 assert_eq!(lexer.next_token(), Token::From);
1056 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1057 assert_eq!(lexer.next_token(), Token::Eof);
1058 }
1059
1060 #[test]
1061 fn test_lexer_mode_preserve_comments() {
1062 let sql = "SELECT id -- comment\nFROM table";
1063
1064 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1066
1067 assert_eq!(lexer.next_token(), Token::Select);
1068 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1069
1070 let comment_tok = lexer.next_token();
1072 assert!(matches!(comment_tok, Token::LineComment(_)));
1073 if let Token::LineComment(text) = comment_tok {
1074 assert_eq!(text.trim(), "comment");
1075 }
1076
1077 assert_eq!(lexer.next_token(), Token::From);
1078 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1079 assert_eq!(lexer.next_token(), Token::Eof);
1080 }
1081
1082 #[test]
1083 fn test_lexer_mode_default_is_skip() {
1084 let sql = "SELECT id -- comment\nFROM table";
1085
1086 let mut lexer = Lexer::new(sql);
1088
1089 let mut tok_count = 0;
1090 loop {
1091 let tok = lexer.next_token();
1092 if matches!(tok, Token::Eof) {
1093 break;
1094 }
1095 assert!(!matches!(
1097 tok,
1098 Token::LineComment(_) | Token::BlockComment(_)
1099 ));
1100 tok_count += 1;
1101 }
1102
1103 assert_eq!(tok_count, 4);
1105 }
1106
1107 #[test]
1108 fn test_lexer_mode_block_comments() {
1109 let sql = "SELECT /* block */ id FROM table";
1110
1111 let mut lexer_skip = Lexer::with_mode(sql, LexerMode::SkipComments);
1113 assert_eq!(lexer_skip.next_token(), Token::Select);
1114 assert_eq!(lexer_skip.next_token(), Token::Identifier("id".into()));
1115 assert_eq!(lexer_skip.next_token(), Token::From);
1116
1117 let mut lexer_preserve = Lexer::with_mode(sql, LexerMode::PreserveComments);
1119 assert_eq!(lexer_preserve.next_token(), Token::Select);
1120
1121 let comment_tok = lexer_preserve.next_token();
1122 assert!(matches!(comment_tok, Token::BlockComment(_)));
1123 if let Token::BlockComment(text) = comment_tok {
1124 assert_eq!(text.trim(), "block");
1125 }
1126
1127 assert_eq!(lexer_preserve.next_token(), Token::Identifier("id".into()));
1128 }
1129
1130 #[test]
1131 fn test_lexer_mode_mixed_comments() {
1132 let sql = "-- leading\nSELECT /* inline */ id -- trailing\nFROM table";
1133
1134 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1135
1136 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1138
1139 assert_eq!(lexer.next_token(), Token::Select);
1141
1142 assert!(matches!(lexer.next_token(), Token::BlockComment(_)));
1144
1145 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1147
1148 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1150
1151 assert_eq!(lexer.next_token(), Token::From);
1153 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1154 assert_eq!(lexer.next_token(), Token::Eof);
1155 }
1156}