1#[derive(Debug, Clone, Copy, PartialEq)]
8pub enum LexerMode {
9 SkipComments,
11 PreserveComments,
13}
14
15impl Default for LexerMode {
16 fn default() -> Self {
17 LexerMode::SkipComments
18 }
19}
20
21#[derive(Debug, Clone, PartialEq)]
22pub enum Token {
23 Select,
25 From,
26 Where,
27 With, And,
29 Or,
30 In,
31 Not,
32 Between,
33 Like,
34 Is,
35 Null,
36 OrderBy,
37 GroupBy,
38 Having,
39 As,
40 Asc,
41 Desc,
42 Limit,
43 Offset,
44 Into, DateTime, Case, When, Then, Else, End, Distinct, Over, Partition, By, Rows, Range, Unbounded, Preceding, Following, Current, Row, Union, Intersect, Except, Web, Unnest, Join, Inner, Left, Right, Full, Outer, On, Cross, Identifier(String),
88 QuotedIdentifier(String), StringLiteral(String),
90 JsonBlock(String), NumberLiteral(String),
92 Star,
93
94 Dot,
96 Comma,
97 Colon,
98 LeftParen,
99 RightParen,
100 Equal,
101 NotEqual,
102 LessThan,
103 GreaterThan,
104 LessThanOrEqual,
105 GreaterThanOrEqual,
106
107 Plus,
109 Minus,
110 Divide,
111 Modulo,
112
113 Concat, LineComment(String), BlockComment(String), Eof,
122}
123
124impl Token {
125 pub fn from_keyword(s: &str) -> Option<Token> {
127 match s.to_uppercase().as_str() {
128 "SELECT" => Some(Token::Select),
129 "FROM" => Some(Token::From),
130 "WHERE" => Some(Token::Where),
131 "WITH" => Some(Token::With),
132 "AND" => Some(Token::And),
133 "OR" => Some(Token::Or),
134 "IN" => Some(Token::In),
135 "NOT" => Some(Token::Not),
136 "BETWEEN" => Some(Token::Between),
137 "LIKE" => Some(Token::Like),
138 "IS" => Some(Token::Is),
139 "NULL" => Some(Token::Null),
140 "ORDER" => Some(Token::OrderBy),
141 "GROUP" => Some(Token::GroupBy),
142 "HAVING" => Some(Token::Having),
143 "AS" => Some(Token::As),
144 "ASC" => Some(Token::Asc),
145 "DESC" => Some(Token::Desc),
146 "LIMIT" => Some(Token::Limit),
147 "OFFSET" => Some(Token::Offset),
148 "INTO" => Some(Token::Into),
149 "DISTINCT" => Some(Token::Distinct),
150 "CASE" => Some(Token::Case),
151 "WHEN" => Some(Token::When),
152 "THEN" => Some(Token::Then),
153 "ELSE" => Some(Token::Else),
154 "END" => Some(Token::End),
155 "OVER" => Some(Token::Over),
156 "PARTITION" => Some(Token::Partition),
157 "BY" => Some(Token::By),
158 "ROWS" => Some(Token::Rows),
159 "RANGE" => Some(Token::Range),
160 "UNBOUNDED" => Some(Token::Unbounded),
161 "PRECEDING" => Some(Token::Preceding),
162 "FOLLOWING" => Some(Token::Following),
163 "CURRENT" => Some(Token::Current),
164 "ROW" => Some(Token::Row),
165 "UNION" => Some(Token::Union),
166 "INTERSECT" => Some(Token::Intersect),
167 "EXCEPT" => Some(Token::Except),
168 "WEB" => Some(Token::Web),
169 "UNNEST" => Some(Token::Unnest),
170 "JOIN" => Some(Token::Join),
171 "INNER" => Some(Token::Inner),
172 "LEFT" => Some(Token::Left),
173 "RIGHT" => Some(Token::Right),
174 "FULL" => Some(Token::Full),
175 "OUTER" => Some(Token::Outer),
176 "ON" => Some(Token::On),
177 "CROSS" => Some(Token::Cross),
178 _ => None,
179 }
180 }
181
182 pub fn is_logical_operator(&self) -> bool {
184 matches!(self, Token::And | Token::Or)
185 }
186
187 pub fn is_join_type(&self) -> bool {
189 matches!(
190 self,
191 Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
192 )
193 }
194
195 pub fn is_clause_terminator(&self) -> bool {
197 matches!(
198 self,
199 Token::OrderBy
200 | Token::GroupBy
201 | Token::Having
202 | Token::Limit
203 | Token::Offset
204 | Token::Union
205 | Token::Intersect
206 | Token::Except
207 )
208 }
209
210 pub fn as_keyword_str(&self) -> Option<&'static str> {
212 match self {
213 Token::Select => Some("SELECT"),
214 Token::From => Some("FROM"),
215 Token::Where => Some("WHERE"),
216 Token::With => Some("WITH"),
217 Token::And => Some("AND"),
218 Token::Or => Some("OR"),
219 Token::OrderBy => Some("ORDER BY"),
220 Token::GroupBy => Some("GROUP BY"),
221 Token::Having => Some("HAVING"),
222 _ => None,
224 }
225 }
226}
227
228#[derive(Debug, Clone)]
229pub struct Lexer {
230 input: Vec<char>,
231 position: usize,
232 current_char: Option<char>,
233 mode: LexerMode,
234}
235
236impl Lexer {
237 #[must_use]
238 pub fn new(input: &str) -> Self {
239 Self::with_mode(input, LexerMode::default())
240 }
241
242 #[must_use]
244 pub fn with_mode(input: &str, mode: LexerMode) -> Self {
245 let chars: Vec<char> = input.chars().collect();
246 let current = chars.first().copied();
247 Self {
248 input: chars,
249 position: 0,
250 current_char: current,
251 mode,
252 }
253 }
254
255 fn advance(&mut self) {
256 self.position += 1;
257 self.current_char = self.input.get(self.position).copied();
258 }
259
260 fn peek(&self, offset: usize) -> Option<char> {
261 self.input.get(self.position + offset).copied()
262 }
263
264 fn peek_string(&self, n: usize) -> String {
266 let mut result = String::new();
267 for i in 0..n {
268 if let Some(ch) = self.input.get(self.position + i) {
269 result.push(*ch);
270 } else {
271 break;
272 }
273 }
274 result
275 }
276
277 fn read_json_block(&mut self) -> String {
280 let mut result = String::new();
281
282 for _ in 0..6 {
284 self.advance();
285 }
286
287 while let Some(ch) = self.current_char {
289 if ch == '$' && self.peek_string(6) == "$JSON$" {
291 for _ in 0..6 {
293 self.advance();
294 }
295 break;
296 }
297 result.push(ch);
298 self.advance();
299 }
300
301 result
302 }
303
304 fn skip_whitespace(&mut self) {
305 while let Some(ch) = self.current_char {
306 if ch.is_whitespace() {
307 self.advance();
308 } else {
309 break;
310 }
311 }
312 }
313
314 fn read_line_comment(&mut self) -> String {
316 let mut result = String::new();
317
318 self.advance();
320 self.advance();
321
322 while let Some(ch) = self.current_char {
324 if ch == '\n' {
325 self.advance(); break;
327 }
328 result.push(ch);
329 self.advance();
330 }
331
332 result
333 }
334
335 fn read_block_comment(&mut self) -> String {
337 let mut result = String::new();
338
339 self.advance();
341 self.advance();
342
343 while let Some(ch) = self.current_char {
345 if ch == '*' && self.peek(1) == Some('/') {
346 self.advance(); self.advance(); break;
349 }
350 result.push(ch);
351 self.advance();
352 }
353
354 result
355 }
356
357 fn skip_whitespace_and_comments(&mut self) {
360 loop {
361 while let Some(ch) = self.current_char {
363 if ch.is_whitespace() {
364 self.advance();
365 } else {
366 break;
367 }
368 }
369
370 match self.current_char {
372 Some('-') if self.peek(1) == Some('-') => {
373 self.advance(); self.advance(); while let Some(ch) = self.current_char {
377 self.advance();
378 if ch == '\n' {
379 break;
380 }
381 }
382 }
383 Some('/') if self.peek(1) == Some('*') => {
384 self.advance(); self.advance(); while let Some(ch) = self.current_char {
388 if ch == '*' && self.peek(1) == Some('/') {
389 self.advance(); self.advance(); break;
392 }
393 self.advance();
394 }
395 }
396 _ => {
397 break;
399 }
400 }
401 }
402 }
403
404 fn read_identifier(&mut self) -> String {
405 let mut result = String::new();
406 while let Some(ch) = self.current_char {
407 if ch.is_alphanumeric() || ch == '_' {
408 result.push(ch);
409 self.advance();
410 } else {
411 break;
412 }
413 }
414 result
415 }
416
417 fn read_string(&mut self) -> String {
418 let mut result = String::new();
419 let quote_char = self.current_char.unwrap(); self.advance(); while let Some(ch) = self.current_char {
423 if ch == quote_char {
424 self.advance(); break;
426 }
427 result.push(ch);
428 self.advance();
429 }
430 result
431 }
432
433 fn read_number(&mut self) -> String {
434 let mut result = String::new();
435 let has_e = false;
436
437 while let Some(ch) = self.current_char {
439 if !has_e && (ch.is_numeric() || ch == '.') {
440 result.push(ch);
441 self.advance();
442 } else if (ch == 'e' || ch == 'E') && !has_e && !result.is_empty() {
443 result.push(ch);
445 self.advance();
446 let _ = has_e; if let Some(sign) = self.current_char {
450 if sign == '+' || sign == '-' {
451 result.push(sign);
452 self.advance();
453 }
454 }
455
456 while let Some(digit) = self.current_char {
458 if digit.is_numeric() {
459 result.push(digit);
460 self.advance();
461 } else {
462 break;
463 }
464 }
465 break; } else {
467 break;
468 }
469 }
470 result
471 }
472
473 pub fn next_token_with_comments(&mut self) -> Token {
476 self.skip_whitespace();
478
479 match self.current_char {
480 None => Token::Eof,
481 Some('-') if self.peek(1) == Some('-') => {
483 let comment_text = self.read_line_comment();
484 Token::LineComment(comment_text)
485 }
486 Some('/') if self.peek(1) == Some('*') => {
487 let comment_text = self.read_block_comment();
488 Token::BlockComment(comment_text)
489 }
490 Some('*') => {
491 self.advance();
492 Token::Star
493 }
494 Some('+') => {
495 self.advance();
496 Token::Plus
497 }
498 Some('/') => {
499 self.advance();
501 Token::Divide
502 }
503 Some('%') => {
504 self.advance();
505 Token::Modulo
506 }
507 Some('.') => {
508 self.advance();
509 Token::Dot
510 }
511 Some(',') => {
512 self.advance();
513 Token::Comma
514 }
515 Some(':') => {
516 self.advance();
517 Token::Colon
518 }
519 Some('(') => {
520 self.advance();
521 Token::LeftParen
522 }
523 Some(')') => {
524 self.advance();
525 Token::RightParen
526 }
527 Some('=') => {
528 self.advance();
529 Token::Equal
530 }
531 Some('<') => {
532 self.advance();
533 if self.current_char == Some('=') {
534 self.advance();
535 Token::LessThanOrEqual
536 } else if self.current_char == Some('>') {
537 self.advance();
538 Token::NotEqual
539 } else {
540 Token::LessThan
541 }
542 }
543 Some('>') => {
544 self.advance();
545 if self.current_char == Some('=') {
546 self.advance();
547 Token::GreaterThanOrEqual
548 } else {
549 Token::GreaterThan
550 }
551 }
552 Some('!') if self.peek(1) == Some('=') => {
553 self.advance();
554 self.advance();
555 Token::NotEqual
556 }
557 Some('|') if self.peek(1) == Some('|') => {
558 self.advance();
559 self.advance();
560 Token::Concat
561 }
562 Some('"') => {
563 let ident_val = self.read_string();
564 Token::QuotedIdentifier(ident_val)
565 }
566 Some('$') => {
567 if self.peek_string(6) == "$JSON$" {
568 let json_content = self.read_json_block();
569 Token::JsonBlock(json_content)
570 } else {
571 let ident = self.read_identifier();
572 Token::Identifier(ident)
573 }
574 }
575 Some('\'') => {
576 let string_val = self.read_string();
577 Token::StringLiteral(string_val)
578 }
579 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
580 self.advance();
581 let num = self.read_number();
582 Token::NumberLiteral(format!("-{num}"))
583 }
584 Some('-') => {
585 self.advance();
586 Token::Minus
587 }
588 Some(ch) if ch.is_numeric() => {
589 let num = self.read_number();
590 Token::NumberLiteral(num)
591 }
592 Some('#') => {
593 self.advance();
594 let table_name = self.read_identifier();
595 if table_name.is_empty() {
596 Token::Identifier("#".to_string())
597 } else {
598 Token::Identifier(format!("#{}", table_name))
599 }
600 }
601 Some(ch) if ch.is_alphabetic() || ch == '_' => {
602 let ident = self.read_identifier();
603 Token::from_keyword(&ident).unwrap_or_else(|| Token::Identifier(ident))
604 }
605 Some(ch) => {
606 self.advance();
607 Token::Identifier(ch.to_string())
608 }
609 }
610 }
611
612 pub fn next_token(&mut self) -> Token {
614 match self.mode {
615 LexerMode::SkipComments => self.next_token_skip_comments(),
616 LexerMode::PreserveComments => self.next_token_with_comments(),
617 }
618 }
619
620 fn next_token_skip_comments(&mut self) -> Token {
622 self.skip_whitespace_and_comments();
623
624 match self.current_char {
625 None => Token::Eof,
626 Some('*') => {
627 self.advance();
628 Token::Star }
632 Some('+') => {
633 self.advance();
634 Token::Plus
635 }
636 Some('/') => {
637 if self.peek(1) == Some('*') {
639 self.skip_whitespace_and_comments();
642 return self.next_token();
643 }
644 self.advance();
645 Token::Divide
646 }
647 Some('%') => {
648 self.advance();
649 Token::Modulo
650 }
651 Some('.') => {
652 self.advance();
653 Token::Dot
654 }
655 Some(',') => {
656 self.advance();
657 Token::Comma
658 }
659 Some(':') => {
660 self.advance();
661 Token::Colon
662 }
663 Some('(') => {
664 self.advance();
665 Token::LeftParen
666 }
667 Some(')') => {
668 self.advance();
669 Token::RightParen
670 }
671 Some('=') => {
672 self.advance();
673 Token::Equal
674 }
675 Some('<') => {
676 self.advance();
677 if self.current_char == Some('=') {
678 self.advance();
679 Token::LessThanOrEqual
680 } else if self.current_char == Some('>') {
681 self.advance();
682 Token::NotEqual
683 } else {
684 Token::LessThan
685 }
686 }
687 Some('>') => {
688 self.advance();
689 if self.current_char == Some('=') {
690 self.advance();
691 Token::GreaterThanOrEqual
692 } else {
693 Token::GreaterThan
694 }
695 }
696 Some('!') if self.peek(1) == Some('=') => {
697 self.advance();
698 self.advance();
699 Token::NotEqual
700 }
701 Some('|') if self.peek(1) == Some('|') => {
702 self.advance();
703 self.advance();
704 Token::Concat
705 }
706 Some('"') => {
707 let ident_val = self.read_string();
709 Token::QuotedIdentifier(ident_val)
710 }
711 Some('$') => {
712 if self.peek_string(6) == "$JSON$" {
714 let json_content = self.read_json_block();
715 Token::JsonBlock(json_content)
716 } else {
717 let ident = self.read_identifier();
720 Token::Identifier(ident)
721 }
722 }
723 Some('\'') => {
724 let string_val = self.read_string();
726 Token::StringLiteral(string_val)
727 }
728 Some('-') if self.peek(1) == Some('-') => {
729 self.skip_whitespace_and_comments();
731 self.next_token()
732 }
733 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
734 self.advance(); let num = self.read_number();
737 Token::NumberLiteral(format!("-{num}"))
738 }
739 Some('-') => {
740 self.advance();
742 Token::Minus
743 }
744 Some(ch) if ch.is_numeric() => {
745 let num = self.read_number();
746 Token::NumberLiteral(num)
747 }
748 Some('#') => {
749 self.advance(); let table_name = self.read_identifier();
752 if table_name.is_empty() {
753 Token::Identifier("#".to_string())
755 } else {
756 Token::Identifier(format!("#{}", table_name))
758 }
759 }
760 Some(ch) if ch.is_alphabetic() || ch == '_' => {
761 let ident = self.read_identifier();
762 match ident.to_uppercase().as_str() {
763 "SELECT" => Token::Select,
764 "FROM" => Token::From,
765 "WHERE" => Token::Where,
766 "WITH" => Token::With,
767 "AND" => Token::And,
768 "OR" => Token::Or,
769 "IN" => Token::In,
770 "NOT" => Token::Not,
771 "BETWEEN" => Token::Between,
772 "LIKE" => Token::Like,
773 "IS" => Token::Is,
774 "NULL" => Token::Null,
775 "ORDER" if self.peek_keyword("BY") => {
776 self.skip_whitespace();
777 self.read_identifier(); Token::OrderBy
779 }
780 "GROUP" if self.peek_keyword("BY") => {
781 self.skip_whitespace();
782 self.read_identifier(); Token::GroupBy
784 }
785 "HAVING" => Token::Having,
786 "AS" => Token::As,
787 "ASC" => Token::Asc,
788 "DESC" => Token::Desc,
789 "LIMIT" => Token::Limit,
790 "OFFSET" => Token::Offset,
791 "INTO" => Token::Into,
792 "DATETIME" => Token::DateTime,
793 "CASE" => Token::Case,
794 "WHEN" => Token::When,
795 "THEN" => Token::Then,
796 "ELSE" => Token::Else,
797 "END" => Token::End,
798 "DISTINCT" => Token::Distinct,
799 "OVER" => Token::Over,
800 "PARTITION" => Token::Partition,
801 "BY" => Token::By,
802 "ROWS" => Token::Rows,
804 "UNBOUNDED" => Token::Unbounded,
807 "PRECEDING" => Token::Preceding,
808 "FOLLOWING" => Token::Following,
809 "CURRENT" => Token::Current,
810 "ROW" => Token::Row,
811 "UNION" => Token::Union,
813 "INTERSECT" => Token::Intersect,
814 "EXCEPT" => Token::Except,
815 "WEB" => Token::Web,
817 "UNNEST" => Token::Unnest,
819 "JOIN" => Token::Join,
821 "INNER" => Token::Inner,
822 "LEFT" => Token::Left,
823 "RIGHT" => Token::Right,
824 "FULL" => Token::Full,
825 "OUTER" => Token::Outer,
826 "ON" => Token::On,
827 "CROSS" => Token::Cross,
828 _ => Token::Identifier(ident),
829 }
830 }
831 Some(ch) => {
832 self.advance();
833 Token::Identifier(ch.to_string())
834 }
835 }
836 }
837
838 fn peek_keyword(&mut self, keyword: &str) -> bool {
839 let saved_pos = self.position;
840 let saved_char = self.current_char;
841
842 self.skip_whitespace_and_comments();
843 let next_word = self.read_identifier();
844 let matches = next_word.to_uppercase() == keyword;
845
846 self.position = saved_pos;
848 self.current_char = saved_char;
849
850 matches
851 }
852
853 #[must_use]
854 pub fn get_position(&self) -> usize {
855 self.position
856 }
857
858 pub fn tokenize_all(&mut self) -> Vec<Token> {
859 let mut tokens = Vec::new();
860 loop {
861 let token = self.next_token();
862 if matches!(token, Token::Eof) {
863 tokens.push(token);
864 break;
865 }
866 tokens.push(token);
867 }
868 tokens
869 }
870
871 pub fn tokenize_all_with_positions(&mut self) -> Vec<(usize, usize, Token)> {
872 let mut tokens = Vec::new();
873 loop {
874 self.skip_whitespace_and_comments();
875 let start_pos = self.position;
876 let token = self.next_token();
877 let end_pos = self.position;
878
879 if matches!(token, Token::Eof) {
880 break;
881 }
882 tokens.push((start_pos, end_pos, token));
883 }
884 tokens
885 }
886
887 pub fn tokenize_all_with_comments(&mut self) -> Vec<Token> {
890 let mut tokens = Vec::new();
891 loop {
892 let token = self.next_token_with_comments();
893 if matches!(token, Token::Eof) {
894 tokens.push(token);
895 break;
896 }
897 tokens.push(token);
898 }
899 tokens
900 }
901}
902
903#[cfg(test)]
904mod tests {
905 use super::*;
906
907 #[test]
908 fn test_line_comment_tokenization() {
909 let sql = "SELECT col1, -- this is a comment\ncol2 FROM table";
910 let mut lexer = Lexer::new(sql);
911 let tokens = lexer.tokenize_all_with_comments();
912
913 let comment_token = tokens.iter().find(|t| matches!(t, Token::LineComment(_)));
915 assert!(comment_token.is_some(), "Should find line comment token");
916
917 if let Some(Token::LineComment(text)) = comment_token {
918 assert_eq!(text.trim(), "this is a comment");
919 }
920 }
921
922 #[test]
923 fn test_block_comment_tokenization() {
924 let sql = "SELECT /* block comment */ col1 FROM table";
925 let mut lexer = Lexer::new(sql);
926 let tokens = lexer.tokenize_all_with_comments();
927
928 let comment_token = tokens.iter().find(|t| matches!(t, Token::BlockComment(_)));
930 assert!(comment_token.is_some(), "Should find block comment token");
931
932 if let Some(Token::BlockComment(text)) = comment_token {
933 assert_eq!(text.trim(), "block comment");
934 }
935 }
936
937 #[test]
938 fn test_multiple_comments() {
939 let sql = "-- First comment\nSELECT col1, /* inline */ col2\n-- Second comment\nFROM table";
940 let mut lexer = Lexer::new(sql);
941 let tokens = lexer.tokenize_all_with_comments();
942
943 let line_comments: Vec<_> = tokens
944 .iter()
945 .filter(|t| matches!(t, Token::LineComment(_)))
946 .collect();
947 let block_comments: Vec<_> = tokens
948 .iter()
949 .filter(|t| matches!(t, Token::BlockComment(_)))
950 .collect();
951
952 assert_eq!(line_comments.len(), 2, "Should find 2 line comments");
953 assert_eq!(block_comments.len(), 1, "Should find 1 block comment");
954 }
955
956 #[test]
957 fn test_backwards_compatibility() {
958 let sql = "SELECT -- comment\ncol1 FROM table";
960 let mut lexer = Lexer::new(sql);
961 let tokens = lexer.tokenize_all();
962
963 let has_comments = tokens
965 .iter()
966 .any(|t| matches!(t, Token::LineComment(_) | Token::BlockComment(_)));
967 assert!(
968 !has_comments,
969 "next_token() should skip comments for backwards compatibility"
970 );
971
972 assert!(tokens.iter().any(|t| matches!(t, Token::Select)));
974 assert!(tokens.iter().any(|t| matches!(t, Token::From)));
975 }
976
977 #[test]
980 fn test_lexer_mode_skip_comments() {
981 let sql = "SELECT id -- comment\nFROM table";
982
983 let mut lexer = Lexer::with_mode(sql, LexerMode::SkipComments);
985
986 assert_eq!(lexer.next_token(), Token::Select);
987 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
988 assert_eq!(lexer.next_token(), Token::From);
990 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
991 assert_eq!(lexer.next_token(), Token::Eof);
992 }
993
994 #[test]
995 fn test_lexer_mode_preserve_comments() {
996 let sql = "SELECT id -- comment\nFROM table";
997
998 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1000
1001 assert_eq!(lexer.next_token(), Token::Select);
1002 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1003
1004 let comment_tok = lexer.next_token();
1006 assert!(matches!(comment_tok, Token::LineComment(_)));
1007 if let Token::LineComment(text) = comment_tok {
1008 assert_eq!(text.trim(), "comment");
1009 }
1010
1011 assert_eq!(lexer.next_token(), Token::From);
1012 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1013 assert_eq!(lexer.next_token(), Token::Eof);
1014 }
1015
1016 #[test]
1017 fn test_lexer_mode_default_is_skip() {
1018 let sql = "SELECT id -- comment\nFROM table";
1019
1020 let mut lexer = Lexer::new(sql);
1022
1023 let mut tok_count = 0;
1024 loop {
1025 let tok = lexer.next_token();
1026 if matches!(tok, Token::Eof) {
1027 break;
1028 }
1029 assert!(!matches!(
1031 tok,
1032 Token::LineComment(_) | Token::BlockComment(_)
1033 ));
1034 tok_count += 1;
1035 }
1036
1037 assert_eq!(tok_count, 4);
1039 }
1040
1041 #[test]
1042 fn test_lexer_mode_block_comments() {
1043 let sql = "SELECT /* block */ id FROM table";
1044
1045 let mut lexer_skip = Lexer::with_mode(sql, LexerMode::SkipComments);
1047 assert_eq!(lexer_skip.next_token(), Token::Select);
1048 assert_eq!(lexer_skip.next_token(), Token::Identifier("id".into()));
1049 assert_eq!(lexer_skip.next_token(), Token::From);
1050
1051 let mut lexer_preserve = Lexer::with_mode(sql, LexerMode::PreserveComments);
1053 assert_eq!(lexer_preserve.next_token(), Token::Select);
1054
1055 let comment_tok = lexer_preserve.next_token();
1056 assert!(matches!(comment_tok, Token::BlockComment(_)));
1057 if let Token::BlockComment(text) = comment_tok {
1058 assert_eq!(text.trim(), "block");
1059 }
1060
1061 assert_eq!(lexer_preserve.next_token(), Token::Identifier("id".into()));
1062 }
1063
1064 #[test]
1065 fn test_lexer_mode_mixed_comments() {
1066 let sql = "-- leading\nSELECT /* inline */ id -- trailing\nFROM table";
1067
1068 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1069
1070 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1072
1073 assert_eq!(lexer.next_token(), Token::Select);
1075
1076 assert!(matches!(lexer.next_token(), Token::BlockComment(_)));
1078
1079 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1081
1082 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1084
1085 assert_eq!(lexer.next_token(), Token::From);
1087 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1088 assert_eq!(lexer.next_token(), Token::Eof);
1089 }
1090}