1#[derive(Debug, Clone, Copy, PartialEq)]
8pub enum LexerMode {
9 SkipComments,
11 PreserveComments,
13}
14
15impl Default for LexerMode {
16 fn default() -> Self {
17 LexerMode::SkipComments
18 }
19}
20
21#[derive(Debug, Clone, PartialEq)]
22pub enum Token {
23 Select,
25 From,
26 Where,
27 With, And,
29 Or,
30 In,
31 Not,
32 Between,
33 Like,
34 ILike, Is,
36 Null,
37 OrderBy,
38 GroupBy,
39 Having,
40 Qualify,
41 As,
42 Asc,
43 Desc,
44 Limit,
45 Offset,
46 Into, DateTime, Case, When, Then, Else, End, Distinct, Over, Partition, By, Exclude, Pivot, Unpivot, For, Rows, Range, Unbounded, Preceding, Following, Current, Row, Union, Intersect, Except, Web, File, Unnest, Join, Inner, Left, Right, Full, Outer, On, Cross, Identifier(String),
99 QuotedIdentifier(String), StringLiteral(String),
101 JsonBlock(String), NumberLiteral(String),
103 Star,
104
105 Dot,
107 Comma,
108 Colon,
109 LeftParen,
110 RightParen,
111 Equal,
112 NotEqual,
113 LessThan,
114 GreaterThan,
115 LessThanOrEqual,
116 GreaterThanOrEqual,
117
118 Plus,
120 Minus,
121 Divide,
122 Modulo,
123
124 Concat, LineComment(String), BlockComment(String), Eof,
133}
134
135impl Token {
136 pub fn from_keyword(s: &str) -> Option<Token> {
138 match s.to_uppercase().as_str() {
139 "SELECT" => Some(Token::Select),
140 "FROM" => Some(Token::From),
141 "WHERE" => Some(Token::Where),
142 "WITH" => Some(Token::With),
143 "AND" => Some(Token::And),
144 "OR" => Some(Token::Or),
145 "IN" => Some(Token::In),
146 "NOT" => Some(Token::Not),
147 "BETWEEN" => Some(Token::Between),
148 "LIKE" => Some(Token::Like),
149 "ILIKE" => Some(Token::ILike),
150 "IS" => Some(Token::Is),
151 "NULL" => Some(Token::Null),
152 "ORDER" => Some(Token::OrderBy),
153 "GROUP" => Some(Token::GroupBy),
154 "HAVING" => Some(Token::Having),
155 "QUALIFY" => Some(Token::Qualify),
156 "AS" => Some(Token::As),
157 "ASC" => Some(Token::Asc),
158 "DESC" => Some(Token::Desc),
159 "LIMIT" => Some(Token::Limit),
160 "OFFSET" => Some(Token::Offset),
161 "INTO" => Some(Token::Into),
162 "DISTINCT" => Some(Token::Distinct),
163 "EXCLUDE" => Some(Token::Exclude),
164 "PIVOT" => Some(Token::Pivot),
165 "UNPIVOT" => Some(Token::Unpivot),
166 "FOR" => Some(Token::For),
167 "CASE" => Some(Token::Case),
168 "WHEN" => Some(Token::When),
169 "THEN" => Some(Token::Then),
170 "ELSE" => Some(Token::Else),
171 "END" => Some(Token::End),
172 "OVER" => Some(Token::Over),
173 "PARTITION" => Some(Token::Partition),
174 "BY" => Some(Token::By),
175 "ROWS" => Some(Token::Rows),
176 "RANGE" => Some(Token::Range),
177 "UNBOUNDED" => Some(Token::Unbounded),
178 "PRECEDING" => Some(Token::Preceding),
179 "FOLLOWING" => Some(Token::Following),
180 "CURRENT" => Some(Token::Current),
181 "ROW" => Some(Token::Row),
182 "UNION" => Some(Token::Union),
183 "INTERSECT" => Some(Token::Intersect),
184 "EXCEPT" => Some(Token::Except),
185 "WEB" => Some(Token::Web),
186 "FILE" => Some(Token::File),
187 "UNNEST" => Some(Token::Unnest),
188 "JOIN" => Some(Token::Join),
189 "INNER" => Some(Token::Inner),
190 "LEFT" => Some(Token::Left),
191 "RIGHT" => Some(Token::Right),
192 "FULL" => Some(Token::Full),
193 "OUTER" => Some(Token::Outer),
194 "ON" => Some(Token::On),
195 "CROSS" => Some(Token::Cross),
196 _ => None,
197 }
198 }
199
200 pub fn is_logical_operator(&self) -> bool {
202 matches!(self, Token::And | Token::Or)
203 }
204
205 pub fn is_join_type(&self) -> bool {
207 matches!(
208 self,
209 Token::Inner | Token::Left | Token::Right | Token::Full | Token::Cross
210 )
211 }
212
213 pub fn is_clause_terminator(&self) -> bool {
215 matches!(
216 self,
217 Token::OrderBy
218 | Token::GroupBy
219 | Token::Having
220 | Token::Limit
221 | Token::Offset
222 | Token::Union
223 | Token::Intersect
224 | Token::Except
225 )
226 }
227
228 pub fn as_keyword_str(&self) -> Option<&'static str> {
231 match self {
232 Token::Select => Some("SELECT"),
233 Token::From => Some("FROM"),
234 Token::Where => Some("WHERE"),
235 Token::With => Some("WITH"),
236 Token::And => Some("AND"),
237 Token::Or => Some("OR"),
238 Token::In => Some("IN"),
239 Token::Not => Some("NOT"),
240 Token::Between => Some("BETWEEN"),
241 Token::Like => Some("LIKE"),
242 Token::ILike => Some("ILIKE"),
243 Token::Is => Some("IS"),
244 Token::Null => Some("NULL"),
245 Token::OrderBy => Some("ORDER BY"),
246 Token::GroupBy => Some("GROUP BY"),
247 Token::Having => Some("HAVING"),
248 Token::Qualify => Some("QUALIFY"),
249 Token::As => Some("AS"),
250 Token::Asc => Some("ASC"),
251 Token::Desc => Some("DESC"),
252 Token::Limit => Some("LIMIT"),
253 Token::Offset => Some("OFFSET"),
254 Token::Into => Some("INTO"),
255 Token::Distinct => Some("DISTINCT"),
256 Token::Exclude => Some("EXCLUDE"),
257 Token::Pivot => Some("PIVOT"),
258 Token::Unpivot => Some("UNPIVOT"),
259 Token::For => Some("FOR"),
260 Token::Case => Some("CASE"),
261 Token::When => Some("WHEN"),
262 Token::Then => Some("THEN"),
263 Token::Else => Some("ELSE"),
264 Token::End => Some("END"),
265 Token::Join => Some("JOIN"),
266 Token::Inner => Some("INNER"),
267 Token::Left => Some("LEFT"),
268 Token::Right => Some("RIGHT"),
269 Token::Full => Some("FULL"),
270 Token::Cross => Some("CROSS"),
271 Token::On => Some("ON"),
272 Token::Union => Some("UNION"),
273 Token::Intersect => Some("INTERSECT"),
274 Token::Except => Some("EXCEPT"),
275 Token::Over => Some("OVER"),
276 Token::Partition => Some("PARTITION"),
277 Token::By => Some("BY"),
278 Token::Rows => Some("ROWS"),
279 Token::Range => Some("RANGE"),
280 Token::Preceding => Some("PRECEDING"),
281 Token::Following => Some("FOLLOWING"),
282 Token::Current => Some("CURRENT"),
283 Token::Row => Some("ROW"),
284 Token::Unbounded => Some("UNBOUNDED"),
285 Token::DateTime => Some("DATETIME"),
286 _ => None,
287 }
288 }
289}
290
291#[derive(Debug, Clone)]
292pub struct Lexer {
293 input: Vec<char>,
294 position: usize,
295 current_char: Option<char>,
296 mode: LexerMode,
297}
298
299impl Lexer {
300 #[must_use]
301 pub fn new(input: &str) -> Self {
302 Self::with_mode(input, LexerMode::default())
303 }
304
305 #[must_use]
307 pub fn with_mode(input: &str, mode: LexerMode) -> Self {
308 let chars: Vec<char> = input.chars().collect();
309 let current = chars.first().copied();
310 Self {
311 input: chars,
312 position: 0,
313 current_char: current,
314 mode,
315 }
316 }
317
318 fn advance(&mut self) {
319 self.position += 1;
320 self.current_char = self.input.get(self.position).copied();
321 }
322
323 fn peek(&self, offset: usize) -> Option<char> {
324 self.input.get(self.position + offset).copied()
325 }
326
327 fn peek_string(&self, n: usize) -> String {
329 let mut result = String::new();
330 for i in 0..n {
331 if let Some(ch) = self.input.get(self.position + i) {
332 result.push(*ch);
333 } else {
334 break;
335 }
336 }
337 result
338 }
339
340 fn read_json_block(&mut self) -> String {
343 let mut result = String::new();
344
345 for _ in 0..6 {
347 self.advance();
348 }
349
350 while let Some(ch) = self.current_char {
352 if ch == '$' && self.peek_string(6) == "$JSON$" {
354 for _ in 0..6 {
356 self.advance();
357 }
358 break;
359 }
360 result.push(ch);
361 self.advance();
362 }
363
364 result
365 }
366
367 fn skip_whitespace(&mut self) {
368 while let Some(ch) = self.current_char {
369 if ch.is_whitespace() {
370 self.advance();
371 } else {
372 break;
373 }
374 }
375 }
376
377 fn read_line_comment(&mut self) -> String {
379 let mut result = String::new();
380
381 self.advance();
383 self.advance();
384
385 while let Some(ch) = self.current_char {
387 if ch == '\n' {
388 self.advance(); break;
390 }
391 result.push(ch);
392 self.advance();
393 }
394
395 result
396 }
397
398 fn read_block_comment(&mut self) -> String {
400 let mut result = String::new();
401
402 self.advance();
404 self.advance();
405
406 while let Some(ch) = self.current_char {
408 if ch == '*' && self.peek(1) == Some('/') {
409 self.advance(); self.advance(); break;
412 }
413 result.push(ch);
414 self.advance();
415 }
416
417 result
418 }
419
420 fn skip_whitespace_and_comments(&mut self) {
423 loop {
424 while let Some(ch) = self.current_char {
426 if ch.is_whitespace() {
427 self.advance();
428 } else {
429 break;
430 }
431 }
432
433 match self.current_char {
435 Some('-') if self.peek(1) == Some('-') => {
436 self.advance(); self.advance(); while let Some(ch) = self.current_char {
440 self.advance();
441 if ch == '\n' {
442 break;
443 }
444 }
445 }
446 Some('/') if self.peek(1) == Some('*') => {
447 self.advance(); self.advance(); while let Some(ch) = self.current_char {
451 if ch == '*' && self.peek(1) == Some('/') {
452 self.advance(); self.advance(); break;
455 }
456 self.advance();
457 }
458 }
459 _ => {
460 break;
462 }
463 }
464 }
465 }
466
467 fn read_identifier(&mut self) -> String {
468 let mut result = String::new();
469 while let Some(ch) = self.current_char {
470 if ch.is_alphanumeric() || ch == '_' {
471 result.push(ch);
472 self.advance();
473 } else {
474 break;
475 }
476 }
477 result
478 }
479
480 fn read_string(&mut self) -> String {
481 let mut result = String::new();
482 let quote_char = self.current_char.unwrap(); self.advance(); while let Some(ch) = self.current_char {
486 if ch == quote_char {
487 self.advance(); break;
489 }
490 result.push(ch);
491 self.advance();
492 }
493 result
494 }
495
496 fn read_number(&mut self) -> String {
497 let mut result = String::new();
498 let has_e = false;
499
500 while let Some(ch) = self.current_char {
502 if !has_e && (ch.is_numeric() || ch == '.') {
503 result.push(ch);
504 self.advance();
505 } else if (ch == 'e' || ch == 'E') && !has_e && !result.is_empty() {
506 result.push(ch);
508 self.advance();
509 let _ = has_e; if let Some(sign) = self.current_char {
513 if sign == '+' || sign == '-' {
514 result.push(sign);
515 self.advance();
516 }
517 }
518
519 while let Some(digit) = self.current_char {
521 if digit.is_numeric() {
522 result.push(digit);
523 self.advance();
524 } else {
525 break;
526 }
527 }
528 break; } else {
530 break;
531 }
532 }
533 result
534 }
535
536 pub fn next_token_with_comments(&mut self) -> Token {
539 self.skip_whitespace();
541
542 match self.current_char {
543 None => Token::Eof,
544 Some('-') if self.peek(1) == Some('-') => {
546 let comment_text = self.read_line_comment();
547 Token::LineComment(comment_text)
548 }
549 Some('/') if self.peek(1) == Some('*') => {
550 let comment_text = self.read_block_comment();
551 Token::BlockComment(comment_text)
552 }
553 Some('*') => {
554 self.advance();
555 Token::Star
556 }
557 Some('+') => {
558 self.advance();
559 Token::Plus
560 }
561 Some('/') => {
562 self.advance();
564 Token::Divide
565 }
566 Some('%') => {
567 self.advance();
568 Token::Modulo
569 }
570 Some('.') => {
571 self.advance();
572 Token::Dot
573 }
574 Some(',') => {
575 self.advance();
576 Token::Comma
577 }
578 Some(':') => {
579 self.advance();
580 Token::Colon
581 }
582 Some('(') => {
583 self.advance();
584 Token::LeftParen
585 }
586 Some(')') => {
587 self.advance();
588 Token::RightParen
589 }
590 Some('=') => {
591 self.advance();
592 Token::Equal
593 }
594 Some('<') => {
595 self.advance();
596 if self.current_char == Some('=') {
597 self.advance();
598 Token::LessThanOrEqual
599 } else if self.current_char == Some('>') {
600 self.advance();
601 Token::NotEqual
602 } else {
603 Token::LessThan
604 }
605 }
606 Some('>') => {
607 self.advance();
608 if self.current_char == Some('=') {
609 self.advance();
610 Token::GreaterThanOrEqual
611 } else {
612 Token::GreaterThan
613 }
614 }
615 Some('!') if self.peek(1) == Some('=') => {
616 self.advance();
617 self.advance();
618 Token::NotEqual
619 }
620 Some('|') if self.peek(1) == Some('|') => {
621 self.advance();
622 self.advance();
623 Token::Concat
624 }
625 Some('"') => {
626 let ident_val = self.read_string();
627 Token::QuotedIdentifier(ident_val)
628 }
629 Some('$') => {
630 if self.peek_string(6) == "$JSON$" {
631 let json_content = self.read_json_block();
632 Token::JsonBlock(json_content)
633 } else {
634 let ident = self.read_identifier();
635 Token::Identifier(ident)
636 }
637 }
638 Some('\'') => {
639 let string_val = self.read_string();
640 Token::StringLiteral(string_val)
641 }
642 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
643 self.advance();
644 let num = self.read_number();
645 Token::NumberLiteral(format!("-{num}"))
646 }
647 Some('-') => {
648 self.advance();
649 Token::Minus
650 }
651 Some(ch) if ch.is_numeric() => {
652 let num = self.read_number();
653 Token::NumberLiteral(num)
654 }
655 Some('#') => {
656 self.advance();
657 let table_name = self.read_identifier();
658 if table_name.is_empty() {
659 Token::Identifier("#".to_string())
660 } else {
661 Token::Identifier(format!("#{}", table_name))
662 }
663 }
664 Some(ch) if ch.is_alphabetic() || ch == '_' => {
665 let ident = self.read_identifier();
666 match ident.to_uppercase().as_str() {
668 "ORDER" if self.peek_keyword("BY") => {
669 self.skip_whitespace();
670 self.read_identifier(); Token::OrderBy
672 }
673 "GROUP" if self.peek_keyword("BY") => {
674 self.skip_whitespace();
675 self.read_identifier(); Token::GroupBy
677 }
678 _ => Token::from_keyword(&ident).unwrap_or_else(|| Token::Identifier(ident)),
679 }
680 }
681 Some(ch) => {
682 self.advance();
683 Token::Identifier(ch.to_string())
684 }
685 }
686 }
687
688 pub fn next_token(&mut self) -> Token {
690 match self.mode {
691 LexerMode::SkipComments => self.next_token_skip_comments(),
692 LexerMode::PreserveComments => self.next_token_with_comments(),
693 }
694 }
695
696 fn next_token_skip_comments(&mut self) -> Token {
698 self.skip_whitespace_and_comments();
699
700 match self.current_char {
701 None => Token::Eof,
702 Some('*') => {
703 self.advance();
704 Token::Star }
708 Some('+') => {
709 self.advance();
710 Token::Plus
711 }
712 Some('/') => {
713 if self.peek(1) == Some('*') {
715 self.skip_whitespace_and_comments();
718 return self.next_token();
719 }
720 self.advance();
721 Token::Divide
722 }
723 Some('%') => {
724 self.advance();
725 Token::Modulo
726 }
727 Some('.') => {
728 self.advance();
729 Token::Dot
730 }
731 Some(',') => {
732 self.advance();
733 Token::Comma
734 }
735 Some(':') => {
736 self.advance();
737 Token::Colon
738 }
739 Some('(') => {
740 self.advance();
741 Token::LeftParen
742 }
743 Some(')') => {
744 self.advance();
745 Token::RightParen
746 }
747 Some('=') => {
748 self.advance();
749 Token::Equal
750 }
751 Some('<') => {
752 self.advance();
753 if self.current_char == Some('=') {
754 self.advance();
755 Token::LessThanOrEqual
756 } else if self.current_char == Some('>') {
757 self.advance();
758 Token::NotEqual
759 } else {
760 Token::LessThan
761 }
762 }
763 Some('>') => {
764 self.advance();
765 if self.current_char == Some('=') {
766 self.advance();
767 Token::GreaterThanOrEqual
768 } else {
769 Token::GreaterThan
770 }
771 }
772 Some('!') if self.peek(1) == Some('=') => {
773 self.advance();
774 self.advance();
775 Token::NotEqual
776 }
777 Some('|') if self.peek(1) == Some('|') => {
778 self.advance();
779 self.advance();
780 Token::Concat
781 }
782 Some('"') => {
783 let ident_val = self.read_string();
785 Token::QuotedIdentifier(ident_val)
786 }
787 Some('$') => {
788 if self.peek_string(6) == "$JSON$" {
790 let json_content = self.read_json_block();
791 Token::JsonBlock(json_content)
792 } else {
793 let ident = self.read_identifier();
796 Token::Identifier(ident)
797 }
798 }
799 Some('\'') => {
800 let string_val = self.read_string();
802 Token::StringLiteral(string_val)
803 }
804 Some('-') if self.peek(1) == Some('-') => {
805 self.skip_whitespace_and_comments();
807 self.next_token()
808 }
809 Some('-') if self.peek(1).is_some_and(char::is_numeric) => {
810 self.advance(); let num = self.read_number();
813 Token::NumberLiteral(format!("-{num}"))
814 }
815 Some('-') => {
816 self.advance();
818 Token::Minus
819 }
820 Some(ch) if ch.is_numeric() => {
821 let num = self.read_number();
822 Token::NumberLiteral(num)
823 }
824 Some('#') => {
825 self.advance(); let table_name = self.read_identifier();
828 if table_name.is_empty() {
829 Token::Identifier("#".to_string())
831 } else {
832 Token::Identifier(format!("#{}", table_name))
834 }
835 }
836 Some(ch) if ch.is_alphabetic() || ch == '_' => {
837 let ident = self.read_identifier();
838 match ident.to_uppercase().as_str() {
839 "SELECT" => Token::Select,
840 "FROM" => Token::From,
841 "WHERE" => Token::Where,
842 "WITH" => Token::With,
843 "AND" => Token::And,
844 "OR" => Token::Or,
845 "IN" => Token::In,
846 "NOT" => Token::Not,
847 "BETWEEN" => Token::Between,
848 "LIKE" => Token::Like,
849 "ILIKE" => Token::ILike,
850 "IS" => Token::Is,
851 "NULL" => Token::Null,
852 "ORDER" if self.peek_keyword("BY") => {
853 self.skip_whitespace();
854 self.read_identifier(); Token::OrderBy
856 }
857 "GROUP" if self.peek_keyword("BY") => {
858 self.skip_whitespace();
859 self.read_identifier(); Token::GroupBy
861 }
862 "HAVING" => Token::Having,
863 "QUALIFY" => Token::Qualify,
864 "AS" => Token::As,
865 "ASC" => Token::Asc,
866 "DESC" => Token::Desc,
867 "LIMIT" => Token::Limit,
868 "OFFSET" => Token::Offset,
869 "INTO" => Token::Into,
870 "DATETIME" => Token::DateTime,
871 "CASE" => Token::Case,
872 "WHEN" => Token::When,
873 "THEN" => Token::Then,
874 "ELSE" => Token::Else,
875 "END" => Token::End,
876 "DISTINCT" => Token::Distinct,
877 "EXCLUDE" => Token::Exclude,
878 "PIVOT" => Token::Pivot,
879 "UNPIVOT" => Token::Unpivot,
880 "FOR" => Token::For,
881 "OVER" => Token::Over,
882 "PARTITION" => Token::Partition,
883 "BY" => Token::By,
884 "ROWS" => Token::Rows,
886 "UNBOUNDED" => Token::Unbounded,
889 "PRECEDING" => Token::Preceding,
890 "FOLLOWING" => Token::Following,
891 "CURRENT" => Token::Current,
892 "ROW" => Token::Row,
893 "UNION" => Token::Union,
895 "INTERSECT" => Token::Intersect,
896 "EXCEPT" => Token::Except,
897 "WEB" => Token::Web,
899 "FILE" => Token::File,
900 "UNNEST" => Token::Unnest,
902 "JOIN" => Token::Join,
904 "INNER" => Token::Inner,
905 "LEFT" => Token::Left,
906 "RIGHT" => Token::Right,
907 "FULL" => Token::Full,
908 "OUTER" => Token::Outer,
909 "ON" => Token::On,
910 "CROSS" => Token::Cross,
911 _ => Token::Identifier(ident),
912 }
913 }
914 Some(ch) => {
915 self.advance();
916 Token::Identifier(ch.to_string())
917 }
918 }
919 }
920
921 fn peek_keyword(&mut self, keyword: &str) -> bool {
922 let saved_pos = self.position;
923 let saved_char = self.current_char;
924
925 self.skip_whitespace_and_comments();
926 let next_word = self.read_identifier();
927 let matches = next_word.to_uppercase() == keyword;
928
929 self.position = saved_pos;
931 self.current_char = saved_char;
932
933 matches
934 }
935
936 #[must_use]
937 pub fn get_position(&self) -> usize {
938 self.position
939 }
940
941 pub fn tokenize_all(&mut self) -> Vec<Token> {
942 let mut tokens = Vec::new();
943 loop {
944 let token = self.next_token();
945 if matches!(token, Token::Eof) {
946 tokens.push(token);
947 break;
948 }
949 tokens.push(token);
950 }
951 tokens
952 }
953
954 pub fn tokenize_all_with_positions(&mut self) -> Vec<(usize, usize, Token)> {
955 let mut tokens = Vec::new();
956 loop {
957 self.skip_whitespace_and_comments();
958 let start_pos = self.position;
959 let token = self.next_token();
960 let end_pos = self.position;
961
962 if matches!(token, Token::Eof) {
963 break;
964 }
965 tokens.push((start_pos, end_pos, token));
966 }
967 tokens
968 }
969
970 pub fn tokenize_all_with_comments(&mut self) -> Vec<Token> {
973 let mut tokens = Vec::new();
974 loop {
975 let token = self.next_token_with_comments();
976 if matches!(token, Token::Eof) {
977 tokens.push(token);
978 break;
979 }
980 tokens.push(token);
981 }
982 tokens
983 }
984}
985
986#[cfg(test)]
987mod tests {
988 use super::*;
989
990 #[test]
991 fn test_line_comment_tokenization() {
992 let sql = "SELECT col1, -- this is a comment\ncol2 FROM table";
993 let mut lexer = Lexer::new(sql);
994 let tokens = lexer.tokenize_all_with_comments();
995
996 let comment_token = tokens.iter().find(|t| matches!(t, Token::LineComment(_)));
998 assert!(comment_token.is_some(), "Should find line comment token");
999
1000 if let Some(Token::LineComment(text)) = comment_token {
1001 assert_eq!(text.trim(), "this is a comment");
1002 }
1003 }
1004
1005 #[test]
1006 fn test_block_comment_tokenization() {
1007 let sql = "SELECT /* block comment */ col1 FROM table";
1008 let mut lexer = Lexer::new(sql);
1009 let tokens = lexer.tokenize_all_with_comments();
1010
1011 let comment_token = tokens.iter().find(|t| matches!(t, Token::BlockComment(_)));
1013 assert!(comment_token.is_some(), "Should find block comment token");
1014
1015 if let Some(Token::BlockComment(text)) = comment_token {
1016 assert_eq!(text.trim(), "block comment");
1017 }
1018 }
1019
1020 #[test]
1021 fn test_multiple_comments() {
1022 let sql = "-- First comment\nSELECT col1, /* inline */ col2\n-- Second comment\nFROM table";
1023 let mut lexer = Lexer::new(sql);
1024 let tokens = lexer.tokenize_all_with_comments();
1025
1026 let line_comments: Vec<_> = tokens
1027 .iter()
1028 .filter(|t| matches!(t, Token::LineComment(_)))
1029 .collect();
1030 let block_comments: Vec<_> = tokens
1031 .iter()
1032 .filter(|t| matches!(t, Token::BlockComment(_)))
1033 .collect();
1034
1035 assert_eq!(line_comments.len(), 2, "Should find 2 line comments");
1036 assert_eq!(block_comments.len(), 1, "Should find 1 block comment");
1037 }
1038
1039 #[test]
1040 fn test_backwards_compatibility() {
1041 let sql = "SELECT -- comment\ncol1 FROM table";
1043 let mut lexer = Lexer::new(sql);
1044 let tokens = lexer.tokenize_all();
1045
1046 let has_comments = tokens
1048 .iter()
1049 .any(|t| matches!(t, Token::LineComment(_) | Token::BlockComment(_)));
1050 assert!(
1051 !has_comments,
1052 "next_token() should skip comments for backwards compatibility"
1053 );
1054
1055 assert!(tokens.iter().any(|t| matches!(t, Token::Select)));
1057 assert!(tokens.iter().any(|t| matches!(t, Token::From)));
1058 }
1059
1060 #[test]
1063 fn test_lexer_mode_skip_comments() {
1064 let sql = "SELECT id -- comment\nFROM table";
1065
1066 let mut lexer = Lexer::with_mode(sql, LexerMode::SkipComments);
1068
1069 assert_eq!(lexer.next_token(), Token::Select);
1070 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1071 assert_eq!(lexer.next_token(), Token::From);
1073 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1074 assert_eq!(lexer.next_token(), Token::Eof);
1075 }
1076
1077 #[test]
1078 fn test_lexer_mode_preserve_comments() {
1079 let sql = "SELECT id -- comment\nFROM table";
1080
1081 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1083
1084 assert_eq!(lexer.next_token(), Token::Select);
1085 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1086
1087 let comment_tok = lexer.next_token();
1089 assert!(matches!(comment_tok, Token::LineComment(_)));
1090 if let Token::LineComment(text) = comment_tok {
1091 assert_eq!(text.trim(), "comment");
1092 }
1093
1094 assert_eq!(lexer.next_token(), Token::From);
1095 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1096 assert_eq!(lexer.next_token(), Token::Eof);
1097 }
1098
1099 #[test]
1100 fn test_lexer_mode_default_is_skip() {
1101 let sql = "SELECT id -- comment\nFROM table";
1102
1103 let mut lexer = Lexer::new(sql);
1105
1106 let mut tok_count = 0;
1107 loop {
1108 let tok = lexer.next_token();
1109 if matches!(tok, Token::Eof) {
1110 break;
1111 }
1112 assert!(!matches!(
1114 tok,
1115 Token::LineComment(_) | Token::BlockComment(_)
1116 ));
1117 tok_count += 1;
1118 }
1119
1120 assert_eq!(tok_count, 4);
1122 }
1123
1124 #[test]
1125 fn test_lexer_mode_block_comments() {
1126 let sql = "SELECT /* block */ id FROM table";
1127
1128 let mut lexer_skip = Lexer::with_mode(sql, LexerMode::SkipComments);
1130 assert_eq!(lexer_skip.next_token(), Token::Select);
1131 assert_eq!(lexer_skip.next_token(), Token::Identifier("id".into()));
1132 assert_eq!(lexer_skip.next_token(), Token::From);
1133
1134 let mut lexer_preserve = Lexer::with_mode(sql, LexerMode::PreserveComments);
1136 assert_eq!(lexer_preserve.next_token(), Token::Select);
1137
1138 let comment_tok = lexer_preserve.next_token();
1139 assert!(matches!(comment_tok, Token::BlockComment(_)));
1140 if let Token::BlockComment(text) = comment_tok {
1141 assert_eq!(text.trim(), "block");
1142 }
1143
1144 assert_eq!(lexer_preserve.next_token(), Token::Identifier("id".into()));
1145 }
1146
1147 #[test]
1148 fn test_lexer_mode_mixed_comments() {
1149 let sql = "-- leading\nSELECT /* inline */ id -- trailing\nFROM table";
1150
1151 let mut lexer = Lexer::with_mode(sql, LexerMode::PreserveComments);
1152
1153 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1155
1156 assert_eq!(lexer.next_token(), Token::Select);
1158
1159 assert!(matches!(lexer.next_token(), Token::BlockComment(_)));
1161
1162 assert_eq!(lexer.next_token(), Token::Identifier("id".into()));
1164
1165 assert!(matches!(lexer.next_token(), Token::LineComment(_)));
1167
1168 assert_eq!(lexer.next_token(), Token::From);
1170 assert_eq!(lexer.next_token(), Token::Identifier("table".into()));
1171 assert_eq!(lexer.next_token(), Token::Eof);
1172 }
1173
1174 #[test]
1175 fn test_pivot_keywords() {
1176 let sql = "PIVOT (MAX(amount) FOR month IN (val1, val2)) UNPIVOT";
1177 let mut lexer = Lexer::new(sql);
1178
1179 assert_eq!(
1181 lexer.next_token(),
1182 Token::Pivot,
1183 "First token should be PIVOT"
1184 );
1185 assert_eq!(lexer.next_token(), Token::LeftParen);
1186 assert!(matches!(lexer.next_token(), Token::Identifier(_))); assert_eq!(lexer.next_token(), Token::LeftParen);
1188 assert!(matches!(lexer.next_token(), Token::Identifier(_))); assert_eq!(lexer.next_token(), Token::RightParen);
1190 assert_eq!(lexer.next_token(), Token::For, "Should tokenize FOR");
1191 assert!(matches!(lexer.next_token(), Token::Identifier(_))); assert_eq!(lexer.next_token(), Token::In, "Should tokenize IN");
1193 assert_eq!(lexer.next_token(), Token::LeftParen);
1194 assert!(matches!(lexer.next_token(), Token::Identifier(_))); assert_eq!(lexer.next_token(), Token::Comma);
1196 assert!(matches!(lexer.next_token(), Token::Identifier(_))); assert_eq!(lexer.next_token(), Token::RightParen);
1198 assert_eq!(lexer.next_token(), Token::RightParen);
1199 assert_eq!(
1200 lexer.next_token(),
1201 Token::Unpivot,
1202 "Should tokenize UNPIVOT"
1203 );
1204 assert_eq!(lexer.next_token(), Token::Eof);
1205 }
1206}