1#![allow(dead_code)]
2use std::fmt::Write;
3use std::iter::Iterator;
4
5#[derive(Debug)]
7pub struct Tokenizer<'a> {
8 input: &'a str,
9 chars: std::str::Chars<'a>,
10 c: Option<char>,
11 p: usize,
12 backend: TokenizerBackend,
13 next_single_quote_uses_backslash_escape: bool,
14}
15
16#[derive(Debug, PartialEq, Eq)]
17#[non_exhaustive]
18pub enum Token<'a> {
19 Quoted(&'a str),
20 Unquoted(&'a str),
21 Space(&'a str),
22 Punctuation(&'a str),
23 Comment(&'a str),
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub(crate) enum TokenizerBackend {
28 Mysql,
29 Postgres,
30 Sqlite,
31}
32
33impl TokenizerBackend {
34 pub(crate) fn from_query_builder(query_builder: &impl crate::QueryBuilder) -> Self {
35 let (_, numbered) = query_builder.placeholder();
36 if numbered {
37 Self::Postgres
38 } else if query_builder.quote().left() == '`' {
39 Self::Mysql
40 } else {
41 Self::Sqlite
42 }
43 }
44}
45
46impl<'a> Tokenizer<'a> {
47 pub fn new(string: &'a str) -> Self {
48 let mut chars = string.chars();
49 let c = chars.next();
50 Self {
51 input: string,
52 chars,
53 c,
54 p: 0,
55 backend: TokenizerBackend::Mysql,
56 next_single_quote_uses_backslash_escape: false,
57 }
58 }
59
60 pub(crate) fn for_backend(mut self, backend: TokenizerBackend) -> Self {
61 self.backend = backend;
62 self
63 }
64
65 pub(crate) fn for_query_builder(self, query_builder: &impl crate::QueryBuilder) -> Self {
66 self.for_backend(TokenizerBackend::from_query_builder(query_builder))
67 }
68
69 pub fn iter(self) -> impl Iterator<Item = Token<'a>> {
70 self
71 }
72
73 fn get(&self) -> char {
74 self.c.unwrap()
75 }
76
77 fn peek(&self) -> char {
78 self.c.unwrap_or('\0')
79 }
80
81 fn inc(&mut self) {
82 let c = self.get();
83 self.c = self.chars.next();
84 self.p += c.len_utf8();
85 }
86
87 fn end(&self) -> bool {
88 self.c.is_none()
89 }
90
91 fn p_c(&self, c: char) -> usize {
92 self.p + c.len_utf8()
93 }
94
95 fn space(&mut self) -> Option<Token<'a>> {
96 let a = self.p;
97 let mut b = a;
98
99 while !self.end() {
100 let c = self.get();
101 if Self::is_space(c) {
102 b = self.p_c(c);
103 } else {
104 break;
105 }
106 self.inc();
107 }
108
109 if a != b {
110 Some(Token::Space(&self.input[a..b]))
111 } else {
112 None
113 }
114 }
115
116 fn unquoted(&mut self) -> Option<Token<'a>> {
117 let a = self.p;
118 let mut b = a;
119
120 let mut first = true;
121 while !self.end() {
122 let c = self.get();
123 if Self::is_alphanumeric(c) {
124 b = self.p_c(c);
125 first = false;
126 self.inc();
127 } else if !first && Self::is_identifier(c) {
128 b = self.p_c(c);
129 self.inc();
130 } else {
131 break;
132 }
133 }
134
135 if a != b {
136 let string = &self.input[a..b];
137 self.next_single_quote_uses_backslash_escape =
138 self.next_single_quote_is_postgres_escape_string(string);
139 Some(Token::Unquoted(string))
140 } else {
141 None
142 }
143 }
144
145 fn quoted(&mut self) -> Option<Token<'a>> {
146 let a = self.p;
147 let mut b = a;
148
149 let mut first = true;
150 let mut escape = false;
151 let mut start = ' ';
152 let mut uses_backslash_escape = false;
153 while !self.end() {
154 let c = self.get();
155 if first && Self::is_string_delimiter_start(c) {
156 b = self.p_c(c);
157 first = false;
158 start = c;
159 uses_backslash_escape = self.uses_backslash_escape_for(start);
160 self.next_single_quote_uses_backslash_escape = false;
161 self.inc();
162 } else if !first && !escape && Self::is_string_delimiter_end_for(start, c) {
163 b = self.p_c(c);
164 self.inc();
165 if self.end() {
166 break;
167 }
168 if !Self::is_string_escape_for(start, self.get()) {
169 break;
170 } else {
171 b = self.p_c(c);
172 self.inc();
173 }
174 } else if !first {
175 escape = !escape && Self::is_escape_char_for(start, c, uses_backslash_escape);
176 b = self.p_c(c);
177 self.inc();
178 } else {
179 break;
180 }
181 }
182 if a != b {
183 Some(Token::Quoted(&self.input[a..b]))
184 } else {
185 None
186 }
187 }
188
189 fn unquote(mut self) -> String {
191 let mut string = String::new();
192 let mut first = true;
193 let mut escape = false;
194 let mut start = ' ';
195 let mut uses_backslash_escape = false;
196 while !self.end() {
197 let c = self.get();
198 if first && Self::is_string_delimiter_start(c) {
199 first = false;
200 start = c;
201 uses_backslash_escape = self.uses_backslash_escape_for(start);
202 self.next_single_quote_uses_backslash_escape = false;
203 self.inc();
204 } else if !first && !escape && Self::is_string_delimiter_end_for(start, c) {
205 self.inc();
206 if self.end() {
207 break;
208 }
209 if !Self::is_string_escape_for(start, self.get()) {
210 break;
211 } else {
212 string.write_char(c).unwrap();
213 self.inc();
214 }
215 } else if !first {
216 escape = !escape && Self::is_escape_char_for(start, c, uses_backslash_escape);
217 string.write_char(c).unwrap();
218 self.inc();
219 } else {
220 break;
221 }
222 }
223 string
224 }
225
226 fn punctuation(&mut self) -> Option<Token<'a>> {
227 let a = self.p;
228 let mut b = a;
229
230 if !self.end() {
231 let c = self.get();
232 if !Self::is_space(c) && !Self::is_alphanumeric(c) {
233 b = self.p_c(c);
234 self.inc();
235 }
236 }
237
238 if a != b {
239 let string = &self.input[a..b];
240 self.next_single_quote_uses_backslash_escape = false;
241 if string == "-" && self.peek() == '-' {
242 b = self.p_c('-');
243 self.inc();
244 while !self.end() {
245 let c = self.get();
246 if c == '\n' {
247 break;
248 } else {
249 b = self.p_c(c);
250 }
251 self.inc();
252 }
253 let string = &self.input[a..b];
254 return Some(Token::Comment(string));
255 } else if string == "/" && self.peek() == '*' {
256 b = self.p_c('*');
257 self.inc();
258 while !self.end() {
259 let c = self.get();
260 b = self.p_c(c);
261 self.inc();
262 if c == '*' && self.peek() == '/' {
263 b = self.p_c('/');
264 self.inc();
265 break;
266 }
267 }
268 let string = &self.input[a..b];
269 return Some(Token::Comment(string));
270 }
271 Some(Token::Punctuation(string))
272 } else {
273 None
274 }
275 }
276
277 fn is_space(c: char) -> bool {
278 matches!(c, ' ' | '\t' | '\r' | '\n')
279 }
280
281 fn is_identifier(c: char) -> bool {
282 matches!(c, '_' | '$')
283 }
284
285 fn is_alphanumeric(c: char) -> bool {
286 c.is_alphabetic() || c.is_ascii_digit()
287 }
288
289 fn is_string_delimiter_start(c: char) -> bool {
290 matches!(c, '`' | '[' | '\'' | '"')
291 }
292
293 fn is_string_escape_for(start: char, c: char) -> bool {
294 match start {
295 '`' => c == '`',
296 '\'' => c == '\'',
297 '"' => c == '"',
298 _ => false,
299 }
300 }
301
302 fn is_string_delimiter_end_for(start: char, c: char) -> bool {
303 match start {
304 '`' => c == '`',
305 '[' => c == ']',
306 '\'' => c == '\'',
307 '"' => c == '"',
308 _ => false,
309 }
310 }
311
312 fn uses_backslash_escape_for(&self, start: char) -> bool {
313 if start != '\'' {
314 return true;
315 }
316 self.next_single_quote_uses_backslash_escape || self.backend == TokenizerBackend::Mysql
317 }
318
319 fn next_single_quote_is_postgres_escape_string(&self, prefix: &str) -> bool {
320 self.backend == TokenizerBackend::Postgres
321 && prefix.eq_ignore_ascii_case("E")
322 && !self.end()
323 && self.get() == '\''
324 }
325
326 fn is_escape_char_for(start: char, c: char, uses_backslash_escape: bool) -> bool {
327 (start != '\'' || uses_backslash_escape) && c == '\\'
328 }
329}
330
331impl<'a> Iterator for Tokenizer<'a> {
332 type Item = Token<'a>;
333
334 fn next(&mut self) -> Option<Self::Item> {
335 if let Some(space) = self.space() {
336 return Some(space);
337 }
338 if let Some(unquoted) = self.unquoted() {
339 return Some(unquoted);
340 }
341 if let Some(quoted) = self.quoted() {
342 return Some(quoted);
343 }
344 if let Some(punctuation) = self.punctuation() {
345 return Some(punctuation);
346 }
347 None
348 }
349}
350
351impl Token<'_> {
352 pub fn is_quoted(&self) -> bool {
353 matches!(self, Self::Quoted(_))
354 }
355
356 pub fn is_unquoted(&self) -> bool {
357 matches!(self, Self::Unquoted(_))
358 }
359
360 pub fn is_space(&self) -> bool {
361 matches!(self, Self::Space(_))
362 }
363
364 pub fn is_punctuation(&self) -> bool {
365 matches!(self, Self::Punctuation(_))
366 }
367
368 pub fn as_str(&self) -> &str {
369 match self {
370 Self::Quoted(string) => string,
371 Self::Unquoted(string) => string,
372 Self::Space(string) => string,
373 Self::Punctuation(string) => string,
374 Self::Comment(string) => string,
375 }
376 }
377
378 pub fn unquote(&self) -> Option<String> {
379 if self.is_quoted() {
380 let tokenizer = Tokenizer::new(self.as_str());
381 Some(tokenizer.unquote())
382 } else {
383 None
384 }
385 }
386}
387
388impl std::fmt::Display for Token<'_> {
389 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
390 f.write_str(self.as_str())
391 }
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397
398 #[test]
399 fn test_0() {
400 let tokenizer = Tokenizer::new("");
401 let tokens: Vec<Token> = tokenizer.iter().collect();
402 assert_eq!(tokens, vec![]);
403 }
404
405 #[test]
406 fn test_1() {
407 let string = "SELECT * FROM `character`";
408 let tokenizer = Tokenizer::new(string);
409 let tokens: Vec<Token> = tokenizer.iter().collect();
410 assert_eq!(
411 tokens,
412 vec![
413 Token::Unquoted("SELECT"),
414 Token::Space(" "),
415 Token::Punctuation("*"),
416 Token::Space(" "),
417 Token::Unquoted("FROM"),
418 Token::Space(" "),
419 Token::Quoted("`character`"),
420 ]
421 );
422 assert_eq!(
423 string,
424 tokens.iter().map(|x| x.as_str()).collect::<String>()
425 );
426 }
427
428 #[test]
429 fn test_2() {
430 let string = "SELECT * FROM `character` WHERE id = ?";
431 let tokenizer = Tokenizer::new(string);
432 let tokens: Vec<Token> = tokenizer.iter().collect();
433 assert_eq!(
434 tokens,
435 vec![
436 Token::Unquoted("SELECT"),
437 Token::Space(" "),
438 Token::Punctuation("*"),
439 Token::Space(" "),
440 Token::Unquoted("FROM"),
441 Token::Space(" "),
442 Token::Quoted("`character`"),
443 Token::Space(" "),
444 Token::Unquoted("WHERE"),
445 Token::Space(" "),
446 Token::Unquoted("id"),
447 Token::Space(" "),
448 Token::Punctuation("="),
449 Token::Space(" "),
450 Token::Punctuation("?"),
451 ]
452 );
453 assert_eq!(
454 string,
455 tokens.iter().map(|x| x.as_str()).collect::<String>()
456 );
457 }
458
459 #[test]
460 fn test_3() {
461 let string = r#"? = "?" "#;
462 let tokenizer = Tokenizer::new(string);
463 let tokens: Vec<Token> = tokenizer.iter().collect();
464 assert_eq!(
465 tokens,
466 vec![
467 Token::Punctuation("?"),
468 Token::Space(" "),
469 Token::Punctuation("="),
470 Token::Space(" "),
471 Token::Quoted(r#""?""#),
472 Token::Space(" "),
473 ]
474 );
475 assert_eq!(
476 string,
477 tokens.iter().map(|x| x.as_str()).collect::<String>()
478 );
479 }
480
481 #[test]
482 fn test_4() {
483 let string = r#""a\"bc""#;
484 let tokenizer = Tokenizer::new(string);
485 let tokens: Vec<Token> = tokenizer.iter().collect();
486 assert_eq!(tokens, vec![Token::Quoted("\"a\\\"bc\"")]);
487 assert_eq!(
488 string,
489 tokens.iter().map(|x| x.as_str()).collect::<String>()
490 );
491 }
492
493 #[test]
494 fn test_5() {
495 let string = "abc123";
496 let tokenizer = Tokenizer::new(string);
497 let tokens: Vec<Token> = tokenizer.iter().collect();
498 assert_eq!(tokens, vec![Token::Unquoted(string)]);
499 assert_eq!(
500 string,
501 tokens.iter().map(|x| x.as_str()).collect::<String>()
502 );
503 }
504
505 #[test]
506 fn test_6() {
507 let string = "2.3*4/5";
508 let tokenizer = Tokenizer::new(string);
509 let tokens: Vec<Token> = tokenizer.iter().collect();
510 assert_eq!(
511 tokens,
512 vec![
513 Token::Unquoted("2"),
514 Token::Punctuation("."),
515 Token::Unquoted("3"),
516 Token::Punctuation("*"),
517 Token::Unquoted("4"),
518 Token::Punctuation("/"),
519 Token::Unquoted("5"),
520 ]
521 );
522 assert_eq!(
523 string,
524 tokens.iter().map(|x| x.as_str()).collect::<String>()
525 );
526 }
527
528 #[test]
529 fn test_7() {
530 let string = r#""a\\" B"#;
531 let tokenizer = Tokenizer::new(string);
532 let tokens: Vec<Token> = tokenizer.iter().collect();
533 assert_eq!(
534 tokens,
535 vec![
536 Token::Quoted("\"a\\\\\""),
537 Token::Space(" "),
538 Token::Unquoted("B"),
539 ]
540 );
541 assert_eq!(
542 string,
543 tokens.iter().map(|x| x.as_str()).collect::<String>()
544 );
545 }
546
547 #[test]
548 fn test_8() {
549 let string = r#"`a"b` "#;
550 let tokenizer = Tokenizer::new(string);
551 let tokens: Vec<Token> = tokenizer.iter().collect();
552 assert_eq!(tokens, vec![Token::Quoted("`a\"b`"), Token::Space(" ")]);
553 assert_eq!(
554 string,
555 tokens.iter().map(|x| x.as_str()).collect::<String>()
556 );
557 }
558
559 #[test]
560 fn test_9() {
561 let string = r"[ab] ";
562 let tokenizer = Tokenizer::new(string);
563 let tokens: Vec<Token> = tokenizer.iter().collect();
564 assert_eq!(tokens, vec![Token::Quoted("[ab]"), Token::Space(" ")]);
565 assert_eq!(
566 string,
567 tokens.iter().map(|x| x.as_str()).collect::<String>()
568 );
569 }
570
571 #[test]
572 fn test_10() {
573 let string = r#" 'a"b' "#;
574 let tokenizer = Tokenizer::new(string);
575 let tokens: Vec<Token> = tokenizer.iter().collect();
576 assert_eq!(
577 tokens,
578 vec![
579 Token::Space(" "),
580 Token::Quoted("'a\"b'"),
581 Token::Space(" "),
582 ]
583 );
584 assert_eq!(
585 string,
586 tokens.iter().map(|x| x.as_str()).collect::<String>()
587 );
588 }
589
590 #[test]
591 fn test_10_single_quoted_backslash_does_not_escape_quote() {
592 let string = r#"ESCAPE '\' OR id = $1"#;
593 let tokenizer = Tokenizer::new(string).for_backend(TokenizerBackend::Postgres);
594 let tokens: Vec<Token> = tokenizer.iter().collect();
595 assert_eq!(
596 tokens,
597 vec![
598 Token::Unquoted("ESCAPE"),
599 Token::Space(" "),
600 Token::Quoted("'\\'"),
601 Token::Space(" "),
602 Token::Unquoted("OR"),
603 Token::Space(" "),
604 Token::Unquoted("id"),
605 Token::Space(" "),
606 Token::Punctuation("="),
607 Token::Space(" "),
608 Token::Punctuation("$"),
609 Token::Unquoted("1"),
610 ]
611 );
612 assert_eq!(
613 string,
614 tokens.iter().map(|x| x.as_str()).collect::<String>()
615 );
616 }
617
618 #[test]
619 fn test_10_mysql_single_quoted_backslash_escapes_quote() {
620 let string = r#"'a\'b' OR id = ?"#;
621 let tokenizer = Tokenizer::new(string).for_backend(TokenizerBackend::Mysql);
622 let tokens: Vec<Token> = tokenizer.iter().collect();
623 assert_eq!(
624 tokens,
625 vec![
626 Token::Quoted("'a\\'b'"),
627 Token::Space(" "),
628 Token::Unquoted("OR"),
629 Token::Space(" "),
630 Token::Unquoted("id"),
631 Token::Space(" "),
632 Token::Punctuation("="),
633 Token::Space(" "),
634 Token::Punctuation("?"),
635 ]
636 );
637 assert_eq!(
638 string,
639 tokens.iter().map(|x| x.as_str()).collect::<String>()
640 );
641 }
642
643 #[test]
644 fn test_10_postgres_escape_string_backslash_escapes_quote() {
645 let string = r#"E'a\'b' OR id = $1"#;
646 let tokenizer = Tokenizer::new(string).for_backend(TokenizerBackend::Postgres);
647 let tokens: Vec<Token> = tokenizer.iter().collect();
648 assert_eq!(
649 tokens,
650 vec![
651 Token::Unquoted("E"),
652 Token::Quoted("'a\\'b'"),
653 Token::Space(" "),
654 Token::Unquoted("OR"),
655 Token::Space(" "),
656 Token::Unquoted("id"),
657 Token::Space(" "),
658 Token::Punctuation("="),
659 Token::Space(" "),
660 Token::Punctuation("$"),
661 Token::Unquoted("1"),
662 ]
663 );
664 assert_eq!(
665 string,
666 tokens.iter().map(|x| x.as_str()).collect::<String>()
667 );
668 }
669
670 #[test]
671 fn test_10_sqlite_does_not_treat_e_prefix_as_escape_string() {
672 let string = r#"E'a\'b' OR id = ?"#;
673 let tokenizer = Tokenizer::new(string).for_backend(TokenizerBackend::Sqlite);
674 let tokens: Vec<Token> = tokenizer.iter().collect();
675 assert_eq!(
676 tokens,
677 vec![
678 Token::Unquoted("E"),
679 Token::Quoted("'a\\'"),
680 Token::Unquoted("b"),
681 Token::Quoted("' OR id = ?"),
682 ]
683 );
684 assert_eq!(
685 string,
686 tokens.iter().map(|x| x.as_str()).collect::<String>()
687 );
688 }
689
690 #[test]
691 fn test_11() {
692 let string = r" `a``b` ";
693 let tokenizer = Tokenizer::new(string);
694 let tokens: Vec<Token> = tokenizer.iter().collect();
695 assert_eq!(
696 tokens,
697 vec![
698 Token::Space(" "),
699 Token::Quoted("`a``b`"),
700 Token::Space(" "),
701 ]
702 );
703 assert_eq!(
704 string,
705 tokens.iter().map(|x| x.as_str()).collect::<String>()
706 );
707 }
708
709 #[test]
710 fn test_12() {
711 let string = r" 'a''b' ";
712 let tokenizer = Tokenizer::new(string);
713 let tokens: Vec<Token> = tokenizer.iter().collect();
714 assert_eq!(
715 tokens,
716 vec![
717 Token::Space(" "),
718 Token::Quoted("'a''b'"),
719 Token::Space(" "),
720 ]
721 );
722 assert_eq!(
723 string,
724 tokens.iter().map(|x| x.as_str()).collect::<String>()
725 );
726 }
727
728 #[test]
729 fn test_13() {
730 let string = r"(?)";
731 let tokenizer = Tokenizer::new(string);
732 let tokens: Vec<Token> = tokenizer.iter().collect();
733 assert_eq!(
734 tokens,
735 vec![
736 Token::Punctuation("("),
737 Token::Punctuation("?"),
738 Token::Punctuation(")"),
739 ]
740 );
741 assert_eq!(
742 string,
743 tokens.iter().map(|x| x.as_str()).collect::<String>()
744 );
745 }
746
747 #[test]
748 fn test_14() {
749 let string = r"($1 = $2)";
750 let tokenizer = Tokenizer::new(string);
751 let tokens: Vec<Token> = tokenizer.iter().collect();
752 assert_eq!(
753 tokens,
754 vec![
755 Token::Punctuation("("),
756 Token::Punctuation("$"),
757 Token::Unquoted("1"),
758 Token::Space(" "),
759 Token::Punctuation("="),
760 Token::Space(" "),
761 Token::Punctuation("$"),
762 Token::Unquoted("2"),
763 Token::Punctuation(")"),
764 ]
765 );
766 assert_eq!(
767 string,
768 tokens.iter().map(|x| x.as_str()).collect::<String>()
769 );
770 }
771
772 #[test]
773 fn test_15() {
774 let string = r#" "Hello World" "#;
775 let tokenizer = Tokenizer::new(string);
776 let tokens: Vec<Token> = tokenizer.iter().collect();
777 assert_eq!(
778 tokens,
779 vec![
780 Token::Space(" "),
781 Token::Quoted("\"Hello World\""),
782 Token::Space(" "),
783 ]
784 );
785 assert_eq!(
786 string,
787 tokens.iter().map(|x| x.as_str()).collect::<String>()
788 );
789 }
790
791 #[test]
792 fn test_16() {
793 let string = "abc_$123";
794 let tokenizer = Tokenizer::new(string);
795 let tokens: Vec<Token> = tokenizer.iter().collect();
796 assert_eq!(tokens, vec![Token::Unquoted(string)]);
797 assert_eq!(
798 string,
799 tokens.iter().map(|x| x.as_str()).collect::<String>()
800 );
801 }
802
803 #[test]
804 fn test_17() {
805 let string = "$abc$123";
806 let tokenizer = Tokenizer::new(string);
807 let tokens: Vec<Token> = tokenizer.iter().collect();
808 assert_eq!(
809 tokens,
810 vec![Token::Punctuation("$"), Token::Unquoted("abc$123"),]
811 );
812 assert_eq!(
813 string,
814 tokens.iter().map(|x| x.as_str()).collect::<String>()
815 );
816 }
817
818 #[test]
819 fn test_18() {
820 let string = "_$abc_123$";
821 let tokenizer = Tokenizer::new(string);
822 let tokens: Vec<Token> = tokenizer.iter().collect();
823 assert_eq!(
824 tokens,
825 vec![
826 Token::Punctuation("_"),
827 Token::Punctuation("$"),
828 Token::Unquoted("abc_123$"),
829 ]
830 );
831 assert_eq!(
832 string,
833 tokens.iter().map(|x| x.as_str()).collect::<String>()
834 );
835 }
836
837 #[test]
838 fn test_19() {
839 let string = r#""a\"bc""#;
840 let tokenizer = Tokenizer::new(string);
841 assert_eq!(tokenizer.unquote(), "a\\\"bc".to_owned());
842 }
843
844 #[test]
845 fn test_20() {
846 let string = r#""a""bc""#;
847 let tokenizer = Tokenizer::new(string);
848 assert_eq!(tokenizer.unquote(), "a\"bc".to_owned());
849 }
850
851 #[test]
852 fn test_21() {
853 assert_eq!(
854 Token::Quoted("'a\\nb'").unquote().unwrap(),
855 "a\\nb".to_owned()
856 );
857 }
858
859 #[test]
860 fn test_22() {
861 let string = r#" "Hello\nWorld" "#;
862 let tokenizer = Tokenizer::new(string);
863 let tokens: Vec<Token> = tokenizer.iter().collect();
864 assert_eq!(
865 tokens,
866 vec![
867 Token::Space(" "),
868 Token::Quoted("\"Hello\\nWorld\""),
869 Token::Space(" "),
870 ]
871 );
872 assert_eq!(
873 string,
874 tokens.iter().map(|x| x.as_str()).collect::<String>()
875 );
876 }
877
878 #[test]
879 fn test_23() {
880 let string = "{ab} '{cd}'";
881 let tokenizer = Tokenizer::new(string);
882 let tokens: Vec<Token> = tokenizer.iter().collect();
883 assert_eq!(
884 tokens,
885 vec![
886 Token::Punctuation("{"),
887 Token::Unquoted("ab"),
888 Token::Punctuation("}"),
889 Token::Space(" "),
890 Token::Quoted("'{cd}'"),
891 ]
892 );
893 assert_eq!(
894 string,
895 tokens.iter().map(|x| x.as_str()).collect::<String>()
896 );
897 }
898
899 #[test]
900 fn test_24() {
901 let string = r#"新"老虎","#;
902 let tokenizer = Tokenizer::new(string);
903 let tokens: Vec<Token> = tokenizer.iter().collect();
904 assert_eq!(
905 tokens,
906 vec![
907 Token::Unquoted("新"),
908 Token::Quoted("\"老虎\""),
909 Token::Punctuation(","),
910 ]
911 );
912 assert_eq!(
913 string,
914 tokens.iter().map(|x| x.as_str()).collect::<String>()
915 );
916 }
917
918 #[test]
919 fn test_25() {
920 let string = r#"{a.1:2}"#;
921 let tokenizer = Tokenizer::new(string);
922 let tokens: Vec<Token> = tokenizer.iter().collect();
923 assert_eq!(
924 tokens,
925 vec![
926 Token::Punctuation("{"),
927 Token::Unquoted("a"),
928 Token::Punctuation("."),
929 Token::Unquoted("1"),
930 Token::Punctuation(":"),
931 Token::Unquoted("2"),
932 Token::Punctuation("}"),
933 ]
934 );
935 assert_eq!(
936 string,
937 tokens.iter().map(|x| x.as_str()).collect::<String>()
938 );
939 }
940
941 #[test]
942 fn test_26() {
943 let string = r#"{..(a.1:2)}"#;
944 let tokenizer = Tokenizer::new(string);
945 let tokens: Vec<Token> = tokenizer.iter().collect();
946 assert_eq!(
947 tokens,
948 vec![
949 Token::Punctuation("{"),
950 Token::Punctuation("."),
951 Token::Punctuation("."),
952 Token::Punctuation("("),
953 Token::Unquoted("a"),
954 Token::Punctuation("."),
955 Token::Unquoted("1"),
956 Token::Punctuation(":"),
957 Token::Unquoted("2"),
958 Token::Punctuation(")"),
959 Token::Punctuation("}"),
960 ]
961 );
962 assert_eq!(
963 string,
964 tokens.iter().map(|x| x.as_str()).collect::<String>()
965 );
966 }
967
968 #[test]
969 fn test_single_line_comment() {
970 let string = r#"SELECT
971 -- hello
972 1"#;
973 let tokenizer = Tokenizer::new(string);
974 let tokens: Vec<Token> = tokenizer.iter().collect();
975 assert_eq!(
976 tokens,
977 vec![
978 Token::Unquoted("SELECT"),
979 Token::Space("\n "),
980 Token::Comment("-- hello "),
981 Token::Space("\n "),
982 Token::Unquoted("1"),
983 ]
984 );
985 assert_eq!(
986 string,
987 tokens.iter().map(|x| x.as_str()).collect::<String>()
988 );
989
990 let string = r#"SELECT -- hello
991 1"#;
992 let tokenizer = Tokenizer::new(string);
993 let tokens: Vec<Token> = tokenizer.iter().collect();
994 assert_eq!(
995 tokens,
996 vec![
997 Token::Unquoted("SELECT"),
998 Token::Space(" "),
999 Token::Comment("-- hello"),
1000 Token::Space("\n "),
1001 Token::Unquoted("1"),
1002 ]
1003 );
1004 assert_eq!(
1005 string,
1006 tokens.iter().map(|x| x.as_str()).collect::<String>()
1007 );
1008
1009 let string = r#"SELECT 1 -- hello"#;
1010 let tokenizer = Tokenizer::new(string);
1011 let tokens: Vec<Token> = tokenizer.iter().collect();
1012 assert_eq!(
1013 tokens,
1014 vec![
1015 Token::Unquoted("SELECT"),
1016 Token::Space(" "),
1017 Token::Unquoted("1"),
1018 Token::Space(" "),
1019 Token::Comment("-- hello"),
1020 ]
1021 );
1022 assert_eq!(
1023 string,
1024 tokens.iter().map(|x| x.as_str()).collect::<String>()
1025 );
1026
1027 let string = r#"SELECT 1 --"#;
1028 let tokenizer = Tokenizer::new(string);
1029 let tokens: Vec<Token> = tokenizer.iter().collect();
1030 assert_eq!(
1031 tokens,
1032 vec![
1033 Token::Unquoted("SELECT"),
1034 Token::Space(" "),
1035 Token::Unquoted("1"),
1036 Token::Space(" "),
1037 Token::Comment("--"),
1038 ]
1039 );
1040 assert_eq!(
1041 string,
1042 tokens.iter().map(|x| x.as_str()).collect::<String>()
1043 );
1044
1045 let string = r#"SELECT 1 -"#;
1046 let tokenizer = Tokenizer::new(string);
1047 let tokens: Vec<Token> = tokenizer.iter().collect();
1048 assert_eq!(
1049 tokens,
1050 vec![
1051 Token::Unquoted("SELECT"),
1052 Token::Space(" "),
1053 Token::Unquoted("1"),
1054 Token::Space(" "),
1055 Token::Punctuation("-"),
1056 ]
1057 );
1058 assert_eq!(
1059 string,
1060 tokens.iter().map(|x| x.as_str()).collect::<String>()
1061 );
1062 }
1063
1064 #[test]
1065 fn test_block_comment() {
1066 let string = r#"SELECT /* hello */ 1"#;
1067 let tokenizer = Tokenizer::new(string);
1068 let tokens: Vec<Token> = tokenizer.iter().collect();
1069 assert_eq!(
1070 tokens,
1071 vec![
1072 Token::Unquoted("SELECT"),
1073 Token::Space(" "),
1074 Token::Comment("/* hello */"),
1075 Token::Space(" "),
1076 Token::Unquoted("1"),
1077 ]
1078 );
1079 assert_eq!(
1080 string,
1081 tokens.iter().map(|x| x.as_str()).collect::<String>()
1082 );
1083
1084 let string = r#"SELECT /*hello*/ 1"#;
1085 let tokenizer = Tokenizer::new(string);
1086 let tokens: Vec<Token> = tokenizer.iter().collect();
1087 assert_eq!(
1088 tokens,
1089 vec![
1090 Token::Unquoted("SELECT"),
1091 Token::Space(" "),
1092 Token::Comment("/*hello*/"),
1093 Token::Space(" "),
1094 Token::Unquoted("1"),
1095 ]
1096 );
1097 assert_eq!(
1098 string,
1099 tokens.iter().map(|x| x.as_str()).collect::<String>()
1100 );
1101
1102 let string = r#"SELECT /* --hello */ 1"#;
1103 let tokenizer = Tokenizer::new(string);
1104 let tokens: Vec<Token> = tokenizer.iter().collect();
1105 assert_eq!(
1106 tokens,
1107 vec![
1108 Token::Unquoted("SELECT"),
1109 Token::Space(" "),
1110 Token::Comment("/* --hello */"),
1111 Token::Space(" "),
1112 Token::Unquoted("1"),
1113 ]
1114 );
1115 assert_eq!(
1116 string,
1117 tokens.iter().map(|x| x.as_str()).collect::<String>()
1118 );
1119
1120 let string = r#"SELECT
1121 /* hello */
1122 1"#;
1123 let tokenizer = Tokenizer::new(string);
1124 let tokens: Vec<Token> = tokenizer.iter().collect();
1125 assert_eq!(
1126 tokens,
1127 vec![
1128 Token::Unquoted("SELECT"),
1129 Token::Space("\n "),
1130 Token::Comment("/* hello */"),
1131 Token::Space("\n "),
1132 Token::Unquoted("1"),
1133 ]
1134 );
1135 assert_eq!(
1136 string,
1137 tokens.iter().map(|x| x.as_str()).collect::<String>()
1138 );
1139
1140 let string = r#"SELECT /*
1141 -- hello */
1142 1"#;
1143 let tokenizer = Tokenizer::new(string);
1144 let tokens: Vec<Token> = tokenizer.iter().collect();
1145 assert_eq!(
1146 tokens,
1147 vec![
1148 Token::Unquoted("SELECT"),
1149 Token::Space(" "),
1150 Token::Comment("/*\n -- hello */"),
1151 Token::Space("\n "),
1152 Token::Unquoted("1"),
1153 ]
1154 );
1155 assert_eq!(
1156 string,
1157 tokens.iter().map(|x| x.as_str()).collect::<String>()
1158 );
1159
1160 let string = r#"SELECT 1/*hello*/"#;
1161 let tokenizer = Tokenizer::new(string);
1162 let tokens: Vec<Token> = tokenizer.iter().collect();
1163 assert_eq!(
1164 tokens,
1165 vec![
1166 Token::Unquoted("SELECT"),
1167 Token::Space(" "),
1168 Token::Unquoted("1"),
1169 Token::Comment("/*hello*/"),
1170 ]
1171 );
1172 assert_eq!(
1173 string,
1174 tokens.iter().map(|x| x.as_str()).collect::<String>()
1175 );
1176
1177 let string = r#"SELECT 1/*hello*"#;
1178 let tokenizer = Tokenizer::new(string);
1179 let tokens: Vec<Token> = tokenizer.iter().collect();
1180 assert_eq!(
1181 tokens,
1182 vec![
1183 Token::Unquoted("SELECT"),
1184 Token::Space(" "),
1185 Token::Unquoted("1"),
1186 Token::Comment("/*hello*"),
1187 ]
1188 );
1189 assert_eq!(
1190 string,
1191 tokens.iter().map(|x| x.as_str()).collect::<String>()
1192 );
1193
1194 let string = r#"SELECT 1/*hello"#;
1195 let tokenizer = Tokenizer::new(string);
1196 let tokens: Vec<Token> = tokenizer.iter().collect();
1197 assert_eq!(
1198 tokens,
1199 vec![
1200 Token::Unquoted("SELECT"),
1201 Token::Space(" "),
1202 Token::Unquoted("1"),
1203 Token::Comment("/*hello"),
1204 ]
1205 );
1206 assert_eq!(
1207 string,
1208 tokens.iter().map(|x| x.as_str()).collect::<String>()
1209 );
1210
1211 let string = r#"SELECT 1 /*"#;
1212 let tokenizer = Tokenizer::new(string);
1213 let tokens: Vec<Token> = tokenizer.iter().collect();
1214 assert_eq!(
1215 tokens,
1216 vec![
1217 Token::Unquoted("SELECT"),
1218 Token::Space(" "),
1219 Token::Unquoted("1"),
1220 Token::Space(" "),
1221 Token::Comment("/*"),
1222 ]
1223 );
1224 assert_eq!(
1225 string,
1226 tokens.iter().map(|x| x.as_str()).collect::<String>()
1227 );
1228 }
1229}