1mod cursor;
2mod token;
3use cursor::{Cursor, EOF_CHAR};
4pub use token::{Base, LiteralKind, Token, TokenKind};
5
6const fn is_ident_start(c: char) -> bool {
9 matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '\u{80}'..)
10}
11
12const fn is_ident_cont(c: char) -> bool {
14 matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9' | '$' | '\u{80}'..)
15}
16
17const fn is_whitespace(c: char) -> bool {
21 matches!(
22 c,
23 ' ' | '\t' | '\n' | '\r' | '\u{000B}' | '\u{000C}' )
30}
31
32impl Cursor<'_> {
33 pub(crate) fn advance_token(&mut self) -> Token {
35 let Some(first_char) = self.bump() else {
36 return Token::new(TokenKind::Eof, 0);
37 };
38 let token_kind = match first_char {
39 '/' => match self.first() {
41 '*' => self.block_comment(),
42 _ => TokenKind::Slash,
43 },
44 '-' => match self.first() {
45 '-' => self.line_comment(),
46 _ => TokenKind::Minus,
47 },
48
49 c if is_whitespace(c) => self.whitespace(),
51
52 'u' | 'U' => {
54 if self.first() == '&' && matches!(self.second(), '\'' | '"') {
55 self.bump();
56 self.prefixed_string(
57 |terminated| LiteralKind::UnicodeEscStr { terminated },
58 true,
59 false,
60 )
61 } else {
62 self.ident()
63 }
64 }
65 'e' | 'E' => {
67 self.prefixed_string(|terminated| LiteralKind::EscStr { terminated }, false, true)
68 }
69
70 'b' | 'B' => self.prefixed_string(
72 |terminated| LiteralKind::BitStr { terminated },
73 false,
74 false,
75 ),
76
77 'x' | 'X' => self.prefixed_string(
79 |terminated| LiteralKind::ByteStr { terminated },
80 false,
81 false,
82 ),
83
84 'n' | 'N' => match self.first() {
86 '\'' => {
87 self.bump();
88 let terminated = self.single_quoted_string(false);
89 TokenKind::Literal {
90 kind: LiteralKind::NationalStr { terminated },
91 }
92 }
93 _ => self.ident(),
94 },
95
96 c if is_ident_start(c) => self.ident(),
99
100 c @ '0'..='9' => {
103 let literal_kind = self.number(c);
104 TokenKind::Literal { kind: literal_kind }
105 }
106 '.' => match self.first() {
107 '0'..='9' => {
108 let literal_kind = self.number('.');
109 TokenKind::Literal { kind: literal_kind }
110 }
111 _ => TokenKind::Dot,
112 },
113 ';' => TokenKind::Semi,
115 ',' => TokenKind::Comma,
116 '(' => TokenKind::OpenParen,
117 ')' => TokenKind::CloseParen,
118 '[' => TokenKind::OpenBracket,
119 ']' => TokenKind::CloseBracket,
120 '{' => TokenKind::OpenCurly,
121 '}' => TokenKind::CloseCurly,
122 '@' => TokenKind::At,
123 '#' => TokenKind::Pound,
124 '~' => TokenKind::Tilde,
125 '?' => TokenKind::Question,
126 ':' => TokenKind::Colon,
127 '$' => {
128 if self.is_dollar_quote_start() {
129 self.dollar_quoted_string()
130 } else {
131 while self.first().is_ascii_digit() {
133 self.bump();
134 }
135 let trailing_junk_start = self.pos_within_token();
136 self.eat_identifier();
137 TokenKind::PositionalParam {
138 trailing_junk_start,
139 }
140 }
141 }
142 '`' => TokenKind::Backtick,
143 '=' => TokenKind::Eq,
144 '!' => TokenKind::Bang,
145 '<' => TokenKind::Lt,
146 '>' => TokenKind::Gt,
147 '&' => TokenKind::And,
148 '|' => TokenKind::Or,
149 '+' => TokenKind::Plus,
150 '*' => TokenKind::Star,
151 '^' => TokenKind::Caret,
152 '%' => TokenKind::Percent,
153
154 '\'' => {
156 let terminated = self.single_quoted_string(false);
157 let kind = LiteralKind::Str { terminated };
158 TokenKind::Literal { kind }
159 }
160
161 '"' => {
163 let terminated = self.double_quoted_string();
164 TokenKind::QuotedIdent {
165 terminated,
166 uescape: false,
167 }
168 }
169 _ => TokenKind::Unknown,
170 };
171 let res = Token::new(token_kind, self.pos_within_token());
172 self.reset_pos_within_token();
173 res
174 }
175 pub(crate) fn ident(&mut self) -> TokenKind {
176 self.eat_while(is_ident_cont);
177 TokenKind::Ident
178 }
179
180 pub(crate) fn whitespace(&mut self) -> TokenKind {
181 self.eat_while(is_whitespace);
182 TokenKind::Whitespace
183 }
184
185 pub(crate) fn line_comment(&mut self) -> TokenKind {
188 self.bump();
189
190 self.eat_while(|c| c != '\n' && c != '\r');
191 TokenKind::LineComment
192 }
193
194 pub(crate) fn block_comment(&mut self) -> TokenKind {
196 self.bump();
197
198 let mut depth = 1usize;
199 while let Some(c) = self.bump() {
200 match c {
201 '/' if self.first() == '*' => {
202 self.bump();
203 depth += 1;
204 }
205 '*' if self.first() == '/' => {
206 self.bump();
207 depth -= 1;
208 if depth == 0 {
209 break;
213 }
214 }
215 _ => (),
216 }
217 }
218
219 TokenKind::BlockComment {
220 terminated: depth == 0,
221 }
222 }
223
224 fn prefixed_string(
225 &mut self,
226 mk_kind: fn(bool) -> LiteralKind,
227 allows_double: bool,
228 backslash_escapes: bool,
229 ) -> TokenKind {
230 match self.first() {
231 '\'' => {
232 self.bump();
233 let terminated = self.single_quoted_string(backslash_escapes);
234 let kind = mk_kind(terminated);
235 TokenKind::Literal { kind }
236 }
237 '"' if allows_double => {
238 self.bump();
239 let terminated = self.double_quoted_string();
240 TokenKind::QuotedIdent {
241 terminated,
242 uescape: true,
243 }
244 }
245 _ => self.ident(),
246 }
247 }
248
249 fn number(&mut self, first_digit: char) -> LiteralKind {
250 let mut base = Base::Decimal;
251 if first_digit == '.' {
252 return self.eat_fractional();
253 }
254 if first_digit == '0' {
255 match self.first() {
257 'b' | 'B' => {
259 base = Base::Binary;
260 self.bump();
261 let has_digits = self.eat_decimal_digits();
262 return self.finish_base_prefixed_int(base, has_digits);
263 }
264 'o' | 'O' => {
266 base = Base::Octal;
267 self.bump();
268 let has_digits = self.eat_decimal_digits();
269 return self.finish_base_prefixed_int(base, has_digits);
270 }
271 'x' | 'X' => {
273 base = Base::Hexadecimal;
274 self.bump();
275 let has_digits = self.eat_hexadecimal_digits();
276 return self.finish_base_prefixed_int(base, has_digits);
277 }
278 '0'..='9' | '_' => {
280 self.eat_decimal_digits();
281 }
282
283 '.' | 'e' | 'E' => {}
285
286 _ => {
288 let trailing_junk_start = self.pos_within_token();
289 self.eat_identifier();
290 return LiteralKind::Int {
291 base,
292 empty_int: false,
293 trailing_junk_start,
294 };
295 }
296 }
297 } else {
298 self.eat_decimal_digits();
300 };
301
302 match self.first() {
303 '.' => {
304 self.bump();
305 self.eat_fractional()
306 }
307 'e' | 'E' => {
308 let exponent_start = self.pos_within_token();
309 self.bump();
310 let empty_exponent_start = (!self.eat_numeric_exponent()).then_some(exponent_start);
311 let trailing_junk_start = self.pos_within_token();
312 self.eat_identifier();
313 LiteralKind::Numeric {
314 empty_exponent_start,
315 trailing_junk_start,
316 }
317 }
318 _ => {
319 let trailing_junk_start = self.pos_within_token();
320 self.eat_identifier();
321 LiteralKind::Int {
322 base,
323 empty_int: false,
324 trailing_junk_start,
325 }
326 }
327 }
328 }
329
330 fn single_quoted_string(&mut self, backslash_escapes: bool) -> bool {
331 loop {
333 match self.first() {
334 '\\' if backslash_escapes => {
335 self.bump();
337 self.bump();
339 }
340 '\'' => {
342 self.bump();
343
344 match self.first() {
345 '\'' => {
347 self.bump();
348 }
349 _ => return true,
351 }
352 }
353 EOF_CHAR if self.is_eof() => break,
355 _ => {
357 self.bump();
358 }
359 }
360 }
361 false
363 }
364
365 fn double_quoted_string(&mut self) -> bool {
368 while let Some(c) = self.bump() {
369 match c {
370 '"' if self.first() == '"' => {
371 self.bump();
373 }
374 '"' => {
375 return true;
376 }
377 _ => (),
378 }
379 }
380 false
382 }
383
384 fn is_dollar_quote_start(&self) -> bool {
386 let mut chars = self.chars();
387 match chars.next() {
388 Some('$') => true,
390 Some(c) if is_ident_start(c) => {
392 for c in chars {
393 if c == '$' {
394 return true;
395 }
396 if !is_ident_cont(c) {
397 return false;
398 }
399 }
400 false
401 }
402 _ => false,
403 }
404 }
405
406 fn dollar_quoted_string(&mut self) -> TokenKind {
408 let mut start = vec![];
411 while let Some(c) = self.bump() {
412 match c {
413 '$' => {
414 break;
415 }
416 _ => {
417 start.push(c);
418 }
419 }
420 }
421
422 if start.is_empty() {
424 loop {
425 self.eat_while(|c| c != '$');
426 if self.is_eof() {
427 return TokenKind::Literal {
428 kind: LiteralKind::DollarQuotedString { terminated: false },
429 };
430 }
431 self.bump();
433 if self.first() == '$' {
434 self.bump();
435 return TokenKind::Literal {
436 kind: LiteralKind::DollarQuotedString { terminated: true },
437 };
438 }
439 }
440 } else {
441 loop {
442 self.eat_while(|c| c != '$');
443 if self.is_eof() {
444 return TokenKind::Literal {
445 kind: LiteralKind::DollarQuotedString { terminated: false },
446 };
447 }
448
449 self.bump();
451
452 let mut matches_tag = true;
453 for start_char in &start {
454 if self.first() == *start_char {
455 self.bump();
456 } else {
457 matches_tag = false;
458 break;
459 }
460 }
461
462 if matches_tag && self.first() == '$' {
463 self.bump();
464 return TokenKind::Literal {
465 kind: LiteralKind::DollarQuotedString { terminated: true },
466 };
467 }
468 }
469 }
470 }
471
472 fn eat_decimal_digits(&mut self) -> bool {
473 let mut has_digits = false;
474 loop {
475 match self.first() {
476 '_' if self.second().is_ascii_digit() => {
477 self.bump();
478 }
479 '0'..='9' => {
480 has_digits = true;
481 self.bump();
482 }
483 _ => break,
484 }
485 }
486 has_digits
487 }
488
489 fn finish_base_prefixed_int(&mut self, base: Base, has_digits: bool) -> LiteralKind {
490 let trailing_junk_start = self.pos_within_token();
491 self.eat_identifier();
492 let has_trailing_junk = self.pos_within_token() > trailing_junk_start;
493 LiteralKind::Int {
494 base,
495 empty_int: !has_digits && !has_trailing_junk,
496 trailing_junk_start,
497 }
498 }
499
500 fn eat_hexadecimal_digits(&mut self) -> bool {
501 let mut has_digits = false;
502 loop {
503 match self.first() {
504 '_' if self.second().is_ascii_hexdigit() => {
505 self.bump();
506 }
507 '0'..='9' | 'a'..='f' | 'A'..='F' => {
508 has_digits = true;
509 self.bump();
510 }
511 _ => break,
512 }
513 }
514 has_digits
515 }
516
517 fn eat_numeric_exponent(&mut self) -> bool {
520 if self.first() == '-' || self.first() == '+' {
521 if !self.second().is_ascii_digit() {
522 return false;
523 }
524 self.bump();
525 } else if !self.first().is_ascii_digit() {
526 return false;
527 }
528 self.eat_decimal_digits()
529 }
530
531 fn eat_identifier(&mut self) {
532 if is_ident_start(self.first()) {
533 self.eat_while(is_ident_cont);
534 }
535 }
536
537 pub(crate) fn eat_fractional(&mut self) -> crate::LiteralKind {
538 let mut empty_exponent_start = None;
539 if self.first().is_ascii_digit() {
540 self.eat_decimal_digits();
541 }
542 match self.first() {
543 'e' | 'E' => {
544 let exponent_start = self.pos_within_token();
545 self.bump();
546 if !self.eat_numeric_exponent() {
547 empty_exponent_start = Some(exponent_start);
548 }
549 }
550 _ => (),
551 }
552 let trailing_junk_start = self.pos_within_token();
553 self.eat_identifier();
554 LiteralKind::Numeric {
555 empty_exponent_start,
556 trailing_junk_start,
557 }
558 }
559}
560
561pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
563 let mut cursor = Cursor::new(input);
564 std::iter::from_fn(move || {
565 let token = cursor.advance_token();
566 if token.kind != TokenKind::Eof {
567 Some(token)
568 } else {
569 None
570 }
571 })
572}
573
574#[cfg(test)]
575mod tests {
576 use std::fmt;
577
578 use super::*;
579 use insta::assert_debug_snapshot;
580
581 struct TokenDebug<'a> {
582 content: &'a str,
583 token: Token,
584 }
585 impl fmt::Debug for TokenDebug<'_> {
586 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
587 write!(f, "{:?} @ {:?}", self.content, self.token.kind)
588 }
589 }
590
591 impl<'a> TokenDebug<'a> {
592 fn new(token: Token, input: &'a str, start: u32) -> TokenDebug<'a> {
593 TokenDebug {
594 token,
595 content: &input[start as usize..(start + token.len) as usize],
596 }
597 }
598 }
599
600 fn lex(input: &str) -> Vec<TokenDebug<'_>> {
601 let mut tokens = vec![];
602 let mut start = 0;
603
604 for token in tokenize(input) {
605 let length = token.len;
606 tokens.push(TokenDebug::new(token, input, start));
607 start += length;
608 }
609 tokens
610 }
611 #[test]
612 fn lex_statement() {
613 let result = lex("select 1;");
614 assert_debug_snapshot!(result);
615 }
616
617 #[test]
618 fn block_comment() {
619 let result = lex(r#"
620/*
621 * foo
622 * bar
623*/"#);
624 assert_debug_snapshot!(result);
625 }
626
627 #[test]
628 fn block_comment_unterminated() {
629 let result = lex(r#"
630/*
631 * foo
632 * bar
633 /*
634*/"#);
635 assert_debug_snapshot!(result);
636 }
637
638 #[test]
639 fn line_comment() {
640 let result = lex(r#"
641-- foooooooooooo bar buzz
642"#);
643 assert_debug_snapshot!(result);
644 }
645
646 #[test]
647 fn line_comment_cr_newline() {
648 assert_debug_snapshot!(lex("select 1; -- comment\rselect 2;"), @r#"
649 [
650 "select" @ Ident,
651 " " @ Whitespace,
652 "1" @ Literal { kind: Int { base: Decimal, empty_int: false, trailing_junk_start: 1 } },
653 ";" @ Semi,
654 " " @ Whitespace,
655 "-- comment" @ LineComment,
656 "\r" @ Whitespace,
657 "select" @ Ident,
658 " " @ Whitespace,
659 "2" @ Literal { kind: Int { base: Decimal, empty_int: false, trailing_junk_start: 1 } },
660 ";" @ Semi,
661 ]
662 "#);
663 }
664
665 #[test]
666 fn line_comment_whitespace() {
667 assert_debug_snapshot!(lex(r#"
668select 'Hello' -- This is a comment
669' World';"#))
670 }
671
672 #[test]
673 fn dollar_quoting() {
674 assert_debug_snapshot!(lex(r#"
675$$Dianne's horse$$
676$SomeTag$Dianne's horse$SomeTag$
677
678-- with dollar inside and matching tags
679$foo$hello$world$bar$
680"#))
681 }
682
683 #[test]
684 fn dollar_strings_part2() {
685 assert_debug_snapshot!(lex(r#"
686DO $doblock$
687end
688$doblock$;"#))
689 }
690
691 #[test]
692 fn dollar_quote_mismatch_tags_simple() {
693 assert_debug_snapshot!(lex(r#"
694-- dollar quoting with mismatched tags
695$foo$hello world$bar$
696"#));
697 }
698
699 #[test]
700 fn dollar_quote_mismatch_tags_complex() {
701 assert_debug_snapshot!(lex(r#"
702-- with dollar inside but mismatched tags
703$foo$hello$world$bar$
704"#));
705 }
706
707 #[test]
708 fn numeric() {
709 assert_debug_snapshot!(lex(r#"
71042
7113.5
7124.
713.001
714.123e10
7155e2
7161.925e-3
7171e-10
7181e+10
7191e10
7204664.E+5
721"#))
722 }
723
724 #[test]
725 fn numeric_non_decimal() {
726 assert_debug_snapshot!(lex(r#"
7270b100101
7280B10011001
7290o273
7300O755
7310x42f
7320XFFFF
733"#))
734 }
735
736 #[test]
737 fn numeric_base_prefix_does_not_swallow_dollar_tokens() {
738 assert_debug_snapshot!(lex("123$abc 0b101$2 0o12$abc 0x12$abc 0xFF$1 0x1$$foo$$ 123$$foo$$"), @r#"
739 [
740 "123" @ Literal { kind: Int { base: Decimal, empty_int: false, trailing_junk_start: 3 } },
741 "$abc" @ PositionalParam { trailing_junk_start: 1 },
742 " " @ Whitespace,
743 "0b101" @ Literal { kind: Int { base: Binary, empty_int: false, trailing_junk_start: 5 } },
744 "$2" @ PositionalParam { trailing_junk_start: 2 },
745 " " @ Whitespace,
746 "0o12" @ Literal { kind: Int { base: Octal, empty_int: false, trailing_junk_start: 4 } },
747 "$abc" @ PositionalParam { trailing_junk_start: 1 },
748 " " @ Whitespace,
749 "0x12" @ Literal { kind: Int { base: Hexadecimal, empty_int: false, trailing_junk_start: 4 } },
750 "$abc" @ PositionalParam { trailing_junk_start: 1 },
751 " " @ Whitespace,
752 "0xFF" @ Literal { kind: Int { base: Hexadecimal, empty_int: false, trailing_junk_start: 4 } },
753 "$1" @ PositionalParam { trailing_junk_start: 2 },
754 " " @ Whitespace,
755 "0x1" @ Literal { kind: Int { base: Hexadecimal, empty_int: false, trailing_junk_start: 3 } },
756 "$$foo$$" @ Literal { kind: DollarQuotedString { terminated: true } },
757 " " @ Whitespace,
758 "123" @ Literal { kind: Int { base: Decimal, empty_int: false, trailing_junk_start: 3 } },
759 "$$foo$$" @ Literal { kind: DollarQuotedString { terminated: true } },
760 ]
761 "#);
762 }
763
764 #[test]
765 fn numeric_with_seperators() {
766 assert_debug_snapshot!(lex(r#"
7671_500_000_000
7680b10001000_00000000
7690o_1_755
7700xFFFF_FFFF
7711.618_034
772"#))
773 }
774
775 #[test]
776 fn numeric_leading_dot_with_separators() {
777 assert_debug_snapshot!(lex(".1_2 .5_5 .1_2e3"), @r#"
778 [
779 ".1_2" @ Literal { kind: Numeric { empty_exponent_start: None, trailing_junk_start: 4 } },
780 " " @ Whitespace,
781 ".5_5" @ Literal { kind: Numeric { empty_exponent_start: None, trailing_junk_start: 4 } },
782 " " @ Whitespace,
783 ".1_2e3" @ Literal { kind: Numeric { empty_exponent_start: None, trailing_junk_start: 6 } },
784 ]
785 "#)
786 }
787
788 #[test]
789 fn numeric_exponent_underscore_after_sign() {
790 assert_debug_snapshot!(lex("1e+_2 1e-_2 1.0e+_2 .1e+_2"), @r#"
791 [
792 "1e" @ Literal { kind: Numeric { empty_exponent_start: Some(1), trailing_junk_start: 2 } },
793 "+" @ Plus,
794 "_2" @ Ident,
795 " " @ Whitespace,
796 "1e" @ Literal { kind: Numeric { empty_exponent_start: Some(1), trailing_junk_start: 2 } },
797 "-" @ Minus,
798 "_2" @ Ident,
799 " " @ Whitespace,
800 "1.0e" @ Literal { kind: Numeric { empty_exponent_start: Some(3), trailing_junk_start: 4 } },
801 "+" @ Plus,
802 "_2" @ Ident,
803 " " @ Whitespace,
804 ".1e" @ Literal { kind: Numeric { empty_exponent_start: Some(2), trailing_junk_start: 3 } },
805 "+" @ Plus,
806 "_2" @ Ident,
807 ]
808 "#)
809 }
810
811 #[test]
812 fn select_with_period() {
813 assert_debug_snapshot!(lex(r#"
814select public.users;
815"#))
816 }
817
818 #[test]
819 fn bitstring() {
820 assert_debug_snapshot!(lex(r#"
821B'1001'
822b'1001'
823X'1FF'
824x'1FF'
825"#))
826 }
827
828 #[test]
829 fn national_character_string() {
830 assert_debug_snapshot!(lex("N'foo' n'bar' numeric'1'"), @r#"
831 [
832 "N'foo'" @ Literal { kind: NationalStr { terminated: true } },
833 " " @ Whitespace,
834 "n'bar'" @ Literal { kind: NationalStr { terminated: true } },
835 " " @ Whitespace,
836 "numeric" @ Ident,
837 "'1'" @ Literal { kind: Str { terminated: true } },
838 ]
839 "#);
840 }
841
842 #[test]
843 fn ident_prefix_then_string_is_consistent() {
844 assert_debug_snapshot!(
845 lex("N1'foo' E1'foo' B1'foo' X1'foo' U1'foo' uuid'00000000'"),
846 @r#"
847 [
848 "N1" @ Ident,
849 "'foo'" @ Literal { kind: Str { terminated: true } },
850 " " @ Whitespace,
851 "E1" @ Ident,
852 "'foo'" @ Literal { kind: Str { terminated: true } },
853 " " @ Whitespace,
854 "B1" @ Ident,
855 "'foo'" @ Literal { kind: Str { terminated: true } },
856 " " @ Whitespace,
857 "X1" @ Ident,
858 "'foo'" @ Literal { kind: Str { terminated: true } },
859 " " @ Whitespace,
860 "U1" @ Ident,
861 "'foo'" @ Literal { kind: Str { terminated: true } },
862 " " @ Whitespace,
863 "uuid" @ Ident,
864 "'00000000'" @ Literal { kind: Str { terminated: true } },
865 ]
866 "#);
867 }
868
869 #[test]
870 fn string() {
871 assert_debug_snapshot!(lex(r#"
872'Dianne''s horse'
873
874select 'foo ''
875bar';
876
877select 'foooo'
878 'bar';
879
880
881'foo \\ \n \tbar'
882
883'forgot to close the string
884"#))
885 }
886
887 #[test]
888 fn params() {
889 assert_debug_snapshot!(lex(r#"
890select $1 + $2;
891
892select $1123123123123;
893
894select $;
895"#))
896 }
897
898 #[test]
899 fn string_with_escapes() {
900 assert_debug_snapshot!(lex(r#"
903E'foo'
904
905e'bar'
906
907e'\b\f\n\r\t'
908
909e'\0\11\777'
910
911e'\x0\x11\xFF'
912
913e'\uAAAA \UFFFFFFFF'
914
915"#))
916 }
917
918 #[test]
919 fn escape_string_with_backslash_escaped_quote() {
920 assert_debug_snapshot!(lex(r"E'foo\'bar'"), @r#"
921 [
922 "E'foo\\'bar'" @ Literal { kind: EscStr { terminated: true } },
923 ]
924 "#);
925 }
926
927 #[test]
928 fn escape_string_with_escaped_terminal_quote_is_unterminated() {
929 assert_debug_snapshot!(lex(r"E'foo\';"), @r#"
930 [
931 "E'foo\\';" @ Literal { kind: EscStr { terminated: false } },
932 ]
933 "#);
934 }
935
936 #[test]
937 fn escape_string_with_even_backslashes_before_quote_is_terminated() {
938 assert_debug_snapshot!(lex(r"E'foo\\'"), @r#"
939 [
940 "E'foo\\\\'" @ Literal { kind: EscStr { terminated: true } },
941 ]
942 "#);
943 }
944
945 #[test]
946 fn string_unicode_escape() {
947 assert_debug_snapshot!(lex(r#"
950U&"d\0061t\+000061"
951
952U&"\0441\043B\043E\043D"
953
954u&'\0441\043B'
955
956U&"d!0061t!+000061" UESCAPE '!'
957"#))
958 }
959
960 #[test]
961 fn quoted_ident() {
962 assert_debug_snapshot!(lex(r#"
963"hello &1 -world";
964
965
966"hello-world
967"#))
968 }
969
970 #[test]
971 fn quoted_ident_with_escape_quote() {
972 assert_debug_snapshot!(lex(r#"
973"foo "" bar"
974"#))
975 }
976
977 #[test]
978 fn dollar_quoted_string() {
979 assert_debug_snapshot!(lex("$$$$"), @r#"
980 [
981 "$$$$" @ Literal { kind: DollarQuotedString { terminated: true } },
982 ]
983 "#);
984 }
985
986 #[test]
987 fn tagged_dollar_quote_requires_leading_dollar() {
988 assert_debug_snapshot!(lex("select $foo$abcfoo$def$foo$;"), @r#"
989 [
990 "select" @ Ident,
991 " " @ Whitespace,
992 "$foo$abcfoo$def$foo$" @ Literal { kind: DollarQuotedString { terminated: true } },
993 ";" @ Semi,
994 ]
995 "#);
996 }
997
998 #[test]
999 fn unclosed_dollar_tag_does_not_swallow_rest_of_input() {
1000 assert_debug_snapshot!(lex("select $x;\ndrop table users;"), @r#"
1001 [
1002 "select" @ Ident,
1003 " " @ Whitespace,
1004 "$x" @ PositionalParam { trailing_junk_start: 1 },
1005 ";" @ Semi,
1006 "\n" @ Whitespace,
1007 "drop" @ Ident,
1008 " " @ Whitespace,
1009 "table" @ Ident,
1010 " " @ Whitespace,
1011 "users" @ Ident,
1012 ";" @ Semi,
1013 ]
1014 "#);
1015 }
1016
1017 #[test]
1018 fn ident_non_ascii_above_latin1() {
1019 assert_debug_snapshot!(lex("ẞ Ā 漢字 𐐷"), @r#"
1020 [
1021 "ẞ" @ Ident,
1022 " " @ Whitespace,
1023 "Ā" @ Ident,
1024 " " @ Whitespace,
1025 "漢字" @ Ident,
1026 " " @ Whitespace,
1027 "𐐷" @ Ident,
1028 ]
1029 "#);
1030 }
1031}