1use crate::{
31 ast::bigint::BigInt,
32 soft_keywords::SoftKeywordTransformer,
33 string::FStringErrorType,
34 text_size::{TextLen, TextRange, TextSize},
35 token::{StringKind, Tok},
36 Mode,
37};
38use log::trace;
39use num_traits::{Num, Zero};
40use std::{char, cmp::Ordering, ops::Index, slice::SliceIndex, str::FromStr};
41use unic_emoji_char::is_emoji_presentation;
42use unic_ucd_ident::{is_xid_continue, is_xid_start};
43
44#[derive(Clone, Copy, PartialEq, Debug, Default)]
47struct IndentationLevel {
48 tabs: u32,
49 spaces: u32,
50}
51
52impl IndentationLevel {
53 fn compare_strict(
54 &self,
55 other: &IndentationLevel,
56 location: TextSize,
57 ) -> Result<Ordering, LexicalError> {
58 match self.tabs.cmp(&other.tabs) {
62 Ordering::Less => {
63 if self.spaces <= other.spaces {
64 Ok(Ordering::Less)
65 } else {
66 Err(LexicalError {
67 location,
68 error: LexicalErrorType::TabError,
69 })
70 }
71 }
72 Ordering::Greater => {
73 if self.spaces >= other.spaces {
74 Ok(Ordering::Greater)
75 } else {
76 Err(LexicalError {
77 location,
78 error: LexicalErrorType::TabError,
79 })
80 }
81 }
82 Ordering::Equal => Ok(self.spaces.cmp(&other.spaces)),
83 }
84 }
85}
86
87#[derive(Debug)]
91struct Indentations {
92 indent_stack: Vec<IndentationLevel>,
93}
94
95impl Indentations {
96 fn is_empty(&self) -> bool {
97 self.indent_stack.len() == 1
98 }
99
100 fn push(&mut self, indent: IndentationLevel) {
101 self.indent_stack.push(indent);
102 }
103
104 fn pop(&mut self) -> Option<IndentationLevel> {
105 if self.is_empty() {
106 return None;
107 }
108 self.indent_stack.pop()
109 }
110
111 fn current(&self) -> &IndentationLevel {
112 self.indent_stack
113 .last()
114 .expect("Indentations must have at least one level")
115 }
116}
117
118impl Default for Indentations {
119 fn default() -> Self {
120 Self {
121 indent_stack: vec![IndentationLevel::default()],
122 }
123 }
124}
125
126struct CharWindow<T: Iterator<Item = char>, const N: usize> {
129 source: T,
130 window: [Option<char>; N],
131}
132
133impl<T, const N: usize> CharWindow<T, N>
134where
135 T: Iterator<Item = char>,
136{
137 fn new(source: T) -> Self {
138 Self {
139 source,
140 window: [None; N],
141 }
142 }
143
144 fn slide(&mut self) -> Option<char> {
145 self.window.rotate_left(1);
146 let next = self.source.next();
147 *self.window.last_mut().expect("never empty") = next;
148 next
149 }
150}
151
152impl<T, const N: usize, Idx> Index<Idx> for CharWindow<T, N>
153where
154 T: Iterator<Item = char>,
155 Idx: SliceIndex<[Option<char>]>,
156{
157 type Output = Idx::Output;
158
159 fn index(&self, index: Idx) -> &Self::Output {
160 &self.window[index]
161 }
162}
163
164pub struct Lexer<T: Iterator<Item = char>> {
166 window: CharWindow<T, 3>,
168 at_begin_of_line: bool,
170 nesting: usize,
172 indentations: Indentations,
174 pending: Vec<Spanned>,
176 location: TextSize,
178}
179
180pub static KEYWORDS: phf::Map<&'static str, Tok> =
183 include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
184
185pub type Spanned = (Tok, TextRange);
187pub type LexResult = Result<Spanned, LexicalError>;
189
190#[inline]
205pub fn lex(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
206 lex_starts_at(source, mode, TextSize::default())
207}
208
209pub fn lex_starts_at(
212 source: &str,
213 mode: Mode,
214 start_offset: TextSize,
215) -> SoftKeywordTransformer<Lexer<std::str::Chars<'_>>> {
216 SoftKeywordTransformer::new(Lexer::new(source.chars(), start_offset), mode)
217}
218
219impl<T> Lexer<T>
220where
221 T: Iterator<Item = char>,
222{
223 pub fn new(input: T, start: TextSize) -> Self {
226 let mut lxr = Lexer {
227 at_begin_of_line: true,
228 nesting: 0,
229 indentations: Indentations::default(),
230 pending: Vec::with_capacity(5),
232 location: start,
233 window: CharWindow::new(input),
234 };
235 lxr.window.slide();
237 lxr.window.slide();
238 lxr.window.slide();
239 if let Some('\u{feff}') = lxr.window[0] {
242 lxr.window.slide();
243 lxr.location += '\u{feff}'.text_len();
244 }
245 lxr
246 }
247
248 fn lex_identifier(&mut self) -> LexResult {
250 match self.window[..3] {
252 [Some(c), Some('"' | '\''), ..] => {
253 if let Ok(kind) = StringKind::try_from(c) {
254 return self.lex_string(kind);
255 }
256 }
257 [Some(c1), Some(c2), Some('"' | '\'')] => {
258 if let Ok(kind) = StringKind::try_from([c1, c2]) {
259 return self.lex_string(kind);
260 }
261 }
262 _ => {}
263 };
264
265 let start_pos = self.get_pos();
266 let mut name = String::with_capacity(8);
267 while self.is_identifier_continuation() {
268 name.push(self.next_char().unwrap());
269 }
270 let end_pos = self.get_pos();
271
272 if let Some(tok) = KEYWORDS.get(&name) {
273 Ok((tok.clone(), TextRange::new(start_pos, end_pos)))
274 } else {
275 Ok((Tok::Name { name }, TextRange::new(start_pos, end_pos)))
276 }
277 }
278
279 fn lex_number(&mut self) -> LexResult {
281 let start_pos = self.get_pos();
282 match self.window[..2] {
283 [Some('0'), Some('x' | 'X')] => {
284 self.next_char();
286 self.next_char();
287 self.lex_number_radix(start_pos, 16)
288 }
289 [Some('0'), Some('o' | 'O')] => {
290 self.next_char();
292 self.next_char();
293 self.lex_number_radix(start_pos, 8)
294 }
295 [Some('0'), Some('b' | 'B')] => {
296 self.next_char();
298 self.next_char();
299 self.lex_number_radix(start_pos, 2)
300 }
301 _ => self.lex_normal_number(),
302 }
303 }
304
305 fn lex_number_radix(&mut self, start_pos: TextSize, radix: u32) -> LexResult {
307 let value_text = self.radix_run(radix);
308 let end_pos = self.get_pos();
309 let value = BigInt::from_str_radix(&value_text, radix).map_err(|e| LexicalError {
310 error: LexicalErrorType::OtherError(format!("{e:?}")),
311 location: start_pos,
312 })?;
313 Ok((Tok::Int { value }, TextRange::new(start_pos, end_pos)))
314 }
315
316 fn lex_normal_number(&mut self) -> LexResult {
318 let start_pos = self.get_pos();
319 let start_is_zero = self.window[0] == Some('0');
320 let mut value_text = self.radix_run(10);
322
323 if self.window[0] == Some('.') || self.at_exponent() {
325 if self.window[0] == Some('.') {
327 if self.window[1] == Some('_') {
328 return Err(LexicalError {
329 error: LexicalErrorType::OtherError("Invalid Syntax".to_owned()),
330 location: self.get_pos(),
331 });
332 }
333 value_text.push(self.next_char().unwrap());
334 value_text.push_str(&self.radix_run(10));
335 }
336
337 if let Some('e' | 'E') = self.window[0] {
339 if self.window[1] == Some('_') {
340 return Err(LexicalError {
341 error: LexicalErrorType::OtherError("Invalid Syntax".to_owned()),
342 location: self.get_pos(),
343 });
344 }
345 value_text.push(self.next_char().unwrap().to_ascii_lowercase());
346 if matches!(self.window[0], Some('-' | '+')) {
348 if self.window[1] == Some('_') {
349 return Err(LexicalError {
350 error: LexicalErrorType::OtherError("Invalid Syntax".to_owned()),
351 location: self.get_pos(),
352 });
353 }
354 value_text.push(self.next_char().unwrap());
355 }
356
357 value_text.push_str(&self.radix_run(10));
358 }
359
360 let value = f64::from_str(&value_text).map_err(|_| LexicalError {
361 error: LexicalErrorType::OtherError("Invalid decimal literal".to_owned()),
362 location: self.get_pos(),
363 })?;
364
365 if matches!(self.window[0], Some('j' | 'J')) {
367 self.next_char();
368 let end_pos = self.get_pos();
369 Ok((
370 Tok::Complex {
371 real: 0.0,
372 imag: value,
373 },
374 TextRange::new(start_pos, end_pos),
375 ))
376 } else {
377 let end_pos = self.get_pos();
378 Ok((Tok::Float { value }, TextRange::new(start_pos, end_pos)))
379 }
380 } else {
381 if matches!(self.window[0], Some('j' | 'J')) {
383 self.next_char();
384 let end_pos = self.get_pos();
385 let imag = f64::from_str(&value_text).unwrap();
386 Ok((
387 Tok::Complex { real: 0.0, imag },
388 TextRange::new(start_pos, end_pos),
389 ))
390 } else {
391 let end_pos = self.get_pos();
392 let value = value_text.parse::<BigInt>().unwrap();
393 if start_is_zero && !value.is_zero() {
394 return Err(LexicalError {
396 error: LexicalErrorType::OtherError("Invalid Token".to_owned()),
397 location: self.get_pos(),
398 });
399 }
400 Ok((Tok::Int { value }, TextRange::new(start_pos, end_pos)))
401 }
402 }
403 }
404
405 fn radix_run(&mut self, radix: u32) -> String {
409 let mut value_text = String::new();
410
411 loop {
412 if let Some(c) = self.take_number(radix) {
413 value_text.push(c);
414 } else if self.window[0] == Some('_')
415 && Lexer::<T>::is_digit_of_radix(self.window[1], radix)
416 {
417 self.next_char();
418 } else {
419 break;
420 }
421 }
422 value_text
423 }
424
425 fn take_number(&mut self, radix: u32) -> Option<char> {
427 let take_char = Lexer::<T>::is_digit_of_radix(self.window[0], radix);
428
429 take_char.then(|| self.next_char().unwrap())
430 }
431
432 fn is_digit_of_radix(c: Option<char>, radix: u32) -> bool {
434 match radix {
435 2 => matches!(c, Some('0'..='1')),
436 8 => matches!(c, Some('0'..='7')),
437 10 => matches!(c, Some('0'..='9')),
438 16 => matches!(c, Some('0'..='9') | Some('a'..='f') | Some('A'..='F')),
439 other => unimplemented!("Radix not implemented: {}", other),
440 }
441 }
442
443 fn at_exponent(&self) -> bool {
445 match self.window[..2] {
446 [Some('e' | 'E'), Some('+' | '-')] => matches!(self.window[2], Some('0'..='9')),
447 [Some('e' | 'E'), Some('0'..='9')] => true,
448 _ => false,
449 }
450 }
451
452 #[cfg(feature = "full-lexer")]
454 fn lex_comment(&mut self) -> LexResult {
455 let start_pos = self.get_pos();
456 let mut value = String::new();
457 loop {
458 match self.window[0] {
459 Some('\n' | '\r') | None => {
460 let end_pos = self.get_pos();
461 return Ok((Tok::Comment(value), TextRange::new(start_pos, end_pos)));
462 }
463 Some(_) => {}
464 }
465 value.push(self.next_char().unwrap());
466 }
467 }
468
469 #[cfg(feature = "full-lexer")]
470 fn lex_and_emit_comment(&mut self) -> Result<(), LexicalError> {
471 let comment = self.lex_comment()?;
472 self.emit(comment);
473 Ok(())
474 }
475
476 #[cfg(not(feature = "full-lexer"))]
478 fn lex_comment(&mut self) {
479 loop {
480 match self.window[0] {
481 Some('\n' | '\r') | None => {
482 return;
483 }
484 Some(_) => {}
485 }
486 self.next_char().unwrap();
487 }
488 }
489
490 #[cfg(not(feature = "full-lexer"))]
491 #[inline]
492 fn lex_and_emit_comment(&mut self) -> Result<(), LexicalError> {
493 self.lex_comment();
494 Ok(())
495 }
496
497 fn lex_string(&mut self, kind: StringKind) -> LexResult {
499 let start_pos = self.get_pos();
500 for _ in 0..u32::from(kind.prefix_len()) {
501 self.next_char();
502 }
503 let quote_char = self.next_char().unwrap();
504 let mut string_content = String::with_capacity(5);
505
506 let triple_quoted = if self.window[..2] == [Some(quote_char); 2] {
509 self.next_char();
510 self.next_char();
511 true
512 } else {
513 false
514 };
515
516 loop {
517 match self.next_char() {
518 Some(c) => {
519 if c == '\\' {
520 if let Some(next_c) = self.next_char() {
521 string_content.push('\\');
522 string_content.push(next_c);
523 continue;
524 }
525 }
526 if c == '\n' && !triple_quoted {
527 return Err(LexicalError {
528 error: LexicalErrorType::OtherError(
529 "EOL while scanning string literal".to_owned(),
530 ),
531 location: self.get_pos(),
532 });
533 }
534
535 if c == quote_char {
536 if triple_quoted {
537 if self.window[..2] == [Some(quote_char); 2] {
541 self.next_char();
542 self.next_char();
543 break;
544 }
545 } else {
546 break;
547 }
548 }
549 string_content.push(c);
550 }
551 None => {
552 return Err(LexicalError {
553 error: if triple_quoted {
554 LexicalErrorType::Eof
555 } else {
556 LexicalErrorType::StringError
557 },
558 location: self.get_pos(),
559 });
560 }
561 }
562 }
563 let end_pos = self.get_pos();
564 let tok = Tok::String {
565 value: string_content,
566 kind,
567 triple_quoted,
568 };
569 Ok((tok, TextRange::new(start_pos, end_pos)))
570 }
571
572 fn is_identifier_start(&self, c: char) -> bool {
575 match c {
576 'a'..='z' | 'A'..='Z' | '_' => true,
577 _ => is_xid_start(c),
578 }
579 }
580
581 fn is_identifier_continuation(&self) -> bool {
584 match self.window[0] {
585 Some('a'..='z' | 'A'..='Z' | '_' | '0'..='9') => true,
586 Some(c) => is_xid_continue(c),
587 _ => false,
588 }
589 }
590
591 fn inner_next(&mut self) -> LexResult {
594 while self.pending.is_empty() {
596 if self.at_begin_of_line {
598 self.handle_indentations()?;
599 }
600
601 self.consume_normal()?;
602 }
603
604 Ok(self.pending.remove(0))
605 }
606
607 fn eat_indentation(&mut self) -> Result<IndentationLevel, LexicalError> {
609 let mut spaces: u32 = 0;
611 let mut tabs: u32 = 0;
612 loop {
613 match self.window[0] {
614 Some(' ') => {
615 self.next_char();
627 spaces += 1;
628 }
629 Some('\t') => {
630 if spaces != 0 {
631 return Err(LexicalError {
635 error: LexicalErrorType::TabsAfterSpaces,
636 location: self.get_pos(),
637 });
638 }
639 self.next_char();
640 tabs += 1;
641 }
642 Some('#') => {
643 self.lex_and_emit_comment()?;
644 spaces = 0;
645 tabs = 0;
646 }
647 Some('\x0C') => {
648 self.next_char();
651 spaces = 0;
652 tabs = 0;
653 }
654 Some('\n' | '\r') => {
655 #[cfg(feature = "full-lexer")]
657 let tok_start = self.get_pos();
658 self.next_char();
659 #[cfg(feature = "full-lexer")]
660 let tok_end = self.get_pos();
661 #[cfg(feature = "full-lexer")]
662 self.emit((Tok::NonLogicalNewline, TextRange::new(tok_start, tok_end)));
663 spaces = 0;
664 tabs = 0;
665 }
666 None => {
667 spaces = 0;
668 tabs = 0;
669 break;
670 }
671 _ => {
672 self.at_begin_of_line = false;
673 break;
674 }
675 }
676 }
677
678 Ok(IndentationLevel { tabs, spaces })
679 }
680
681 fn handle_indentations(&mut self) -> Result<(), LexicalError> {
683 let indentation_level = self.eat_indentation()?;
684
685 if self.nesting != 0 {
686 return Ok(());
687 }
688
689 let current_indentation = self.indentations.current();
691 let ordering = indentation_level.compare_strict(current_indentation, self.get_pos())?;
692 match ordering {
693 Ordering::Equal => {
694 }
696 Ordering::Greater => {
697 self.indentations.push(indentation_level);
699 let tok_pos = self.get_pos();
700 self.emit((
701 Tok::Indent,
702 TextRange::new(
703 tok_pos
704 - TextSize::new(indentation_level.spaces)
705 - TextSize::new(indentation_level.tabs),
706 tok_pos,
707 ),
708 ));
709 }
710 Ordering::Less => {
711 loop {
715 let current_indentation = self.indentations.current();
716 let ordering =
717 indentation_level.compare_strict(current_indentation, self.get_pos())?;
718 match ordering {
719 Ordering::Less => {
720 self.indentations.pop();
721 let tok_pos = self.get_pos();
722 self.emit((Tok::Dedent, TextRange::empty(tok_pos)));
723 }
724 Ordering::Equal => {
725 break;
727 }
728 Ordering::Greater => {
729 return Err(LexicalError {
730 error: LexicalErrorType::IndentationError,
731 location: self.get_pos(),
732 });
733 }
734 }
735 }
736 }
737 }
738
739 Ok(())
740 }
741
742 fn consume_normal(&mut self) -> Result<(), LexicalError> {
744 if let Some(c) = self.window[0] {
745 if self.is_identifier_start(c) {
747 let identifier = self.lex_identifier()?;
748 self.emit(identifier);
749 } else {
750 self.consume_character(c)?;
751 }
752 } else {
753 let tok_pos = self.get_pos();
755
756 if self.nesting > 0 {
758 return Err(LexicalError {
759 error: LexicalErrorType::Eof,
760 location: tok_pos,
761 });
762 }
763
764 if !self.at_begin_of_line {
766 self.at_begin_of_line = true;
767 self.emit((Tok::Newline, TextRange::empty(tok_pos)));
768 }
769
770 while !self.indentations.is_empty() {
772 self.indentations.pop();
773 self.emit((Tok::Dedent, TextRange::empty(tok_pos)));
774 }
775
776 self.emit((Tok::EndOfFile, TextRange::empty(tok_pos)));
777 }
778
779 Ok(())
780 }
781
782 fn consume_character(&mut self, c: char) -> Result<(), LexicalError> {
784 match c {
785 '0'..='9' => {
786 let number = self.lex_number()?;
787 self.emit(number);
788 }
789 '#' => {
790 self.lex_and_emit_comment()?;
791 }
792 '"' | '\'' => {
793 let string = self.lex_string(StringKind::String)?;
794 self.emit(string);
795 }
796 '=' => {
797 let tok_start = self.get_pos();
798 self.next_char();
799 match self.window[0] {
800 Some('=') => {
801 self.next_char();
802 let tok_end = self.get_pos();
803 self.emit((Tok::EqEqual, TextRange::new(tok_start, tok_end)));
804 }
805 _ => {
806 let tok_end = self.get_pos();
807 self.emit((Tok::Equal, TextRange::new(tok_start, tok_end)));
808 }
809 }
810 }
811 '+' => {
812 let tok_start = self.get_pos();
813 self.next_char();
814 if let Some('=') = self.window[0] {
815 self.next_char();
816 let tok_end = self.get_pos();
817 self.emit((Tok::PlusEqual, TextRange::new(tok_start, tok_end)));
818 } else {
819 let tok_end = self.get_pos();
820 self.emit((Tok::Plus, TextRange::new(tok_start, tok_end)));
821 }
822 }
823 '*' => {
824 let tok_start = self.get_pos();
825 self.next_char();
826 match self.window[0] {
827 Some('=') => {
828 self.next_char();
829 let tok_end = self.get_pos();
830 self.emit((Tok::StarEqual, TextRange::new(tok_start, tok_end)));
831 }
832 Some('*') => {
833 self.next_char();
834 match self.window[0] {
835 Some('=') => {
836 self.next_char();
837 let tok_end = self.get_pos();
838 self.emit((
839 Tok::DoubleStarEqual,
840 TextRange::new(tok_start, tok_end),
841 ));
842 }
843 _ => {
844 let tok_end = self.get_pos();
845 self.emit((Tok::DoubleStar, TextRange::new(tok_start, tok_end)));
846 }
847 }
848 }
849 _ => {
850 let tok_end = self.get_pos();
851 self.emit((Tok::Star, TextRange::new(tok_start, tok_end)));
852 }
853 }
854 }
855 '/' => {
856 let tok_start = self.get_pos();
857 self.next_char();
858 match self.window[0] {
859 Some('=') => {
860 self.next_char();
861 let tok_end = self.get_pos();
862 self.emit((Tok::SlashEqual, TextRange::new(tok_start, tok_end)));
863 }
864 Some('/') => {
865 self.next_char();
866 match self.window[0] {
867 Some('=') => {
868 self.next_char();
869 let tok_end = self.get_pos();
870 self.emit((
871 Tok::DoubleSlashEqual,
872 TextRange::new(tok_start, tok_end),
873 ));
874 }
875 _ => {
876 let tok_end = self.get_pos();
877 self.emit((Tok::DoubleSlash, TextRange::new(tok_start, tok_end)));
878 }
879 }
880 }
881 _ => {
882 let tok_end = self.get_pos();
883 self.emit((Tok::Slash, TextRange::new(tok_start, tok_end)));
884 }
885 }
886 }
887 '%' => {
888 let tok_start = self.get_pos();
889 self.next_char();
890 if let Some('=') = self.window[0] {
891 self.next_char();
892 let tok_end = self.get_pos();
893 self.emit((Tok::PercentEqual, TextRange::new(tok_start, tok_end)));
894 } else {
895 let tok_end = self.get_pos();
896 self.emit((Tok::Percent, TextRange::new(tok_start, tok_end)));
897 }
898 }
899 '|' => {
900 let tok_start = self.get_pos();
901 self.next_char();
902 if let Some('=') = self.window[0] {
903 self.next_char();
904 let tok_end = self.get_pos();
905 self.emit((Tok::VbarEqual, TextRange::new(tok_start, tok_end)));
906 } else {
907 let tok_end = self.get_pos();
908 self.emit((Tok::Vbar, TextRange::new(tok_start, tok_end)));
909 }
910 }
911 '^' => {
912 let tok_start = self.get_pos();
913 self.next_char();
914 if let Some('=') = self.window[0] {
915 self.next_char();
916 let tok_end = self.get_pos();
917 self.emit((Tok::CircumflexEqual, TextRange::new(tok_start, tok_end)));
918 } else {
919 let tok_end = self.get_pos();
920 self.emit((Tok::CircumFlex, TextRange::new(tok_start, tok_end)));
921 }
922 }
923 '&' => {
924 let tok_start = self.get_pos();
925 self.next_char();
926 if let Some('=') = self.window[0] {
927 self.next_char();
928 let tok_end = self.get_pos();
929 self.emit((Tok::AmperEqual, TextRange::new(tok_start, tok_end)));
930 } else {
931 let tok_end = self.get_pos();
932 self.emit((Tok::Amper, TextRange::new(tok_start, tok_end)));
933 }
934 }
935 '-' => {
936 let tok_start = self.get_pos();
937 self.next_char();
938 match self.window[0] {
939 Some('=') => {
940 self.next_char();
941 let tok_end = self.get_pos();
942 self.emit((Tok::MinusEqual, TextRange::new(tok_start, tok_end)));
943 }
944 Some('>') => {
945 self.next_char();
946 let tok_end = self.get_pos();
947 self.emit((Tok::Rarrow, TextRange::new(tok_start, tok_end)));
948 }
949 _ => {
950 let tok_end = self.get_pos();
951 self.emit((Tok::Minus, TextRange::new(tok_start, tok_end)));
952 }
953 }
954 }
955 '@' => {
956 let tok_start = self.get_pos();
957 self.next_char();
958 if let Some('=') = self.window[0] {
959 self.next_char();
960 let tok_end = self.get_pos();
961 self.emit((Tok::AtEqual, TextRange::new(tok_start, tok_end)));
962 } else {
963 let tok_end = self.get_pos();
964 self.emit((Tok::At, TextRange::new(tok_start, tok_end)));
965 }
966 }
967 '!' => {
968 let tok_start = self.get_pos();
969 self.next_char();
970 if let Some('=') = self.window[0] {
971 self.next_char();
972 let tok_end = self.get_pos();
973 self.emit((Tok::NotEqual, TextRange::new(tok_start, tok_end)));
974 } else {
975 return Err(LexicalError {
976 error: LexicalErrorType::UnrecognizedToken { tok: '!' },
977 location: tok_start,
978 });
979 }
980 }
981 '~' => {
982 self.eat_single_char(Tok::Tilde);
983 }
984 '(' => {
985 self.eat_single_char(Tok::Lpar);
986 self.nesting += 1;
987 }
988 ')' => {
989 self.eat_single_char(Tok::Rpar);
990 if self.nesting == 0 {
991 return Err(LexicalError {
992 error: LexicalErrorType::NestingError,
993 location: self.get_pos(),
994 });
995 }
996 self.nesting -= 1;
997 }
998 '[' => {
999 self.eat_single_char(Tok::Lsqb);
1000 self.nesting += 1;
1001 }
1002 ']' => {
1003 self.eat_single_char(Tok::Rsqb);
1004 if self.nesting == 0 {
1005 return Err(LexicalError {
1006 error: LexicalErrorType::NestingError,
1007 location: self.get_pos(),
1008 });
1009 }
1010 self.nesting -= 1;
1011 }
1012 '{' => {
1013 self.eat_single_char(Tok::Lbrace);
1014 self.nesting += 1;
1015 }
1016 '}' => {
1017 self.eat_single_char(Tok::Rbrace);
1018 if self.nesting == 0 {
1019 return Err(LexicalError {
1020 error: LexicalErrorType::NestingError,
1021 location: self.get_pos(),
1022 });
1023 }
1024 self.nesting -= 1;
1025 }
1026 ':' => {
1027 let tok_start = self.get_pos();
1028 self.next_char();
1029 if let Some('=') = self.window[0] {
1030 self.next_char();
1031 let tok_end = self.get_pos();
1032 self.emit((Tok::ColonEqual, TextRange::new(tok_start, tok_end)));
1033 } else {
1034 let tok_end = self.get_pos();
1035 self.emit((Tok::Colon, TextRange::new(tok_start, tok_end)));
1036 }
1037 }
1038 ';' => {
1039 self.eat_single_char(Tok::Semi);
1040 }
1041 '<' => {
1042 let tok_start = self.get_pos();
1043 self.next_char();
1044 match self.window[0] {
1045 Some('<') => {
1046 self.next_char();
1047 match self.window[0] {
1048 Some('=') => {
1049 self.next_char();
1050 let tok_end = self.get_pos();
1051 self.emit((
1052 Tok::LeftShiftEqual,
1053 TextRange::new(tok_start, tok_end),
1054 ));
1055 }
1056 _ => {
1057 let tok_end = self.get_pos();
1058 self.emit((Tok::LeftShift, TextRange::new(tok_start, tok_end)));
1059 }
1060 }
1061 }
1062 Some('=') => {
1063 self.next_char();
1064 let tok_end = self.get_pos();
1065 self.emit((Tok::LessEqual, TextRange::new(tok_start, tok_end)));
1066 }
1067 _ => {
1068 let tok_end = self.get_pos();
1069 self.emit((Tok::Less, TextRange::new(tok_start, tok_end)));
1070 }
1071 }
1072 }
1073 '>' => {
1074 let tok_start = self.get_pos();
1075 self.next_char();
1076 match self.window[0] {
1077 Some('>') => {
1078 self.next_char();
1079 match self.window[0] {
1080 Some('=') => {
1081 self.next_char();
1082 let tok_end = self.get_pos();
1083 self.emit((
1084 Tok::RightShiftEqual,
1085 TextRange::new(tok_start, tok_end),
1086 ));
1087 }
1088 _ => {
1089 let tok_end = self.get_pos();
1090 self.emit((Tok::RightShift, TextRange::new(tok_start, tok_end)));
1091 }
1092 }
1093 }
1094 Some('=') => {
1095 self.next_char();
1096 let tok_end = self.get_pos();
1097 self.emit((Tok::GreaterEqual, TextRange::new(tok_start, tok_end)));
1098 }
1099 _ => {
1100 let tok_end = self.get_pos();
1101 self.emit((Tok::Greater, TextRange::new(tok_start, tok_end)));
1102 }
1103 }
1104 }
1105 ',' => {
1106 self.eat_single_char(Tok::Comma);
1107 }
1108 '.' => {
1109 if let Some('0'..='9') = self.window[1] {
1110 let number = self.lex_number()?;
1111 self.emit(number);
1112 } else {
1113 let tok_start = self.get_pos();
1114 self.next_char();
1115 if self.window[..2] == [Some('.'); 2] {
1116 self.next_char();
1117 self.next_char();
1118 let tok_end = self.get_pos();
1119 self.emit((Tok::Ellipsis, TextRange::new(tok_start, tok_end)));
1120 } else {
1121 let tok_end = self.get_pos();
1122 self.emit((Tok::Dot, TextRange::new(tok_start, tok_end)));
1123 }
1124 }
1125 }
1126 '\n' | '\r' => {
1127 let tok_start = self.get_pos();
1128 self.next_char();
1129 let tok_end = self.get_pos();
1130
1131 if self.nesting == 0 {
1134 self.at_begin_of_line = true;
1135 self.emit((Tok::Newline, TextRange::new(tok_start, tok_end)));
1136 } else {
1137 #[cfg(feature = "full-lexer")]
1138 self.emit((Tok::NonLogicalNewline, TextRange::new(tok_start, tok_end)));
1139 }
1140 }
1141 ' ' | '\t' | '\x0C' => {
1142 self.next_char();
1144 while let Some(' ' | '\t' | '\x0C') = self.window[0] {
1145 self.next_char();
1146 }
1147 }
1148 '\\' => {
1149 self.next_char();
1150 match self.window[0] {
1151 Some('\n' | '\r') => {
1152 self.next_char();
1153 }
1154 _ => {
1155 return Err(LexicalError {
1156 error: LexicalErrorType::LineContinuationError,
1157 location: self.get_pos(),
1158 });
1159 }
1160 }
1161
1162 if self.window[0].is_none() {
1163 return Err(LexicalError {
1164 error: LexicalErrorType::Eof,
1165 location: self.get_pos(),
1166 });
1167 }
1168 }
1169 _ => {
1170 if is_emoji_presentation(c) {
1171 let tok_start = self.get_pos();
1172 self.next_char();
1173 let tok_end = self.get_pos();
1174 self.emit((
1175 Tok::Name {
1176 name: c.to_string(),
1177 },
1178 TextRange::new(tok_start, tok_end),
1179 ));
1180 } else {
1181 let c = self.next_char();
1182 return Err(LexicalError {
1183 error: LexicalErrorType::UnrecognizedToken { tok: c.unwrap() },
1184 location: self.get_pos(),
1185 });
1186 }
1187 }
1188 }
1189
1190 Ok(())
1191 }
1192
1193 fn eat_single_char(&mut self, ty: Tok) {
1195 let tok_start = self.get_pos();
1196 self.next_char().unwrap_or_else(|| unsafe {
1197 std::hint::unreachable_unchecked()
1200 });
1201 let tok_end = self.get_pos();
1202 self.emit((ty, TextRange::new(tok_start, tok_end)));
1203 }
1204
1205 fn next_char(&mut self) -> Option<char> {
1207 let mut c = self.window[0];
1208 self.window.slide();
1209 match c {
1210 Some('\r') => {
1211 if self.window[0] == Some('\n') {
1212 self.location += TextSize::from(1);
1213 self.window.slide();
1214 }
1215
1216 self.location += TextSize::from(1);
1217 c = Some('\n');
1218 }
1219 #[allow(unused_variables)]
1220 Some(c) => {
1221 self.location += c.text_len();
1222 }
1223 _ => {}
1224 }
1225 c
1226 }
1227
1228 fn get_pos(&self) -> TextSize {
1230 self.location
1231 }
1232
1233 fn emit(&mut self, spanned: Spanned) {
1235 self.pending.push(spanned);
1236 }
1237}
1238
1239impl<T> Iterator for Lexer<T>
1243where
1244 T: Iterator<Item = char>,
1245{
1246 type Item = LexResult;
1247
1248 fn next(&mut self) -> Option<Self::Item> {
1249 let token = self.inner_next();
1250 trace!(
1251 "Lex token {:?}, nesting={:?}, indent stack: {:?}",
1252 token,
1253 self.nesting,
1254 self.indentations,
1255 );
1256
1257 match token {
1258 Ok((Tok::EndOfFile, _)) => None,
1259 r => Some(r),
1260 }
1261 }
1262}
1263
1264#[derive(Debug, PartialEq)]
1270pub struct LexicalError {
1271 pub error: LexicalErrorType,
1273 pub location: TextSize,
1275}
1276
1277impl LexicalError {
1278 pub fn new(error: LexicalErrorType, location: TextSize) -> Self {
1280 Self { error, location }
1281 }
1282}
1283
1284#[derive(Debug, PartialEq)]
1286pub enum LexicalErrorType {
1287 #[doc(hidden)]
1290 StringError,
1291 UnicodeError,
1294 NestingError,
1296 IndentationError,
1298 TabError,
1300 TabsAfterSpaces,
1302 DefaultArgumentError,
1304 DuplicateArgumentError(String),
1306 PositionalArgumentError,
1308 UnpackedArgumentError,
1310 DuplicateKeywordArgumentError(String),
1312 UnrecognizedToken { tok: char },
1314 FStringError(FStringErrorType),
1316 LineContinuationError,
1318 Eof,
1320 OtherError(String),
1322}
1323
1324impl std::fmt::Display for LexicalErrorType {
1325 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1326 match self {
1327 LexicalErrorType::StringError => write!(f, "Got unexpected string"),
1328 LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
1329 LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
1330 LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
1331 LexicalErrorType::IndentationError => {
1332 write!(f, "unindent does not match any outer indentation level")
1333 }
1334 LexicalErrorType::TabError => {
1335 write!(f, "inconsistent use of tabs and spaces in indentation")
1336 }
1337 LexicalErrorType::TabsAfterSpaces => {
1338 write!(f, "Tabs not allowed as part of indentation after spaces")
1339 }
1340 LexicalErrorType::DefaultArgumentError => {
1341 write!(f, "non-default argument follows default argument")
1342 }
1343 LexicalErrorType::DuplicateArgumentError(arg_name) => {
1344 write!(f, "duplicate argument '{arg_name}' in function definition")
1345 }
1346 LexicalErrorType::DuplicateKeywordArgumentError(arg_name) => {
1347 write!(f, "keyword argument repeated: {arg_name}")
1348 }
1349 LexicalErrorType::PositionalArgumentError => {
1350 write!(f, "positional argument follows keyword argument")
1351 }
1352 LexicalErrorType::UnpackedArgumentError => {
1353 write!(
1354 f,
1355 "iterable argument unpacking follows keyword argument unpacking"
1356 )
1357 }
1358 LexicalErrorType::UnrecognizedToken { tok } => {
1359 write!(f, "Got unexpected token {tok}")
1360 }
1361 LexicalErrorType::LineContinuationError => {
1362 write!(f, "unexpected character after line continuation character")
1363 }
1364 LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
1365 LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
1366 }
1367 }
1368}
1369
1370#[cfg(test)]
1371mod tests {
1372 use super::*;
1373 use crate::ast::bigint::BigInt;
1374
1375 const WINDOWS_EOL: &str = "\r\n";
1376 const MAC_EOL: &str = "\r";
1377 const UNIX_EOL: &str = "\n";
1378
1379 pub fn lex_source(source: &str) -> Vec<Tok> {
1380 let lexer = lex(source, Mode::Module);
1381 lexer.map(|x| x.unwrap().0).collect()
1382 }
1383
1384 fn str_tok(s: &str) -> Tok {
1385 Tok::String {
1386 value: s.to_owned(),
1387 kind: StringKind::String,
1388 triple_quoted: false,
1389 }
1390 }
1391
1392 fn raw_str_tok(s: &str) -> Tok {
1393 Tok::String {
1394 value: s.to_owned(),
1395 kind: StringKind::RawString,
1396 triple_quoted: false,
1397 }
1398 }
1399
1400 #[test]
1401 fn test_numbers() {
1402 let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j";
1403 let tokens = lex_source(source);
1404 assert_eq!(
1405 tokens,
1406 vec![
1407 Tok::Int {
1408 value: BigInt::from(47),
1409 },
1410 Tok::Int {
1411 value: BigInt::from(10)
1412 },
1413 Tok::Int {
1414 value: BigInt::from(13),
1415 },
1416 Tok::Int {
1417 value: BigInt::from(0),
1418 },
1419 Tok::Int {
1420 value: BigInt::from(123),
1421 },
1422 Tok::Int {
1423 value: BigInt::from(1234567890),
1424 },
1425 Tok::Float { value: 0.2 },
1426 Tok::Float { value: 100.0 },
1427 Tok::Float { value: 2100.0 },
1428 Tok::Complex {
1429 real: 0.0,
1430 imag: 2.0,
1431 },
1432 Tok::Complex {
1433 real: 0.0,
1434 imag: 2.2,
1435 },
1436 Tok::Newline,
1437 ]
1438 );
1439 }
1440
1441 macro_rules! test_line_comment {
1442 ($($name:ident: $eol:expr,)*) => {
1443 $(
1444 #[test]
1445 #[cfg(feature = "full-lexer")]
1446 fn $name() {
1447 let source = format!(r"99232 # {}", $eol);
1448 let tokens = lex_source(&source);
1449 assert_eq!(tokens, vec![Tok::Int { value: BigInt::from(99232) }, Tok::Comment(format!("# {}", $eol)), Tok::Newline]);
1450 }
1451 )*
1452 }
1453 }
1454
1455 test_line_comment! {
1456 test_line_comment_long: " foo",
1457 test_line_comment_whitespace: " ",
1458 test_line_comment_single_whitespace: " ",
1459 test_line_comment_empty: "",
1460 }
1461
1462 macro_rules! test_comment_until_eol {
1463 ($($name:ident: $eol:expr,)*) => {
1464 $(
1465 #[test]
1466 #[cfg(feature = "full-lexer")]
1467 fn $name() {
1468 let source = format!("123 # Foo{}456", $eol);
1469 let tokens = lex_source(&source);
1470 assert_eq!(
1471 tokens,
1472 vec![
1473 Tok::Int { value: BigInt::from(123) },
1474 Tok::Comment("# Foo".to_string()),
1475 Tok::Newline,
1476 Tok::Int { value: BigInt::from(456) },
1477 Tok::Newline,
1478 ]
1479 )
1480 }
1481 )*
1482 }
1483 }
1484
1485 test_comment_until_eol! {
1486 test_comment_until_windows_eol: WINDOWS_EOL,
1487 test_comment_until_mac_eol: MAC_EOL,
1488 test_comment_until_unix_eol: UNIX_EOL,
1489 }
1490
1491 #[test]
1492 fn test_assignment() {
1493 let source = r"a_variable = 99 + 2-0";
1494 let tokens = lex_source(source);
1495 assert_eq!(
1496 tokens,
1497 vec![
1498 Tok::Name {
1499 name: String::from("a_variable"),
1500 },
1501 Tok::Equal,
1502 Tok::Int {
1503 value: BigInt::from(99)
1504 },
1505 Tok::Plus,
1506 Tok::Int {
1507 value: BigInt::from(2)
1508 },
1509 Tok::Minus,
1510 Tok::Int {
1511 value: BigInt::from(0)
1512 },
1513 Tok::Newline,
1514 ]
1515 );
1516 }
1517
1518 macro_rules! test_indentation_with_eol {
1519 ($($name:ident: $eol:expr,)*) => {
1520 $(
1521 #[test]
1522 #[cfg(feature = "full-lexer")]
1523 fn $name() {
1524 let source = format!("def foo():{} return 99{}{}", $eol, $eol, $eol);
1525 let tokens = lex_source(&source);
1526 assert_eq!(
1527 tokens,
1528 vec![
1529 Tok::Def,
1530 Tok::Name {
1531 name: String::from("foo"),
1532 },
1533 Tok::Lpar,
1534 Tok::Rpar,
1535 Tok::Colon,
1536 Tok::Newline,
1537 Tok::Indent,
1538 Tok::Return,
1539 Tok::Int { value: BigInt::from(99) },
1540 Tok::Newline,
1541 Tok::NonLogicalNewline,
1542 Tok::Dedent,
1543 ]
1544 );
1545 }
1546 )*
1547 };
1548 }
1549
1550 test_indentation_with_eol! {
1551 test_indentation_windows_eol: WINDOWS_EOL,
1552 test_indentation_mac_eol: MAC_EOL,
1553 test_indentation_unix_eol: UNIX_EOL,
1554 }
1555
1556 macro_rules! test_double_dedent_with_eol {
1557 ($($name:ident: $eol:expr,)*) => {
1558 $(
1559 #[test]
1560 #[cfg(feature = "full-lexer")]
1561 fn $name() {
1562 let source = format!("def foo():{} if x:{}{} return 99{}{}", $eol, $eol, $eol, $eol, $eol);
1563 let tokens = lex_source(&source);
1564 assert_eq!(
1565 tokens,
1566 vec![
1567 Tok::Def,
1568 Tok::Name {
1569 name: String::from("foo"),
1570 },
1571 Tok::Lpar,
1572 Tok::Rpar,
1573 Tok::Colon,
1574 Tok::Newline,
1575 Tok::Indent,
1576 Tok::If,
1577 Tok::Name {
1578 name: String::from("x"),
1579 },
1580 Tok::Colon,
1581 Tok::Newline,
1582 Tok::NonLogicalNewline,
1583 Tok::Indent,
1584 Tok::Return,
1585 Tok::Int { value: BigInt::from(99) },
1586 Tok::Newline,
1587 Tok::NonLogicalNewline,
1588 Tok::Dedent,
1589 Tok::Dedent,
1590 ]
1591 );
1592 }
1593 )*
1594 }
1595 }
1596
1597 macro_rules! test_double_dedent_with_tabs {
1598 ($($name:ident: $eol:expr,)*) => {
1599 $(
1600 #[test]
1601 #[cfg(feature = "full-lexer")]
1602 fn $name() {
1603 let source = format!("def foo():{}\tif x:{}{}\t return 99{}{}", $eol, $eol, $eol, $eol, $eol);
1604 let tokens = lex_source(&source);
1605 assert_eq!(
1606 tokens,
1607 vec![
1608 Tok::Def,
1609 Tok::Name {
1610 name: String::from("foo"),
1611 },
1612 Tok::Lpar,
1613 Tok::Rpar,
1614 Tok::Colon,
1615 Tok::Newline,
1616 Tok::Indent,
1617 Tok::If,
1618 Tok::Name {
1619 name: String::from("x"),
1620 },
1621 Tok::Colon,
1622 Tok::Newline,
1623 Tok::NonLogicalNewline,
1624 Tok::Indent,
1625 Tok::Return,
1626 Tok::Int { value: BigInt::from(99) },
1627 Tok::Newline,
1628 Tok::NonLogicalNewline,
1629 Tok::Dedent,
1630 Tok::Dedent,
1631 ]
1632 );
1633 }
1634 )*
1635 }
1636 }
1637
1638 test_double_dedent_with_eol! {
1639 test_double_dedent_windows_eol: WINDOWS_EOL,
1640 test_double_dedent_mac_eol: MAC_EOL,
1641 test_double_dedent_unix_eol: UNIX_EOL,
1642 }
1643
1644 test_double_dedent_with_tabs! {
1645 test_double_dedent_tabs_windows_eol: WINDOWS_EOL,
1646 test_double_dedent_tabs_mac_eol: MAC_EOL,
1647 test_double_dedent_tabs_unix_eol: UNIX_EOL,
1648 }
1649
1650 macro_rules! test_newline_in_brackets {
1651 ($($name:ident: $eol:expr,)*) => {
1652 $(
1653 #[test]
1654 #[cfg(feature = "full-lexer")]
1655 fn $name() {
1656 let source = r"x = [
1657
1658 1,2
1659,(3,
16604,
1661), {
16625,
16636,\
16647}]
1665".replace("\n", $eol);
1666 let tokens = lex_source(&source);
1667 assert_eq!(
1668 tokens,
1669 vec![
1670 Tok::Name {
1671 name: String::from("x"),
1672 },
1673 Tok::Equal,
1674 Tok::Lsqb,
1675 Tok::NonLogicalNewline,
1676 Tok::NonLogicalNewline,
1677 Tok::Int { value: BigInt::from(1) },
1678 Tok::Comma,
1679 Tok::Int { value: BigInt::from(2) },
1680 Tok::NonLogicalNewline,
1681 Tok::Comma,
1682 Tok::Lpar,
1683 Tok::Int { value: BigInt::from(3) },
1684 Tok::Comma,
1685 Tok::NonLogicalNewline,
1686 Tok::Int { value: BigInt::from(4) },
1687 Tok::Comma,
1688 Tok::NonLogicalNewline,
1689 Tok::Rpar,
1690 Tok::Comma,
1691 Tok::Lbrace,
1692 Tok::NonLogicalNewline,
1693 Tok::Int { value: BigInt::from(5) },
1694 Tok::Comma,
1695 Tok::NonLogicalNewline,
1696 Tok::Int { value: BigInt::from(6) },
1697 Tok::Comma,
1698 Tok::Int { value: BigInt::from(7) },
1700 Tok::Rbrace,
1701 Tok::Rsqb,
1702 Tok::Newline,
1703 ]
1704 );
1705 }
1706 )*
1707 };
1708 }
1709
1710 test_newline_in_brackets! {
1711 test_newline_in_brackets_windows_eol: WINDOWS_EOL,
1712 test_newline_in_brackets_mac_eol: MAC_EOL,
1713 test_newline_in_brackets_unix_eol: UNIX_EOL,
1714 }
1715
1716 #[test]
1717 #[cfg(feature = "full-lexer")]
1718 fn test_non_logical_newline_in_string_continuation() {
1719 let source = r"(
1720 'a'
1721 'b'
1722
1723 'c' \
1724 'd'
1725)";
1726 let tokens = lex_source(source);
1727 assert_eq!(
1728 tokens,
1729 vec![
1730 Tok::Lpar,
1731 Tok::NonLogicalNewline,
1732 str_tok("a"),
1733 Tok::NonLogicalNewline,
1734 str_tok("b"),
1735 Tok::NonLogicalNewline,
1736 Tok::NonLogicalNewline,
1737 str_tok("c"),
1738 str_tok("d"),
1739 Tok::NonLogicalNewline,
1740 Tok::Rpar,
1741 Tok::Newline,
1742 ]
1743 );
1744 }
1745
1746 #[test]
1747 #[cfg(feature = "full-lexer")]
1748 fn test_logical_newline_line_comment() {
1749 let source = "#Hello\n#World\n";
1750 let tokens = lex_source(source);
1751 assert_eq!(
1752 tokens,
1753 vec![
1754 Tok::Comment("#Hello".to_owned()),
1755 Tok::NonLogicalNewline,
1756 Tok::Comment("#World".to_owned()),
1757 Tok::NonLogicalNewline,
1758 ]
1759 );
1760 }
1761
1762 #[test]
1763 fn test_operators() {
1764 let source = "//////=/ /";
1765 let tokens = lex_source(source);
1766 assert_eq!(
1767 tokens,
1768 vec![
1769 Tok::DoubleSlash,
1770 Tok::DoubleSlash,
1771 Tok::DoubleSlashEqual,
1772 Tok::Slash,
1773 Tok::Slash,
1774 Tok::Newline,
1775 ]
1776 );
1777 }
1778
1779 #[test]
1780 fn test_string() {
1781 let source = r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\'' '\420' '\200\0a'"#;
1782 let tokens = lex_source(source);
1783 assert_eq!(
1784 tokens,
1785 vec![
1786 str_tok("double"),
1787 str_tok("single"),
1788 str_tok(r"can\'t"),
1789 str_tok(r#"\\\""#),
1790 str_tok(r"\t\r\n"),
1791 str_tok(r"\g"),
1792 raw_str_tok(r"raw\'"),
1793 str_tok(r"\420"),
1794 str_tok(r"\200\0a"),
1795 Tok::Newline,
1796 ]
1797 );
1798 }
1799
1800 macro_rules! test_string_continuation {
1801 ($($name:ident: $eol:expr,)*) => {
1802 $(
1803 #[test]
1804 fn $name() {
1805 let source = format!("\"abc\\{}def\"", $eol);
1806 let tokens = lex_source(&source);
1807 assert_eq!(
1808 tokens,
1809 vec![
1810 str_tok("abc\\\ndef"),
1811 Tok::Newline,
1812 ]
1813 )
1814 }
1815 )*
1816 }
1817 }
1818
1819 test_string_continuation! {
1820 test_string_continuation_windows_eol: WINDOWS_EOL,
1821 test_string_continuation_mac_eol: MAC_EOL,
1822 test_string_continuation_unix_eol: UNIX_EOL,
1823 }
1824
1825 #[test]
1826 fn test_escape_unicode_name() {
1827 let source = r#""\N{EN SPACE}""#;
1828 let tokens = lex_source(source);
1829 assert_eq!(tokens, vec![str_tok(r"\N{EN SPACE}"), Tok::Newline])
1830 }
1831
1832 macro_rules! test_triple_quoted {
1833 ($($name:ident: $eol:expr,)*) => {
1834 $(
1835 #[test]
1836 fn $name() {
1837 let source = format!("\"\"\"{0} test string{0} \"\"\"", $eol);
1838 let tokens = lex_source(&source);
1839 assert_eq!(
1840 tokens,
1841 vec![
1842 Tok::String {
1843 value: "\n test string\n ".to_owned(),
1844 kind: StringKind::String,
1845 triple_quoted: true,
1846 },
1847 Tok::Newline,
1848 ]
1849 )
1850 }
1851 )*
1852 }
1853 }
1854
1855 test_triple_quoted! {
1856 test_triple_quoted_windows_eol: WINDOWS_EOL,
1857 test_triple_quoted_mac_eol: MAC_EOL,
1858 test_triple_quoted_unix_eol: UNIX_EOL,
1859 }
1860}