1use std::char;
19use std::collections::VecDeque;
20use std::fmt;
21use std::fmt::Display;
22
23use logos::Logos;
24use num_bigint::BigInt;
25use num_traits::Num;
26use thiserror::Error;
27
28use crate::codemap::CodeMap;
29use crate::codemap::Pos;
30use crate::codemap::Span;
31use crate::cursors::CursorBytes;
32use crate::cursors::CursorChars;
33use crate::dialect::Dialect;
34use crate::eval_exception::EvalException;
35
36#[derive(Error, Debug)]
37pub enum LexemeError {
38 #[error("Parse error: incorrect indentation")]
39 Indentation,
40 #[error("Parse error: invalid input `{0}`")]
41 InvalidInput(String),
42 #[error("Parse error: tabs are not allowed")]
43 InvalidTab,
44 #[error("Parse error: unfinished string literal")]
45 UnfinishedStringLiteral,
46 #[error("Parse error: invalid string escape sequence `{0}`")]
47 InvalidEscapeSequence(String),
48 #[error("Parse error: missing string escape sequence, only saw `\\`")]
49 EmptyEscapeSequence,
50 #[error("Parse error: cannot use reserved keyword `{0}`")]
51 ReservedKeyword(String),
52 #[error("Parse error: integer cannot have leading 0, got `{0}`")]
53 StartsZero(String),
54 #[error("Parse error: failed to parse integer: `{0}`")]
55 IntParse(String),
56 #[error("Comment span is computed incorrectly (internal error)")]
57 CommentSpanComputedIncorrectly,
58 #[error("Cannot parse `{0}` as an integer in base {1}")]
59 CannotParse(String, u32),
60}
61
62impl From<LexemeError> for crate::error::Error {
63 fn from(e: LexemeError) -> Self {
64 crate::error::Error::new_kind(crate::error::ErrorKind::Parser(anyhow::Error::new(e)))
65 }
66}
67
68type LexemeT<T> = Result<(usize, T, usize), EvalException>;
69type Lexeme = LexemeT<Token>;
70
71fn map_lexeme_t<T1, T2>(lexeme: LexemeT<T1>, f: impl FnOnce(T1) -> T2) -> LexemeT<T2> {
72 lexeme.map(|(l, t, r)| (l, f(t), r))
73}
74
75pub struct Lexer<'a> {
76 codemap: CodeMap,
78 indent_levels: Vec<usize>,
80 buffer: VecDeque<Lexeme>,
82 parens: isize, lexer: logos::Lexer<'a, Token>,
84 done: bool,
85}
86
87impl<'a> Lexer<'a> {
88 pub fn new(input: &'a str, _dialect: &Dialect, codemap: CodeMap) -> Self {
89 let lexer = Token::lexer(input);
90 let mut lexer2 = Self {
91 codemap,
92 indent_levels: Vec::with_capacity(20),
94 buffer: VecDeque::with_capacity(10),
95 lexer,
96 parens: 0,
97 done: false,
98 };
99 if let Err(e) = lexer2.calculate_indent() {
100 lexer2.buffer.push_back(Err(e));
101 }
102 lexer2
103 }
104
105 fn err_pos<T>(&self, msg: LexemeError, pos: usize) -> Result<T, EvalException> {
106 self.err_span(msg, pos, pos)
107 }
108
109 fn err_span<T>(&self, msg: LexemeError, start: usize, end: usize) -> Result<T, EvalException> {
110 Err(EvalException::new(
111 msg.into(),
112 Span::new(Pos::new(start as u32), Pos::new(end as u32)),
113 &self.codemap,
114 ))
115 }
116
117 fn err_now<T>(&self, msg: fn(String) -> LexemeError) -> Result<T, EvalException> {
118 self.err_span(
119 msg(self.lexer.slice().to_owned()),
120 self.lexer.span().start,
121 self.lexer.span().end,
122 )
123 }
124
125 #[allow(clippy::manual_strip)]
128 fn make_comment(&self, start: usize, end: usize) -> Lexeme {
129 let comment = &self.codemap.source()[start..end];
130 if !comment.starts_with('#') {
131 return self.err_pos(LexemeError::CommentSpanComputedIncorrectly, start);
132 }
133 let comment = &comment[1..];
135 if comment.ends_with('\r') {
138 let end = end - 1;
139 let comment = &comment[..comment.len() - 1];
140 Ok((start, Token::Comment(comment.to_owned()), end))
141 } else {
142 Ok((start, Token::Comment(comment.to_owned()), end))
143 }
144 }
145
146 fn calculate_indent(&mut self) -> Result<(), EvalException> {
149 let mut it = CursorBytes::new(self.lexer.remainder());
151 let mut spaces = 0;
152 let mut tabs = 0;
153 let mut indent_start = self.lexer.span().end;
154 loop {
155 match it.next_char() {
156 None => {
157 self.lexer.bump(it.pos());
158 return Ok(());
159 }
160 Some(' ') => {
161 spaces += 1;
162 }
163 Some('\t') => {
164 tabs += 1;
165 }
166 Some('\n') => {
167 self.lexer.bump(it.pos() - 1);
170 return Ok(());
171 }
172 Some('\r') => {
173 }
175 Some('#') => {
176 spaces = 0;
180 tabs = 0;
181 let start = self.lexer.span().end + it.pos() - 1;
182 loop {
183 match it.next_char() {
184 None => {
185 let end = self.lexer.span().end + it.pos();
186 self.buffer.push_back(self.make_comment(start, end));
187 self.lexer.bump(it.pos());
188 return Ok(());
189 }
190 Some('\n') => break, Some(_) => {}
192 }
193 }
194 let end = self.lexer.span().end + it.pos() - 1;
195 self.buffer.push_back(self.make_comment(start, end));
196 indent_start = self.lexer.span().end + it.pos();
197 }
198 _ => break,
199 }
200 }
201 self.lexer.bump(it.pos() - 1); let indent = spaces + tabs * 8;
203 if tabs > 0 {
204 return self.err_pos(LexemeError::InvalidTab, self.lexer.span().start);
205 }
206 let now = self.indent_levels.last().copied().unwrap_or(0);
207
208 if indent > now {
209 self.indent_levels.push(indent);
210 let span = self.lexer.span();
211 self.buffer
212 .push_back(Ok((indent_start, Token::Indent, span.end)));
213 } else if indent < now {
214 let mut dedents = 1;
215 self.indent_levels.pop().unwrap();
216 loop {
217 let now = self.indent_levels.last().copied().unwrap_or(0);
218 if now == indent {
219 break;
220 } else if now > indent {
221 dedents += 1;
222 self.indent_levels.pop().unwrap();
223 } else {
224 let pos = self.lexer.span();
225 return self.err_span(LexemeError::Indentation, pos.start, pos.end);
226 }
227 }
228 for _ in 0..dedents {
229 self.buffer
231 .push_back(Ok((indent_start, Token::Dedent, indent_start)))
232 }
233 }
234 Ok(())
235 }
236
237 fn wrap(&mut self, token: Token) -> Option<Lexeme> {
238 let span = self.lexer.span();
239 Some(Ok((span.start, token, span.end)))
240 }
241
242 fn escape_char(it: &mut CursorChars, min: usize, max: usize, radix: u32) -> Result<char, ()> {
245 let mut value = 0u32;
246 let mut count = 0;
247 while count < max {
248 match it.next() {
249 None => {
250 if count >= min {
251 break;
252 } else {
253 return Err(());
254 }
255 }
256 Some(c) => match c.to_digit(radix) {
257 None => {
258 if count >= min {
259 it.unnext(c);
260 break;
261 } else {
262 return Err(());
263 }
264 }
265 Some(v) => {
266 count += 1;
267 value = (value * radix) + v;
268 }
269 },
270 }
271 }
272 char::from_u32(value).ok_or(())
273 }
274
275 fn escape(it: &mut CursorChars, res: &mut String) -> Result<(), ()> {
277 match it.next() {
278 Some('n') => res.push('\n'),
279 Some('r') => res.push('\r'),
280 Some('t') => res.push('\t'),
281 Some('a') => res.push('\x07'),
282 Some('b') => res.push('\x08'),
283 Some('f') => res.push('\x0C'),
284 Some('v') => res.push('\x0B'),
285 Some('\n') => {}
286 Some('\r') => {
287 if it.next() != Some('\n') {
289 return Err(());
291 }
292 }
293 Some('x') => res.push(Self::escape_char(it, 2, 2, 16)?),
294 Some('u') => res.push(Self::escape_char(it, 4, 4, 16)?),
295 Some('U') => res.push(Self::escape_char(it, 8, 8, 16)?),
296 Some(c) => match c {
297 '0'..='7' => {
298 it.unnext(c);
299 res.push(Self::escape_char(it, 1, 3, 8)?)
300 }
301 '"' | '\'' | '\\' => res.push(c),
302 _ => {
303 res.push('\\');
304 res.push(c);
305 }
306 },
307 None => {
308 return Err(());
309 }
310 };
311 Ok(())
312 }
313
314 fn string(
318 &mut self,
319 triple: bool,
320 raw: bool,
321 mut stop: impl FnMut(char) -> bool,
322 ) -> LexemeT<(String, usize)> {
323 let string_start = self.lexer.span().start;
329 let mut string_end = self.lexer.span().end;
331
332 let mut it = CursorBytes::new(self.lexer.remainder());
333 let it2;
334
335 if triple {
336 it.next();
337 it.next();
338 }
339 let contents_start = it.pos();
340
341 let mut res;
343 loop {
344 match it.next_char() {
345 None => {
346 return self.err_span(
347 LexemeError::UnfinishedStringLiteral,
348 string_start,
349 string_end + it.pos(),
350 );
351 }
352 Some(c) => {
353 if stop(c) {
354 let contents_end = it.pos() - if triple { 3 } else { 1 };
355 let contents = &self.lexer.remainder()[contents_start..contents_end];
356 self.lexer.bump(it.pos());
357 return Ok((
358 string_start,
359 (contents.to_owned(), contents_start),
360 string_end + it.pos(),
361 ));
362 } else if c == '\\' || c == '\r' || (c == '\n' && !triple) {
363 res = String::with_capacity(it.pos() + 10);
364 res.push_str(&self.lexer.remainder()[contents_start..it.pos() - 1]);
365 it2 = CursorChars::new_offset(self.lexer.remainder(), it.pos() - 1);
366 break;
367 }
368 }
369 }
370 }
371
372 let mut it = it2;
375 while let Some(c) = it.next() {
376 if stop(c) {
377 self.lexer.bump(it.pos());
378 if triple {
379 res.truncate(res.len() - 2);
380 }
381 return Ok((string_start, (res, contents_start), string_end + it.pos()));
382 }
383 match c {
384 '\n' if !triple => {
385 string_end -= 1;
388 break;
389 }
390 '\r' => {
391 }
393 '\\' => {
394 if raw {
395 match it.next() {
396 Some(c) => {
397 if c != '\'' && c != '"' {
398 res.push('\\');
399 }
400 res.push(c);
401 }
402 _ => break, }
404 } else {
405 let pos = it.pos();
406 if Self::escape(&mut it, &mut res).is_err() {
407 let bad = self.lexer.remainder()[pos..it.pos()].to_owned();
408 return self.err_span(
409 if bad.is_empty() {
410 LexemeError::EmptyEscapeSequence
411 } else {
412 LexemeError::InvalidEscapeSequence(bad)
413 },
414 string_end + pos - 1,
415 string_end + it.pos(),
416 );
417 }
418 }
419 }
420 c => res.push(c),
421 }
422 }
423
424 self.err_span(
426 LexemeError::UnfinishedStringLiteral,
427 string_start,
428 string_end + it.pos(),
429 )
430 }
431
432 fn int(&self, s: &str, radix: u32) -> Lexeme {
433 let span = self.lexer.span();
434 match TokenInt::from_str_radix(s, radix) {
435 Ok(i) => Ok((span.start, Token::Int(i), span.end)),
436 Err(_) => self.err_now(LexemeError::IntParse),
437 }
438 }
439
440 pub fn next(&mut self) -> Option<Lexeme> {
441 loop {
442 return match self.buffer.pop_front() {
445 Some(x) => Some(x),
446 _ => {
447 if self.done {
448 None
449 } else {
450 match self.lexer.next() {
451 None => {
452 self.done = true;
453 let pos = self.lexer.span().end;
454 for _ in 0..self.indent_levels.len() {
455 self.buffer.push_back(Ok((pos, Token::Dedent, pos)))
456 }
457 self.indent_levels.clear();
458 self.wrap(Token::Newline)
459 }
460 Some(Ok(token)) => match token {
461 Token::Tabs => {
462 self.buffer.push_back(
463 self.err_pos(
464 LexemeError::InvalidTab,
465 self.lexer.span().start,
466 ),
467 );
468 continue;
469 }
470 Token::Newline => {
471 if self.parens == 0 {
472 let span = self.lexer.span();
473 if let Err(e) = self.calculate_indent() {
474 return Some(Err(e));
475 }
476 Some(Ok((span.start, Token::Newline, span.end)))
477 } else {
478 continue;
479 }
480 }
481 Token::Reserved | Token::Match | Token::Case => {
482 self.wrap(Token::Identifier(self.lexer.slice().to_owned()))
484 }
485 Token::RawDecInt => {
486 let s = self.lexer.slice();
487 if s.len() > 1 && &s[0..1] == "0" {
488 return Some(self.err_now(LexemeError::StartsZero));
489 }
490 Some(self.int(s, 10))
491 }
492 Token::RawOctInt => {
493 let s = self.lexer.slice();
494 assert!(s.starts_with("0o") || s.starts_with("0O"));
495 Some(self.int(&s[2..], 8))
496 }
497 Token::RawHexInt => {
498 let s = self.lexer.slice();
499 assert!(s.starts_with("0x") || s.starts_with("0X"));
500 Some(self.int(&s[2..], 16))
501 }
502 Token::RawBinInt => {
503 let s = self.lexer.slice();
504 assert!(s.starts_with("0b") || s.starts_with("0B"));
505 Some(self.int(&s[2..], 2))
506 }
507 Token::Int(..) => unreachable!("Lexer does not produce Int tokens"),
508 Token::RawDoubleQuote => {
509 let raw = self.lexer.span().len() == 2;
510 self.parse_double_quoted_string(raw).map(|lex| {
511 map_lexeme_t(lex, |(s, _offset)| Token::String(s))
512 })
513 }
514 Token::RawSingleQuote => {
515 let raw = self.lexer.span().len() == 2;
516 self.parse_single_quoted_string(raw).map(|lex| {
517 map_lexeme_t(lex, |(s, _offset)| Token::String(s))
518 })
519 }
520 Token::String(_) => {
521 unreachable!("The lexer does not produce String")
522 }
523 Token::RawFStringDoubleQuote => {
524 let span_len = self.lexer.span().len();
525 let raw = span_len == 3;
526 self.parse_double_quoted_string(raw).map(|lex| {
527 map_lexeme_t(lex, |(content, content_start_offset)| {
528 Token::FString(TokenFString {
529 content,
530 content_start_offset: content_start_offset
531 + span_len,
532 })
533 })
534 })
535 }
536 Token::RawFStringSingleQuote => {
537 let span_len = self.lexer.span().len();
538 let raw = span_len == 3;
539 self.parse_single_quoted_string(raw).map(|lex| {
540 map_lexeme_t(lex, |(content, content_start_offset)| {
541 Token::FString(TokenFString {
542 content,
543 content_start_offset: content_start_offset
544 + span_len,
545 })
546 })
547 })
548 }
549 Token::FString(_) => {
550 unreachable!("The lexer does not produce FString")
551 }
552 Token::RawByteDoubleQuote => {
553 let raw = self.lexer.span().len() == 3;
554 self.parse_double_quoted_string(raw).map(|lex| {
555 map_lexeme_t(lex, |(s, _offset)| {
556 Token::ByteString(s.into_bytes())
557 })
558 })
559 }
560 Token::RawByteSingleQuote => {
561 let raw = self.lexer.span().len() == 3;
562 self.parse_single_quoted_string(raw).map(|lex| {
563 map_lexeme_t(lex, |(s, _offset)| {
564 Token::ByteString(s.into_bytes())
565 })
566 })
567 }
568 Token::ByteString(_) => {
569 unreachable!("The lexer does not produce ByteString")
570 }
571 Token::OpeningCurly
572 | Token::OpeningRound
573 | Token::OpeningSquare => {
574 self.parens += 1;
575 self.wrap(token)
576 }
577 Token::ClosingCurly
578 | Token::ClosingRound
579 | Token::ClosingSquare => {
580 self.parens -= 1;
581 self.wrap(token)
582 }
583 _ => self.wrap(token),
584 },
585 Some(Err(_)) => Some(self.err_now(LexemeError::InvalidInput)),
586 }
587 }
588 }
589 };
590 }
591 }
592
593 fn parse_double_quoted_string(&mut self, raw: bool) -> Option<LexemeT<(String, usize)>> {
594 if self.lexer.remainder().starts_with("\"\"") {
595 let mut qs = 0;
596 Some(self.string(true, raw, |c| {
597 if c == '\"' {
598 qs += 1;
599 qs == 3
600 } else {
601 qs = 0;
602 false
603 }
604 }))
605 } else {
606 Some(self.string(false, raw, |c| c == '\"'))
607 }
608 }
609
610 fn parse_single_quoted_string(&mut self, raw: bool) -> Option<LexemeT<(String, usize)>> {
611 if self.lexer.remainder().starts_with("''") {
612 let mut qs = 0;
613 Some(self.string(true, raw, |c| {
614 if c == '\'' {
615 qs += 1;
616 qs == 3
617 } else {
618 qs = 0;
619 false
620 }
621 }))
622 } else {
623 Some(self.string(false, raw, |c| c == '\''))
624 }
625 }
626}
627
628#[derive(Debug, Clone, Eq, PartialEq, derive_more::Display)]
629pub enum TokenInt {
630 I32(i32),
631 BigInt(BigInt),
633}
634
635impl TokenInt {
636 pub fn from_str_radix(s: &str, base: u32) -> crate::Result<TokenInt> {
637 if let Ok(i) = i32::from_str_radix(s, base) {
638 Ok(TokenInt::I32(i))
639 } else {
640 match BigInt::from_str_radix(s, base) {
641 Ok(i) => Ok(TokenInt::BigInt(i)),
642 Err(_) => Err(LexemeError::CannotParse(s.to_owned(), base).into()),
643 }
644 }
645 }
646}
647
648#[derive(Debug, Clone, PartialEq)]
649pub struct TokenFString {
650 pub content: String,
652 pub content_start_offset: usize,
654}
655
656#[derive(Logos, Debug, Clone, PartialEq)]
658#[logos(skip r" +")] #[logos(skip r"\\\n")] #[logos(skip r"\\\r\n")] pub enum Token {
662 #[regex(r#"#[^\r\n]*"#, |lex| lex.slice()[1..].to_owned())]
665 Comment(String),
666
667 #[regex("\t+")] Tabs,
669
670 Indent, Dedent, #[regex(r"(\r)?\n")]
674 Newline, #[token("'")]
679 #[token("r'")]
680 RawSingleQuote,
681 #[token("\"")]
682 #[token("r\"")]
683 RawDoubleQuote,
684
685 #[token("f'")]
687 #[token("fr'")]
688 RawFStringSingleQuote,
689 #[token("f\"")]
691 #[token("fr\"")]
692 RawFStringDoubleQuote,
693
694 #[token("b'")]
696 #[token("br'")]
697 RawByteSingleQuote,
698 #[token("b\"")]
700 #[token("br\"")]
701 RawByteDoubleQuote,
702
703 #[regex(
704 "as|\
705 async|\
706 await|\
707 class|\
708 del|\
709 except|\
710 finally|\
711 from|\
712 global|\
713 import|\
714 is|\
715 nonlocal|\
716 raise|\
717 try|\
718 while|\
719 with"
720 )]
721 Reserved, #[regex(
724 "[a-zA-Z_][a-zA-Z0-9_]*"
725 , |lex| lex.slice().to_owned())]
726 Identifier(String), #[regex("[0-9]+")]
729 RawDecInt,
730 #[regex("0[xX][A-Fa-f0-9]+")]
731 RawHexInt,
732 #[regex("0[bB][01]+")]
733 RawBinInt,
734 #[regex("0[oO][0-7]+")]
735 RawOctInt,
736
737 Int(TokenInt), #[regex("[0-9]+\\.[0-9]*([eE][-+]?[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
741 #[regex("[0-9]+[eE][-+]?[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
742 #[regex("\\.[0-9]+([eE][-+]?[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
743 Float(f64), String(String), FString(TokenFString),
748 ByteString(Vec<u8>),
750
751 #[token("and")]
753 And,
754 #[token("break")]
755 Break,
756 #[token("continue")]
757 Continue,
758 #[token("def")]
759 Def,
760 #[token("elif")]
761 Elif,
762 #[token("else")]
763 Else,
764 #[token("for")]
765 For,
766 #[token("if")]
767 If,
768 #[token("in")]
769 In,
770 #[token("lambda")]
771 Lambda,
772 #[token("load")]
773 Load,
774 #[token("not")]
775 Not,
776 #[token("or")]
777 Or,
778 #[token("pass")]
779 Pass,
780 #[token("return")]
781 Return,
782 #[token("struct")]
783 Struct,
784 #[token("yield")]
785 Yield,
786 #[token("match")]
787 Match,
788 #[token("case")]
789 Case,
790 #[token(",")]
792 Comma,
793 #[token(";")]
794 Semicolon,
795 #[token(":")]
796 Colon,
797 #[token("+=")]
798 PlusEqual,
799 #[token("-=")]
800 MinusEqual,
801 #[token("*=")]
802 StarEqual,
803 #[token("/=")]
804 SlashEqual,
805 #[token("//=")]
806 SlashSlashEqual,
807 #[token("%=")]
808 PercentEqual,
809 #[token("==")]
810 EqualEqual,
811 #[token("!=")]
812 BangEqual,
813 #[token("<=")]
814 LessEqual,
815 #[token(">=")]
816 GreaterEqual,
817 #[token("**")]
818 StarStar,
819 #[token("->")]
820 MinusGreater,
821 #[token("=")]
822 Equal,
823 #[token("<")]
824 LessThan,
825 #[token(">")]
826 GreaterThan,
827 #[token("-")]
828 Minus,
829 #[token("+")]
830 Plus,
831 #[token("*")]
832 Star,
833 #[token("%")]
834 Percent,
835 #[token("/")]
836 Slash,
837 #[token("//")]
838 SlashSlash,
839 #[token(".")]
840 Dot,
841 #[token("&")]
842 Ampersand,
843 #[token("|")]
844 Pipe,
845 #[token("^")]
846 Caret,
847 #[token("<<")]
848 LessLess,
849 #[token(">>")]
850 GreaterGreater,
851 #[token("~")]
852 Tilde,
853 #[token("&=")]
854 AmpersandEqual,
855 #[token("|=")]
856 PipeEqual,
857 #[token("^=")]
858 CaretEqual,
859 #[token("<<=")]
860 LessLessEqual,
861 #[token(">>=")]
862 GreaterGreaterEqual,
863 #[token("...")]
864 Ellipsis,
865
866 #[token("[")]
868 OpeningSquare,
869 #[token("{")]
870 OpeningCurly,
871 #[token("(")]
872 OpeningRound,
873 #[token("]")]
874 ClosingSquare,
875 #[token("}")]
876 ClosingCurly,
877 #[token(")")]
878 ClosingRound,
879}
880
881impl Token {
882 #[cfg(test)]
884 pub fn unlex(&self) -> String {
885 use std::io::Write;
886 match self {
887 Token::Indent => "\t".to_owned(),
888 Token::Newline => "\n".to_owned(),
889 Token::Dedent => "#dedent".to_owned(),
890 Token::String(x) => {
891 serde_json::to_string(x).unwrap()
895 }
896 Token::FString(x) => {
897 let mut buff = Vec::new();
898 write!(&mut buff, "f").unwrap();
899 serde_json::to_writer(&mut buff, &x.content).unwrap();
900 String::from_utf8(buff).unwrap()
901 }
902 Token::ByteString(b) => {
903 let mut buff = Vec::new();
904 write!(&mut buff, "b").unwrap();
905 serde_json::to_writer(&mut buff, &String::from_utf8_lossy(b).to_string()).unwrap();
906 String::from_utf8(buff).unwrap()
907 }
908 _ => {
909 let s = self.to_string();
910 let first = s.find('\'');
913 match first {
914 Some(first) if s.ends_with('\'') && first != s.len() - 1 => {
915 s[first + 1..s.len() - 1].to_owned()
916 }
917 _ => s,
918 }
919 }
920 }
921 }
922}
923
924impl Display for Token {
925 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
926 match self {
927 Token::Indent => write!(f, "new indentation block"),
928 Token::Dedent => write!(f, "end of indentation block"),
929 Token::Newline => write!(f, "new line"),
930 Token::And => write!(f, "keyword 'and'"),
931 Token::Else => write!(f, "keyword 'else'"),
932 Token::Load => write!(f, "keyword 'load'"),
933 Token::Break => write!(f, "keyword 'break'"),
934 Token::For => write!(f, "keyword 'for'"),
935 Token::Not => write!(f, "keyword 'not'"),
936 Token::Continue => write!(f, "keyword 'continue'"),
937 Token::If => write!(f, "keyword 'if'"),
938 Token::Or => write!(f, "keyword 'or'"),
939 Token::Def => write!(f, "keyword 'def'"),
940 Token::In => write!(f, "keyword 'in'"),
941 Token::Pass => write!(f, "keyword 'pass'"),
942 Token::Elif => write!(f, "keyword 'elif'"),
943 Token::Return => write!(f, "keyword 'return'"),
944 Token::Struct => write!(f, "keyword 'struct'"),
945 Token::Yield => write!(f, "keyword 'yield'"),
946 Token::Match => write!(f, "keyword 'match'"),
947 Token::Case => write!(f, "keyword 'case'"),
948 Token::Lambda => write!(f, "keyword 'lambda'"),
949 Token::Comma => write!(f, "symbol ','"),
950 Token::Semicolon => write!(f, "symbol ';'"),
951 Token::Colon => write!(f, "symbol ':'"),
952 Token::PlusEqual => write!(f, "symbol '+='"),
953 Token::MinusEqual => write!(f, "symbol '-='"),
954 Token::StarEqual => write!(f, "symbol '*='"),
955 Token::SlashEqual => write!(f, "symbol '/='"),
956 Token::SlashSlashEqual => write!(f, "symbol '//='"),
957 Token::PercentEqual => write!(f, "symbol '%='"),
958 Token::EqualEqual => write!(f, "symbol '=='"),
959 Token::BangEqual => write!(f, "symbol '!='"),
960 Token::LessEqual => write!(f, "symbol '<='"),
961 Token::GreaterEqual => write!(f, "symbol '>='"),
962 Token::StarStar => write!(f, "symbol '**'"),
963 Token::MinusGreater => write!(f, "symbol '->'"),
964 Token::Equal => write!(f, "symbol '='"),
965 Token::LessThan => write!(f, "symbol '<'"),
966 Token::GreaterThan => write!(f, "symbol '>'"),
967 Token::Minus => write!(f, "symbol '-'"),
968 Token::Plus => write!(f, "symbol '+'"),
969 Token::Star => write!(f, "symbol '*'"),
970 Token::Percent => write!(f, "symbol '%'"),
971 Token::Slash => write!(f, "symbol '/'"),
972 Token::SlashSlash => write!(f, "symbol '//'"),
973 Token::Dot => write!(f, "symbol '.'"),
974 Token::Ampersand => write!(f, "symbol '&'"),
975 Token::Pipe => write!(f, "symbol '|'"),
976 Token::Caret => write!(f, "symbol '^'"),
977 Token::LessLess => write!(f, "symbol '<<'"),
978 Token::GreaterGreater => write!(f, "symbol '>>'"),
979 Token::Tilde => write!(f, "symbol '~'"),
980 Token::AmpersandEqual => write!(f, "symbol '&='"),
981 Token::PipeEqual => write!(f, "symbol '|='"),
982 Token::CaretEqual => write!(f, "symbol '^='"),
983 Token::LessLessEqual => write!(f, "symbol '<<='"),
984 Token::GreaterGreaterEqual => write!(f, "symbol '>>='"),
985 Token::Ellipsis => write!(f, "symbol '...'"),
986 Token::OpeningSquare => write!(f, "symbol '['"),
987 Token::OpeningCurly => write!(f, "symbol '{{'"),
988 Token::OpeningRound => write!(f, "symbol '('"),
989 Token::ClosingSquare => write!(f, "symbol ']'"),
990 Token::ClosingCurly => write!(f, "symbol '}}'"),
991 Token::ClosingRound => write!(f, "symbol ')'"),
992 Token::Reserved => write!(f, "reserved keyword"),
993 Token::Identifier(s) => write!(f, "identifier '{s}'"),
994 Token::Int(i) => write!(f, "integer literal '{i}'"),
995 Token::RawDecInt => write!(f, "decimal integer literal"),
996 Token::RawHexInt => write!(f, "hexadecimal integer literal"),
997 Token::RawOctInt => write!(f, "octal integer literal"),
998 Token::RawBinInt => write!(f, "binary integer literal"),
999 Token::Float(n) => write!(f, "float literal '{n}'"),
1000 Token::String(s) => write!(f, "string literal {s:?}"),
1001 Token::RawSingleQuote => write!(f, "starting '"),
1002 Token::RawDoubleQuote => write!(f, "starting \""),
1003 Token::RawFStringDoubleQuote => write!(f, "starting f'"),
1004 Token::RawFStringSingleQuote => write!(f, "starting f\""),
1005 Token::FString(s) => write!(f, "f-string {:?}", &s.content),
1006 Token::RawByteSingleQuote => write!(f, "starting b'"),
1007 Token::RawByteDoubleQuote => write!(f, "starting b\""),
1008 Token::ByteString(b) => write!(f, "byte string literal ({} bytes)", b.len()),
1009 Token::Comment(c) => write!(f, "comment '{c}'"),
1010 Token::Tabs => Ok(()),
1011 }
1012 }
1013}
1014
1015impl<'a> Iterator for Lexer<'a> {
1016 type Item = Lexeme;
1017
1018 fn next(&mut self) -> Option<Self::Item> {
1019 self.next()
1020 }
1021}
1022
1023pub fn lex_exactly_one_identifier(s: &str) -> Option<String> {
1024 let mut lexer = Token::lexer(s);
1025 match (lexer.next(), lexer.next()) {
1026 (Some(Ok(Token::Identifier(ident))), None) => Some(ident),
1027 _ => None,
1028 }
1029}
1030
1031#[cfg(test)]
1032mod tests {
1033 use crate::lexer::lex_exactly_one_identifier;
1034
1035 #[test]
1036 fn test_is_valid_identifier() {
1037 assert_eq!(lex_exactly_one_identifier("foo").as_deref(), Some("foo"));
1038 assert_eq!(lex_exactly_one_identifier(" foo ").as_deref(), Some("foo"));
1039 assert_eq!(lex_exactly_one_identifier("foo bar"), None);
1040 assert_eq!(lex_exactly_one_identifier("not"), None);
1041 assert_eq!(lex_exactly_one_identifier("123"), None);
1042 }
1043}