1use std::char;
19use std::collections::VecDeque;
20use std::fmt;
21use std::fmt::Display;
22
23use logos::Logos;
24use num_bigint::BigInt;
25use num_traits::Num;
26use thiserror::Error;
27
28use crate::codemap::CodeMap;
29use crate::codemap::Pos;
30use crate::codemap::Span;
31use crate::cursors::CursorBytes;
32use crate::cursors::CursorChars;
33use crate::dialect::Dialect;
34use crate::eval_exception::EvalException;
35
36#[derive(Error, Debug)]
37pub enum LexemeError {
38 #[error("Parse error: incorrect indentation")]
39 Indentation,
40 #[error("Parse error: invalid input `{0}`")]
41 InvalidInput(String),
42 #[error("Parse error: tabs are not allowed")]
43 InvalidTab,
44 #[error("Parse error: unfinished string literal")]
45 UnfinishedStringLiteral,
46 #[error("Parse error: invalid string escape sequence `{0}`")]
47 InvalidEscapeSequence(String),
48 #[error("Parse error: missing string escape sequence, only saw `\\`")]
49 EmptyEscapeSequence,
50 #[error("Parse error: cannot use reserved keyword `{0}`")]
51 ReservedKeyword(String),
52 #[error("Parse error: integer cannot have leading 0, got `{0}`")]
53 StartsZero(String),
54 #[error("Parse error: failed to parse integer: `{0}`")]
55 IntParse(String),
56 #[error("Comment span is computed incorrectly (internal error)")]
57 CommentSpanComputedIncorrectly,
58 #[error("Cannot parse `{0}` as an integer in base {1}")]
59 CannotParse(String, u32),
60}
61
62impl From<LexemeError> for crate::error::Error {
63 fn from(e: LexemeError) -> Self {
64 crate::error::Error::new_kind(crate::error::ErrorKind::Parser(anyhow::Error::new(e)))
65 }
66}
67
68type LexemeT<T> = Result<(usize, T, usize), EvalException>;
69type Lexeme = LexemeT<Token>;
70
71fn map_lexeme_t<T1, T2>(lexeme: LexemeT<T1>, f: impl FnOnce(T1) -> T2) -> LexemeT<T2> {
72 lexeme.map(|(l, t, r)| (l, f(t), r))
73}
74
75pub struct Lexer<'a> {
76 codemap: CodeMap,
78 indent_levels: Vec<usize>,
80 buffer: VecDeque<Lexeme>,
82 parens: isize, lexer: logos::Lexer<'a, Token>,
84 done: bool,
85}
86
87impl<'a> Lexer<'a> {
88 pub fn new(input: &'a str, _dialect: &Dialect, codemap: CodeMap) -> Self {
89 let lexer = Token::lexer(input);
90 let mut lexer2 = Self {
91 codemap,
92 indent_levels: Vec::with_capacity(20),
94 buffer: VecDeque::with_capacity(10),
95 lexer,
96 parens: 0,
97 done: false,
98 };
99 if let Err(e) = lexer2.calculate_indent() {
100 lexer2.buffer.push_back(Err(e));
101 }
102 lexer2
103 }
104
105 fn err_pos<T>(&self, msg: LexemeError, pos: usize) -> Result<T, EvalException> {
106 self.err_span(msg, pos, pos)
107 }
108
109 fn err_span<T>(&self, msg: LexemeError, start: usize, end: usize) -> Result<T, EvalException> {
110 Err(EvalException::new(
111 msg.into(),
112 Span::new(Pos::new(start as u32), Pos::new(end as u32)),
113 &self.codemap,
114 ))
115 }
116
117 fn err_now<T>(&self, msg: fn(String) -> LexemeError) -> Result<T, EvalException> {
118 self.err_span(
119 msg(self.lexer.slice().to_owned()),
120 self.lexer.span().start,
121 self.lexer.span().end,
122 )
123 }
124
125 #[allow(clippy::manual_strip)]
128 fn make_comment(&self, start: usize, end: usize) -> Lexeme {
129 let comment = &self.codemap.source()[start..end];
130 if !comment.starts_with('#') {
131 return self.err_pos(LexemeError::CommentSpanComputedIncorrectly, start);
132 }
133 let comment = &comment[1..];
135 if comment.ends_with('\r') {
138 let end = end - 1;
139 let comment = &comment[..comment.len() - 1];
140 Ok((start, Token::Comment(comment.to_owned()), end))
141 } else {
142 Ok((start, Token::Comment(comment.to_owned()), end))
143 }
144 }
145
146 fn calculate_indent(&mut self) -> Result<(), EvalException> {
149 let mut it = CursorBytes::new(self.lexer.remainder());
151 let mut spaces = 0;
152 let mut tabs = 0;
153 let mut indent_start = self.lexer.span().end;
154 loop {
155 match it.next_char() {
156 None => {
157 self.lexer.bump(it.pos());
158 return Ok(());
159 }
160 Some(' ') => {
161 spaces += 1;
162 }
163 Some('\t') => {
164 tabs += 1;
165 }
166 Some('\n') => {
167 self.lexer.bump(it.pos() - 1);
170 return Ok(());
171 }
172 Some('\r') => {
173 }
175 Some('#') => {
176 spaces = 0;
180 tabs = 0;
181 let start = self.lexer.span().end + it.pos() - 1;
182 loop {
183 match it.next_char() {
184 None => {
185 let end = self.lexer.span().end + it.pos();
186 self.buffer.push_back(self.make_comment(start, end));
187 self.lexer.bump(it.pos());
188 return Ok(());
189 }
190 Some('\n') => break, Some(_) => {}
192 }
193 }
194 let end = self.lexer.span().end + it.pos() - 1;
195 self.buffer.push_back(self.make_comment(start, end));
196 indent_start = self.lexer.span().end + it.pos();
197 }
198 _ => break,
199 }
200 }
201 self.lexer.bump(it.pos() - 1); let indent = spaces + tabs * 8;
203 if tabs > 0 {
204 return self.err_pos(LexemeError::InvalidTab, self.lexer.span().start);
205 }
206 let now = self.indent_levels.last().copied().unwrap_or(0);
207
208 if indent > now {
209 self.indent_levels.push(indent);
210 let span = self.lexer.span();
211 self.buffer
212 .push_back(Ok((indent_start, Token::Indent, span.end)));
213 } else if indent < now {
214 let mut dedents = 1;
215 self.indent_levels.pop().unwrap();
216 loop {
217 let now = self.indent_levels.last().copied().unwrap_or(0);
218 if now == indent {
219 break;
220 } else if now > indent {
221 dedents += 1;
222 self.indent_levels.pop().unwrap();
223 } else {
224 let pos = self.lexer.span();
225 return self.err_span(LexemeError::Indentation, pos.start, pos.end);
226 }
227 }
228 for _ in 0..dedents {
229 self.buffer
231 .push_back(Ok((indent_start, Token::Dedent, indent_start)))
232 }
233 }
234 Ok(())
235 }
236
237 fn wrap(&mut self, token: Token) -> Option<Lexeme> {
238 let span = self.lexer.span();
239 Some(Ok((span.start, token, span.end)))
240 }
241
242 fn escape_char(it: &mut CursorChars, min: usize, max: usize, radix: u32) -> Result<char, ()> {
245 let mut value = 0u32;
246 let mut count = 0;
247 while count < max {
248 match it.next() {
249 None => {
250 if count >= min {
251 break;
252 } else {
253 return Err(());
254 }
255 }
256 Some(c) => match c.to_digit(radix) {
257 None => {
258 if count >= min {
259 it.unnext(c);
260 break;
261 } else {
262 return Err(());
263 }
264 }
265 Some(v) => {
266 count += 1;
267 value = (value * radix) + v;
268 }
269 },
270 }
271 }
272 char::from_u32(value).ok_or(())
273 }
274
275 fn escape(it: &mut CursorChars, res: &mut String) -> Result<(), ()> {
277 match it.next() {
278 Some('n') => res.push('\n'),
279 Some('r') => res.push('\r'),
280 Some('t') => res.push('\t'),
281 Some('a') => res.push('\x07'),
282 Some('b') => res.push('\x08'),
283 Some('f') => res.push('\x0C'),
284 Some('v') => res.push('\x0B'),
285 Some('\n') => {}
286 Some('\r') => {
287 if it.next() != Some('\n') {
289 return Err(());
291 }
292 }
293 Some('x') => res.push(Self::escape_char(it, 2, 2, 16)?),
294 Some('u') => res.push(Self::escape_char(it, 4, 4, 16)?),
295 Some('U') => res.push(Self::escape_char(it, 8, 8, 16)?),
296 Some(c) => match c {
297 '0'..='7' => {
298 it.unnext(c);
299 res.push(Self::escape_char(it, 1, 3, 8)?)
300 }
301 '"' | '\'' | '\\' => res.push(c),
302 _ => {
303 res.push('\\');
304 res.push(c);
305 }
306 },
307 None => {
308 return Err(());
309 }
310 };
311 Ok(())
312 }
313
314 fn string(
318 &mut self,
319 triple: bool,
320 raw: bool,
321 mut stop: impl FnMut(char) -> bool,
322 ) -> LexemeT<(String, usize)> {
323 let string_start = self.lexer.span().start;
329 let mut string_end = self.lexer.span().end;
331
332 let mut it = CursorBytes::new(self.lexer.remainder());
333 let it2;
334
335 if triple {
336 it.next();
337 it.next();
338 }
339 let contents_start = it.pos();
340
341 let mut res;
343 loop {
344 match it.next_char() {
345 None => {
346 return self.err_span(
347 LexemeError::UnfinishedStringLiteral,
348 string_start,
349 string_end + it.pos(),
350 );
351 }
352 Some(c) => {
353 if stop(c) {
354 let contents_end = it.pos() - if triple { 3 } else { 1 };
355 let contents = &self.lexer.remainder()[contents_start..contents_end];
356 self.lexer.bump(it.pos());
357 return Ok((
358 string_start,
359 (contents.to_owned(), contents_start),
360 string_end + it.pos(),
361 ));
362 } else if c == '\\' || c == '\r' || (c == '\n' && !triple) {
363 res = String::with_capacity(it.pos() + 10);
364 res.push_str(&self.lexer.remainder()[contents_start..it.pos() - 1]);
365 it2 = CursorChars::new_offset(self.lexer.remainder(), it.pos() - 1);
366 break;
367 }
368 }
369 }
370 }
371
372 let mut it = it2;
375 while let Some(c) = it.next() {
376 if stop(c) {
377 self.lexer.bump(it.pos());
378 if triple {
379 res.truncate(res.len() - 2);
380 }
381 return Ok((string_start, (res, contents_start), string_end + it.pos()));
382 }
383 match c {
384 '\n' if !triple => {
385 string_end -= 1;
388 break;
389 }
390 '\r' => {
391 }
393 '\\' => {
394 if raw {
395 match it.next() {
396 Some(c) => {
397 if c != '\'' && c != '"' {
398 res.push('\\');
399 }
400 res.push(c);
401 }
402 _ => break, }
404 } else {
405 let pos = it.pos();
406 if Self::escape(&mut it, &mut res).is_err() {
407 let bad = self.lexer.remainder()[pos..it.pos()].to_owned();
408 return self.err_span(
409 if bad.is_empty() {
410 LexemeError::EmptyEscapeSequence
411 } else {
412 LexemeError::InvalidEscapeSequence(bad)
413 },
414 string_end + pos - 1,
415 string_end + it.pos(),
416 );
417 }
418 }
419 }
420 c => res.push(c),
421 }
422 }
423
424 self.err_span(
426 LexemeError::UnfinishedStringLiteral,
427 string_start,
428 string_end + it.pos(),
429 )
430 }
431
432 fn int(&self, s: &str, radix: u32) -> Lexeme {
433 let span = self.lexer.span();
434 match TokenInt::from_str_radix(s, radix) {
435 Ok(i) => Ok((span.start, Token::Int(i), span.end)),
436 Err(_) => self.err_now(LexemeError::IntParse),
437 }
438 }
439
440 pub fn next(&mut self) -> Option<Lexeme> {
441 loop {
442 return if let Some(x) = self.buffer.pop_front() {
445 Some(x)
446 } else if self.done {
447 None
448 } else {
449 match self.lexer.next() {
450 None => {
451 self.done = true;
452 let pos = self.lexer.span().end;
453 for _ in 0..self.indent_levels.len() {
454 self.buffer.push_back(Ok((pos, Token::Dedent, pos)))
455 }
456 self.indent_levels.clear();
457 self.wrap(Token::Newline)
458 }
459 Some(token) => match token {
460 Token::Tabs => {
461 self.buffer.push_back(
462 self.err_pos(LexemeError::InvalidTab, self.lexer.span().start),
463 );
464 continue;
465 }
466 Token::Newline => {
467 if self.parens == 0 {
468 let span = self.lexer.span();
469 if let Err(e) = self.calculate_indent() {
470 return Some(Err(e));
471 }
472 Some(Ok((span.start, Token::Newline, span.end)))
473 } else {
474 continue;
475 }
476 }
477 Token::Reserved => Some(self.err_now(LexemeError::ReservedKeyword)),
478 Token::Error => Some(self.err_now(LexemeError::InvalidInput)),
479 Token::RawDecInt => {
480 let s = self.lexer.slice();
481 if s.len() > 1 && &s[0..1] == "0" {
482 return Some(self.err_now(LexemeError::StartsZero));
483 }
484 Some(self.int(s, 10))
485 }
486 Token::RawOctInt => {
487 let s = self.lexer.slice();
488 assert!(s.starts_with("0o") || s.starts_with("0O"));
489 Some(self.int(&s[2..], 8))
490 }
491 Token::RawHexInt => {
492 let s = self.lexer.slice();
493 assert!(s.starts_with("0x") || s.starts_with("0X"));
494 Some(self.int(&s[2..], 16))
495 }
496 Token::RawBinInt => {
497 let s = self.lexer.slice();
498 assert!(s.starts_with("0b") || s.starts_with("0B"));
499 Some(self.int(&s[2..], 2))
500 }
501 Token::Int(..) => unreachable!("Lexer does not produce Int tokens"),
502 Token::RawDoubleQuote => {
503 let raw = self.lexer.span().len() == 2;
504 self.parse_double_quoted_string(raw)
505 .map(|lex| map_lexeme_t(lex, |(s, _offset)| Token::String(s)))
506 }
507 Token::RawSingleQuote => {
508 let raw = self.lexer.span().len() == 2;
509 self.parse_single_quoted_string(raw)
510 .map(|lex| map_lexeme_t(lex, |(s, _offset)| Token::String(s)))
511 }
512 Token::String(_) => {
513 unreachable!("The lexer does not produce String")
514 }
515 Token::RawFStringDoubleQuote => {
516 let span_len = self.lexer.span().len();
517 let raw = span_len == 3;
518 self.parse_double_quoted_string(raw).map(|lex| {
519 map_lexeme_t(lex, |(content, content_start_offset)| {
520 Token::FString(TokenFString {
521 content,
522 content_start_offset: content_start_offset + span_len,
523 })
524 })
525 })
526 }
527 Token::RawFStringSingleQuote => {
528 let span_len = self.lexer.span().len();
529 let raw = span_len == 3;
530 self.parse_single_quoted_string(raw).map(|lex| {
531 map_lexeme_t(lex, |(content, content_start_offset)| {
532 Token::FString(TokenFString {
533 content,
534 content_start_offset: content_start_offset + span_len,
535 })
536 })
537 })
538 }
539 Token::FString(_) => {
540 unreachable!("The lexer does not produce FString")
541 }
542 Token::OpeningCurly | Token::OpeningRound | Token::OpeningSquare => {
543 self.parens += 1;
544 self.wrap(token)
545 }
546 Token::ClosingCurly | Token::ClosingRound | Token::ClosingSquare => {
547 self.parens -= 1;
548 self.wrap(token)
549 }
550 _ => self.wrap(token),
551 },
552 }
553 };
554 }
555 }
556
557 fn parse_double_quoted_string(&mut self, raw: bool) -> Option<LexemeT<(String, usize)>> {
558 if self.lexer.remainder().starts_with("\"\"") {
559 let mut qs = 0;
560 Some(self.string(true, raw, |c| {
561 if c == '\"' {
562 qs += 1;
563 qs == 3
564 } else {
565 qs = 0;
566 false
567 }
568 }))
569 } else {
570 Some(self.string(false, raw, |c| c == '\"'))
571 }
572 }
573
574 fn parse_single_quoted_string(&mut self, raw: bool) -> Option<LexemeT<(String, usize)>> {
575 if self.lexer.remainder().starts_with("''") {
576 let mut qs = 0;
577 Some(self.string(true, raw, |c| {
578 if c == '\'' {
579 qs += 1;
580 qs == 3
581 } else {
582 qs = 0;
583 false
584 }
585 }))
586 } else {
587 Some(self.string(false, raw, |c| c == '\''))
588 }
589 }
590}
591
592#[derive(Debug, Clone, Eq, PartialEq, derive_more::Display)]
593pub enum TokenInt {
594 I32(i32),
595 BigInt(BigInt),
597}
598
599impl TokenInt {
600 pub fn from_str_radix(s: &str, base: u32) -> crate::Result<TokenInt> {
601 if let Ok(i) = i32::from_str_radix(s, base) {
602 Ok(TokenInt::I32(i))
603 } else {
604 match BigInt::from_str_radix(s, base) {
605 Ok(i) => Ok(TokenInt::BigInt(i)),
606 Err(_) => Err(LexemeError::CannotParse(s.to_owned(), base).into()),
607 }
608 }
609 }
610}
611
612#[derive(Debug, Clone, PartialEq)]
613pub struct TokenFString {
614 pub content: String,
616 pub content_start_offset: usize,
618}
619
620#[derive(Logos, Debug, Clone, PartialEq)]
622pub enum Token {
623 #[regex(" +", logos::skip)] #[token("\\\n", logos::skip)] #[token("\\\r\n", logos::skip)] #[error]
627 Error,
628
629 #[regex(r#"#[^\r\n]*"#, |lex| lex.slice()[1..].to_owned())]
632 Comment(String),
633
634 #[regex("\t+")] Tabs,
636
637 Indent, Dedent, #[regex(r"(\r)?\n")]
641 Newline, #[token("'")]
646 #[token("r'")]
647 RawSingleQuote,
648 #[token("\"")]
649 #[token("r\"")]
650 RawDoubleQuote,
651
652 #[token("f'")]
654 #[token("fr'")]
655 RawFStringSingleQuote,
656 #[token("f\"")]
658 #[token("fr\"")]
659 RawFStringDoubleQuote,
660
661 #[regex(
662 "as|\
663 assert|\
664 async|\
665 await|\
666 class|\
667 del|\
668 except|\
669 finally|\
670 from|\
671 global|\
672 import|\
673 is|\
674 nonlocal|\
675 raise|\
676 try|\
677 while|\
678 with|\
679 yield"
680 )]
681 Reserved, #[regex(
684 "[a-zA-Z_][a-zA-Z0-9_]*"
685 , |lex| lex.slice().to_owned())]
686 Identifier(String), #[regex("[0-9]+")]
689 RawDecInt,
690 #[regex("0[xX][A-Fa-f0-9]+")]
691 RawHexInt,
692 #[regex("0[bB][01]+")]
693 RawBinInt,
694 #[regex("0[oO][0-7]+")]
695 RawOctInt,
696
697 Int(TokenInt), #[regex("[0-9]+\\.[0-9]*([eE][-+]?[0-9]+)?", |lex| lex.slice().parse::<f64>())]
700 #[regex("[0-9]+[eE][-+]?[0-9]+", |lex| lex.slice().parse::<f64>())]
701 #[regex("\\.[0-9]+([eE][-+]?[0-9]+)?", |lex| lex.slice().parse::<f64>())]
702 Float(f64), String(String), FString(TokenFString),
707
708 #[token("and")]
710 And,
711 #[token("break")]
712 Break,
713 #[token("continue")]
714 Continue,
715 #[token("def")]
716 Def,
717 #[token("elif")]
718 Elif,
719 #[token("else")]
720 Else,
721 #[token("for")]
722 For,
723 #[token("if")]
724 If,
725 #[token("in")]
726 In,
727 #[token("lambda")]
728 Lambda,
729 #[token("load")]
730 Load,
731 #[token("not")]
732 Not,
733 #[token("or")]
734 Or,
735 #[token("pass")]
736 Pass,
737 #[token("return")]
738 Return,
739 #[token(",")]
741 Comma,
742 #[token(";")]
743 Semicolon,
744 #[token(":")]
745 Colon,
746 #[token("+=")]
747 PlusEqual,
748 #[token("-=")]
749 MinusEqual,
750 #[token("*=")]
751 StarEqual,
752 #[token("/=")]
753 SlashEqual,
754 #[token("//=")]
755 SlashSlashEqual,
756 #[token("%=")]
757 PercentEqual,
758 #[token("==")]
759 EqualEqual,
760 #[token("!=")]
761 BangEqual,
762 #[token("<=")]
763 LessEqual,
764 #[token(">=")]
765 GreaterEqual,
766 #[token("**")]
767 StarStar,
768 #[token("->")]
769 MinusGreater,
770 #[token("=")]
771 Equal,
772 #[token("<")]
773 LessThan,
774 #[token(">")]
775 GreaterThan,
776 #[token("-")]
777 Minus,
778 #[token("+")]
779 Plus,
780 #[token("*")]
781 Star,
782 #[token("%")]
783 Percent,
784 #[token("/")]
785 Slash,
786 #[token("//")]
787 SlashSlash,
788 #[token(".")]
789 Dot,
790 #[token("&")]
791 Ampersand,
792 #[token("|")]
793 Pipe,
794 #[token("^")]
795 Caret,
796 #[token("<<")]
797 LessLess,
798 #[token(">>")]
799 GreaterGreater,
800 #[token("~")]
801 Tilde,
802 #[token("&=")]
803 AmpersandEqual,
804 #[token("|=")]
805 PipeEqual,
806 #[token("^=")]
807 CaretEqual,
808 #[token("<<=")]
809 LessLessEqual,
810 #[token(">>=")]
811 GreaterGreaterEqual,
812 #[token("...")]
813 Ellipsis,
814
815 #[token("[")]
817 OpeningSquare,
818 #[token("{")]
819 OpeningCurly,
820 #[token("(")]
821 OpeningRound,
822 #[token("]")]
823 ClosingSquare,
824 #[token("}")]
825 ClosingCurly,
826 #[token(")")]
827 ClosingRound,
828}
829
830impl Token {
831 #[cfg(test)]
833 pub fn unlex(&self) -> String {
834 use std::io::Write;
835 match self {
836 Token::Indent => "\t".to_owned(),
837 Token::Newline => "\n".to_owned(),
838 Token::Dedent => "#dedent".to_owned(),
839 Token::String(x) => {
840 serde_json::to_string(x).unwrap()
844 }
845 Token::FString(x) => {
846 let mut buff = Vec::new();
847 write!(&mut buff, "f").unwrap();
848 serde_json::to_writer(&mut buff, &x.content).unwrap();
849 String::from_utf8(buff).unwrap()
850 }
851 _ => {
852 let s = self.to_string();
853 let first = s.find('\'');
856 match first {
857 Some(first) if s.ends_with('\'') && first != s.len() - 1 => {
858 s[first + 1..s.len() - 1].to_owned()
859 }
860 _ => s,
861 }
862 }
863 }
864 }
865}
866
867impl Display for Token {
868 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
869 match self {
870 Token::Error => write!(f, "lexical error"),
871 Token::Indent => write!(f, "new indentation block"),
872 Token::Dedent => write!(f, "end of indentation block"),
873 Token::Newline => write!(f, "new line"),
874 Token::And => write!(f, "keyword 'and'"),
875 Token::Else => write!(f, "keyword 'else'"),
876 Token::Load => write!(f, "keyword 'load'"),
877 Token::Break => write!(f, "keyword 'break'"),
878 Token::For => write!(f, "keyword 'for'"),
879 Token::Not => write!(f, "keyword 'not'"),
880 Token::Continue => write!(f, "keyword 'continue'"),
881 Token::If => write!(f, "keyword 'if'"),
882 Token::Or => write!(f, "keyword 'or'"),
883 Token::Def => write!(f, "keyword 'def'"),
884 Token::In => write!(f, "keyword 'in'"),
885 Token::Pass => write!(f, "keyword 'pass'"),
886 Token::Elif => write!(f, "keyword 'elif'"),
887 Token::Return => write!(f, "keyword 'return'"),
888 Token::Lambda => write!(f, "keyword 'lambda'"),
889 Token::Comma => write!(f, "symbol ','"),
890 Token::Semicolon => write!(f, "symbol ';'"),
891 Token::Colon => write!(f, "symbol ':'"),
892 Token::PlusEqual => write!(f, "symbol '+='"),
893 Token::MinusEqual => write!(f, "symbol '-='"),
894 Token::StarEqual => write!(f, "symbol '*='"),
895 Token::SlashEqual => write!(f, "symbol '/='"),
896 Token::SlashSlashEqual => write!(f, "symbol '//='"),
897 Token::PercentEqual => write!(f, "symbol '%='"),
898 Token::EqualEqual => write!(f, "symbol '=='"),
899 Token::BangEqual => write!(f, "symbol '!='"),
900 Token::LessEqual => write!(f, "symbol '<='"),
901 Token::GreaterEqual => write!(f, "symbol '>='"),
902 Token::StarStar => write!(f, "symbol '**'"),
903 Token::MinusGreater => write!(f, "symbol '->'"),
904 Token::Equal => write!(f, "symbol '='"),
905 Token::LessThan => write!(f, "symbol '<'"),
906 Token::GreaterThan => write!(f, "symbol '>'"),
907 Token::Minus => write!(f, "symbol '-'"),
908 Token::Plus => write!(f, "symbol '+'"),
909 Token::Star => write!(f, "symbol '*'"),
910 Token::Percent => write!(f, "symbol '%'"),
911 Token::Slash => write!(f, "symbol '/'"),
912 Token::SlashSlash => write!(f, "symbol '//'"),
913 Token::Dot => write!(f, "symbol '.'"),
914 Token::Ampersand => write!(f, "symbol '&'"),
915 Token::Pipe => write!(f, "symbol '|'"),
916 Token::Caret => write!(f, "symbol '^'"),
917 Token::LessLess => write!(f, "symbol '<<'"),
918 Token::GreaterGreater => write!(f, "symbol '>>'"),
919 Token::Tilde => write!(f, "symbol '~'"),
920 Token::AmpersandEqual => write!(f, "symbol '&='"),
921 Token::PipeEqual => write!(f, "symbol '|='"),
922 Token::CaretEqual => write!(f, "symbol '^='"),
923 Token::LessLessEqual => write!(f, "symbol '<<='"),
924 Token::GreaterGreaterEqual => write!(f, "symbol '>>='"),
925 Token::Ellipsis => write!(f, "symbol '...'"),
926 Token::OpeningSquare => write!(f, "symbol '['"),
927 Token::OpeningCurly => write!(f, "symbol '{{'"),
928 Token::OpeningRound => write!(f, "symbol '('"),
929 Token::ClosingSquare => write!(f, "symbol ']'"),
930 Token::ClosingCurly => write!(f, "symbol '}}'"),
931 Token::ClosingRound => write!(f, "symbol ')'"),
932 Token::Reserved => write!(f, "reserved keyword"),
933 Token::Identifier(s) => write!(f, "identifier '{}'", s),
934 Token::Int(i) => write!(f, "integer literal '{}'", i),
935 Token::RawDecInt => write!(f, "decimal integer literal"),
936 Token::RawHexInt => write!(f, "hexadecimal integer literal"),
937 Token::RawOctInt => write!(f, "octal integer literal"),
938 Token::RawBinInt => write!(f, "binary integer literal"),
939 Token::Float(n) => write!(f, "float literal '{}'", n),
940 Token::String(s) => write!(f, "string literal {:?}", s),
941 Token::RawSingleQuote => write!(f, "starting '"),
942 Token::RawDoubleQuote => write!(f, "starting \""),
943 Token::RawFStringDoubleQuote => write!(f, "starting f'"),
944 Token::RawFStringSingleQuote => write!(f, "starting f\""),
945 Token::FString(s) => write!(f, "f-string {:?}", &s.content),
946 Token::Comment(c) => write!(f, "comment '{}'", c),
947 Token::Tabs => Ok(()),
948 }
949 }
950}
951
952impl<'a> Iterator for Lexer<'a> {
953 type Item = Lexeme;
954
955 fn next(&mut self) -> Option<Self::Item> {
956 self.next()
957 }
958}
959
960pub fn lex_exactly_one_identifier(s: &str) -> Option<String> {
961 let mut lexer = Token::lexer(s);
962 match (lexer.next(), lexer.next()) {
963 (Some(Token::Identifier(ident)), None) => Some(ident),
964 _ => None,
965 }
966}
967
968#[cfg(test)]
969mod tests {
970 use crate::lexer::lex_exactly_one_identifier;
971
972 #[test]
973 fn test_is_valid_identifier() {
974 assert_eq!(lex_exactly_one_identifier("foo").as_deref(), Some("foo"));
975 assert_eq!(lex_exactly_one_identifier(" foo ").as_deref(), Some("foo"));
976 assert_eq!(lex_exactly_one_identifier("foo bar"), None);
977 assert_eq!(lex_exactly_one_identifier("not"), None);
978 assert_eq!(lex_exactly_one_identifier("123"), None);
979 }
980}