Skip to main content

steel_parser/
lexer.rs

1use super::parser::SourceId;
2use crate::interner::InternedString;
3use crate::tokens::{IntLiteral, Token, TokenLike, TokenType};
4use crate::tokens::{NumberLiteral, Paren, ParenMod, RealLiteral};
5use alloc::borrow::Cow;
6use core::char;
7use core::iter::Iterator;
8use core::ops::Range;
9use core::{iter::Peekable, str::Chars};
10use num_bigint::BigInt;
11use smallvec::{smallvec, SmallVec};
12
13pub const INFINITY: &str = "+inf.0";
14pub const NEG_INFINITY: &str = "-inf.0";
15pub const NAN: &str = "+nan.0";
16pub const NEG_NAN: &str = "-nan.0";
17
18pub struct OwnedString;
19
20impl ToOwnedString<String> for OwnedString {
21    fn own(&self, s: Cow<str>) -> String {
22        s.to_string()
23    }
24}
25
26pub trait ToOwnedString<T> {
27    fn own(&self, s: Cow<str>) -> T;
28}
29
30pub type Span = core::ops::Range<usize>;
31
32pub struct Lexer<'a> {
33    /// The source of the lexer.
34    source: &'a str,
35    /// An iterator over the characters.
36    chars: Peekable<Chars<'a>>,
37    /// The  next token to return or `None` if it should be parsed.
38    queued: Option<TokenType<InternedString>>,
39    token_start: u32,
40    token_end: u32,
41    error: Range<u32>,
42
43    ident_buffer: String,
44}
45
46impl<'a> Lexer<'a> {
47    pub fn new(source: &'a str) -> Self {
48        Self {
49            source,
50            chars: source.chars().peekable(),
51            queued: None,
52            token_start: 0,
53            token_end: 0,
54            error: Default::default(),
55            ident_buffer: String::new(),
56        }
57    }
58
59    fn eat(&mut self) -> Option<char> {
60        if let Some(c) = self.chars.next() {
61            self.token_end += c.len_utf8() as u32;
62            Some(c)
63        } else {
64            None
65        }
66    }
67
68    // Consume characters until the next non whitespace input
69    fn consume_whitespace(&mut self) {
70        while let Some(&c) = self.chars.peek() {
71            if c.is_whitespace() {
72                self.eat();
73
74                self.token_start = self.token_end;
75            } else {
76                break;
77            }
78        }
79    }
80
81    fn read_string(&mut self) -> Result<TokenType<InternedString>> {
82        // Skip the opening quote.
83        self.eat();
84
85        let mut buf = String::new();
86
87        while let Some(&c) = self.chars.peek() {
88            self.eat();
89            match c {
90                '"' => return Ok(TokenType::StringLiteral(buf.into())),
91                '\\' => {
92                    if let Some(c) = self.read_string_escape(TokenError::IncompleteString, '"')? {
93                        buf.push(c);
94                    }
95                }
96                _ => buf.push(c),
97            }
98        }
99
100        Err(TokenError::IncompleteString)
101    }
102
103    fn read_string_escape(&mut self, incomplete: TokenError, delim: char) -> Result<Option<char>> {
104        let c = match self.chars.peek() {
105            Some('"') => {
106                self.eat();
107                '"'
108            }
109
110            Some('a') => {
111                self.eat();
112                '\x07'
113            }
114
115            Some('b') => {
116                self.eat();
117                '\x08'
118            }
119
120            Some('\\') => {
121                self.eat();
122                '\\'
123            }
124
125            Some('|') => {
126                self.eat();
127                '|'
128            }
129
130            Some('t') => {
131                self.eat();
132                '\t'
133            }
134
135            Some('n') => {
136                self.eat();
137                '\n'
138            }
139
140            Some('r') => {
141                self.eat();
142                '\r'
143            }
144
145            Some('0') => {
146                self.eat();
147                '\0'
148            }
149
150            Some(&code @ ('x' | 'u')) => {
151                self.eat();
152                let start = self.token_end - 2;
153
154                let mut digits = String::new();
155
156                let escape_end = match self.chars.peek().copied() {
157                    Some('{') if code == 'u' => {
158                        self.eat();
159                        '}'
160                    }
161                    _ => ';',
162                };
163
164                let valid = loop {
165                    let Some(c) = self.eat() else {
166                        return Err(incomplete);
167                    };
168
169                    match c {
170                        c if c == escape_end => break true,
171                        // note that this overlaps partially with `escapeEnd`
172                        ';' | '\\' | '\n' | '(' | ')' | '[' | ']' | '{' | '}' => break false,
173                        c if c == delim => break false,
174                        _ => digits.push(c),
175                    }
176                };
177
178                if !valid {
179                    self.error = start..self.token_end - 1;
180
181                    return Err(TokenError::UnclosedHexEscape(escape_end));
182                }
183
184                let error = start..self.token_end;
185
186                let codepoint = u32::from_str_radix(&digits, 16)
187                    .map_err(TokenError::InvalidHexEscapeLiteral)
188                    .inspect_err(|_| self.error = error.clone())?;
189
190                char::from_u32(codepoint)
191                    .ok_or(TokenError::InvalidHexCodePoint(codepoint))
192                    .inspect_err(|_| self.error = error)?
193            }
194
195            Some(&start @ (' ' | '\t' | '\n')) => {
196                self.eat();
197
198                let mut trimming = start == '\n';
199
200                loop {
201                    let Some(c) = self.chars.peek() else {
202                        return Err(incomplete);
203                    };
204
205                    match c {
206                        ' ' | '\t' => {
207                            self.eat();
208                        }
209                        '\n' if !trimming => {
210                            self.eat();
211                            trimming = true;
212                        }
213                        _ if trimming => return Ok(None),
214
215                        c => {
216                            self.error = self.token_end..(self.token_end + c.len_utf8() as u32);
217                            return Err(TokenError::InvalidWhitespace);
218                        }
219                    }
220                }
221            }
222
223            Some(c) => {
224                self.error = (self.token_end - 1)..(self.token_end + c.len_utf8() as u32);
225                return Err(TokenError::InvalidStringEscape(*c));
226            }
227
228            None => return Err(incomplete),
229        };
230
231        Ok(Some(c))
232    }
233
234    fn read_hash_value(&mut self) -> Result<TokenType<InternedString>> {
235        fn parse_char(slice: &str) -> Result<char> {
236            use core::str::FromStr;
237
238            debug_assert!(slice.len() > 2);
239
240            match &slice[2..] {
241                s if s.eq_ignore_ascii_case("alarm") => Ok('\x07'),
242                s if s.eq_ignore_ascii_case("backspace") => Ok('\x08'),
243                s if s.eq_ignore_ascii_case("delete") => Ok('\x7F'),
244                s if s.eq_ignore_ascii_case("escape") => Ok('\x1B'),
245                s if s.eq_ignore_ascii_case("newline") => Ok('\n'),
246                s if s.eq_ignore_ascii_case("null") => Ok('\0'),
247                s if s.eq_ignore_ascii_case("return") => Ok('\r'),
248                s if s.eq_ignore_ascii_case("space") => Ok(' '),
249                s if s.eq_ignore_ascii_case("tab") => Ok('\t'),
250
251                character => {
252                    let first = character.as_bytes()[0];
253
254                    let escape = (first == b'u' || first == b'x') && slice.len() > 3;
255
256                    if !escape {
257                        return char::from_str(character).map_err(|_| TokenError::InvalidCharName);
258                    }
259
260                    let payload = if first == b'u' && character.as_bytes().get(1) == Some(&b'{') {
261                        if character.as_bytes().last() != Some(&b'}') {
262                            return Err(TokenError::UnclosedHexEscape('}'));
263                        }
264
265                        &character[2..(character.len() - 1)]
266                    } else {
267                        &character[1..]
268                    };
269
270                    let code = u32::from_str_radix(payload, 16)
271                        .map_err(TokenError::InvalidHexEscapeLiteral)?;
272
273                    char::from_u32(code).ok_or(TokenError::InvalidHexCodePoint(code))
274                }
275            }
276        }
277
278        while let Some(&c) = self.chars.peek() {
279            match c {
280                '\\' => {
281                    self.eat();
282                    self.eat();
283                }
284                '\'' | '`' => {
285                    self.eat();
286                    break;
287                }
288
289                ',' => {
290                    self.eat();
291                    if Some('@') == self.chars.peek().copied() {
292                        self.eat();
293                        break;
294                    } else {
295                        break;
296                    }
297                }
298
299                '(' | '[' | ')' | ']' => break,
300                c if c.is_whitespace() => break,
301                _ => {
302                    self.eat();
303                }
304            };
305        }
306
307        match self.slice() {
308            "#true" | "#t" => Ok(TokenType::BooleanLiteral(true)),
309            "#false" | "#f" => Ok(TokenType::BooleanLiteral(false)),
310
311            "#'" => Ok(TokenType::QuoteSyntax),
312            "#`" => Ok(TokenType::QuasiQuoteSyntax),
313            "#," => Ok(TokenType::UnquoteSyntax),
314            "#,@" => Ok(TokenType::UnquoteSpliceSyntax),
315
316            keyword if keyword.starts_with("#:") => Ok(TokenType::Keyword(self.slice().into())),
317
318            character if character.starts_with("#\\") => {
319                if character.len() <= 2 {
320                    return Err(TokenError::InvalidCharacter);
321                }
322
323                let parsed = match parse_char(character) {
324                    Ok(it) => it,
325                    Err(err) => {
326                        self.error = self.token_start..self.token_end;
327                        return Err(err);
328                    }
329                };
330
331                Ok(TokenType::CharacterLiteral(parsed))
332            }
333
334            "#" if self.chars.peek() == Some(&'(') => {
335                self.eat();
336                Ok(TokenType::OpenParen(Paren::Round, Some(ParenMod::Vector)))
337            }
338
339            "#u8" if self.chars.peek() == Some(&'(') => {
340                self.eat();
341                Ok(TokenType::OpenParen(Paren::Round, Some(ParenMod::Bytes)))
342            }
343
344            _ => self.read_word(),
345        }
346    }
347
348    fn read_number(&mut self) -> Result<TokenType<InternedString>> {
349        while let Some(&c) = self.chars.peek() {
350            match c {
351                c if c.is_ascii_digit() => {
352                    self.eat();
353                }
354                '+' | '-' | '.' | '/' | '@' | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
355                | 'e' | 'E' | 'f' | 'F' | 'i' | 'n' => {
356                    self.eat();
357                }
358                '(' | ')' | '[' | ']' => {
359                    return if let Some(t) = try_parse_number(self.slice(), None)? {
360                        Ok(t.into())
361                    } else {
362                        self.read_word()
363                    }
364                }
365                c if c.is_whitespace() => {
366                    return if let Some(t) = try_parse_number(self.slice(), None)? {
367                        Ok(t.into())
368                    } else {
369                        self.read_word()
370                    }
371                }
372                _ => return self.read_word(),
373            }
374        }
375        match try_parse_number(self.slice(), None)? {
376            Some(n) => Ok(n.into()),
377            None => self.read_word(),
378        }
379    }
380
381    fn read_rest_of_line(&mut self) {
382        while let Some(c) = self.eat() {
383            if c == '\n' {
384                break;
385            }
386        }
387    }
388
389    fn read_word(&mut self) -> Result<TokenType<InternedString>> {
390        let escaped_identifier = self.chars.peek().copied() == Some('|');
391
392        if escaped_identifier {
393            self.eat();
394        }
395
396        let mut buffer = core::mem::take(&mut self.ident_buffer);
397        buffer.clear();
398
399        let mut ident_buffer = IdentBuffer::new(self.chars.clone(), &mut buffer);
400
401        while let Some(&c) = self.chars.peek() {
402            match c {
403                '|' if escaped_identifier => {
404                    self.eat();
405
406                    break;
407                }
408                '\\' if escaped_identifier => {
409                    self.eat();
410
411                    let escaped = self.read_string_escape(TokenError::IncompleteIdentifier, '|')?;
412
413                    ident_buffer.push_escape(escaped);
414                }
415                c if escaped_identifier => {
416                    ident_buffer.push(c);
417                    self.eat();
418                }
419                '(' | '[' | ')' | ']' | '{' | '}' => break,
420                c if c.is_whitespace() => break,
421                '\'' | '"' | '`' | ';' | ',' => {
422                    break;
423                }
424                // Could be a quote within a word, we should handle escaping it accordingly
425                // (even though its a bit odd)
426                '\\' => {
427                    self.eat();
428                    self.eat();
429                }
430
431                _ => {
432                    self.eat();
433                }
434            };
435        }
436
437        let token = match self.slice() {
438            "." => TokenType::Dot,
439            "if" => TokenType::If,
440            "let" => TokenType::Let,
441            "define" | "defn" | "#%define" => TokenType::Define,
442            "%plain-let" => TokenType::TestLet,
443            "return!" => TokenType::Return,
444            "begin" => TokenType::Begin,
445            "lambda" | "fn" | "#%plain-lambda" | "λ" => TokenType::Lambda,
446            "quote" => TokenType::Quote,
447            "syntax-rules" => TokenType::SyntaxRules,
448            "define-syntax" => TokenType::DefineSyntax,
449            "..." => TokenType::Ellipses,
450            "set!" => TokenType::Set,
451            "require" => TokenType::Require,
452            identifier => {
453                debug_assert!(!identifier.is_empty());
454
455                match identifier.as_bytes() {
456                    [b'+', _, ..] if self.queued.is_none() => {
457                        self.queued = Some(TokenType::Identifier((&identifier[1..]).into()));
458                        TokenType::Identifier("+".into())
459                    }
460                    [b'|', .., b'|'] if escaped_identifier => {
461                        if ident_buffer.ident.is_empty() {
462                            TokenType::Identifier((&identifier[1..identifier.len() - 1]).into())
463                        } else {
464                            TokenType::Identifier(ident_buffer.ident.as_str().into())
465                        }
466                    }
467                    _ if escaped_identifier => {
468                        ident_buffer.ident.clear();
469                        return Err(TokenError::IncompleteIdentifier);
470                    }
471                    _ => TokenType::Identifier(identifier.into()),
472                }
473            }
474        };
475
476        ident_buffer.ident.clear();
477        self.ident_buffer = buffer;
478
479        Ok(token)
480    }
481
482    fn read_nestable_comment(&mut self) -> Result<TokenType<InternedString>> {
483        self.eat();
484
485        let mut depth = 1;
486
487        while let Some(c) = self.eat() {
488            match c {
489                '|' => {
490                    if self.chars.peek().copied() == Some('#') {
491                        self.eat();
492                        depth -= 1;
493
494                        if depth == 0 {
495                            return Ok(TokenType::Comment);
496                        }
497                    }
498                }
499                '#' => {
500                    if self.chars.peek().copied() == Some('|') {
501                        self.eat();
502                        depth += 1;
503                    }
504                }
505                _ => {}
506            }
507        }
508
509        Err(TokenError::IncompleteComment)
510    }
511}
512
513struct IdentBuffer<'b, 'a: 'b> {
514    chars: Peekable<Chars<'a>>,
515    ident: &'b mut String,
516    // works as Either:
517    //  - Ok: saw a non-trivial escape, buffering into ident
518    //  - Err: "trivial" string, keeping count of its len
519    mode: core::result::Result<(), usize>,
520}
521
522impl<'b, 'a: 'b> IdentBuffer<'b, 'a> {
523    fn new(chars: Peekable<Chars<'a>>, buffer: &'b mut String) -> Self {
524        Self {
525            chars,
526            ident: buffer,
527            mode: Err(0),
528        }
529    }
530
531    fn push(&mut self, c: char) {
532        if let Err(len) = self.mode.as_mut() {
533            *len += 1;
534        } else {
535            self.ident.push(c);
536        }
537    }
538
539    fn push_escape(&mut self, c: Option<char>) {
540        if let Err(len) = self.mode {
541            self.ident.extend(self.chars.clone().take(len));
542            self.mode = Ok(());
543        }
544
545        if let Some(c) = c {
546            self.ident.push(c);
547        }
548    }
549}
550
551fn strip_shebang_line(input: &str) -> (usize, usize) {
552    if input.starts_with("#!") {
553        // split is guaranteed to yield at least one element
554        let shebang = input.split('\n').next().unwrap();
555        (shebang.chars().count(), shebang.len())
556    } else {
557        (0, 0)
558    }
559}
560
561impl<'a> Lexer<'a> {
562    #[inline]
563    pub fn span(&self) -> Span {
564        self.token_start as _..self.token_end as _
565    }
566
567    pub fn small_span(&self) -> core::ops::Range<u32> {
568        self.token_start..self.token_end
569    }
570
571    #[inline]
572    pub fn slice(&self) -> &'a str {
573        self.source.get(self.span()).unwrap()
574    }
575}
576
577pub struct TokenStream<'a> {
578    pub(crate) lexer: Lexer<'a>,
579    pub(crate) skip_comments: bool,
580    source_id: Option<SourceId>,
581}
582
583impl<'a> TokenStream<'a> {
584    pub fn new(input: &'a str, skip_comments: bool, source_id: Option<SourceId>) -> Self {
585        let (char_offset, bytes_offset) = strip_shebang_line(input);
586
587        let mut res = Self {
588            lexer: Lexer::new(input),
589            skip_comments,
590            source_id, // skip_doc_comments,
591        };
592
593        res.lexer.token_start += bytes_offset as u32;
594        res.lexer.token_end += bytes_offset as u32;
595
596        for _ in 0..char_offset {
597            res.lexer.chars.next();
598        }
599
600        res
601    }
602
603    pub fn into_owned(self) -> OwnedTokenStream<'a> {
604        OwnedTokenStream { stream: self }
605    }
606}
607
608pub struct OwnedTokenStream<'a> {
609    pub(crate) stream: TokenStream<'a>,
610}
611
612impl<'a> Iterator for OwnedTokenStream<'a> {
613    type Item = core::result::Result<Token<'a, InternedString>, TokenLike<'a, TokenError>>;
614
615    fn next(&mut self) -> Option<Self::Item> {
616        self.stream.next()
617    }
618}
619
620impl<'a> OwnedTokenStream<'a> {
621    pub fn offset(&self) -> usize {
622        self.stream.lexer.span().end
623    }
624}
625impl<'a> Iterator for TokenStream<'a> {
626    type Item = core::result::Result<Token<'a, InternedString>, TokenLike<'a, TokenError>>;
627
628    fn next(&mut self) -> Option<Self::Item> {
629        self.lexer.next().and_then(|token| {
630            let token = match token {
631                Ok(token) => token,
632                Err(err) => {
633                    return Some(Err(TokenLike::new(
634                        err,
635                        self.lexer.slice(),
636                        if self.lexer.error.is_empty() {
637                            self.lexer.small_span()
638                        } else {
639                            self.lexer.error.clone()
640                        },
641                        self.source_id,
642                    )))
643                }
644            };
645
646            let token = Token::new(
647                token,
648                self.lexer.slice(),
649                self.lexer.small_span(),
650                self.source_id,
651            );
652            match token.ty {
653                // TokenType::Space => self.next(),
654                TokenType::Comment if self.skip_comments => self.next(),
655                // TokenType::DocComment if self.skip_doc_comments => self.next(),
656                _ => Some(Ok(token)),
657            }
658        })
659    }
660}
661
662pub type Result<T> = core::result::Result<T, TokenError>;
663
664#[derive(Clone, Debug, PartialEq)]
665pub enum TokenError {
666    UnexpectedChar(char),
667    IncompleteString,
668    IncompleteIdentifier,
669    IncompleteComment,
670    InvalidWhitespace,
671    InvalidStringEscape(char),
672    InvalidCharacter,
673    ZeroDenominator,
674    UnclosedHexEscape(char),
675    InvalidCharName,
676    InvalidHexEscapeLiteral(core::num::ParseIntError),
677    InvalidHexCodePoint(u32),
678}
679
680impl core::fmt::Display for TokenError {
681    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
682        match self {
683            TokenError::UnexpectedChar(c) => write!(f, "unexpected char {c:?}"),
684            TokenError::IncompleteString => write!(f, "incomplete string"),
685            TokenError::IncompleteIdentifier => write!(f, "incomplete identifier"),
686            TokenError::IncompleteComment => write!(f, "incomplete comment"),
687            TokenError::InvalidWhitespace => {
688                write!(f, "unexpected character, expected whitespace or newline")
689            }
690            TokenError::InvalidStringEscape(c) => write!(f, "invalid escape {c:?}"),
691            TokenError::InvalidCharacter => write!(f, "invalid character"),
692            TokenError::ZeroDenominator => {
693                write!(f, "division by zero is not allowed in rational literals")
694            }
695            TokenError::UnclosedHexEscape(close) => {
696                write!(f, "unclosed hex escape, expected {close:?}")
697            }
698            TokenError::InvalidCharName => write!(f, "invalid character name"),
699            TokenError::InvalidHexEscapeLiteral(error) => {
700                write!(f, "invalid hex escape literal, {error}")
701            }
702            TokenError::InvalidHexCodePoint(code) => write!(f, "invalid code point {code:x}"),
703        }
704    }
705}
706
707impl<'a> Iterator for Lexer<'a> {
708    type Item = Result<TokenType<InternedString>>;
709
710    fn next(&mut self) -> Option<Self::Item> {
711        if let Some(t) = self.queued.take() {
712            return Some(Ok(t));
713        }
714        // Crunch until the next input
715        self.consume_whitespace();
716
717        self.token_start = self.token_end;
718
719        match self.chars.peek() {
720            Some(';') => {
721                self.eat();
722                self.read_rest_of_line();
723                Some(Ok(TokenType::Comment))
724            }
725
726            Some('"') => Some(self.read_string()),
727
728            Some(&paren @ ('(' | '[' | '{')) => {
729                self.eat();
730                let kind = match paren {
731                    '[' => Paren::Square,
732                    '{' => Paren::Curly,
733                    _ => Paren::Round,
734                };
735                Some(Ok(TokenType::OpenParen(kind, None)))
736            }
737
738            Some(&paren @ (')' | ']' | '}')) => {
739                self.eat();
740                let kind = match paren {
741                    ']' => Paren::Square,
742                    '}' => Paren::Curly,
743                    _ => Paren::Round,
744                };
745                Some(Ok(TokenType::CloseParen(kind)))
746            }
747
748            // Handle Quotes
749            Some('\'') => {
750                self.eat();
751                Some(Ok(TokenType::QuoteTick))
752            }
753
754            Some('`') => {
755                self.eat();
756                Some(Ok(TokenType::QuasiQuote))
757            }
758
759            Some(',') => {
760                self.eat();
761
762                if let Some('@') = self.chars.peek() {
763                    self.eat();
764
765                    Some(Ok(TokenType::UnquoteSplice))
766                } else {
767                    Some(Ok(TokenType::Unquote))
768                }
769            }
770            Some('+') | Some('-') | Some('.') => {
771                self.eat();
772                Some(self.read_number())
773            }
774            Some('#') => {
775                self.eat();
776                let next = self.chars.peek().copied();
777
778                let token = match next {
779                    Some('x' | 'X' | 'd' | 'D' | 'o' | 'O' | 'b' | 'B') => {
780                        self.eat();
781                        self.read_number()
782                    }
783                    Some('|') => self.read_nestable_comment(),
784                    Some(';') => {
785                        self.eat();
786                        Ok(TokenType::DatumComment)
787                    }
788                    Some('#') => {
789                        self.eat();
790                        Err(TokenError::UnexpectedChar('#'))
791                    }
792                    _ => self.read_hash_value(),
793                };
794
795                Some(token)
796            }
797
798            Some(c) if !c.is_whitespace() && !c.is_ascii_digit() || *c == '_' => {
799                Some(self.read_word())
800            }
801            Some(c) if c.is_ascii_digit() => Some(self.read_number()),
802            Some(_) => {
803                // this is very much unexpected
804                debug_assert!(false);
805
806                self.eat().map(|e| Err(TokenError::UnexpectedChar(e)))
807            }
808            None => None,
809        }
810    }
811}
812
813// Split the string by + and -. Returns at most 2 elements or `None` if there were more than 2.
814fn split_into_complex<'a>(s: &'a str) -> Option<SmallVec<[NumPart<'a>; 2]>> {
815    let classify_num_part = |s: &'a str| -> NumPart<'a> {
816        match s.as_bytes().last() {
817            Some(b'i') => NumPart::Imaginary(&s[..s.len() - 1]),
818            _ => NumPart::Real(s),
819        }
820    };
821
822    let mut idxs = SmallVec::<[usize; 3]>::new();
823
824    let mut chars = s.char_indices();
825    while let Some((idx, ch)) = chars.next() {
826        if ch == '+' || ch == '-' {
827            if idxs.len() == 2 {
828                return None;
829            } else {
830                idxs.push(idx);
831            }
832        } else if ch == 'e' || ch == 'E' {
833            // ignore any + or - after an e
834            let _ = chars.next();
835        }
836    }
837
838    let parts = match idxs.as_slice() {
839        [] | [0] => smallvec![classify_num_part(s)],
840        [idx] | [0, idx] => smallvec![
841            classify_num_part(&s[0..*idx]),
842            classify_num_part(&s[*idx..])
843        ],
844        _ => return None,
845    };
846    Some(parts)
847}
848
849#[derive(Debug)]
850enum NumPart<'a> {
851    Real(&'a str),
852    Imaginary(&'a str),
853}
854
855fn parse_real(s: &str, radix: u32) -> Option<RealLiteral> {
856    if s == NEG_INFINITY {
857        return Some(RealLiteral::Float(f64::NEG_INFINITY.into()));
858    } else if s == INFINITY {
859        return Some(RealLiteral::Float(f64::INFINITY.into()));
860    } else if s == NAN || s == NEG_NAN {
861        return Some(RealLiteral::Float(f64::NAN.into()));
862    }
863
864    let mut has_dot = false;
865    let mut has_exponent = false;
866    let mut frac_position = None;
867    for (idx, ch) in s.chars().enumerate() {
868        match ch {
869            'e' | 'E' if radix < 15 => {
870                if has_exponent {
871                    return None;
872                };
873                has_exponent = true;
874            }
875            '/' => {
876                frac_position = match frac_position {
877                    Some(_) => return None,
878                    None => Some(idx),
879                }
880            }
881            '.' => {
882                if has_dot {
883                    return None;
884                }
885                has_dot = true
886            }
887            _ => {}
888        }
889    }
890
891    if has_exponent || has_dot {
892        if radix != 10 {
893            // radix for floating points not yet supported
894            return None;
895        }
896
897        s.parse().map(RealLiteral::Float).ok()
898    } else if let Some(p) = frac_position {
899        let (n_str, d_str) = s.split_at(p);
900        let d_str = &d_str[1..];
901        let n = IntLiteral::from_str_radix(n_str, radix).ok()?;
902        let d = IntLiteral::from_str_radix(d_str, radix).ok()?;
903        Some(RealLiteral::Rational(n, d))
904    } else {
905        let int = IntLiteral::from_str_radix(s, radix).ok()?;
906        Some(RealLiteral::Int(int))
907    }
908}
909
910fn try_parse_number(s: &str, radix: Option<u32>) -> Result<Option<NumberLiteral>> {
911    let Some(n) = parse_number(s, radix) else {
912        return Ok(None);
913    };
914
915    fn validate_real_literal(lit: &RealLiteral) -> Result<()> {
916        let RealLiteral::Rational(_, int) = lit else {
917            return Ok(());
918        };
919
920        match int {
921            IntLiteral::Small(n) if *n == 0 => Err(TokenError::ZeroDenominator),
922            IntLiteral::Big(big_int) if **big_int == BigInt::ZERO => {
923                Err(TokenError::ZeroDenominator)
924            }
925            _ => Ok(()),
926        }
927    }
928
929    match &n {
930        NumberLiteral::Real(real) => validate_real_literal(real)?,
931        NumberLiteral::Complex(r, i) => {
932            validate_real_literal(r)?;
933            validate_real_literal(i)?;
934        }
935        NumberLiteral::Polar(r, theta) => {
936            validate_real_literal(r)?;
937            validate_real_literal(theta)?;
938        }
939    }
940
941    Ok(Some(n))
942}
943
944pub fn parse_number(s: &str, radix: Option<u32>) -> Option<NumberLiteral> {
945    let (s, radix) = match s.get(0..2) {
946        Some("#x" | "#X") => (&s[2..], 16),
947        Some("#d" | "#D") => (&s[2..], 10),
948        Some("#o" | "#O") => (&s[2..], 8),
949        Some("#b" | "#B") => (&s[2..], 2),
950        _ => (s, radix.unwrap_or(10)),
951    };
952
953    if let Some((r, theta)) = s.split_once('@') {
954        let r = parse_real(r, radix)?;
955        let theta = parse_real(theta, radix)?;
956        return Some(NumberLiteral::Polar(r, theta));
957    }
958
959    match split_into_complex(s)?.as_slice() {
960        [NumPart::Real(x)] => parse_real(x, radix).map(NumberLiteral::from),
961        [NumPart::Imaginary(x)] => {
962            if !matches!(x.as_bytes().first(), Some(b'+') | Some(b'-')) {
963                return None;
964            };
965
966            let imaginary = if *x == "+" {
967                IntLiteral::Small(1).into()
968            } else if *x == "-" {
969                IntLiteral::Small(-1).into()
970            } else {
971                parse_real(x, radix)?
972            };
973            Some(NumberLiteral::Complex(
974                IntLiteral::Small(0).into(),
975                imaginary,
976            ))
977        }
978        [NumPart::Real(re), NumPart::Imaginary(im)] => Some(NumberLiteral::Complex(
979            parse_real(re, radix)?,
980            if *im == "+" {
981                IntLiteral::Small(1).into()
982            } else if *im == "-" {
983                IntLiteral::Small(-1).into()
984            } else {
985                parse_real(im, radix)?
986            },
987        )),
988        _ => None,
989    }
990}
991
992#[cfg(test)]
993mod lexer_tests {
994    use core::str::FromStr;
995
996    use super::*;
997    use crate::span::Span;
998    use crate::tokens::{IntLiteral, TokenType::*};
999    use pretty_assertions::assert_eq;
1000
1001    fn identifier(ident: &str) -> TokenType<InternedString> {
1002        Identifier(ident.into())
1003    }
1004
1005    fn token_stream(source: &str) -> impl Iterator<Item = Token<'_, InternedString>> {
1006        TokenStream::new(source, true, None).map(|t| t.expect("unexpected parsing error"))
1007    }
1008
1009    // TODO: Figure out why this just cause an infinite loop when parsing it?
1010    #[test]
1011    fn test_identifier_with_quote_end() {
1012        let s = TokenStream::new(
1013            "        (define (stream-cdr stream)
1014            ((stream-cdr' stream)))
1015",
1016            true,
1017            SourceId::none(),
1018        );
1019
1020        for token in s {
1021            println!("{:?}", token);
1022        }
1023    }
1024
1025    #[test]
1026    fn test_bracket_characters() {
1027        let s = TokenStream::new(
1028            "[(equal? #\\[ (car chars)) (b (cdr chars) (+ sum 1))]",
1029            true,
1030            SourceId::none(),
1031        );
1032
1033        for token in s {
1034            println!("{:?}", token);
1035        }
1036    }
1037
1038    #[test]
1039    fn test_escape_in_string() {
1040        let s = TokenStream::new(r#"(display "}\n")"#, true, SourceId::none());
1041
1042        for token in s {
1043            println!("{:?}", token);
1044        }
1045    }
1046
1047    #[test]
1048    fn test_quote_within_word() {
1049        let mut s = TokenStream::new("'foo\\'a", true, SourceId::none());
1050
1051        println!("{:?}", s.next());
1052        println!("{:?}", s.next());
1053        println!("{:?}", s.next());
1054    }
1055
1056    #[test]
1057    fn test_single_period() {
1058        let mut s = TokenStream::new(".", true, SourceId::none());
1059
1060        println!("{:?}", s.next());
1061    }
1062
1063    #[test]
1064    fn test_chars() {
1065        let mut s = token_stream("#\\a #\\b #\\λ");
1066
1067        assert_eq!(
1068            s.next(),
1069            Some(Token {
1070                ty: CharacterLiteral('a'),
1071                source: "#\\a",
1072                span: Span::new(0, 3, SourceId::none())
1073            })
1074        );
1075        assert_eq!(
1076            s.next(),
1077            Some(Token {
1078                ty: CharacterLiteral('b'),
1079                source: "#\\b",
1080                span: Span::new(4, 7, SourceId::none())
1081            })
1082        );
1083        assert_eq!(
1084            s.next(),
1085            Some(Token {
1086                ty: CharacterLiteral('λ'),
1087                source: "#\\λ",
1088                span: Span::new(8, 12, SourceId::none())
1089            })
1090        );
1091    }
1092
1093    #[test]
1094    fn test_unicode_escapes() {
1095        let mut s = token_stream(r#"  #\xAb #\u{0D300} #\u0540 "\x00D;" "\u1044;" "\u{045}"  "#);
1096
1097        assert_eq!(
1098            s.next().unwrap(),
1099            Token {
1100                ty: CharacterLiteral('«'),
1101                source: r#"#\xAb"#,
1102                span: Span::new(2, 7, SourceId::none())
1103            }
1104        );
1105
1106        assert_eq!(
1107            s.next().unwrap(),
1108            Token {
1109                ty: CharacterLiteral('팀'),
1110                source: r#"#\u{0D300}"#,
1111                span: Span::new(8, 18, SourceId::none())
1112            }
1113        );
1114
1115        assert_eq!(
1116            s.next().unwrap(),
1117            Token {
1118                ty: CharacterLiteral('Հ'),
1119                source: r#"#\u0540"#,
1120                span: Span::new(19, 26, SourceId::none())
1121            }
1122        );
1123
1124        assert_eq!(
1125            s.next().unwrap(),
1126            Token {
1127                ty: StringLiteral("\r".into()),
1128                source: r#""\x00D;""#,
1129                span: Span::new(27, 35, SourceId::none())
1130            }
1131        );
1132
1133        assert_eq!(
1134            s.next().unwrap(),
1135            Token {
1136                ty: StringLiteral("၄".into()),
1137                source: r#""\u1044;""#,
1138                span: Span::new(36, 45, SourceId::none())
1139            }
1140        );
1141
1142        assert_eq!(
1143            s.next().unwrap(),
1144            Token {
1145                ty: StringLiteral("E".into()),
1146                source: r#""\u{045}""#,
1147                span: Span::new(46, 55, SourceId::none())
1148            }
1149        );
1150    }
1151
1152    #[test]
1153    fn test_invalid_unicode_escapes() {
1154        let tokens = [
1155            r#" #\xd820 "#,
1156            r#" #\u{1 "#,
1157            r#" "\xabx" "#,
1158            r#" "\u0045" "#,
1159            r#" #\xaaaaaaaa " "#,
1160            r#" "\u{ffffffff}" "#,
1161            r#" #\u{} "#,
1162        ];
1163
1164        for token in tokens {
1165            let mut s = TokenStream::new(token, true, None);
1166
1167            // FIXME: concrete errors
1168            assert!(s.next().unwrap().is_err(), "{token:?} should be invalid");
1169        }
1170    }
1171
1172    #[test]
1173    fn test_string_newlines() {
1174        let mut s = token_stream(" \"foo\nbar\" \"foo \\  \n   bar\" ");
1175
1176        assert_eq!(
1177            s.next().unwrap(),
1178            Token {
1179                ty: StringLiteral("foo\nbar".into()),
1180                source: "\"foo\nbar\"",
1181                span: Span::new(1, 10, SourceId::none())
1182            }
1183        );
1184
1185        assert_eq!(
1186            s.next().unwrap(),
1187            Token {
1188                ty: StringLiteral("foo bar".into()),
1189                source: "\"foo \\  \n   bar\"",
1190                span: Span::new(11, 27, SourceId::none())
1191            }
1192        );
1193    }
1194
1195    #[test]
1196    fn test_unexpected_char() {
1197        let mut s = token_stream("($)");
1198        assert_eq!(
1199            s.next(),
1200            Some(Token {
1201                ty: OpenParen(Paren::Round, None),
1202                source: "(",
1203                span: Span::new(0, 1, SourceId::none())
1204            })
1205        );
1206        assert_eq!(
1207            s.next(),
1208            Some(Token {
1209                ty: identifier("$"),
1210                source: "$",
1211                span: Span::new(1, 2, SourceId::none())
1212            })
1213        );
1214        assert_eq!(
1215            s.next(),
1216            Some(Token {
1217                ty: CloseParen(Paren::Round),
1218                source: ")",
1219                span: Span::new(2, 3, SourceId::none())
1220            })
1221        );
1222    }
1223
1224    #[test]
1225    fn test_words() {
1226        let mut s = token_stream("foo FOO _123_ Nil #f #t");
1227
1228        assert_eq!(
1229            s.next(),
1230            Some(Token {
1231                ty: identifier("foo"),
1232                source: "foo",
1233                span: Span::new(0, 3, SourceId::none())
1234            })
1235        );
1236
1237        assert_eq!(
1238            s.next(),
1239            Some(Token {
1240                ty: identifier("FOO"),
1241                source: "FOO",
1242                span: Span::new(4, 7, SourceId::none())
1243            })
1244        );
1245
1246        assert_eq!(
1247            s.next(),
1248            Some(Token {
1249                ty: identifier("_123_"),
1250                source: "_123_",
1251                span: Span::new(8, 13, SourceId::none())
1252            })
1253        );
1254
1255        assert_eq!(
1256            s.next(),
1257            Some(Token {
1258                ty: identifier("Nil"),
1259                source: "Nil",
1260                span: Span::new(14, 17, SourceId::none())
1261            })
1262        );
1263
1264        assert_eq!(
1265            s.next(),
1266            Some(Token {
1267                ty: BooleanLiteral(false),
1268                source: "#f",
1269                span: Span::new(18, 20, SourceId::none())
1270            })
1271        );
1272
1273        assert_eq!(
1274            s.next(),
1275            Some(Token {
1276                ty: BooleanLiteral(true),
1277                source: "#t",
1278                span: Span::new(21, 23, SourceId::none())
1279            })
1280        );
1281
1282        assert_eq!(s.next(), None);
1283    }
1284
1285    #[test]
1286    fn test_almost_literals() {
1287        let got: Vec<_> = token_stream("1e 1ee 1.2e5.4 1E10/4 1.45# 3- e10").collect();
1288        assert_eq!(
1289            got.as_slice(),
1290            &[
1291                Token {
1292                    ty: identifier("1e"),
1293                    source: "1e",
1294                    span: Span::new(0, 2, SourceId::none()),
1295                },
1296                Token {
1297                    ty: identifier("1ee"),
1298                    source: "1ee",
1299                    span: Span::new(3, 6, SourceId::none()),
1300                },
1301                Token {
1302                    ty: identifier("1.2e5.4"),
1303                    source: "1.2e5.4",
1304                    span: Span::new(7, 14, SourceId::none()),
1305                },
1306                Token {
1307                    ty: identifier("1E10/4"),
1308                    source: "1E10/4",
1309                    span: Span::new(15, 21, SourceId::none()),
1310                },
1311                Token {
1312                    ty: identifier("1.45#"),
1313                    source: "1.45#",
1314                    span: Span::new(22, 27, SourceId::none()),
1315                },
1316                Token {
1317                    ty: identifier("3-"),
1318                    source: "3-",
1319                    span: Span::new(28, 30, SourceId::none()),
1320                },
1321                Token {
1322                    ty: identifier("e10"),
1323                    source: "e10",
1324                    span: Span::new(31, 34, SourceId::none()),
1325                },
1326            ]
1327        );
1328    }
1329
1330    #[test]
1331    fn test_real_numbers() {
1332        let got: Vec<_> =
1333            token_stream("0 -0 -1.2 +2.3 999 1. 1e2 1E2 1.2e2 1.2E2 +inf.0 -inf.0 2e-4 2e+10")
1334                .collect();
1335        assert_eq!(
1336            got.as_slice(),
1337            &[
1338                Token {
1339                    ty: IntLiteral::Small(0).into(),
1340                    source: "0",
1341                    span: Span::new(0, 1, SourceId::none()),
1342                },
1343                Token {
1344                    ty: IntLiteral::Small(0).into(),
1345                    source: "-0",
1346                    span: Span::new(2, 4, SourceId::none()),
1347                },
1348                Token {
1349                    ty: RealLiteral::Float((-1.2).into()).into(),
1350                    source: "-1.2",
1351                    span: Span::new(5, 9, SourceId::none()),
1352                },
1353                Token {
1354                    ty: RealLiteral::Float(2.3.into()).into(),
1355                    source: "+2.3",
1356                    span: Span::new(10, 14, SourceId::none()),
1357                },
1358                Token {
1359                    ty: IntLiteral::Small(999).into(),
1360                    source: "999",
1361                    span: Span::new(15, 18, SourceId::none()),
1362                },
1363                Token {
1364                    ty: RealLiteral::Float(1.0.into()).into(),
1365                    source: "1.",
1366                    span: Span::new(19, 21, SourceId::none()),
1367                },
1368                Token {
1369                    ty: RealLiteral::Float(100.0.into()).into(),
1370                    source: "1e2",
1371                    span: Span::new(22, 25, SourceId::none()),
1372                },
1373                Token {
1374                    ty: RealLiteral::Float(100.0.into()).into(),
1375                    source: "1E2",
1376                    span: Span::new(26, 29, SourceId::none()),
1377                },
1378                Token {
1379                    ty: RealLiteral::Float(120.0.into()).into(),
1380                    source: "1.2e2",
1381                    span: Span::new(30, 35, SourceId::none()),
1382                },
1383                Token {
1384                    ty: RealLiteral::Float(120.0.into()).into(),
1385                    source: "1.2E2",
1386                    span: Span::new(36, 41, SourceId::none()),
1387                },
1388                Token {
1389                    ty: RealLiteral::Float(f64::INFINITY.into()).into(),
1390                    source: "+inf.0",
1391                    span: Span::new(42, 48, SourceId::none()),
1392                },
1393                Token {
1394                    ty: RealLiteral::Float(f64::NEG_INFINITY.into()).into(),
1395                    source: "-inf.0",
1396                    span: Span::new(49, 55, SourceId::none()),
1397                },
1398                Token {
1399                    ty: RealLiteral::Float((2e-4).into()).into(),
1400                    source: "2e-4",
1401                    span: Span::new(56, 60, SourceId::none()),
1402                },
1403                Token {
1404                    ty: RealLiteral::Float((2e+10).into()).into(),
1405                    source: "2e+10",
1406                    span: Span::new(61, 66, SourceId::none())
1407                }
1408            ]
1409        );
1410    }
1411
1412    #[test]
1413    fn test_nan() {
1414        // nan does not equal nan so we have to run the is_nan predicate.
1415        let got = token_stream("+nan.0").next().unwrap();
1416
1417        match got.ty {
1418            TokenType::Number(n) => {
1419                assert!(
1420                    matches!(n.resolve(), NumberLiteral::Real(RealLiteral::Float(x)) if x.is_nan())
1421                )
1422            }
1423
1424            _ => panic!("Didn't match"),
1425        }
1426
1427        let got = token_stream("-nan.0").next().unwrap();
1428
1429        match got.ty {
1430            TokenType::Number(n) => {
1431                assert!(
1432                    matches!(n.resolve(), NumberLiteral::Real(RealLiteral::Float(x)) if x.is_nan())
1433                )
1434            }
1435
1436            _ => panic!("Didn't match"),
1437        }
1438    }
1439
1440    #[test]
1441    fn test_rationals() {
1442        let got: Vec<_> = token_stream(
1443            r#"
1444                1/4
1445                (1/4 1/3)
1446                11111111111111111111/22222222222222222222
1447                /
1448                1/
1449                1/4.0
1450                1//4
1451                1 / 4
1452                .2
1453"#,
1454        )
1455        .collect();
1456        assert_eq!(
1457            got.as_slice(),
1458            &[
1459                Token {
1460                    ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(4)).into(),
1461                    source: "1/4",
1462                    span: Span::new(17, 20, SourceId::none()),
1463                },
1464                Token {
1465                    ty: OpenParen(Paren::Round, None),
1466                    source: "(",
1467                    span: Span::new(37, 38, SourceId::none()),
1468                },
1469                Token {
1470                    ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(4)).into(),
1471                    source: "1/4",
1472                    span: Span::new(38, 41, SourceId::none()),
1473                },
1474                Token {
1475                    ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(3)).into(),
1476                    source: "1/3",
1477                    span: Span::new(42, 45, SourceId::none()),
1478                },
1479                Token {
1480                    ty: CloseParen(Paren::Round),
1481                    source: ")",
1482                    span: Span::new(45, 46, SourceId::none()),
1483                },
1484                Token {
1485                    ty: RealLiteral::Rational(
1486                        IntLiteral::from_str("11111111111111111111").unwrap(),
1487                        IntLiteral::from_str("22222222222222222222").unwrap(),
1488                    )
1489                    .into(),
1490                    source: "11111111111111111111/22222222222222222222",
1491                    span: Span::new(63, 104, SourceId::none()),
1492                },
1493                Token {
1494                    ty: identifier("/"),
1495                    source: "/",
1496                    span: Span::new(121, 122, SourceId::none()),
1497                },
1498                Token {
1499                    ty: identifier("1/"),
1500                    source: "1/",
1501                    span: Span::new(139, 141, SourceId::none()),
1502                },
1503                Token {
1504                    ty: identifier("1/4.0"),
1505                    source: "1/4.0",
1506                    span: Span::new(158, 163, SourceId::none()),
1507                },
1508                Token {
1509                    ty: identifier("1//4"),
1510                    source: "1//4",
1511                    span: Span::new(180, 184, SourceId::none()),
1512                },
1513                Token {
1514                    ty: IntLiteral::Small(1).into(),
1515                    source: "1",
1516                    span: Span::new(201, 202, SourceId::none()),
1517                },
1518                Token {
1519                    ty: identifier("/"),
1520                    source: "/",
1521                    span: Span::new(203, 204, SourceId::none()),
1522                },
1523                Token {
1524                    ty: IntLiteral::Small(4).into(),
1525                    source: "4",
1526                    span: Span::new(205, 206, SourceId::none()),
1527                },
1528                Token {
1529                    ty: RealLiteral::Float((0.2).into()).into(),
1530                    source: ".2",
1531                    span: Span::new(223, 225, SourceId::none())
1532                }
1533            ]
1534        );
1535    }
1536
1537    #[test]
1538    fn test_complex_numbers() {
1539        let got: Vec<_> = token_stream(
1540            "1+2i 3-4i +5+6i +1i 1.0+2.0i 3-4.0i +1.0i 2e+4+inf.0i -inf.0-2e-4i 1/2@0 -3/2@1 +i -i 4+i",
1541        )
1542        .collect();
1543        assert_eq!(
1544            got.as_slice(),
1545            &[
1546                Token {
1547                    ty: NumberLiteral::Complex(
1548                        IntLiteral::Small(1).into(),
1549                        IntLiteral::Small(2).into()
1550                    )
1551                    .into(),
1552                    source: "1+2i",
1553                    span: Span::new(0, 4, SourceId::none()),
1554                },
1555                Token {
1556                    ty: NumberLiteral::Complex(
1557                        IntLiteral::Small(3).into(),
1558                        IntLiteral::Small(-4).into()
1559                    )
1560                    .into(),
1561                    source: "3-4i",
1562                    span: Span::new(5, 9, SourceId::none()),
1563                },
1564                Token {
1565                    ty: NumberLiteral::Complex(
1566                        IntLiteral::Small(5).into(),
1567                        IntLiteral::Small(6).into()
1568                    )
1569                    .into(),
1570                    source: "+5+6i",
1571                    span: Span::new(10, 15, SourceId::none()),
1572                },
1573                Token {
1574                    ty: NumberLiteral::Complex(
1575                        IntLiteral::Small(0).into(),
1576                        IntLiteral::Small(1).into()
1577                    )
1578                    .into(),
1579                    source: "+1i",
1580                    span: Span::new(16, 19, SourceId::none()),
1581                },
1582                Token {
1583                    ty: NumberLiteral::Complex(
1584                        RealLiteral::Float((1.0).into()).into(),
1585                        RealLiteral::Float((2.0).into()).into()
1586                    )
1587                    .into(),
1588                    source: "1.0+2.0i",
1589                    span: Span::new(20, 28, SourceId::none()),
1590                },
1591                Token {
1592                    ty: NumberLiteral::Complex(
1593                        IntLiteral::Small(3).into(),
1594                        RealLiteral::Float((-4.0).into()).into()
1595                    )
1596                    .into(),
1597                    source: "3-4.0i",
1598                    span: Span::new(29, 35, SourceId::none()),
1599                },
1600                Token {
1601                    ty: NumberLiteral::Complex(
1602                        IntLiteral::Small(0).into(),
1603                        RealLiteral::Float((1.0).into()).into()
1604                    )
1605                    .into(),
1606                    source: "+1.0i",
1607                    span: Span::new(36, 41, SourceId::none()),
1608                },
1609                Token {
1610                    ty: NumberLiteral::Complex(
1611                        RealLiteral::Float((2e+4).into()),
1612                        RealLiteral::Float(f64::INFINITY.into()),
1613                    )
1614                    .into(),
1615                    source: "2e+4+inf.0i",
1616                    span: Span::new(42, 53, SourceId::none()),
1617                },
1618                Token {
1619                    ty: NumberLiteral::Complex(
1620                        RealLiteral::Float(f64::NEG_INFINITY.into()),
1621                        RealLiteral::Float((-2e-4).into()),
1622                    )
1623                    .into(),
1624                    source: "-inf.0-2e-4i",
1625                    span: Span::new(54, 66, SourceId::none()),
1626                },
1627                Token {
1628                    ty: NumberLiteral::Polar(
1629                        RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(2)),
1630                        IntLiteral::Small(0).into()
1631                    )
1632                    .into(),
1633                    source: "1/2@0",
1634                    span: Span::new(67, 72, SourceId::none()),
1635                },
1636                Token {
1637                    ty: NumberLiteral::Polar(
1638                        RealLiteral::Rational(IntLiteral::Small(-3), IntLiteral::Small(2)),
1639                        IntLiteral::Small(1).into()
1640                    )
1641                    .into(),
1642                    source: "-3/2@1",
1643                    span: Span::new(73, 79, SourceId::none()),
1644                },
1645                Token {
1646                    ty: NumberLiteral::Complex(
1647                        IntLiteral::Small(0).into(),
1648                        IntLiteral::Small(1).into(),
1649                    )
1650                    .into(),
1651                    source: "+i",
1652                    span: Span::new(80, 82, SourceId::none()),
1653                },
1654                Token {
1655                    ty: NumberLiteral::Complex(
1656                        IntLiteral::Small(0).into(),
1657                        IntLiteral::Small(-1).into()
1658                    )
1659                    .into(),
1660                    source: "-i",
1661                    span: Span::new(83, 85, SourceId::none()),
1662                },
1663                Token {
1664                    ty: NumberLiteral::Complex(
1665                        IntLiteral::Small(4).into(),
1666                        IntLiteral::Small(1).into()
1667                    )
1668                    .into(),
1669                    source: "4+i",
1670                    span: Span::new(86, 89, SourceId::none()),
1671                },
1672            ]
1673        );
1674    }
1675
1676    #[test]
1677    fn test_numbers_with_radix() {
1678        let got = token_stream("#xff #xce #o777 #o1/20 #b1/10 #x10+ffi #d1.0").collect::<Vec<_>>();
1679
1680        assert_eq!(
1681            &*got,
1682            &[
1683                Token {
1684                    ty: NumberLiteral::Real(IntLiteral::Small(255).into()).into(),
1685                    source: "#xff",
1686                    span: Span::new(0, 4, SourceId::none()),
1687                },
1688                Token {
1689                    ty: NumberLiteral::Real(IntLiteral::Small(206).into()).into(),
1690                    source: "#xce",
1691                    span: Span::new(5, 9, SourceId::none()),
1692                },
1693                Token {
1694                    ty: NumberLiteral::Real(IntLiteral::Small(511).into()).into(),
1695                    source: "#o777",
1696                    span: Span::new(10, 15, SourceId::none()),
1697                },
1698                Token {
1699                    ty: NumberLiteral::Real(RealLiteral::Rational(
1700                        IntLiteral::Small(1),
1701                        IntLiteral::Small(16)
1702                    ))
1703                    .into(),
1704                    source: "#o1/20",
1705                    span: Span::new(16, 22, SourceId::none()),
1706                },
1707                Token {
1708                    ty: NumberLiteral::Real(RealLiteral::Rational(
1709                        IntLiteral::Small(1),
1710                        IntLiteral::Small(2)
1711                    ))
1712                    .into(),
1713                    source: "#b1/10",
1714                    span: Span::new(23, 29, SourceId::none()),
1715                },
1716                Token {
1717                    ty: NumberLiteral::Complex(
1718                        IntLiteral::Small(16).into(),
1719                        IntLiteral::Small(255).into(),
1720                    )
1721                    .into(),
1722                    source: "#x10+ffi",
1723                    span: Span::new(30, 38, SourceId::none()),
1724                },
1725                Token {
1726                    ty: NumberLiteral::Real(RealLiteral::Float((1.0).into())).into(),
1727                    source: "#d1.0",
1728                    span: Span::new(39, 44, SourceId::none()),
1729                }
1730            ]
1731        );
1732    }
1733
1734    #[test]
1735    fn test_malformed_complex_numbers_are_identifiers() {
1736        let got: Vec<_> = token_stream("i 1i+1i -4+-2i").collect();
1737        assert_eq!(
1738            got.as_slice(),
1739            &[
1740                Token {
1741                    ty: identifier("i"),
1742                    source: "i",
1743                    span: Span::new(0, 1, SourceId::none()),
1744                },
1745                Token {
1746                    ty: identifier("1i+1i"),
1747                    source: "1i+1i",
1748                    span: Span::new(2, 7, SourceId::none()),
1749                },
1750                Token {
1751                    ty: identifier("-4+-2i"),
1752                    source: "-4+-2i",
1753                    span: Span::new(8, 14, SourceId::none()),
1754                },
1755            ]
1756        );
1757    }
1758
1759    #[test]
1760    fn test_string() {
1761        let got: Vec<_> = token_stream(r#" "" "Foo bar" "\"\\" "#).collect();
1762        assert_eq!(
1763            got.as_slice(),
1764            &[
1765                Token {
1766                    ty: StringLiteral(r#""#.into()),
1767                    source: r#""""#,
1768                    span: Span::new(1, 3, SourceId::none()),
1769                },
1770                Token {
1771                    ty: StringLiteral(r#"Foo bar"#.into()),
1772                    source: r#""Foo bar""#,
1773                    span: Span::new(4, 13, SourceId::none()),
1774                },
1775                Token {
1776                    ty: StringLiteral(r#""\"#.into()),
1777                    source: r#""\"\\""#,
1778                    span: Span::new(14, 20, SourceId::none()),
1779                },
1780            ]
1781        );
1782    }
1783
1784    #[test]
1785    fn test_comment() {
1786        let mut s = token_stream(";!/usr/bin/gate\n   ; foo\n");
1787        assert_eq!(s.next(), None);
1788    }
1789
1790    #[test]
1791    fn function_definition() {
1792        let s = token_stream("(define odd-rec? (lambda (x) (if (= x 0) #f (even-rec? (- x 1)))))");
1793        let res: Vec<_> = s.collect();
1794
1795        println!("{:#?}", res);
1796    }
1797
1798    #[test]
1799    fn lex_string_with_escape_chars() {
1800        let s = token_stream("\"\0\0\0\"");
1801        let res: Vec<_> = s.collect();
1802        println!("{:#?}", res);
1803    }
1804
1805    #[test]
1806    fn scheme_statement() {
1807        let s = token_stream("(apples (function a b) (+ a b))");
1808        let res: Vec<_> = s.collect();
1809
1810        let expected: Vec<Token<InternedString>> = vec![
1811            Token {
1812                ty: OpenParen(Paren::Round, None),
1813                source: "(",
1814                span: Span::new(0, 1, SourceId::none()),
1815            },
1816            Token {
1817                ty: identifier("apples"),
1818                source: "apples",
1819                span: Span::new(1, 7, SourceId::none()),
1820            },
1821            Token {
1822                ty: OpenParen(Paren::Round, None),
1823                source: "(",
1824                span: Span::new(8, 9, SourceId::none()),
1825            },
1826            Token {
1827                ty: identifier("function"),
1828                source: "function",
1829                span: Span::new(9, 17, SourceId::none()),
1830            },
1831            Token {
1832                ty: identifier("a"),
1833                source: "a",
1834                span: Span::new(18, 19, SourceId::none()),
1835            },
1836            Token {
1837                ty: identifier("b"),
1838                source: "b",
1839                span: Span::new(20, 21, SourceId::none()),
1840            },
1841            Token {
1842                ty: CloseParen(Paren::Round),
1843                source: ")",
1844                span: Span::new(21, 22, SourceId::none()),
1845            },
1846            Token {
1847                ty: OpenParen(Paren::Round, None),
1848                source: "(",
1849                span: Span::new(23, 24, SourceId::none()),
1850            },
1851            Token {
1852                ty: identifier("+"),
1853                source: "+",
1854                span: Span::new(24, 25, SourceId::none()),
1855            },
1856            Token {
1857                ty: identifier("a"),
1858                source: "a",
1859                span: Span::new(26, 27, SourceId::none()),
1860            },
1861            Token {
1862                ty: identifier("b"),
1863                source: "b",
1864                span: Span::new(28, 29, SourceId::none()),
1865            },
1866            Token {
1867                ty: CloseParen(Paren::Round),
1868                source: ")",
1869                span: Span::new(29, 30, SourceId::none()),
1870            },
1871            Token {
1872                ty: CloseParen(Paren::Round),
1873                source: ")",
1874                span: Span::new(30, 31, SourceId::none()),
1875            },
1876        ];
1877
1878        assert_eq!(res, expected);
1879    }
1880
1881    #[test]
1882    fn test_bigint() {
1883        let s = token_stream("9223372036854775808"); // isize::MAX + 1
1884        let res: Vec<_> = s.collect();
1885
1886        let expected_bigint = Box::new("9223372036854775808".parse().unwrap());
1887
1888        let expected: Vec<Token<InternedString>> = vec![Token {
1889            ty: IntLiteral::Big(expected_bigint).into(),
1890            source: "9223372036854775808",
1891            span: Span::new(0, 19, SourceId::none()),
1892        }];
1893
1894        assert_eq!(res, expected);
1895    }
1896
1897    #[test]
1898    fn negative_test_bigint() {
1899        let s = token_stream("-9223372036854775809"); // isize::MIN - 1
1900        let res: Vec<_> = s.collect();
1901
1902        let expected_bigint = Box::new("-9223372036854775809".parse().unwrap());
1903
1904        let expected: Vec<Token<InternedString>> = vec![Token {
1905            ty: IntLiteral::Big(expected_bigint).into(),
1906            source: "-9223372036854775809",
1907            span: Span::new(0, 20, SourceId::none()),
1908        }];
1909
1910        assert_eq!(res, expected);
1911    }
1912
1913    #[test]
1914    fn identifier_test() {
1915        let s = token_stream("a b(c`d'e\"www\"f,g;");
1916
1917        let tokens: Vec<(TokenType<InternedString>, &str)> =
1918            s.map(|token| (token.ty, token.source)).collect();
1919
1920        assert_eq!(tokens[0], (identifier("a"), "a"));
1921        assert_eq!(tokens[1], (identifier("b"), "b"));
1922        assert_eq!(tokens[3], (identifier("c"), "c"));
1923        assert_eq!(tokens[5], (identifier("d"), "d"));
1924        assert_eq!(tokens[7], (identifier("e"), "e"));
1925        assert_eq!(tokens[9], (identifier("f"), "f"));
1926        assert_eq!(tokens[11], (identifier("g"), "g"));
1927    }
1928
1929    #[test]
1930    fn vector_test() {
1931        let tokens: Vec<_> = token_stream("a b #(c d)")
1932            .map(|token| (token.ty, token.source))
1933            .collect();
1934
1935        assert_eq!(tokens[0], (identifier("a"), "a"));
1936        assert_eq!(tokens[1], (identifier("b"), "b"));
1937        assert_eq!(
1938            tokens[2],
1939            (
1940                TokenType::OpenParen(Paren::Round, Some(ParenMod::Vector)),
1941                "#("
1942            )
1943        );
1944        assert_eq!(tokens[3], (identifier("c"), "c"));
1945        assert_eq!(tokens[4], (identifier("d"), "d"));
1946    }
1947
1948    #[test]
1949    fn bytevector_test() {
1950        let tokens: Vec<_> = token_stream("a b #u8(1 2)")
1951            .map(|token| (token.ty, token.source))
1952            .collect();
1953
1954        assert_eq!(tokens[0], (identifier("a"), "a"));
1955        assert_eq!(tokens[1], (identifier("b"), "b"));
1956        assert_eq!(
1957            tokens[2],
1958            (
1959                TokenType::OpenParen(Paren::Round, Some(ParenMod::Bytes)),
1960                "#u8("
1961            )
1962        );
1963        assert_eq!(tokens[5], (TokenType::CloseParen(Paren::Round), ")"));
1964    }
1965
1966    #[test]
1967    fn special_comments_test() {
1968        let mut lexer = Lexer::new("#| f(\n [ |#");
1969        assert_eq!(lexer.next(), Some(Ok(TokenType::Comment)));
1970
1971        let mut lexer = Lexer::new("#| a #| ( |# |#");
1972        assert_eq!(lexer.next(), Some(Ok(TokenType::Comment)));
1973
1974        let mut lexer = Lexer::new("#;(a b)");
1975        assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1976
1977        let mut lexer = Lexer::new("#; #(#true 3)");
1978        assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1979
1980        let mut lexer = Lexer::new("#; #; 3 5");
1981        assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1982    }
1983
1984    #[test]
1985    fn comment_error_test() {
1986        let mut lexer = Lexer::new("#|");
1987
1988        assert_eq!(lexer.next().unwrap(), Err(TokenError::IncompleteComment));
1989    }
1990
1991    #[test]
1992    fn escaped_identifier_test() {
1993        let mut s = token_stream(r#"|a| |a b| |\x61;| |.|"#);
1994
1995        assert_eq!(
1996            s.next().unwrap(),
1997            Token {
1998                ty: identifier("a"),
1999                source: "|a|",
2000                span: Span::new(0, 3, None),
2001            },
2002        );
2003
2004        assert_eq!(
2005            s.next().unwrap(),
2006            Token {
2007                ty: identifier("a b"),
2008                source: "|a b|",
2009                span: Span::new(4, 9, None),
2010            },
2011        );
2012
2013        assert_eq!(
2014            s.next().unwrap(),
2015            Token {
2016                ty: identifier("a"),
2017                source: r#"|\x61;|"#,
2018                span: Span::new(10, 17, None),
2019            },
2020        );
2021
2022        assert_eq!(
2023            s.next().unwrap(),
2024            Token {
2025                ty: identifier("."),
2026                source: "|.|",
2027                span: Span::new(18, 21, None),
2028            },
2029        );
2030
2031        let mut s = token_stream("|a\\\nb|");
2032
2033        assert_eq!(
2034            s.next().unwrap(),
2035            Token {
2036                ty: identifier("ab"),
2037                source: "|a\\\nb|",
2038                span: Span::new(0, 6, None),
2039            },
2040        );
2041    }
2042}