1use super::parser::SourceId;
2use crate::interner::InternedString;
3use crate::tokens::{IntLiteral, Token, TokenLike, TokenType};
4use crate::tokens::{NumberLiteral, Paren, ParenMod, RealLiteral};
5use alloc::borrow::Cow;
6use core::char;
7use core::iter::Iterator;
8use core::ops::Range;
9use core::{iter::Peekable, str::Chars};
10use num_bigint::BigInt;
11use smallvec::{smallvec, SmallVec};
12
13pub const INFINITY: &str = "+inf.0";
14pub const NEG_INFINITY: &str = "-inf.0";
15pub const NAN: &str = "+nan.0";
16pub const NEG_NAN: &str = "-nan.0";
17
18pub struct OwnedString;
19
20impl ToOwnedString<String> for OwnedString {
21 fn own(&self, s: Cow<str>) -> String {
22 s.to_string()
23 }
24}
25
26pub trait ToOwnedString<T> {
27 fn own(&self, s: Cow<str>) -> T;
28}
29
30pub type Span = core::ops::Range<usize>;
31
32pub struct Lexer<'a> {
33 source: &'a str,
35 chars: Peekable<Chars<'a>>,
37 queued: Option<TokenType<InternedString>>,
39 token_start: u32,
40 token_end: u32,
41 error: Range<u32>,
42
43 ident_buffer: String,
44}
45
46impl<'a> Lexer<'a> {
47 pub fn new(source: &'a str) -> Self {
48 Self {
49 source,
50 chars: source.chars().peekable(),
51 queued: None,
52 token_start: 0,
53 token_end: 0,
54 error: Default::default(),
55 ident_buffer: String::new(),
56 }
57 }
58
59 fn eat(&mut self) -> Option<char> {
60 if let Some(c) = self.chars.next() {
61 self.token_end += c.len_utf8() as u32;
62 Some(c)
63 } else {
64 None
65 }
66 }
67
68 fn consume_whitespace(&mut self) {
70 while let Some(&c) = self.chars.peek() {
71 if c.is_whitespace() {
72 self.eat();
73
74 self.token_start = self.token_end;
75 } else {
76 break;
77 }
78 }
79 }
80
81 fn read_string(&mut self) -> Result<TokenType<InternedString>> {
82 self.eat();
84
85 let mut buf = String::new();
86
87 while let Some(&c) = self.chars.peek() {
88 self.eat();
89 match c {
90 '"' => return Ok(TokenType::StringLiteral(buf.into())),
91 '\\' => {
92 if let Some(c) = self.read_string_escape(TokenError::IncompleteString, '"')? {
93 buf.push(c);
94 }
95 }
96 _ => buf.push(c),
97 }
98 }
99
100 Err(TokenError::IncompleteString)
101 }
102
103 fn read_string_escape(&mut self, incomplete: TokenError, delim: char) -> Result<Option<char>> {
104 let c = match self.chars.peek() {
105 Some('"') => {
106 self.eat();
107 '"'
108 }
109
110 Some('a') => {
111 self.eat();
112 '\x07'
113 }
114
115 Some('b') => {
116 self.eat();
117 '\x08'
118 }
119
120 Some('\\') => {
121 self.eat();
122 '\\'
123 }
124
125 Some('|') => {
126 self.eat();
127 '|'
128 }
129
130 Some('t') => {
131 self.eat();
132 '\t'
133 }
134
135 Some('n') => {
136 self.eat();
137 '\n'
138 }
139
140 Some('r') => {
141 self.eat();
142 '\r'
143 }
144
145 Some('0') => {
146 self.eat();
147 '\0'
148 }
149
150 Some(&code @ ('x' | 'u')) => {
151 self.eat();
152 let start = self.token_end - 2;
153
154 let mut digits = String::new();
155
156 let escape_end = match self.chars.peek().copied() {
157 Some('{') if code == 'u' => {
158 self.eat();
159 '}'
160 }
161 _ => ';',
162 };
163
164 let valid = loop {
165 let Some(c) = self.eat() else {
166 return Err(incomplete);
167 };
168
169 match c {
170 c if c == escape_end => break true,
171 ';' | '\\' | '\n' | '(' | ')' | '[' | ']' | '{' | '}' => break false,
173 c if c == delim => break false,
174 _ => digits.push(c),
175 }
176 };
177
178 if !valid {
179 self.error = start..self.token_end - 1;
180
181 return Err(TokenError::UnclosedHexEscape(escape_end));
182 }
183
184 let error = start..self.token_end;
185
186 let codepoint = u32::from_str_radix(&digits, 16)
187 .map_err(TokenError::InvalidHexEscapeLiteral)
188 .inspect_err(|_| self.error = error.clone())?;
189
190 char::from_u32(codepoint)
191 .ok_or(TokenError::InvalidHexCodePoint(codepoint))
192 .inspect_err(|_| self.error = error)?
193 }
194
195 Some(&start @ (' ' | '\t' | '\n')) => {
196 self.eat();
197
198 let mut trimming = start == '\n';
199
200 loop {
201 let Some(c) = self.chars.peek() else {
202 return Err(incomplete);
203 };
204
205 match c {
206 ' ' | '\t' => {
207 self.eat();
208 }
209 '\n' if !trimming => {
210 self.eat();
211 trimming = true;
212 }
213 _ if trimming => return Ok(None),
214
215 c => {
216 self.error = self.token_end..(self.token_end + c.len_utf8() as u32);
217 return Err(TokenError::InvalidWhitespace);
218 }
219 }
220 }
221 }
222
223 Some(c) => {
224 self.error = (self.token_end - 1)..(self.token_end + c.len_utf8() as u32);
225 return Err(TokenError::InvalidStringEscape(*c));
226 }
227
228 None => return Err(incomplete),
229 };
230
231 Ok(Some(c))
232 }
233
234 fn read_hash_value(&mut self) -> Result<TokenType<InternedString>> {
235 fn parse_char(slice: &str) -> Result<char> {
236 use core::str::FromStr;
237
238 debug_assert!(slice.len() > 2);
239
240 match &slice[2..] {
241 s if s.eq_ignore_ascii_case("alarm") => Ok('\x07'),
242 s if s.eq_ignore_ascii_case("backspace") => Ok('\x08'),
243 s if s.eq_ignore_ascii_case("delete") => Ok('\x7F'),
244 s if s.eq_ignore_ascii_case("escape") => Ok('\x1B'),
245 s if s.eq_ignore_ascii_case("newline") => Ok('\n'),
246 s if s.eq_ignore_ascii_case("null") => Ok('\0'),
247 s if s.eq_ignore_ascii_case("return") => Ok('\r'),
248 s if s.eq_ignore_ascii_case("space") => Ok(' '),
249 s if s.eq_ignore_ascii_case("tab") => Ok('\t'),
250
251 character => {
252 let first = character.as_bytes()[0];
253
254 let escape = (first == b'u' || first == b'x') && slice.len() > 3;
255
256 if !escape {
257 return char::from_str(character).map_err(|_| TokenError::InvalidCharName);
258 }
259
260 let payload = if first == b'u' && character.as_bytes().get(1) == Some(&b'{') {
261 if character.as_bytes().last() != Some(&b'}') {
262 return Err(TokenError::UnclosedHexEscape('}'));
263 }
264
265 &character[2..(character.len() - 1)]
266 } else {
267 &character[1..]
268 };
269
270 let code = u32::from_str_radix(payload, 16)
271 .map_err(TokenError::InvalidHexEscapeLiteral)?;
272
273 char::from_u32(code).ok_or(TokenError::InvalidHexCodePoint(code))
274 }
275 }
276 }
277
278 while let Some(&c) = self.chars.peek() {
279 match c {
280 '\\' => {
281 self.eat();
282 self.eat();
283 }
284 '\'' | '`' => {
285 self.eat();
286 break;
287 }
288
289 ',' => {
290 self.eat();
291 if Some('@') == self.chars.peek().copied() {
292 self.eat();
293 break;
294 } else {
295 break;
296 }
297 }
298
299 '(' | '[' | ')' | ']' => break,
300 c if c.is_whitespace() => break,
301 _ => {
302 self.eat();
303 }
304 };
305 }
306
307 match self.slice() {
308 "#true" | "#t" => Ok(TokenType::BooleanLiteral(true)),
309 "#false" | "#f" => Ok(TokenType::BooleanLiteral(false)),
310
311 "#'" => Ok(TokenType::QuoteSyntax),
312 "#`" => Ok(TokenType::QuasiQuoteSyntax),
313 "#," => Ok(TokenType::UnquoteSyntax),
314 "#,@" => Ok(TokenType::UnquoteSpliceSyntax),
315
316 keyword if keyword.starts_with("#:") => Ok(TokenType::Keyword(self.slice().into())),
317
318 character if character.starts_with("#\\") => {
319 if character.len() <= 2 {
320 return Err(TokenError::InvalidCharacter);
321 }
322
323 let parsed = match parse_char(character) {
324 Ok(it) => it,
325 Err(err) => {
326 self.error = self.token_start..self.token_end;
327 return Err(err);
328 }
329 };
330
331 Ok(TokenType::CharacterLiteral(parsed))
332 }
333
334 "#" if self.chars.peek() == Some(&'(') => {
335 self.eat();
336 Ok(TokenType::OpenParen(Paren::Round, Some(ParenMod::Vector)))
337 }
338
339 "#u8" if self.chars.peek() == Some(&'(') => {
340 self.eat();
341 Ok(TokenType::OpenParen(Paren::Round, Some(ParenMod::Bytes)))
342 }
343
344 _ => self.read_word(),
345 }
346 }
347
348 fn read_number(&mut self) -> Result<TokenType<InternedString>> {
349 while let Some(&c) = self.chars.peek() {
350 match c {
351 c if c.is_ascii_digit() => {
352 self.eat();
353 }
354 '+' | '-' | '.' | '/' | '@' | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
355 | 'e' | 'E' | 'f' | 'F' | 'i' | 'n' => {
356 self.eat();
357 }
358 '(' | ')' | '[' | ']' => {
359 return if let Some(t) = try_parse_number(self.slice(), None)? {
360 Ok(t.into())
361 } else {
362 self.read_word()
363 }
364 }
365 c if c.is_whitespace() => {
366 return if let Some(t) = try_parse_number(self.slice(), None)? {
367 Ok(t.into())
368 } else {
369 self.read_word()
370 }
371 }
372 _ => return self.read_word(),
373 }
374 }
375 match try_parse_number(self.slice(), None)? {
376 Some(n) => Ok(n.into()),
377 None => self.read_word(),
378 }
379 }
380
381 fn read_rest_of_line(&mut self) {
382 while let Some(c) = self.eat() {
383 if c == '\n' {
384 break;
385 }
386 }
387 }
388
389 fn read_word(&mut self) -> Result<TokenType<InternedString>> {
390 let escaped_identifier = self.chars.peek().copied() == Some('|');
391
392 if escaped_identifier {
393 self.eat();
394 }
395
396 let mut buffer = core::mem::take(&mut self.ident_buffer);
397 buffer.clear();
398
399 let mut ident_buffer = IdentBuffer::new(self.chars.clone(), &mut buffer);
400
401 while let Some(&c) = self.chars.peek() {
402 match c {
403 '|' if escaped_identifier => {
404 self.eat();
405
406 break;
407 }
408 '\\' if escaped_identifier => {
409 self.eat();
410
411 let escaped = self.read_string_escape(TokenError::IncompleteIdentifier, '|')?;
412
413 ident_buffer.push_escape(escaped);
414 }
415 c if escaped_identifier => {
416 ident_buffer.push(c);
417 self.eat();
418 }
419 '(' | '[' | ')' | ']' | '{' | '}' => break,
420 c if c.is_whitespace() => break,
421 '\'' | '"' | '`' | ';' | ',' => {
422 break;
423 }
424 '\\' => {
427 self.eat();
428 self.eat();
429 }
430
431 _ => {
432 self.eat();
433 }
434 };
435 }
436
437 let token = match self.slice() {
438 "." => TokenType::Dot,
439 "if" => TokenType::If,
440 "let" => TokenType::Let,
441 "define" | "defn" | "#%define" => TokenType::Define,
442 "%plain-let" => TokenType::TestLet,
443 "return!" => TokenType::Return,
444 "begin" => TokenType::Begin,
445 "lambda" | "fn" | "#%plain-lambda" | "λ" => TokenType::Lambda,
446 "quote" => TokenType::Quote,
447 "syntax-rules" => TokenType::SyntaxRules,
448 "define-syntax" => TokenType::DefineSyntax,
449 "..." => TokenType::Ellipses,
450 "set!" => TokenType::Set,
451 "require" => TokenType::Require,
452 identifier => {
453 debug_assert!(!identifier.is_empty());
454
455 match identifier.as_bytes() {
456 [b'+', _, ..] if self.queued.is_none() => {
457 self.queued = Some(TokenType::Identifier((&identifier[1..]).into()));
458 TokenType::Identifier("+".into())
459 }
460 [b'|', .., b'|'] if escaped_identifier => {
461 if ident_buffer.ident.is_empty() {
462 TokenType::Identifier((&identifier[1..identifier.len() - 1]).into())
463 } else {
464 TokenType::Identifier(ident_buffer.ident.as_str().into())
465 }
466 }
467 _ if escaped_identifier => {
468 ident_buffer.ident.clear();
469 return Err(TokenError::IncompleteIdentifier);
470 }
471 _ => TokenType::Identifier(identifier.into()),
472 }
473 }
474 };
475
476 ident_buffer.ident.clear();
477 self.ident_buffer = buffer;
478
479 Ok(token)
480 }
481
482 fn read_nestable_comment(&mut self) -> Result<TokenType<InternedString>> {
483 self.eat();
484
485 let mut depth = 1;
486
487 while let Some(c) = self.eat() {
488 match c {
489 '|' => {
490 if self.chars.peek().copied() == Some('#') {
491 self.eat();
492 depth -= 1;
493
494 if depth == 0 {
495 return Ok(TokenType::Comment);
496 }
497 }
498 }
499 '#' => {
500 if self.chars.peek().copied() == Some('|') {
501 self.eat();
502 depth += 1;
503 }
504 }
505 _ => {}
506 }
507 }
508
509 Err(TokenError::IncompleteComment)
510 }
511}
512
513struct IdentBuffer<'b, 'a: 'b> {
514 chars: Peekable<Chars<'a>>,
515 ident: &'b mut String,
516 mode: core::result::Result<(), usize>,
520}
521
522impl<'b, 'a: 'b> IdentBuffer<'b, 'a> {
523 fn new(chars: Peekable<Chars<'a>>, buffer: &'b mut String) -> Self {
524 Self {
525 chars,
526 ident: buffer,
527 mode: Err(0),
528 }
529 }
530
531 fn push(&mut self, c: char) {
532 if let Err(len) = self.mode.as_mut() {
533 *len += 1;
534 } else {
535 self.ident.push(c);
536 }
537 }
538
539 fn push_escape(&mut self, c: Option<char>) {
540 if let Err(len) = self.mode {
541 self.ident.extend(self.chars.clone().take(len));
542 self.mode = Ok(());
543 }
544
545 if let Some(c) = c {
546 self.ident.push(c);
547 }
548 }
549}
550
551fn strip_shebang_line(input: &str) -> (usize, usize) {
552 if input.starts_with("#!") {
553 let shebang = input.split('\n').next().unwrap();
555 (shebang.chars().count(), shebang.len())
556 } else {
557 (0, 0)
558 }
559}
560
561impl<'a> Lexer<'a> {
562 #[inline]
563 pub fn span(&self) -> Span {
564 self.token_start as _..self.token_end as _
565 }
566
567 pub fn small_span(&self) -> core::ops::Range<u32> {
568 self.token_start..self.token_end
569 }
570
571 #[inline]
572 pub fn slice(&self) -> &'a str {
573 self.source.get(self.span()).unwrap()
574 }
575}
576
577pub struct TokenStream<'a> {
578 pub(crate) lexer: Lexer<'a>,
579 pub(crate) skip_comments: bool,
580 source_id: Option<SourceId>,
581}
582
583impl<'a> TokenStream<'a> {
584 pub fn new(input: &'a str, skip_comments: bool, source_id: Option<SourceId>) -> Self {
585 let (char_offset, bytes_offset) = strip_shebang_line(input);
586
587 let mut res = Self {
588 lexer: Lexer::new(input),
589 skip_comments,
590 source_id, };
592
593 res.lexer.token_start += bytes_offset as u32;
594 res.lexer.token_end += bytes_offset as u32;
595
596 for _ in 0..char_offset {
597 res.lexer.chars.next();
598 }
599
600 res
601 }
602
603 pub fn into_owned(self) -> OwnedTokenStream<'a> {
604 OwnedTokenStream { stream: self }
605 }
606}
607
608pub struct OwnedTokenStream<'a> {
609 pub(crate) stream: TokenStream<'a>,
610}
611
612impl<'a> Iterator for OwnedTokenStream<'a> {
613 type Item = core::result::Result<Token<'a, InternedString>, TokenLike<'a, TokenError>>;
614
615 fn next(&mut self) -> Option<Self::Item> {
616 self.stream.next()
617 }
618}
619
620impl<'a> OwnedTokenStream<'a> {
621 pub fn offset(&self) -> usize {
622 self.stream.lexer.span().end
623 }
624}
625impl<'a> Iterator for TokenStream<'a> {
626 type Item = core::result::Result<Token<'a, InternedString>, TokenLike<'a, TokenError>>;
627
628 fn next(&mut self) -> Option<Self::Item> {
629 self.lexer.next().and_then(|token| {
630 let token = match token {
631 Ok(token) => token,
632 Err(err) => {
633 return Some(Err(TokenLike::new(
634 err,
635 self.lexer.slice(),
636 if self.lexer.error.is_empty() {
637 self.lexer.small_span()
638 } else {
639 self.lexer.error.clone()
640 },
641 self.source_id,
642 )))
643 }
644 };
645
646 let token = Token::new(
647 token,
648 self.lexer.slice(),
649 self.lexer.small_span(),
650 self.source_id,
651 );
652 match token.ty {
653 TokenType::Comment if self.skip_comments => self.next(),
655 _ => Some(Ok(token)),
657 }
658 })
659 }
660}
661
662pub type Result<T> = core::result::Result<T, TokenError>;
663
664#[derive(Clone, Debug, PartialEq)]
665pub enum TokenError {
666 UnexpectedChar(char),
667 IncompleteString,
668 IncompleteIdentifier,
669 IncompleteComment,
670 InvalidWhitespace,
671 InvalidStringEscape(char),
672 InvalidCharacter,
673 ZeroDenominator,
674 UnclosedHexEscape(char),
675 InvalidCharName,
676 InvalidHexEscapeLiteral(core::num::ParseIntError),
677 InvalidHexCodePoint(u32),
678}
679
680impl core::fmt::Display for TokenError {
681 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
682 match self {
683 TokenError::UnexpectedChar(c) => write!(f, "unexpected char {c:?}"),
684 TokenError::IncompleteString => write!(f, "incomplete string"),
685 TokenError::IncompleteIdentifier => write!(f, "incomplete identifier"),
686 TokenError::IncompleteComment => write!(f, "incomplete comment"),
687 TokenError::InvalidWhitespace => {
688 write!(f, "unexpected character, expected whitespace or newline")
689 }
690 TokenError::InvalidStringEscape(c) => write!(f, "invalid escape {c:?}"),
691 TokenError::InvalidCharacter => write!(f, "invalid character"),
692 TokenError::ZeroDenominator => {
693 write!(f, "division by zero is not allowed in rational literals")
694 }
695 TokenError::UnclosedHexEscape(close) => {
696 write!(f, "unclosed hex escape, expected {close:?}")
697 }
698 TokenError::InvalidCharName => write!(f, "invalid character name"),
699 TokenError::InvalidHexEscapeLiteral(error) => {
700 write!(f, "invalid hex escape literal, {error}")
701 }
702 TokenError::InvalidHexCodePoint(code) => write!(f, "invalid code point {code:x}"),
703 }
704 }
705}
706
707impl<'a> Iterator for Lexer<'a> {
708 type Item = Result<TokenType<InternedString>>;
709
710 fn next(&mut self) -> Option<Self::Item> {
711 if let Some(t) = self.queued.take() {
712 return Some(Ok(t));
713 }
714 self.consume_whitespace();
716
717 self.token_start = self.token_end;
718
719 match self.chars.peek() {
720 Some(';') => {
721 self.eat();
722 self.read_rest_of_line();
723 Some(Ok(TokenType::Comment))
724 }
725
726 Some('"') => Some(self.read_string()),
727
728 Some(&paren @ ('(' | '[' | '{')) => {
729 self.eat();
730 let kind = match paren {
731 '[' => Paren::Square,
732 '{' => Paren::Curly,
733 _ => Paren::Round,
734 };
735 Some(Ok(TokenType::OpenParen(kind, None)))
736 }
737
738 Some(&paren @ (')' | ']' | '}')) => {
739 self.eat();
740 let kind = match paren {
741 ']' => Paren::Square,
742 '}' => Paren::Curly,
743 _ => Paren::Round,
744 };
745 Some(Ok(TokenType::CloseParen(kind)))
746 }
747
748 Some('\'') => {
750 self.eat();
751 Some(Ok(TokenType::QuoteTick))
752 }
753
754 Some('`') => {
755 self.eat();
756 Some(Ok(TokenType::QuasiQuote))
757 }
758
759 Some(',') => {
760 self.eat();
761
762 if let Some('@') = self.chars.peek() {
763 self.eat();
764
765 Some(Ok(TokenType::UnquoteSplice))
766 } else {
767 Some(Ok(TokenType::Unquote))
768 }
769 }
770 Some('+') | Some('-') | Some('.') => {
771 self.eat();
772 Some(self.read_number())
773 }
774 Some('#') => {
775 self.eat();
776 let next = self.chars.peek().copied();
777
778 let token = match next {
779 Some('x' | 'X' | 'd' | 'D' | 'o' | 'O' | 'b' | 'B') => {
780 self.eat();
781 self.read_number()
782 }
783 Some('|') => self.read_nestable_comment(),
784 Some(';') => {
785 self.eat();
786 Ok(TokenType::DatumComment)
787 }
788 Some('#') => {
789 self.eat();
790 Err(TokenError::UnexpectedChar('#'))
791 }
792 _ => self.read_hash_value(),
793 };
794
795 Some(token)
796 }
797
798 Some(c) if !c.is_whitespace() && !c.is_ascii_digit() || *c == '_' => {
799 Some(self.read_word())
800 }
801 Some(c) if c.is_ascii_digit() => Some(self.read_number()),
802 Some(_) => {
803 debug_assert!(false);
805
806 self.eat().map(|e| Err(TokenError::UnexpectedChar(e)))
807 }
808 None => None,
809 }
810 }
811}
812
813fn split_into_complex<'a>(s: &'a str) -> Option<SmallVec<[NumPart<'a>; 2]>> {
815 let classify_num_part = |s: &'a str| -> NumPart<'a> {
816 match s.as_bytes().last() {
817 Some(b'i') => NumPart::Imaginary(&s[..s.len() - 1]),
818 _ => NumPart::Real(s),
819 }
820 };
821
822 let mut idxs = SmallVec::<[usize; 3]>::new();
823
824 let mut chars = s.char_indices();
825 while let Some((idx, ch)) = chars.next() {
826 if ch == '+' || ch == '-' {
827 if idxs.len() == 2 {
828 return None;
829 } else {
830 idxs.push(idx);
831 }
832 } else if ch == 'e' || ch == 'E' {
833 let _ = chars.next();
835 }
836 }
837
838 let parts = match idxs.as_slice() {
839 [] | [0] => smallvec![classify_num_part(s)],
840 [idx] | [0, idx] => smallvec![
841 classify_num_part(&s[0..*idx]),
842 classify_num_part(&s[*idx..])
843 ],
844 _ => return None,
845 };
846 Some(parts)
847}
848
849#[derive(Debug)]
850enum NumPart<'a> {
851 Real(&'a str),
852 Imaginary(&'a str),
853}
854
855fn parse_real(s: &str, radix: u32) -> Option<RealLiteral> {
856 if s == NEG_INFINITY {
857 return Some(RealLiteral::Float(f64::NEG_INFINITY.into()));
858 } else if s == INFINITY {
859 return Some(RealLiteral::Float(f64::INFINITY.into()));
860 } else if s == NAN || s == NEG_NAN {
861 return Some(RealLiteral::Float(f64::NAN.into()));
862 }
863
864 let mut has_dot = false;
865 let mut has_exponent = false;
866 let mut frac_position = None;
867 for (idx, ch) in s.chars().enumerate() {
868 match ch {
869 'e' | 'E' if radix < 15 => {
870 if has_exponent {
871 return None;
872 };
873 has_exponent = true;
874 }
875 '/' => {
876 frac_position = match frac_position {
877 Some(_) => return None,
878 None => Some(idx),
879 }
880 }
881 '.' => {
882 if has_dot {
883 return None;
884 }
885 has_dot = true
886 }
887 _ => {}
888 }
889 }
890
891 if has_exponent || has_dot {
892 if radix != 10 {
893 return None;
895 }
896
897 s.parse().map(RealLiteral::Float).ok()
898 } else if let Some(p) = frac_position {
899 let (n_str, d_str) = s.split_at(p);
900 let d_str = &d_str[1..];
901 let n = IntLiteral::from_str_radix(n_str, radix).ok()?;
902 let d = IntLiteral::from_str_radix(d_str, radix).ok()?;
903 Some(RealLiteral::Rational(n, d))
904 } else {
905 let int = IntLiteral::from_str_radix(s, radix).ok()?;
906 Some(RealLiteral::Int(int))
907 }
908}
909
910fn try_parse_number(s: &str, radix: Option<u32>) -> Result<Option<NumberLiteral>> {
911 let Some(n) = parse_number(s, radix) else {
912 return Ok(None);
913 };
914
915 fn validate_real_literal(lit: &RealLiteral) -> Result<()> {
916 let RealLiteral::Rational(_, int) = lit else {
917 return Ok(());
918 };
919
920 match int {
921 IntLiteral::Small(n) if *n == 0 => Err(TokenError::ZeroDenominator),
922 IntLiteral::Big(big_int) if **big_int == BigInt::ZERO => {
923 Err(TokenError::ZeroDenominator)
924 }
925 _ => Ok(()),
926 }
927 }
928
929 match &n {
930 NumberLiteral::Real(real) => validate_real_literal(real)?,
931 NumberLiteral::Complex(r, i) => {
932 validate_real_literal(r)?;
933 validate_real_literal(i)?;
934 }
935 NumberLiteral::Polar(r, theta) => {
936 validate_real_literal(r)?;
937 validate_real_literal(theta)?;
938 }
939 }
940
941 Ok(Some(n))
942}
943
944pub fn parse_number(s: &str, radix: Option<u32>) -> Option<NumberLiteral> {
945 let (s, radix) = match s.get(0..2) {
946 Some("#x" | "#X") => (&s[2..], 16),
947 Some("#d" | "#D") => (&s[2..], 10),
948 Some("#o" | "#O") => (&s[2..], 8),
949 Some("#b" | "#B") => (&s[2..], 2),
950 _ => (s, radix.unwrap_or(10)),
951 };
952
953 if let Some((r, theta)) = s.split_once('@') {
954 let r = parse_real(r, radix)?;
955 let theta = parse_real(theta, radix)?;
956 return Some(NumberLiteral::Polar(r, theta));
957 }
958
959 match split_into_complex(s)?.as_slice() {
960 [NumPart::Real(x)] => parse_real(x, radix).map(NumberLiteral::from),
961 [NumPart::Imaginary(x)] => {
962 if !matches!(x.as_bytes().first(), Some(b'+') | Some(b'-')) {
963 return None;
964 };
965
966 let imaginary = if *x == "+" {
967 IntLiteral::Small(1).into()
968 } else if *x == "-" {
969 IntLiteral::Small(-1).into()
970 } else {
971 parse_real(x, radix)?
972 };
973 Some(NumberLiteral::Complex(
974 IntLiteral::Small(0).into(),
975 imaginary,
976 ))
977 }
978 [NumPart::Real(re), NumPart::Imaginary(im)] => Some(NumberLiteral::Complex(
979 parse_real(re, radix)?,
980 if *im == "+" {
981 IntLiteral::Small(1).into()
982 } else if *im == "-" {
983 IntLiteral::Small(-1).into()
984 } else {
985 parse_real(im, radix)?
986 },
987 )),
988 _ => None,
989 }
990}
991
992#[cfg(test)]
993mod lexer_tests {
994 use core::str::FromStr;
995
996 use super::*;
997 use crate::span::Span;
998 use crate::tokens::{IntLiteral, TokenType::*};
999 use pretty_assertions::assert_eq;
1000
1001 fn identifier(ident: &str) -> TokenType<InternedString> {
1002 Identifier(ident.into())
1003 }
1004
1005 fn token_stream(source: &str) -> impl Iterator<Item = Token<'_, InternedString>> {
1006 TokenStream::new(source, true, None).map(|t| t.expect("unexpected parsing error"))
1007 }
1008
1009 #[test]
1011 fn test_identifier_with_quote_end() {
1012 let s = TokenStream::new(
1013 " (define (stream-cdr stream)
1014 ((stream-cdr' stream)))
1015",
1016 true,
1017 SourceId::none(),
1018 );
1019
1020 for token in s {
1021 println!("{:?}", token);
1022 }
1023 }
1024
1025 #[test]
1026 fn test_bracket_characters() {
1027 let s = TokenStream::new(
1028 "[(equal? #\\[ (car chars)) (b (cdr chars) (+ sum 1))]",
1029 true,
1030 SourceId::none(),
1031 );
1032
1033 for token in s {
1034 println!("{:?}", token);
1035 }
1036 }
1037
1038 #[test]
1039 fn test_escape_in_string() {
1040 let s = TokenStream::new(r#"(display "}\n")"#, true, SourceId::none());
1041
1042 for token in s {
1043 println!("{:?}", token);
1044 }
1045 }
1046
1047 #[test]
1048 fn test_quote_within_word() {
1049 let mut s = TokenStream::new("'foo\\'a", true, SourceId::none());
1050
1051 println!("{:?}", s.next());
1052 println!("{:?}", s.next());
1053 println!("{:?}", s.next());
1054 }
1055
1056 #[test]
1057 fn test_single_period() {
1058 let mut s = TokenStream::new(".", true, SourceId::none());
1059
1060 println!("{:?}", s.next());
1061 }
1062
1063 #[test]
1064 fn test_chars() {
1065 let mut s = token_stream("#\\a #\\b #\\λ");
1066
1067 assert_eq!(
1068 s.next(),
1069 Some(Token {
1070 ty: CharacterLiteral('a'),
1071 source: "#\\a",
1072 span: Span::new(0, 3, SourceId::none())
1073 })
1074 );
1075 assert_eq!(
1076 s.next(),
1077 Some(Token {
1078 ty: CharacterLiteral('b'),
1079 source: "#\\b",
1080 span: Span::new(4, 7, SourceId::none())
1081 })
1082 );
1083 assert_eq!(
1084 s.next(),
1085 Some(Token {
1086 ty: CharacterLiteral('λ'),
1087 source: "#\\λ",
1088 span: Span::new(8, 12, SourceId::none())
1089 })
1090 );
1091 }
1092
1093 #[test]
1094 fn test_unicode_escapes() {
1095 let mut s = token_stream(r#" #\xAb #\u{0D300} #\u0540 "\x00D;" "\u1044;" "\u{045}" "#);
1096
1097 assert_eq!(
1098 s.next().unwrap(),
1099 Token {
1100 ty: CharacterLiteral('«'),
1101 source: r#"#\xAb"#,
1102 span: Span::new(2, 7, SourceId::none())
1103 }
1104 );
1105
1106 assert_eq!(
1107 s.next().unwrap(),
1108 Token {
1109 ty: CharacterLiteral('팀'),
1110 source: r#"#\u{0D300}"#,
1111 span: Span::new(8, 18, SourceId::none())
1112 }
1113 );
1114
1115 assert_eq!(
1116 s.next().unwrap(),
1117 Token {
1118 ty: CharacterLiteral('Հ'),
1119 source: r#"#\u0540"#,
1120 span: Span::new(19, 26, SourceId::none())
1121 }
1122 );
1123
1124 assert_eq!(
1125 s.next().unwrap(),
1126 Token {
1127 ty: StringLiteral("\r".into()),
1128 source: r#""\x00D;""#,
1129 span: Span::new(27, 35, SourceId::none())
1130 }
1131 );
1132
1133 assert_eq!(
1134 s.next().unwrap(),
1135 Token {
1136 ty: StringLiteral("၄".into()),
1137 source: r#""\u1044;""#,
1138 span: Span::new(36, 45, SourceId::none())
1139 }
1140 );
1141
1142 assert_eq!(
1143 s.next().unwrap(),
1144 Token {
1145 ty: StringLiteral("E".into()),
1146 source: r#""\u{045}""#,
1147 span: Span::new(46, 55, SourceId::none())
1148 }
1149 );
1150 }
1151
1152 #[test]
1153 fn test_invalid_unicode_escapes() {
1154 let tokens = [
1155 r#" #\xd820 "#,
1156 r#" #\u{1 "#,
1157 r#" "\xabx" "#,
1158 r#" "\u0045" "#,
1159 r#" #\xaaaaaaaa " "#,
1160 r#" "\u{ffffffff}" "#,
1161 r#" #\u{} "#,
1162 ];
1163
1164 for token in tokens {
1165 let mut s = TokenStream::new(token, true, None);
1166
1167 assert!(s.next().unwrap().is_err(), "{token:?} should be invalid");
1169 }
1170 }
1171
1172 #[test]
1173 fn test_string_newlines() {
1174 let mut s = token_stream(" \"foo\nbar\" \"foo \\ \n bar\" ");
1175
1176 assert_eq!(
1177 s.next().unwrap(),
1178 Token {
1179 ty: StringLiteral("foo\nbar".into()),
1180 source: "\"foo\nbar\"",
1181 span: Span::new(1, 10, SourceId::none())
1182 }
1183 );
1184
1185 assert_eq!(
1186 s.next().unwrap(),
1187 Token {
1188 ty: StringLiteral("foo bar".into()),
1189 source: "\"foo \\ \n bar\"",
1190 span: Span::new(11, 27, SourceId::none())
1191 }
1192 );
1193 }
1194
1195 #[test]
1196 fn test_unexpected_char() {
1197 let mut s = token_stream("($)");
1198 assert_eq!(
1199 s.next(),
1200 Some(Token {
1201 ty: OpenParen(Paren::Round, None),
1202 source: "(",
1203 span: Span::new(0, 1, SourceId::none())
1204 })
1205 );
1206 assert_eq!(
1207 s.next(),
1208 Some(Token {
1209 ty: identifier("$"),
1210 source: "$",
1211 span: Span::new(1, 2, SourceId::none())
1212 })
1213 );
1214 assert_eq!(
1215 s.next(),
1216 Some(Token {
1217 ty: CloseParen(Paren::Round),
1218 source: ")",
1219 span: Span::new(2, 3, SourceId::none())
1220 })
1221 );
1222 }
1223
1224 #[test]
1225 fn test_words() {
1226 let mut s = token_stream("foo FOO _123_ Nil #f #t");
1227
1228 assert_eq!(
1229 s.next(),
1230 Some(Token {
1231 ty: identifier("foo"),
1232 source: "foo",
1233 span: Span::new(0, 3, SourceId::none())
1234 })
1235 );
1236
1237 assert_eq!(
1238 s.next(),
1239 Some(Token {
1240 ty: identifier("FOO"),
1241 source: "FOO",
1242 span: Span::new(4, 7, SourceId::none())
1243 })
1244 );
1245
1246 assert_eq!(
1247 s.next(),
1248 Some(Token {
1249 ty: identifier("_123_"),
1250 source: "_123_",
1251 span: Span::new(8, 13, SourceId::none())
1252 })
1253 );
1254
1255 assert_eq!(
1256 s.next(),
1257 Some(Token {
1258 ty: identifier("Nil"),
1259 source: "Nil",
1260 span: Span::new(14, 17, SourceId::none())
1261 })
1262 );
1263
1264 assert_eq!(
1265 s.next(),
1266 Some(Token {
1267 ty: BooleanLiteral(false),
1268 source: "#f",
1269 span: Span::new(18, 20, SourceId::none())
1270 })
1271 );
1272
1273 assert_eq!(
1274 s.next(),
1275 Some(Token {
1276 ty: BooleanLiteral(true),
1277 source: "#t",
1278 span: Span::new(21, 23, SourceId::none())
1279 })
1280 );
1281
1282 assert_eq!(s.next(), None);
1283 }
1284
1285 #[test]
1286 fn test_almost_literals() {
1287 let got: Vec<_> = token_stream("1e 1ee 1.2e5.4 1E10/4 1.45# 3- e10").collect();
1288 assert_eq!(
1289 got.as_slice(),
1290 &[
1291 Token {
1292 ty: identifier("1e"),
1293 source: "1e",
1294 span: Span::new(0, 2, SourceId::none()),
1295 },
1296 Token {
1297 ty: identifier("1ee"),
1298 source: "1ee",
1299 span: Span::new(3, 6, SourceId::none()),
1300 },
1301 Token {
1302 ty: identifier("1.2e5.4"),
1303 source: "1.2e5.4",
1304 span: Span::new(7, 14, SourceId::none()),
1305 },
1306 Token {
1307 ty: identifier("1E10/4"),
1308 source: "1E10/4",
1309 span: Span::new(15, 21, SourceId::none()),
1310 },
1311 Token {
1312 ty: identifier("1.45#"),
1313 source: "1.45#",
1314 span: Span::new(22, 27, SourceId::none()),
1315 },
1316 Token {
1317 ty: identifier("3-"),
1318 source: "3-",
1319 span: Span::new(28, 30, SourceId::none()),
1320 },
1321 Token {
1322 ty: identifier("e10"),
1323 source: "e10",
1324 span: Span::new(31, 34, SourceId::none()),
1325 },
1326 ]
1327 );
1328 }
1329
1330 #[test]
1331 fn test_real_numbers() {
1332 let got: Vec<_> =
1333 token_stream("0 -0 -1.2 +2.3 999 1. 1e2 1E2 1.2e2 1.2E2 +inf.0 -inf.0 2e-4 2e+10")
1334 .collect();
1335 assert_eq!(
1336 got.as_slice(),
1337 &[
1338 Token {
1339 ty: IntLiteral::Small(0).into(),
1340 source: "0",
1341 span: Span::new(0, 1, SourceId::none()),
1342 },
1343 Token {
1344 ty: IntLiteral::Small(0).into(),
1345 source: "-0",
1346 span: Span::new(2, 4, SourceId::none()),
1347 },
1348 Token {
1349 ty: RealLiteral::Float((-1.2).into()).into(),
1350 source: "-1.2",
1351 span: Span::new(5, 9, SourceId::none()),
1352 },
1353 Token {
1354 ty: RealLiteral::Float(2.3.into()).into(),
1355 source: "+2.3",
1356 span: Span::new(10, 14, SourceId::none()),
1357 },
1358 Token {
1359 ty: IntLiteral::Small(999).into(),
1360 source: "999",
1361 span: Span::new(15, 18, SourceId::none()),
1362 },
1363 Token {
1364 ty: RealLiteral::Float(1.0.into()).into(),
1365 source: "1.",
1366 span: Span::new(19, 21, SourceId::none()),
1367 },
1368 Token {
1369 ty: RealLiteral::Float(100.0.into()).into(),
1370 source: "1e2",
1371 span: Span::new(22, 25, SourceId::none()),
1372 },
1373 Token {
1374 ty: RealLiteral::Float(100.0.into()).into(),
1375 source: "1E2",
1376 span: Span::new(26, 29, SourceId::none()),
1377 },
1378 Token {
1379 ty: RealLiteral::Float(120.0.into()).into(),
1380 source: "1.2e2",
1381 span: Span::new(30, 35, SourceId::none()),
1382 },
1383 Token {
1384 ty: RealLiteral::Float(120.0.into()).into(),
1385 source: "1.2E2",
1386 span: Span::new(36, 41, SourceId::none()),
1387 },
1388 Token {
1389 ty: RealLiteral::Float(f64::INFINITY.into()).into(),
1390 source: "+inf.0",
1391 span: Span::new(42, 48, SourceId::none()),
1392 },
1393 Token {
1394 ty: RealLiteral::Float(f64::NEG_INFINITY.into()).into(),
1395 source: "-inf.0",
1396 span: Span::new(49, 55, SourceId::none()),
1397 },
1398 Token {
1399 ty: RealLiteral::Float((2e-4).into()).into(),
1400 source: "2e-4",
1401 span: Span::new(56, 60, SourceId::none()),
1402 },
1403 Token {
1404 ty: RealLiteral::Float((2e+10).into()).into(),
1405 source: "2e+10",
1406 span: Span::new(61, 66, SourceId::none())
1407 }
1408 ]
1409 );
1410 }
1411
1412 #[test]
1413 fn test_nan() {
1414 let got = token_stream("+nan.0").next().unwrap();
1416
1417 match got.ty {
1418 TokenType::Number(n) => {
1419 assert!(
1420 matches!(n.resolve(), NumberLiteral::Real(RealLiteral::Float(x)) if x.is_nan())
1421 )
1422 }
1423
1424 _ => panic!("Didn't match"),
1425 }
1426
1427 let got = token_stream("-nan.0").next().unwrap();
1428
1429 match got.ty {
1430 TokenType::Number(n) => {
1431 assert!(
1432 matches!(n.resolve(), NumberLiteral::Real(RealLiteral::Float(x)) if x.is_nan())
1433 )
1434 }
1435
1436 _ => panic!("Didn't match"),
1437 }
1438 }
1439
1440 #[test]
1441 fn test_rationals() {
1442 let got: Vec<_> = token_stream(
1443 r#"
1444 1/4
1445 (1/4 1/3)
1446 11111111111111111111/22222222222222222222
1447 /
1448 1/
1449 1/4.0
1450 1//4
1451 1 / 4
1452 .2
1453"#,
1454 )
1455 .collect();
1456 assert_eq!(
1457 got.as_slice(),
1458 &[
1459 Token {
1460 ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(4)).into(),
1461 source: "1/4",
1462 span: Span::new(17, 20, SourceId::none()),
1463 },
1464 Token {
1465 ty: OpenParen(Paren::Round, None),
1466 source: "(",
1467 span: Span::new(37, 38, SourceId::none()),
1468 },
1469 Token {
1470 ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(4)).into(),
1471 source: "1/4",
1472 span: Span::new(38, 41, SourceId::none()),
1473 },
1474 Token {
1475 ty: RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(3)).into(),
1476 source: "1/3",
1477 span: Span::new(42, 45, SourceId::none()),
1478 },
1479 Token {
1480 ty: CloseParen(Paren::Round),
1481 source: ")",
1482 span: Span::new(45, 46, SourceId::none()),
1483 },
1484 Token {
1485 ty: RealLiteral::Rational(
1486 IntLiteral::from_str("11111111111111111111").unwrap(),
1487 IntLiteral::from_str("22222222222222222222").unwrap(),
1488 )
1489 .into(),
1490 source: "11111111111111111111/22222222222222222222",
1491 span: Span::new(63, 104, SourceId::none()),
1492 },
1493 Token {
1494 ty: identifier("/"),
1495 source: "/",
1496 span: Span::new(121, 122, SourceId::none()),
1497 },
1498 Token {
1499 ty: identifier("1/"),
1500 source: "1/",
1501 span: Span::new(139, 141, SourceId::none()),
1502 },
1503 Token {
1504 ty: identifier("1/4.0"),
1505 source: "1/4.0",
1506 span: Span::new(158, 163, SourceId::none()),
1507 },
1508 Token {
1509 ty: identifier("1//4"),
1510 source: "1//4",
1511 span: Span::new(180, 184, SourceId::none()),
1512 },
1513 Token {
1514 ty: IntLiteral::Small(1).into(),
1515 source: "1",
1516 span: Span::new(201, 202, SourceId::none()),
1517 },
1518 Token {
1519 ty: identifier("/"),
1520 source: "/",
1521 span: Span::new(203, 204, SourceId::none()),
1522 },
1523 Token {
1524 ty: IntLiteral::Small(4).into(),
1525 source: "4",
1526 span: Span::new(205, 206, SourceId::none()),
1527 },
1528 Token {
1529 ty: RealLiteral::Float((0.2).into()).into(),
1530 source: ".2",
1531 span: Span::new(223, 225, SourceId::none())
1532 }
1533 ]
1534 );
1535 }
1536
1537 #[test]
1538 fn test_complex_numbers() {
1539 let got: Vec<_> = token_stream(
1540 "1+2i 3-4i +5+6i +1i 1.0+2.0i 3-4.0i +1.0i 2e+4+inf.0i -inf.0-2e-4i 1/2@0 -3/2@1 +i -i 4+i",
1541 )
1542 .collect();
1543 assert_eq!(
1544 got.as_slice(),
1545 &[
1546 Token {
1547 ty: NumberLiteral::Complex(
1548 IntLiteral::Small(1).into(),
1549 IntLiteral::Small(2).into()
1550 )
1551 .into(),
1552 source: "1+2i",
1553 span: Span::new(0, 4, SourceId::none()),
1554 },
1555 Token {
1556 ty: NumberLiteral::Complex(
1557 IntLiteral::Small(3).into(),
1558 IntLiteral::Small(-4).into()
1559 )
1560 .into(),
1561 source: "3-4i",
1562 span: Span::new(5, 9, SourceId::none()),
1563 },
1564 Token {
1565 ty: NumberLiteral::Complex(
1566 IntLiteral::Small(5).into(),
1567 IntLiteral::Small(6).into()
1568 )
1569 .into(),
1570 source: "+5+6i",
1571 span: Span::new(10, 15, SourceId::none()),
1572 },
1573 Token {
1574 ty: NumberLiteral::Complex(
1575 IntLiteral::Small(0).into(),
1576 IntLiteral::Small(1).into()
1577 )
1578 .into(),
1579 source: "+1i",
1580 span: Span::new(16, 19, SourceId::none()),
1581 },
1582 Token {
1583 ty: NumberLiteral::Complex(
1584 RealLiteral::Float((1.0).into()).into(),
1585 RealLiteral::Float((2.0).into()).into()
1586 )
1587 .into(),
1588 source: "1.0+2.0i",
1589 span: Span::new(20, 28, SourceId::none()),
1590 },
1591 Token {
1592 ty: NumberLiteral::Complex(
1593 IntLiteral::Small(3).into(),
1594 RealLiteral::Float((-4.0).into()).into()
1595 )
1596 .into(),
1597 source: "3-4.0i",
1598 span: Span::new(29, 35, SourceId::none()),
1599 },
1600 Token {
1601 ty: NumberLiteral::Complex(
1602 IntLiteral::Small(0).into(),
1603 RealLiteral::Float((1.0).into()).into()
1604 )
1605 .into(),
1606 source: "+1.0i",
1607 span: Span::new(36, 41, SourceId::none()),
1608 },
1609 Token {
1610 ty: NumberLiteral::Complex(
1611 RealLiteral::Float((2e+4).into()),
1612 RealLiteral::Float(f64::INFINITY.into()),
1613 )
1614 .into(),
1615 source: "2e+4+inf.0i",
1616 span: Span::new(42, 53, SourceId::none()),
1617 },
1618 Token {
1619 ty: NumberLiteral::Complex(
1620 RealLiteral::Float(f64::NEG_INFINITY.into()),
1621 RealLiteral::Float((-2e-4).into()),
1622 )
1623 .into(),
1624 source: "-inf.0-2e-4i",
1625 span: Span::new(54, 66, SourceId::none()),
1626 },
1627 Token {
1628 ty: NumberLiteral::Polar(
1629 RealLiteral::Rational(IntLiteral::Small(1), IntLiteral::Small(2)),
1630 IntLiteral::Small(0).into()
1631 )
1632 .into(),
1633 source: "1/2@0",
1634 span: Span::new(67, 72, SourceId::none()),
1635 },
1636 Token {
1637 ty: NumberLiteral::Polar(
1638 RealLiteral::Rational(IntLiteral::Small(-3), IntLiteral::Small(2)),
1639 IntLiteral::Small(1).into()
1640 )
1641 .into(),
1642 source: "-3/2@1",
1643 span: Span::new(73, 79, SourceId::none()),
1644 },
1645 Token {
1646 ty: NumberLiteral::Complex(
1647 IntLiteral::Small(0).into(),
1648 IntLiteral::Small(1).into(),
1649 )
1650 .into(),
1651 source: "+i",
1652 span: Span::new(80, 82, SourceId::none()),
1653 },
1654 Token {
1655 ty: NumberLiteral::Complex(
1656 IntLiteral::Small(0).into(),
1657 IntLiteral::Small(-1).into()
1658 )
1659 .into(),
1660 source: "-i",
1661 span: Span::new(83, 85, SourceId::none()),
1662 },
1663 Token {
1664 ty: NumberLiteral::Complex(
1665 IntLiteral::Small(4).into(),
1666 IntLiteral::Small(1).into()
1667 )
1668 .into(),
1669 source: "4+i",
1670 span: Span::new(86, 89, SourceId::none()),
1671 },
1672 ]
1673 );
1674 }
1675
1676 #[test]
1677 fn test_numbers_with_radix() {
1678 let got = token_stream("#xff #xce #o777 #o1/20 #b1/10 #x10+ffi #d1.0").collect::<Vec<_>>();
1679
1680 assert_eq!(
1681 &*got,
1682 &[
1683 Token {
1684 ty: NumberLiteral::Real(IntLiteral::Small(255).into()).into(),
1685 source: "#xff",
1686 span: Span::new(0, 4, SourceId::none()),
1687 },
1688 Token {
1689 ty: NumberLiteral::Real(IntLiteral::Small(206).into()).into(),
1690 source: "#xce",
1691 span: Span::new(5, 9, SourceId::none()),
1692 },
1693 Token {
1694 ty: NumberLiteral::Real(IntLiteral::Small(511).into()).into(),
1695 source: "#o777",
1696 span: Span::new(10, 15, SourceId::none()),
1697 },
1698 Token {
1699 ty: NumberLiteral::Real(RealLiteral::Rational(
1700 IntLiteral::Small(1),
1701 IntLiteral::Small(16)
1702 ))
1703 .into(),
1704 source: "#o1/20",
1705 span: Span::new(16, 22, SourceId::none()),
1706 },
1707 Token {
1708 ty: NumberLiteral::Real(RealLiteral::Rational(
1709 IntLiteral::Small(1),
1710 IntLiteral::Small(2)
1711 ))
1712 .into(),
1713 source: "#b1/10",
1714 span: Span::new(23, 29, SourceId::none()),
1715 },
1716 Token {
1717 ty: NumberLiteral::Complex(
1718 IntLiteral::Small(16).into(),
1719 IntLiteral::Small(255).into(),
1720 )
1721 .into(),
1722 source: "#x10+ffi",
1723 span: Span::new(30, 38, SourceId::none()),
1724 },
1725 Token {
1726 ty: NumberLiteral::Real(RealLiteral::Float((1.0).into())).into(),
1727 source: "#d1.0",
1728 span: Span::new(39, 44, SourceId::none()),
1729 }
1730 ]
1731 );
1732 }
1733
1734 #[test]
1735 fn test_malformed_complex_numbers_are_identifiers() {
1736 let got: Vec<_> = token_stream("i 1i+1i -4+-2i").collect();
1737 assert_eq!(
1738 got.as_slice(),
1739 &[
1740 Token {
1741 ty: identifier("i"),
1742 source: "i",
1743 span: Span::new(0, 1, SourceId::none()),
1744 },
1745 Token {
1746 ty: identifier("1i+1i"),
1747 source: "1i+1i",
1748 span: Span::new(2, 7, SourceId::none()),
1749 },
1750 Token {
1751 ty: identifier("-4+-2i"),
1752 source: "-4+-2i",
1753 span: Span::new(8, 14, SourceId::none()),
1754 },
1755 ]
1756 );
1757 }
1758
1759 #[test]
1760 fn test_string() {
1761 let got: Vec<_> = token_stream(r#" "" "Foo bar" "\"\\" "#).collect();
1762 assert_eq!(
1763 got.as_slice(),
1764 &[
1765 Token {
1766 ty: StringLiteral(r#""#.into()),
1767 source: r#""""#,
1768 span: Span::new(1, 3, SourceId::none()),
1769 },
1770 Token {
1771 ty: StringLiteral(r#"Foo bar"#.into()),
1772 source: r#""Foo bar""#,
1773 span: Span::new(4, 13, SourceId::none()),
1774 },
1775 Token {
1776 ty: StringLiteral(r#""\"#.into()),
1777 source: r#""\"\\""#,
1778 span: Span::new(14, 20, SourceId::none()),
1779 },
1780 ]
1781 );
1782 }
1783
1784 #[test]
1785 fn test_comment() {
1786 let mut s = token_stream(";!/usr/bin/gate\n ; foo\n");
1787 assert_eq!(s.next(), None);
1788 }
1789
1790 #[test]
1791 fn function_definition() {
1792 let s = token_stream("(define odd-rec? (lambda (x) (if (= x 0) #f (even-rec? (- x 1)))))");
1793 let res: Vec<_> = s.collect();
1794
1795 println!("{:#?}", res);
1796 }
1797
1798 #[test]
1799 fn lex_string_with_escape_chars() {
1800 let s = token_stream("\"\0\0\0\"");
1801 let res: Vec<_> = s.collect();
1802 println!("{:#?}", res);
1803 }
1804
1805 #[test]
1806 fn scheme_statement() {
1807 let s = token_stream("(apples (function a b) (+ a b))");
1808 let res: Vec<_> = s.collect();
1809
1810 let expected: Vec<Token<InternedString>> = vec![
1811 Token {
1812 ty: OpenParen(Paren::Round, None),
1813 source: "(",
1814 span: Span::new(0, 1, SourceId::none()),
1815 },
1816 Token {
1817 ty: identifier("apples"),
1818 source: "apples",
1819 span: Span::new(1, 7, SourceId::none()),
1820 },
1821 Token {
1822 ty: OpenParen(Paren::Round, None),
1823 source: "(",
1824 span: Span::new(8, 9, SourceId::none()),
1825 },
1826 Token {
1827 ty: identifier("function"),
1828 source: "function",
1829 span: Span::new(9, 17, SourceId::none()),
1830 },
1831 Token {
1832 ty: identifier("a"),
1833 source: "a",
1834 span: Span::new(18, 19, SourceId::none()),
1835 },
1836 Token {
1837 ty: identifier("b"),
1838 source: "b",
1839 span: Span::new(20, 21, SourceId::none()),
1840 },
1841 Token {
1842 ty: CloseParen(Paren::Round),
1843 source: ")",
1844 span: Span::new(21, 22, SourceId::none()),
1845 },
1846 Token {
1847 ty: OpenParen(Paren::Round, None),
1848 source: "(",
1849 span: Span::new(23, 24, SourceId::none()),
1850 },
1851 Token {
1852 ty: identifier("+"),
1853 source: "+",
1854 span: Span::new(24, 25, SourceId::none()),
1855 },
1856 Token {
1857 ty: identifier("a"),
1858 source: "a",
1859 span: Span::new(26, 27, SourceId::none()),
1860 },
1861 Token {
1862 ty: identifier("b"),
1863 source: "b",
1864 span: Span::new(28, 29, SourceId::none()),
1865 },
1866 Token {
1867 ty: CloseParen(Paren::Round),
1868 source: ")",
1869 span: Span::new(29, 30, SourceId::none()),
1870 },
1871 Token {
1872 ty: CloseParen(Paren::Round),
1873 source: ")",
1874 span: Span::new(30, 31, SourceId::none()),
1875 },
1876 ];
1877
1878 assert_eq!(res, expected);
1879 }
1880
1881 #[test]
1882 fn test_bigint() {
1883 let s = token_stream("9223372036854775808"); let res: Vec<_> = s.collect();
1885
1886 let expected_bigint = Box::new("9223372036854775808".parse().unwrap());
1887
1888 let expected: Vec<Token<InternedString>> = vec![Token {
1889 ty: IntLiteral::Big(expected_bigint).into(),
1890 source: "9223372036854775808",
1891 span: Span::new(0, 19, SourceId::none()),
1892 }];
1893
1894 assert_eq!(res, expected);
1895 }
1896
1897 #[test]
1898 fn negative_test_bigint() {
1899 let s = token_stream("-9223372036854775809"); let res: Vec<_> = s.collect();
1901
1902 let expected_bigint = Box::new("-9223372036854775809".parse().unwrap());
1903
1904 let expected: Vec<Token<InternedString>> = vec![Token {
1905 ty: IntLiteral::Big(expected_bigint).into(),
1906 source: "-9223372036854775809",
1907 span: Span::new(0, 20, SourceId::none()),
1908 }];
1909
1910 assert_eq!(res, expected);
1911 }
1912
1913 #[test]
1914 fn identifier_test() {
1915 let s = token_stream("a b(c`d'e\"www\"f,g;");
1916
1917 let tokens: Vec<(TokenType<InternedString>, &str)> =
1918 s.map(|token| (token.ty, token.source)).collect();
1919
1920 assert_eq!(tokens[0], (identifier("a"), "a"));
1921 assert_eq!(tokens[1], (identifier("b"), "b"));
1922 assert_eq!(tokens[3], (identifier("c"), "c"));
1923 assert_eq!(tokens[5], (identifier("d"), "d"));
1924 assert_eq!(tokens[7], (identifier("e"), "e"));
1925 assert_eq!(tokens[9], (identifier("f"), "f"));
1926 assert_eq!(tokens[11], (identifier("g"), "g"));
1927 }
1928
1929 #[test]
1930 fn vector_test() {
1931 let tokens: Vec<_> = token_stream("a b #(c d)")
1932 .map(|token| (token.ty, token.source))
1933 .collect();
1934
1935 assert_eq!(tokens[0], (identifier("a"), "a"));
1936 assert_eq!(tokens[1], (identifier("b"), "b"));
1937 assert_eq!(
1938 tokens[2],
1939 (
1940 TokenType::OpenParen(Paren::Round, Some(ParenMod::Vector)),
1941 "#("
1942 )
1943 );
1944 assert_eq!(tokens[3], (identifier("c"), "c"));
1945 assert_eq!(tokens[4], (identifier("d"), "d"));
1946 }
1947
1948 #[test]
1949 fn bytevector_test() {
1950 let tokens: Vec<_> = token_stream("a b #u8(1 2)")
1951 .map(|token| (token.ty, token.source))
1952 .collect();
1953
1954 assert_eq!(tokens[0], (identifier("a"), "a"));
1955 assert_eq!(tokens[1], (identifier("b"), "b"));
1956 assert_eq!(
1957 tokens[2],
1958 (
1959 TokenType::OpenParen(Paren::Round, Some(ParenMod::Bytes)),
1960 "#u8("
1961 )
1962 );
1963 assert_eq!(tokens[5], (TokenType::CloseParen(Paren::Round), ")"));
1964 }
1965
1966 #[test]
1967 fn special_comments_test() {
1968 let mut lexer = Lexer::new("#| f(\n [ |#");
1969 assert_eq!(lexer.next(), Some(Ok(TokenType::Comment)));
1970
1971 let mut lexer = Lexer::new("#| a #| ( |# |#");
1972 assert_eq!(lexer.next(), Some(Ok(TokenType::Comment)));
1973
1974 let mut lexer = Lexer::new("#;(a b)");
1975 assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1976
1977 let mut lexer = Lexer::new("#; #(#true 3)");
1978 assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1979
1980 let mut lexer = Lexer::new("#; #; 3 5");
1981 assert_eq!(lexer.next(), Some(Ok(TokenType::DatumComment)));
1982 }
1983
1984 #[test]
1985 fn comment_error_test() {
1986 let mut lexer = Lexer::new("#|");
1987
1988 assert_eq!(lexer.next().unwrap(), Err(TokenError::IncompleteComment));
1989 }
1990
1991 #[test]
1992 fn escaped_identifier_test() {
1993 let mut s = token_stream(r#"|a| |a b| |\x61;| |.|"#);
1994
1995 assert_eq!(
1996 s.next().unwrap(),
1997 Token {
1998 ty: identifier("a"),
1999 source: "|a|",
2000 span: Span::new(0, 3, None),
2001 },
2002 );
2003
2004 assert_eq!(
2005 s.next().unwrap(),
2006 Token {
2007 ty: identifier("a b"),
2008 source: "|a b|",
2009 span: Span::new(4, 9, None),
2010 },
2011 );
2012
2013 assert_eq!(
2014 s.next().unwrap(),
2015 Token {
2016 ty: identifier("a"),
2017 source: r#"|\x61;|"#,
2018 span: Span::new(10, 17, None),
2019 },
2020 );
2021
2022 assert_eq!(
2023 s.next().unwrap(),
2024 Token {
2025 ty: identifier("."),
2026 source: "|.|",
2027 span: Span::new(18, 21, None),
2028 },
2029 );
2030
2031 let mut s = token_stream("|a\\\nb|");
2032
2033 assert_eq!(
2034 s.next().unwrap(),
2035 Token {
2036 ty: identifier("ab"),
2037 source: "|a\\\nb|",
2038 span: Span::new(0, 6, None),
2039 },
2040 );
2041 }
2042}