Skip to main content

scheme_rs/syntax/
lex.rs

1//! Lexical analysis of symbolic expressions
2
3use std::sync::Arc;
4
5use super::Span;
6use malachite::{Integer, base::num::conversion::traits::*, rational::Rational};
7use scheme_rs_macros::{maybe_async, maybe_await};
8use unicode_categories::UnicodeCategories;
9
10#[cfg(feature = "async")]
11use futures::future::BoxFuture;
12
13use crate::{
14    exceptions::Exception,
15    num::{self, SimpleNumber},
16    ports::{PortData, PortInfo},
17};
18
19pub struct Lexer<'a> {
20    port_data: &'a mut PortData,
21    port_info: &'a PortInfo,
22    pos: usize,
23    buff: Vec<char>,
24    curr_span: Span,
25}
26
27impl<'a> Lexer<'a> {
28    pub(crate) fn new(port_data: &'a mut PortData, port_info: &'a PortInfo, span: Span) -> Self {
29        Self {
30            port_data,
31            port_info,
32            pos: 0,
33            buff: Vec::new(),
34            curr_span: span,
35        }
36    }
37
38    pub(crate) fn curr_span(&self) -> Span {
39        Span {
40            line: self.curr_span.line,
41            column: self.curr_span.column,
42            offset: self.curr_span.offset + self.pos,
43            file: self.curr_span.file.clone(),
44        }
45    }
46
47    #[maybe_async]
48    fn peek(&mut self) -> Result<Option<char>, Exception> {
49        if self.buff.len() > self.pos {
50            return Ok(Some(self.buff[self.pos]));
51        }
52        while self.buff.len() < self.pos {
53            let Some(chr) = maybe_await!(self.port_data.read_char(self.port_info))? else {
54                return Ok(None);
55            };
56            self.buff.push(chr);
57        }
58        maybe_await!(self.port_data.peekn_chars(self.port_info, 0))
59    }
60
61    #[maybe_async]
62    fn skip(&mut self) -> Result<(), Exception> {
63        maybe_await!(self.take())?;
64        Ok(())
65    }
66
67    #[maybe_async]
68    pub(crate) fn take(&mut self) -> Result<Option<char>, Exception> {
69        let Some(chr) = maybe_await!(self.peek())? else {
70            return Ok(None);
71        };
72        if chr == '\n' {
73            self.curr_span.line += 1;
74            self.curr_span.column = 0;
75        } else {
76            self.curr_span.column += 1;
77        }
78        self.pos += 1;
79        Ok(Some(chr))
80    }
81
82    #[maybe_async]
83    fn match_char(&mut self, chr: char) -> Result<bool, Exception> {
84        Ok(maybe_await!(self.match_pred(|peek| peek == chr))?.is_some())
85    }
86
87    #[maybe_async]
88    fn match_pred(&mut self, pred: impl FnOnce(char) -> bool) -> Result<Option<char>, Exception> {
89        let chr = maybe_await!(self.peek())?;
90        if let Some(chr) = chr
91            && pred(chr)
92        {
93            if chr == '\n' {
94                self.curr_span.line += 1;
95                self.curr_span.column = 0;
96            } else {
97                self.curr_span.column += 1;
98            }
99            self.pos += 1;
100            Ok(Some(chr))
101        } else {
102            Ok(None)
103        }
104    }
105
106    #[maybe_async]
107    fn match_tag(&mut self, tag: &str) -> Result<bool, Exception> {
108        let pos = self.pos;
109        for chr in tag.chars() {
110            if !maybe_await!(self.match_char(chr))? {
111                self.pos = pos;
112                return Ok(false);
113            }
114        }
115        // tag cannot contain newlines
116        self.curr_span.column += pos;
117        Ok(true)
118    }
119
120    #[maybe_async]
121    fn consume_chars(&mut self) -> Result<(), Exception> {
122        // Consume all the characters we need to
123        if self.pos > self.buff.len() {
124            maybe_await!(
125                self.port_data
126                    .consume_chars(self.port_info, self.pos - self.buff.len())
127            )?;
128        }
129        self.pos = 0;
130        self.buff.clear();
131        Ok(())
132    }
133
134    #[maybe_async]
135    pub fn next_token(&mut self) -> Result<Option<Token>, LexerError> {
136        // TODO: Check if the port is empty
137
138        // Check for any interlexeme space:
139        maybe_await!(self.interlexeme_space())?;
140
141        // self.consume_chars()?;
142
143        // Get the current span:
144        let span = self.curr_span();
145
146        // Check for various special characters:
147        let lexeme = if let Some(number) = maybe_await!(self.number(10))? {
148            Lexeme::Number(number)
149        } else if let Some(identifier) = maybe_await!(self.identifier())? {
150            Lexeme::Identifier(identifier)
151        } else if let Some(chr) = maybe_await!(self.take())? {
152            match chr {
153                '.' => Lexeme::Period,
154                '\'' => Lexeme::Quote,
155                '`' => Lexeme::Backquote,
156                ',' if maybe_await!(self.match_tag("@"))? => Lexeme::CommaAt,
157                ',' => Lexeme::Comma,
158                '(' => Lexeme::LParen,
159                ')' => Lexeme::RParen,
160                '[' => Lexeme::LBracket,
161                ']' => Lexeme::RBracket,
162                '"' => Lexeme::String(maybe_await!(self.string())?),
163                '#' if maybe_await!(self.match_tag(";"))? => Lexeme::DatumComment,
164                '#' if maybe_await!(self.match_tag("\\"))? => {
165                    Lexeme::Character(maybe_await!(self.character())?)
166                }
167                '#' if maybe_await!(self.match_tag("F"))? || maybe_await!(self.match_tag("f"))? => {
168                    Lexeme::Boolean(false)
169                }
170                '#' if maybe_await!(self.match_tag("T"))? || maybe_await!(self.match_tag("t"))? => {
171                    Lexeme::Boolean(true)
172                }
173                '#' if maybe_await!(self.match_tag("("))? => Lexeme::HashParen,
174                '#' if maybe_await!(self.match_tag("vu8("))? => Lexeme::Vu8Paren,
175                '#' if maybe_await!(self.match_tag("'"))? => Lexeme::HashQuote,
176                '#' if maybe_await!(self.match_tag("`"))? => Lexeme::HashBackquote,
177                '#' if maybe_await!(self.match_tag(",@"))? => Lexeme::HashCommaAt,
178                '#' if maybe_await!(self.match_tag(","))? => Lexeme::HashComma,
179                '#' => {
180                    let next_chr = maybe_await!(self.take())?;
181                    if let Some(chr) = next_chr {
182                        return Err(LexerError::UnexpectedCharacter {
183                            chr,
184                            span: self.curr_span(),
185                        });
186                    } else {
187                        return Err(LexerError::UnexpectedEof);
188                    }
189                }
190                '\0' => return Ok(None),
191                chr => return Err(LexerError::UnexpectedCharacter { chr, span }),
192            }
193        } else {
194            return Ok(None);
195        };
196
197        maybe_await!(self.consume_chars())?;
198
199        Ok(Some(Token { lexeme, span }))
200    }
201
202    #[maybe_async]
203    fn interlexeme_space(&mut self) -> Result<(), Exception> {
204        loop {
205            if maybe_await!(self.match_char(';'))? {
206                maybe_await!(self.comment())?;
207            } else if maybe_await!(self.match_tag("#|"))? {
208                maybe_await!(self.nested_comment())?;
209            } else if !maybe_await!(self.match_tag("#!r6rs"))?
210                && maybe_await!(self.match_pred(is_whitespace))?.is_none()
211            {
212                break;
213            }
214        }
215        Ok(())
216    }
217
218    #[maybe_async]
219    fn comment(&mut self) -> Result<(), Exception> {
220        while maybe_await!(self.match_pred(|chr| chr != '\n'))?.is_some() {}
221        Ok(())
222    }
223
224    #[cfg(feature = "async")]
225    fn nested_comment(&mut self) -> BoxFuture<'_, Result<(), Exception>> {
226        Box::pin(self.nested_comment_inner())
227    }
228
229    #[cfg(not(feature = "async"))]
230    fn nested_comment(&mut self) -> Result<(), Exception> {
231        self.nested_comment_inner()
232    }
233
234    #[maybe_async]
235    fn nested_comment_inner(&mut self) -> Result<(), Exception> {
236        while !maybe_await!(self.match_tag("|#"))? {
237            if maybe_await!(self.match_tag("#|"))? {
238                maybe_await!(self.nested_comment())?;
239            } else {
240                maybe_await!(self.skip())?;
241            }
242        }
243        Ok(())
244    }
245
246    #[maybe_async]
247    fn character(&mut self) -> Result<Character, LexerError> {
248        let chr = if maybe_await!(self.match_tag("alarm"))? {
249            Character::Escaped(EscapedCharacter::Alarm)
250        } else if maybe_await!(self.match_tag("backspace"))? {
251            Character::Escaped(EscapedCharacter::Backspace)
252        } else if maybe_await!(self.match_tag("delete"))? {
253            Character::Escaped(EscapedCharacter::Delete)
254        } else if maybe_await!(self.match_tag("esc"))? {
255            Character::Escaped(EscapedCharacter::Escape)
256        } else if maybe_await!(self.match_tag("newline"))?
257            || maybe_await!(self.match_tag("linefeed"))?
258        {
259            Character::Escaped(EscapedCharacter::Newline)
260        } else if maybe_await!(self.match_tag("nul"))? {
261            Character::Escaped(EscapedCharacter::Nul)
262        } else if maybe_await!(self.match_tag("return"))? {
263            Character::Escaped(EscapedCharacter::Return)
264        } else if maybe_await!(self.match_tag("space"))? {
265            Character::Escaped(EscapedCharacter::Space)
266        } else if maybe_await!(self.match_tag("tab"))? {
267            Character::Escaped(EscapedCharacter::Tab)
268        } else if maybe_await!(self.match_tag("vtab"))? {
269            Character::Escaped(EscapedCharacter::VTab)
270        } else if maybe_await!(self.match_tag("page"))? {
271            Character::Escaped(EscapedCharacter::Page)
272        } else if maybe_await!(self.match_char('x'))? {
273            if is_delimiter(maybe_await!(self.peek())?.ok_or(LexerError::UnexpectedEof)?) {
274                Character::Literal('x')
275            } else {
276                let mut unicode = String::new();
277                while let Some(chr) = maybe_await!(self.match_pred(|c| c.is_ascii_hexdigit()))? {
278                    unicode.push(chr);
279                }
280                Character::Unicode(unicode)
281            }
282        } else {
283            Character::Literal(maybe_await!(self.take())?.ok_or(LexerError::UnexpectedEof)?)
284        };
285        let peeked = maybe_await!(self.peek())?;
286        if let Some(peeked) = peeked
287            && !is_delimiter(peeked)
288        {
289            let span = self.curr_span();
290            Err(LexerError::UnexpectedCharacter { chr: peeked, span })
291        } else {
292            Ok(chr)
293        }
294    }
295
296    #[maybe_async]
297    pub(crate) fn number(&mut self, default_radix: u32) -> Result<Option<Number>, Exception> {
298        let saved_pos = self.pos;
299        let saved_span = self.curr_span.clone();
300
301        let (radix, exactness) = maybe_await!(self.radix_and_exactness())?;
302
303        let radix = radix.unwrap_or(default_radix);
304
305        // Need this because "10i" is not a valid number.
306        let has_sign = {
307            let peeked = maybe_await!(self.peek())?;
308            peeked == Some('+') || peeked == Some('-')
309        };
310
311        let first_part = maybe_await!(self.part(radix))?;
312
313        if first_part.is_none() {
314            self.pos = saved_pos;
315            self.curr_span = saved_span;
316            return Ok(None);
317        }
318
319        let number = if maybe_await!(self.match_char('i'))? {
320            if !has_sign {
321                self.pos = saved_pos;
322                self.curr_span = saved_span;
323                return Ok(None);
324            }
325            Number {
326                radix,
327                exactness,
328                real_part: None,
329                imag_part: first_part,
330                is_polar: false,
331            }
332        } else {
333            let matched_at = maybe_await!(self.match_char('@'))?;
334            let imag_part = if matched_at || {
335                let peeked = maybe_await!(self.peek())?;
336                peeked == Some('+') || peeked == Some('-')
337            } {
338                let second_part = maybe_await!(self.part(radix))?;
339                if second_part.is_none() || !matched_at && !maybe_await!(self.match_char('i'))? {
340                    self.pos = saved_pos;
341                    self.curr_span = saved_span;
342                    return Ok(None);
343                }
344                second_part
345            } else {
346                None
347            };
348            Number {
349                radix,
350                exactness,
351                real_part: first_part,
352                imag_part,
353                is_polar: matched_at,
354            }
355        };
356
357        match maybe_await!(self.peek()) {
358            Ok(Some(chr)) if is_subsequent(chr) => {
359                self.pos = saved_pos;
360                self.curr_span = saved_span;
361                return Ok(None);
362            }
363            Err(err) => return Err(err),
364            Ok(_) => (),
365        }
366
367        Ok(Some(number))
368    }
369
370    #[maybe_async]
371    fn part(&mut self, radix: u32) -> Result<Option<Part>, Exception> {
372        let neg = !maybe_await!(self.match_char('+'))? && maybe_await!(self.match_char('-'))?;
373        let mut mantissa_width = None;
374
375        // Check for special nan/inf
376        let real = if maybe_await!(self.match_tag("nan.0"))? {
377            Real::Nan
378        } else if maybe_await!(self.match_tag("inf.0"))? {
379            Real::Inf
380        } else {
381            let mut num = String::new();
382            while let Some(ch) = maybe_await!(self.match_pred(|chr| chr.is_digit(radix)))? {
383                num.push(ch);
384            }
385            if !num.is_empty() && maybe_await!(self.match_char('/'))? {
386                // Rational number
387                let mut denom = String::new();
388                while let Some(ch) = maybe_await!(self.match_pred(|chr| chr.is_digit(radix)))? {
389                    denom.push(ch);
390                }
391                if denom.is_empty() {
392                    return Ok(None);
393                }
394                Real::Rational(num, denom)
395            } else if radix == 10 {
396                let mut fractional = String::new();
397                if maybe_await!(self.match_char('.'))? {
398                    while let Some(ch) = maybe_await!(self.match_pred(|chr| chr.is_digit(radix)))? {
399                        fractional.push(ch);
400                    }
401                }
402                if num.is_empty() && fractional.is_empty() {
403                    return Ok(None);
404                }
405                let suffix = maybe_await!(self.suffix())?;
406                if maybe_await!(self.match_char('|'))? {
407                    let mut width = 0;
408                    while let Some(chr) = maybe_await!(self.match_pred(|chr| chr.is_ascii_digit()))?
409                    {
410                        width = width * 10 + chr.to_digit(10).unwrap() as usize;
411                    }
412                    mantissa_width = Some(width);
413                }
414                Real::Decimal(num, fractional, suffix)
415            } else if num.is_empty() {
416                return Ok(None);
417            } else {
418                Real::Num(num)
419            }
420        };
421
422        Ok(Some(Part {
423            neg,
424            real,
425            mantissa_width,
426        }))
427    }
428
429    #[maybe_async]
430    fn exactness(&mut self) -> Result<Option<Exactness>, Exception> {
431        Ok(
432            if maybe_await!(self.match_tag("#i"))? || maybe_await!(self.match_tag("#I"))? {
433                Some(Exactness::Inexact)
434            } else if maybe_await!(self.match_tag("#e"))? || maybe_await!(self.match_tag("#E"))? {
435                Some(Exactness::Exact)
436            } else {
437                None
438            },
439        )
440    }
441
442    #[maybe_async]
443    fn radix(&mut self) -> Result<Option<u32>, Exception> {
444        Ok(
445            if maybe_await!(self.match_tag("#b"))? || maybe_await!(self.match_tag("#B"))? {
446                Some(2)
447            } else if maybe_await!(self.match_tag("#o"))? || maybe_await!(self.match_tag("#O"))? {
448                Some(8)
449            } else if maybe_await!(self.match_tag("#x"))? || maybe_await!(self.match_tag("#X"))? {
450                Some(16)
451            } else if maybe_await!(self.match_tag("#d"))? || maybe_await!(self.match_tag("#D"))? {
452                Some(10)
453            } else {
454                None
455            },
456        )
457    }
458
459    #[maybe_async]
460    fn radix_and_exactness(&mut self) -> Result<(Option<u32>, Option<Exactness>), Exception> {
461        let exactness = maybe_await!(self.exactness())?;
462        let radix = maybe_await!(self.radix())?;
463        if exactness.is_some() {
464            Ok((radix, exactness))
465        } else {
466            Ok((radix, maybe_await!(self.exactness())?))
467        }
468    }
469
470    #[maybe_async]
471    fn suffix(&mut self) -> Result<Option<isize>, Exception> {
472        let pos = self.pos;
473        if maybe_await!(
474            self.match_pred(|chr| matches!(chr.to_ascii_lowercase(), 'e' | 's' | 'f' | 'd' | 'l'))
475        )?
476        .is_some()
477        {
478            let neg = !maybe_await!(self.match_char('+'))? && maybe_await!(self.match_char('-'))?;
479            let mut suffix = String::new();
480            while let Some(chr) = maybe_await!(self.match_pred(|chr| chr.is_ascii_digit()))? {
481                suffix.push(chr);
482            }
483            if !suffix.is_empty() {
484                let val: isize = suffix.parse().unwrap();
485                if neg {
486                    return Ok(Some(-val));
487                } else {
488                    return Ok(Some(val));
489                }
490            }
491        }
492        self.pos = pos;
493        Ok(None)
494    }
495
496    #[maybe_async]
497    fn string(&mut self) -> Result<String, LexerError> {
498        let mut output = String::new();
499        while let Some(chr) = maybe_await!(self.match_pred(|chr| chr != '"'))? {
500            if chr == '\\' {
501                let escaped = match maybe_await!(self.take())?.ok_or(LexerError::UnexpectedEof)? {
502                    'x' => {
503                        let escaped = maybe_await!(self.inline_hex_escape())?;
504                        output.push_str(&escaped);
505                        continue;
506                    }
507                    'a' => '\u{07}',
508                    'b' => '\u{08}',
509                    't' => '\t',
510                    'n' => '\n',
511                    'r' => '\r',
512                    'v' => '\u{0B}',
513                    'f' => '\u{0C}',
514                    '"' => '"',
515                    '\\' => '\\',
516                    '\n' => {
517                        while maybe_await!(self.match_pred(is_intraline_whitespace))?.is_some() {}
518                        continue;
519                    }
520                    chr if is_intraline_whitespace(chr) => {
521                        while maybe_await!(
522                            self.match_pred(|chr| chr != '\n' && is_intraline_whitespace(chr))
523                        )?
524                        .is_some()
525                        {}
526                        let chr = maybe_await!(self.take())?.ok_or(LexerError::UnexpectedEof)?;
527                        if chr != '\n' {
528                            let span = self.curr_span();
529                            return Err(LexerError::UnexpectedCharacter { chr, span });
530                        }
531                        while maybe_await!(self.match_pred(is_intraline_whitespace))?.is_some() {}
532                        continue;
533                    }
534                    chr => {
535                        let span = self.curr_span();
536                        return Err(LexerError::BadEscapeCharacter { chr, span });
537                    }
538                };
539                output.push(escaped);
540            } else {
541                output.push(chr);
542            }
543        }
544        // Skip the terminating quote
545        maybe_await!(self.skip())?;
546        Ok(output)
547    }
548
549    #[maybe_async]
550    fn identifier(&mut self) -> Result<Option<String>, LexerError> {
551        let mut ident = if maybe_await!(self.match_tag("\\x"))? {
552            maybe_await!(self.inline_hex_escape())?
553        } else if maybe_await!(self.match_tag("..."))? {
554            String::from("...")
555        } else if maybe_await!(self.match_tag("->"))? {
556            String::from("->")
557        } else if let Some(initial) =
558            maybe_await!(self.match_pred(|chr| is_initial(chr) || is_peculiar_initial(chr)))?
559        {
560            String::from(initial)
561        } else {
562            return Ok(None);
563        };
564
565        loop {
566            if maybe_await!(self.match_tag("\\x"))? {
567                ident.push_str(&maybe_await!(self.inline_hex_escape())?);
568            } else if let Some(next) = maybe_await!(self.match_pred(is_subsequent))? {
569                ident.push(next);
570            } else {
571                break;
572            }
573        }
574
575        Ok(Some(ident))
576    }
577
578    #[maybe_async]
579    fn inline_hex_escape(&mut self) -> Result<String, LexerError> {
580        let mut escaped = String::new();
581        let mut buff = String::with_capacity(2);
582        while let Some(chr) = maybe_await!(self.match_pred(|chr| chr != ';'))? {
583            if !chr.is_ascii_hexdigit() {
584                return Err(LexerError::InvalidCharacterInHexEscape {
585                    chr,
586                    span: self.curr_span(),
587                });
588            }
589            buff.push(chr);
590            if buff.len() == 2 {
591                escaped.push(u8::from_str_radix(&buff, 16).unwrap() as char);
592                buff.clear();
593            }
594        }
595        if !buff.is_empty() {
596            escaped.push(u8::from_str_radix(&buff, 16).unwrap() as char);
597        }
598        maybe_await!(self.take())?;
599        Ok(escaped)
600    }
601}
602
603#[derive(Debug)]
604pub enum LexerError {
605    UnexpectedEof,
606    InvalidCharacterInHexEscape { chr: char, span: Span },
607    UnexpectedCharacter { chr: char, span: Span },
608    BadEscapeCharacter { chr: char, span: Span },
609    ReadError(Exception),
610}
611
612impl From<Exception> for LexerError {
613    fn from(error: Exception) -> Self {
614        Self::ReadError(error)
615    }
616}
617
618fn is_delimiter(chr: char) -> bool {
619    is_whitespace(chr) || matches!(chr, '(' | ')' | '[' | ']' | '"' | ';' | '#')
620}
621
622fn is_whitespace(chr: char) -> bool {
623    chr.is_separator() || matches!(chr, '\t' | '\n' | '\r')
624}
625
626fn is_intraline_whitespace(chr: char) -> bool {
627    chr == '\t' || chr.is_separator()
628}
629
630fn is_initial(chr: char) -> bool {
631    is_constituent(chr) || is_special_initial(chr)
632}
633
634fn is_constituent(c: char) -> bool {
635    c.is_ascii_alphabetic()
636        || (c as u32 > 127
637            && (c.is_letter()
638                || c.is_mark_nonspacing()
639                || c.is_number_letter()
640                || c.is_number_other()
641                || c.is_punctuation_dash()
642                || c.is_punctuation_connector()
643                || c.is_punctuation_other()
644                || c.is_symbol()
645                || c.is_other_private_use()))
646}
647
648fn is_special_initial(chr: char) -> bool {
649    matches!(
650        chr,
651        '!' | '$' | '%' | '&' | '*' | '/' | ':' | '<' | '=' | '>' | '?' | '^' | '_' | '~' | '@'
652    )
653}
654
655fn is_peculiar_initial(chr: char) -> bool {
656    matches!(chr, '+' | '-')
657}
658
659fn is_special_subsequent(chr: char) -> bool {
660    matches!(chr, '+' | '-' | '.' | '@')
661}
662
663fn is_subsequent(chr: char) -> bool {
664    is_initial(chr)
665        || chr.is_ascii_digit()
666        || chr.is_number_decimal_digit()
667        || chr.is_mark_spacing_combining()
668        || chr.is_mark_enclosing()
669        || is_special_subsequent(chr)
670}
671
672#[derive(Clone, Debug)]
673pub struct Token {
674    pub lexeme: Lexeme,
675    pub span: super::Span,
676}
677
678#[derive(Clone, Debug, PartialEq)]
679pub enum Lexeme {
680    Identifier(String),
681    Boolean(bool),
682    Number(Number),
683    Character(Character),
684    String(String),
685    LParen,
686    RParen,
687    LBracket,
688    RBracket,
689    HashParen,
690    Vu8Paren,
691    Quote,
692    Backquote,
693    Comma,
694    CommaAt,
695    Period,
696    HashQuote,
697    HashBackquote,
698    HashComma,
699    HashCommaAt,
700    DatumComment,
701}
702
703#[derive(Clone, Debug, PartialEq)]
704pub struct Number {
705    radix: u32,
706    exactness: Option<Exactness>,
707    real_part: Option<Part>,
708    imag_part: Option<Part>,
709    is_polar: bool,
710}
711
712impl TryFrom<(Part, u32)> for SimpleNumber {
713    type Error = ParseNumberError;
714
715    fn try_from((part, radix): (Part, u32)) -> Result<Self, Self::Error> {
716        part.try_into_i64(radix)
717            .map(SimpleNumber::FixedInteger)
718            .or_else(|| part.try_into_integer(radix).map(SimpleNumber::BigInteger))
719            .or_else(|| part.try_into_rational(radix).map(SimpleNumber::Rational))
720            .or_else(|| part.try_into_f64(radix).map(SimpleNumber::Real))
721            .ok_or(ParseNumberError::NoValidRepresentation)
722    }
723}
724
725impl TryFrom<Number> for num::Number {
726    type Error = ParseNumberError;
727
728    fn try_from(value: Number) -> Result<Self, Self::Error> {
729        // Ignore exactness for now
730        if let Some(imag_part) = value.imag_part {
731            // This is a complex number:
732            let imag_part: SimpleNumber = (imag_part, value.radix).try_into()?;
733            let real_part: SimpleNumber = if let Some(real_part) = value.real_part {
734                (real_part, value.radix).try_into()?
735            } else {
736                SimpleNumber::Real(0.0)
737            };
738            return Ok(num::Number(Arc::new(num::NumberInner::Complex(
739                if value.is_polar {
740                    num::ComplexNumber::from_polar(real_part, imag_part)
741                } else {
742                    num::ComplexNumber::new(real_part, imag_part)
743                },
744            ))));
745        }
746
747        let part = value
748            .real_part
749            .ok_or(ParseNumberError::NoValidRepresentation)?;
750
751        Ok(num::Number(Arc::new(num::NumberInner::Simple(
752            (part, value.radix).try_into()?,
753        ))))
754    }
755}
756
757#[derive(Debug)]
758pub enum ParseNumberError {
759    NoValidRepresentation,
760}
761
762#[derive(Clone, Debug, PartialEq)]
763struct Part {
764    neg: bool,
765    real: Real,
766    mantissa_width: Option<usize>,
767}
768
769impl Part {
770    fn try_into_i64(&self, radix: u32) -> Option<i64> {
771        let num = match &self.real {
772            Real::Num(num) => i64::from_str_radix(num, radix).ok()?,
773            Real::Decimal(base, fract, None) if fract.is_empty() => base.parse().ok()?,
774            Real::Decimal(base, fract, Some(exp)) if fract.is_empty() && !exp.is_negative() => {
775                let base: i64 = base.parse().ok()?;
776                let exp = 10_i64.checked_pow((*exp).try_into().ok()?)?;
777                base.checked_mul(exp)?
778            }
779            _ => return None,
780        };
781        Some(if self.neg { -num } else { num })
782    }
783
784    fn try_into_integer(&self, radix: u32) -> Option<Integer> {
785        let num = match &self.real {
786            Real::Num(num) => Integer::from_string_base(radix as u8, num)?,
787            Real::Decimal(base, fract, None) if fract.is_empty() => {
788                Integer::from_string_base(10, base)?
789            }
790            Real::Decimal(base, fract, Some(exp)) if fract.is_empty() && !exp.is_negative() => {
791                Integer::from_sci_string(&format!("{base}e{exp}"))?
792            }
793            _ => return None,
794        };
795        Some(if self.neg { -num } else { num })
796    }
797
798    fn try_into_rational(&self, radix: u32) -> Option<Rational> {
799        let num = match &self.real {
800            Real::Rational(num, denom) => {
801                let num = Integer::from_string_base(radix as u8, num)?;
802                let den = Integer::from_string_base(radix as u8, denom)?;
803                if den == 0 {
804                    return None;
805                }
806                Rational::from_integers(num, den)
807            }
808            _ => return None,
809        };
810        Some(if self.neg { -num } else { num })
811    }
812
813    fn try_into_f64(&self, radix: u32) -> Option<f64> {
814        match &self.real {
815            Real::Nan => Some(f64::NAN),
816            Real::Inf if !self.neg => Some(f64::INFINITY),
817            Real::Inf if self.neg => Some(f64::NEG_INFINITY),
818            Real::Num(s) if radix == 10 => {
819                let num: f64 = s.parse().ok()?;
820                Some(if self.neg { -num } else { num })
821            }
822            Real::Rational(num, den) if radix == 10 => {
823                let num: f64 = num.parse().ok()?;
824                let den: f64 = den.parse().ok()?;
825                if den == 0.0 {
826                    return None;
827                }
828                let num = num / den;
829                Some(if self.neg { -num } else { num })
830            }
831            Real::Decimal(base, fract, None) => {
832                let num: f64 = format!("{base}.{fract}").parse().ok()?;
833                Some(if self.neg { -num } else { num })
834            }
835            Real::Decimal(base, fract, Some(exp)) => {
836                let num: f64 = format!("{base}.{fract}e{exp}").parse().ok()?;
837                Some(if self.neg { -num } else { num })
838            }
839            _ => None,
840        }
841    }
842}
843
844#[derive(Clone, Debug, PartialEq)]
845enum Exactness {
846    Exact,
847    Inexact,
848}
849
850#[derive(Clone, Debug, PartialEq)]
851enum Real {
852    Nan,
853    Inf,
854    Num(String),
855    Rational(String, String),
856    Decimal(String, String, Option<isize>),
857}
858
859#[derive(Clone, Debug, PartialEq, Eq)]
860pub enum Character {
861    /// `#\a` characters
862    Literal(char),
863    /// `#\foo` characters
864    Escaped(EscapedCharacter),
865    /// `#\xcafe` characters
866    Unicode(String),
867}
868
869#[derive(Clone, Copy, Debug, PartialEq, Eq)]
870pub enum EscapedCharacter {
871    Nul,
872    Alarm,
873    Backspace,
874    Tab,
875    Newline,
876    VTab,
877    Page,
878    Return,
879    Escape,
880    Space,
881    Delete,
882}
883
884impl From<EscapedCharacter> for char {
885    fn from(c: EscapedCharacter) -> char {
886        // from r7rs 6.6
887        match c {
888            EscapedCharacter::Nul => '\u{0000}',
889            EscapedCharacter::Alarm => '\u{0007}',
890            EscapedCharacter::Backspace => '\u{0008}',
891            EscapedCharacter::Tab => '\u{0009}',
892            EscapedCharacter::Newline => '\u{000A}',
893            EscapedCharacter::VTab => '\u{000B}',
894            EscapedCharacter::Page => '\u{000C}',
895            EscapedCharacter::Return => '\u{000D}',
896            EscapedCharacter::Escape => '\u{001B}',
897            EscapedCharacter::Space => ' ',
898            EscapedCharacter::Delete => '\u{007F}',
899        }
900    }
901}
902
903#[cfg(test)]
904mod test {
905    use super::*;
906
907    #[test]
908    fn is_hash_identifier_char() {
909        assert!(!is_initial('#') && !is_peculiar_initial('#'))
910    }
911}