Skip to main content

steel_parser/
tokens.rs

1use crate::interner::InternedString;
2use crate::lexer;
3use crate::parser::SourceId;
4use crate::span::Span;
5use alloc::borrow::Cow;
6use core::fmt::{self, Display};
7use core::ops;
8use core::str::FromStr;
9use core::sync::atomic::AtomicU32;
10use dashmap::DashMap;
11use num_bigint::{BigInt, ParseBigIntError};
12use num_rational::Rational32;
13use num_traits::{Num, Signed};
14use ordered_float::OrderedFloat;
15use serde::{Deserialize, Serialize};
16use std::sync::LazyLock;
17use TokenType::*;
18
19#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
20pub enum Paren {
21    Round,
22    Square,
23    Curly,
24}
25
26#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
27pub enum ParenMod {
28    Vector,
29    Bytes,
30}
31
32impl ParenMod {
33    pub(crate) fn as_str(&self) -> &'static str {
34        match self {
35            ParenMod::Vector => "#",
36            ParenMod::Bytes => "#u8",
37        }
38    }
39}
40
41impl Display for ParenMod {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        self.as_str().fmt(f)
44    }
45}
46
47impl Paren {
48    pub fn open(&self) -> char {
49        match self {
50            Paren::Round => '(',
51            Paren::Square => '[',
52            Paren::Curly => '{',
53        }
54    }
55
56    pub fn close(&self) -> char {
57        match self {
58            Paren::Round => ')',
59            Paren::Square => ']',
60            Paren::Curly => '}',
61        }
62    }
63}
64
65#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
66pub enum TokenType<S> {
67    OpenParen(Paren, Option<ParenMod>),
68    CloseParen(Paren),
69    QuoteTick,
70    QuasiQuote,
71    Unquote,
72    UnquoteSplice,
73    QuoteSyntax,
74    QuasiQuoteSyntax,
75    UnquoteSyntax,
76    UnquoteSpliceSyntax,
77    If,
78    Define,
79    Let,
80    TestLet,
81    Return,
82    Begin,
83    Lambda,
84    Quote,
85    SyntaxRules,
86    DefineSyntax,
87    Ellipses,
88    Set,
89    Require,
90    CharacterLiteral(char),
91    DatumComment,
92    Comment,
93    BooleanLiteral(bool),
94    Identifier(S),
95    Keyword(S),
96    Number(InternedNumber),
97    StringLiteral(InternedString),
98    Dot,
99}
100
101impl<T> TokenType<T> {
102    pub fn identifier_mut(&mut self) -> Option<&mut T> {
103        if let Self::Identifier(i) = self {
104            Some(i)
105        } else {
106            None
107        }
108    }
109
110    pub fn identifier(&self) -> Option<&T> {
111        if let Self::Identifier(i) = self {
112            Some(i)
113        } else {
114            None
115        }
116    }
117}
118
119#[derive(Default)]
120struct NumberLiteralInterner {
121    keys: DashMap<NumberLiteral, u32>,
122    values: DashMap<u32, NumberLiteral>,
123    key: AtomicU32,
124}
125
126static NUMBER_INTERNER: LazyLock<NumberLiteralInterner> =
127    LazyLock::new(NumberLiteralInterner::default);
128
129impl NumberLiteralInterner {
130    pub fn add(&self, n: NumberLiteral) -> InternedNumber {
131        if let Some(value) = self.keys.get(&n) {
132            return InternedNumber(*value);
133        }
134
135        let value = self.key.fetch_add(1, core::sync::atomic::Ordering::Acquire);
136        self.keys.insert(n.clone(), value);
137        self.values.insert(value, n.clone());
138        InternedNumber(value)
139    }
140}
141
142#[derive(Copy, Clone, Debug, PartialEq)]
143pub struct InternedNumber(u32);
144
145impl std::fmt::Display for InternedNumber {
146    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147        write!(f, "{}", self.resolve())
148    }
149}
150
151impl InternedNumber {
152    pub fn resolve(&self) -> NumberLiteral {
153        NUMBER_INTERNER.values.get(&self.0).unwrap().clone()
154    }
155}
156
157impl From<NumberLiteral> for InternedNumber {
158    fn from(value: NumberLiteral) -> Self {
159        NUMBER_INTERNER.add(value)
160    }
161}
162
163impl Serialize for InternedNumber {
164    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
165    where
166        S: serde::Serializer,
167    {
168        self.resolve().serialize(serializer)
169    }
170}
171
172impl<'de> Deserialize<'de> for InternedNumber {
173    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
174    where
175        D: serde::Deserializer<'de>,
176    {
177        let key = <NumberLiteral>::deserialize(deserializer)?;
178
179        Ok(InternedNumber::from(key))
180    }
181}
182
183#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, PartialOrd, Hash, Eq)]
184pub enum NumberLiteral {
185    Real(RealLiteral),
186    Complex(RealLiteral, RealLiteral),
187    Polar(RealLiteral, RealLiteral),
188}
189
190impl Display for NumberLiteral {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        match self {
193            NumberLiteral::Real(r) => r.fmt(f),
194            NumberLiteral::Complex(re, im) => {
195                if im.is_negative() || !im.is_finite() {
196                    write!(f, "{re}{im}i")
197                } else {
198                    write!(f, "{re}+{im}i")
199                }
200            }
201            NumberLiteral::Polar(r, theta) => {
202                write!(f, "{r}@{theta}")
203            }
204        }
205    }
206}
207
208impl<S> From<NumberLiteral> for TokenType<S> {
209    fn from(n: NumberLiteral) -> Self {
210        TokenType::Number(n.into())
211    }
212}
213
214#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, PartialOrd, Hash, Eq)]
215pub enum RealLiteral {
216    Int(IntLiteral),
217    Rational(IntLiteral, IntLiteral),
218    Float(OrderedFloat<f64>),
219}
220
221impl RealLiteral {
222    fn is_negative(&self) -> bool {
223        match self {
224            RealLiteral::Int(i) => i.is_negative(),
225            RealLiteral::Rational(n, _) => n.is_negative(),
226            RealLiteral::Float(f) => f.is_sign_negative(),
227        }
228    }
229
230    fn is_finite(&self) -> bool {
231        match self {
232            RealLiteral::Int(_) => true,
233            RealLiteral::Rational(_, _) => true,
234            RealLiteral::Float(f) => f.is_finite(),
235        }
236    }
237}
238
239impl From<RealLiteral> for NumberLiteral {
240    fn from(value: RealLiteral) -> Self {
241        NumberLiteral::Real(value)
242    }
243}
244
245impl<S> From<RealLiteral> for TokenType<S> {
246    fn from(value: RealLiteral) -> Self {
247        NumberLiteral::Real(value).into()
248    }
249}
250
251impl From<f64> for RealLiteral {
252    fn from(value: f64) -> RealLiteral {
253        RealLiteral::Float(value.into())
254    }
255}
256
257impl From<isize> for RealLiteral {
258    fn from(value: isize) -> RealLiteral {
259        RealLiteral::Int(IntLiteral::Small(value))
260    }
261}
262
263impl From<Rational32> for RealLiteral {
264    fn from(value: Rational32) -> RealLiteral {
265        RealLiteral::Rational(
266            (*value.numer() as isize).into(),
267            (*value.denom() as isize).into(),
268        )
269    }
270}
271
272impl Display for RealLiteral {
273    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
274        match self {
275            RealLiteral::Int(i) => i.fmt(f),
276            RealLiteral::Rational(n, d) => write!(f, "{n}/{d}"),
277            RealLiteral::Float(x) => {
278                if x.is_nan() {
279                    write!(f, "{}", lexer::NAN)
280                } else if x.is_infinite() && x.is_sign_negative() {
281                    write!(f, "{}", lexer::NEG_INFINITY)
282                } else if x.is_infinite() {
283                    write!(f, "{}", lexer::INFINITY)
284                } else {
285                    write!(f, "{x:?}")
286                }
287            }
288        }
289    }
290}
291
292#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
293pub enum IntLiteral {
294    Small(isize),
295    Big(Box<BigInt>),
296}
297
298impl IntLiteral {
299    pub fn from_str_radix(src: &str, radix: u32) -> Result<IntLiteral, ParseBigIntError> {
300        isize::from_str_radix(src, radix)
301            .map(IntLiteral::Small)
302            .or_else(|_| {
303                BigInt::from_str_radix(src, radix)
304                    .map(Box::new)
305                    .map(IntLiteral::Big)
306            })
307    }
308
309    fn is_negative(&self) -> bool {
310        match self {
311            IntLiteral::Small(i) => i.is_negative(),
312            IntLiteral::Big(i) => i.is_negative(),
313        }
314    }
315}
316
317impl FromStr for IntLiteral {
318    type Err = <num_bigint::BigInt as FromStr>::Err;
319
320    fn from_str(s: &str) -> Result<Self, Self::Err> {
321        s.parse::<isize>().map(IntLiteral::Small).or_else(|_| {
322            s.parse::<num_bigint::BigInt>()
323                .map(|b| IntLiteral::Big(Box::new(b)))
324        })
325    }
326}
327
328impl Display for IntLiteral {
329    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
330        match self {
331            Self::Small(s) => write!(f, "{s}"),
332            Self::Big(b) => write!(f, "{b}"),
333        }
334    }
335}
336
337impl<S> From<IntLiteral> for TokenType<S> {
338    fn from(value: IntLiteral) -> Self {
339        RealLiteral::Int(value).into()
340    }
341}
342
343impl From<IntLiteral> for RealLiteral {
344    fn from(value: IntLiteral) -> Self {
345        RealLiteral::Int(value)
346    }
347}
348
349impl From<IntLiteral> for BigInt {
350    fn from(v: IntLiteral) -> BigInt {
351        match v {
352            IntLiteral::Small(x) => x.into(),
353            IntLiteral::Big(x) => *x,
354        }
355    }
356}
357
358impl From<isize> for IntLiteral {
359    fn from(value: isize) -> Self {
360        IntLiteral::Small(value)
361    }
362}
363
364impl From<BigInt> for IntLiteral {
365    fn from(value: BigInt) -> Self {
366        IntLiteral::Big(Box::new(value))
367    }
368}
369
370impl<'a> TokenType<Cow<'a, str>> {
371    pub fn open_span(mut span: Span, paren_mod: Option<ParenMod>) -> Span {
372        let offset = match paren_mod {
373            Some(ParenMod::Vector) => 1,
374            Some(ParenMod::Bytes) => 3,
375            None => 0,
376        };
377
378        span.start += offset;
379
380        span
381    }
382
383    pub fn to_owned<T: From<Cow<'a, str>>>(self) -> TokenType<T> {
384        match self {
385            TokenType::Identifier(i) => TokenType::Identifier(i.into()),
386            TokenType::Keyword(i) => TokenType::Keyword(i.into()),
387            OpenParen(p, m) => OpenParen(p, m),
388            CloseParen(p) => CloseParen(p),
389            CharacterLiteral(x) => CharacterLiteral(x),
390            BooleanLiteral(x) => BooleanLiteral(x),
391            Number(x) => Number(x),
392            StringLiteral(x) => StringLiteral(x),
393            QuoteTick => QuoteTick,
394            Unquote => Unquote,
395            QuasiQuote => QuasiQuote,
396            UnquoteSplice => UnquoteSplice,
397            Comment => Comment,
398            DatumComment => DatumComment,
399            If => If,
400            Define => Define,
401            Let => Let,
402            TestLet => TestLet,
403            Return => Return,
404            Begin => Begin,
405            Lambda => Lambda,
406            Quote => Quote,
407            DefineSyntax => DefineSyntax,
408            SyntaxRules => SyntaxRules,
409            Ellipses => Ellipses,
410            Set => Set,
411            Require => Require,
412            QuasiQuoteSyntax => QuasiQuoteSyntax,
413            UnquoteSyntax => UnquoteSyntax,
414            QuoteSyntax => QuoteSyntax,
415            UnquoteSpliceSyntax => UnquoteSpliceSyntax,
416            Dot => Dot,
417        }
418    }
419
420    pub fn map<T>(self, mut func: impl FnMut(Cow<'a, str>) -> T) -> TokenType<T> {
421        match self {
422            TokenType::Identifier(i) => TokenType::Identifier(func(i)),
423            TokenType::Keyword(i) => TokenType::Keyword(func(i)),
424            OpenParen(p, m) => OpenParen(p, m),
425            CloseParen(p) => CloseParen(p),
426            CharacterLiteral(x) => CharacterLiteral(x),
427            BooleanLiteral(x) => BooleanLiteral(x),
428            Number(x) => Number(x),
429            StringLiteral(x) => StringLiteral(x),
430            QuoteTick => QuoteTick,
431            Unquote => Unquote,
432            QuasiQuote => QuasiQuote,
433            UnquoteSplice => UnquoteSplice,
434            Comment => Comment,
435            DatumComment => DatumComment,
436            If => If,
437            Define => Define,
438            Let => Let,
439            TestLet => TestLet,
440            Return => Return,
441            Begin => Begin,
442            Lambda => Lambda,
443            Quote => Quote,
444            DefineSyntax => DefineSyntax,
445            SyntaxRules => SyntaxRules,
446            Ellipses => Ellipses,
447            Set => Set,
448            Require => Require,
449            QuasiQuoteSyntax => QuasiQuoteSyntax,
450            UnquoteSyntax => UnquoteSyntax,
451            QuoteSyntax => QuoteSyntax,
452            UnquoteSpliceSyntax => UnquoteSpliceSyntax,
453            Dot => Dot,
454        }
455    }
456}
457
458fn character_special_display(c: char, f: &mut fmt::Formatter) -> fmt::Result {
459    match c {
460        ' ' => write!(f, "#\\space"),
461        '\0' => write!(f, "#\\null"),
462        '\t' => write!(f, "#\\tab"),
463        '\n' => write!(f, "#\\newline"),
464        '\r' => write!(f, "#\\return"),
465        _ => {
466            let escape = c.escape_debug();
467            if escape.len() <= 2 {
468                // char does not need escaping
469                write!(f, "#\\{}", c)
470            } else {
471                // escape char as #\uNNNN
472                write!(f, "#\\u{:04x}", c as u32)
473            }
474        }
475    }
476}
477
478impl<T: Display> fmt::Display for TokenType<T> {
479    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
480        match self {
481            OpenParen(p, m) => {
482                if let Some(m) = m {
483                    m.fmt(f)?;
484                }
485
486                write!(f, "{}", p.open())
487            }
488            CloseParen(p) => write!(f, "{}", p.close()),
489            CharacterLiteral(x) => character_special_display(*x, f),
490            BooleanLiteral(x) => write!(f, "#{x}"),
491            Identifier(x) => write!(f, "{x}"),
492            Number(x) => write!(f, "{x}"),
493            StringLiteral(x) => write!(f, "\"{x}\""),
494            Keyword(x) => write!(f, "{x}"),
495            QuoteTick => write!(f, "'"),
496            Unquote => write!(f, ","),
497            QuasiQuote => write!(f, "`"),
498            UnquoteSplice => write!(f, ",@"),
499            QuoteSyntax => write!(f, "#'"),
500            QuasiQuoteSyntax => write!(f, "#`"),
501            UnquoteSyntax => write!(f, "#,"),
502            UnquoteSpliceSyntax => write!(f, "#,@"),
503            DatumComment => write!(f, "#;"),
504            Comment => write!(f, ""),
505            If => write!(f, "if"),
506            Define => write!(f, "define"),
507            Let => write!(f, "let"),
508            TestLet => write!(f, "%plain-let"),
509            Return => write!(f, "return!"),
510            Begin => write!(f, "begin"),
511            Lambda => write!(f, "lambda"),
512            Quote => write!(f, "quote"),
513            DefineSyntax => write!(f, "define-syntax"),
514            SyntaxRules => write!(f, "syntax-rules"),
515            Ellipses => write!(f, "..."),
516            Set => write!(f, "set!"),
517            Require => write!(f, "require"),
518            Dot => write!(f, "."),
519        }
520    }
521}
522
523#[derive(Debug, Clone, PartialEq)]
524pub struct TokenLike<'a, TY> {
525    pub ty: TY,
526    pub source: &'a str,
527    pub span: Span,
528}
529
530impl<'a, TY> TokenLike<'a, TY> {
531    pub const fn new(
532        ty: TY,
533        source: &'a str,
534        range: ops::Range<u32>,
535        source_id: Option<SourceId>,
536    ) -> Self {
537        Self {
538            ty,
539            source,
540            span: Span::new(range.start, range.end, source_id),
541        }
542    }
543}
544
545pub type Token<'a, T> = TokenLike<'a, TokenType<T>>;
546
547impl<'a, T> Token<'a, T> {
548    pub fn typ(&self) -> &TokenType<T> {
549        &self.ty
550    }
551
552    pub const fn span(&self) -> Span {
553        self.span
554    }
555
556    pub const fn range(&self) -> ops::Range<u32> {
557        self.span.start()..self.span.end()
558    }
559
560    pub const fn source(&self) -> &'a str {
561        self.source
562    }
563}
564
565impl<T> From<Token<'_, T>> for Span {
566    fn from(token: Token<'_, T>) -> Self {
567        token.span()
568    }
569}
570
571impl<T> From<&Token<'_, T>> for Span {
572    fn from(token: &Token<'_, T>) -> Self {
573        token.span()
574    }
575}
576
577impl<T> From<Token<'_, T>> for ops::Range<u32> {
578    fn from(token: Token<'_, T>) -> Self {
579        token.span().into()
580    }
581}
582
583impl<T> From<&Token<'_, T>> for ops::Range<u32> {
584    fn from(token: &Token<'_, T>) -> Self {
585        token.span().into()
586    }
587}
588
589impl<T> From<Token<'_, T>> for (u32, u32) {
590    fn from(token: Token<'_, T>) -> Self {
591        token.span().into()
592    }
593}
594
595impl<T> From<&Token<'_, T>> for (u32, u32) {
596    fn from(token: &Token<'_, T>) -> Self {
597        token.span().into()
598    }
599}
600
601impl<T> From<Token<'_, T>> for [u32; 2] {
602    fn from(token: Token<'_, T>) -> Self {
603        token.span().into()
604    }
605}
606
607impl<T> From<&Token<'_, T>> for [u32; 2] {
608    fn from(token: &Token<'_, T>) -> Self {
609        token.span().into()
610    }
611}
612
613impl<T> Display for Token<'_, T> {
614    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
615        write!(f, "{} @ {:?}", self.source, self.span)
616    }
617}