1use crate::interner::InternedString;
2use crate::lexer;
3use crate::parser::SourceId;
4use crate::span::Span;
5use alloc::borrow::Cow;
6use core::fmt::{self, Display};
7use core::ops;
8use core::str::FromStr;
9use core::sync::atomic::AtomicU32;
10use dashmap::DashMap;
11use num_bigint::{BigInt, ParseBigIntError};
12use num_rational::Rational32;
13use num_traits::{Num, Signed};
14use ordered_float::OrderedFloat;
15use serde::{Deserialize, Serialize};
16use std::sync::LazyLock;
17use TokenType::*;
18
19#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
20pub enum Paren {
21 Round,
22 Square,
23 Curly,
24}
25
26#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
27pub enum ParenMod {
28 Vector,
29 Bytes,
30}
31
32impl ParenMod {
33 pub(crate) fn as_str(&self) -> &'static str {
34 match self {
35 ParenMod::Vector => "#",
36 ParenMod::Bytes => "#u8",
37 }
38 }
39}
40
41impl Display for ParenMod {
42 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43 self.as_str().fmt(f)
44 }
45}
46
47impl Paren {
48 pub fn open(&self) -> char {
49 match self {
50 Paren::Round => '(',
51 Paren::Square => '[',
52 Paren::Curly => '{',
53 }
54 }
55
56 pub fn close(&self) -> char {
57 match self {
58 Paren::Round => ')',
59 Paren::Square => ']',
60 Paren::Curly => '}',
61 }
62 }
63}
64
65#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
66pub enum TokenType<S> {
67 OpenParen(Paren, Option<ParenMod>),
68 CloseParen(Paren),
69 QuoteTick,
70 QuasiQuote,
71 Unquote,
72 UnquoteSplice,
73 QuoteSyntax,
74 QuasiQuoteSyntax,
75 UnquoteSyntax,
76 UnquoteSpliceSyntax,
77 If,
78 Define,
79 Let,
80 TestLet,
81 Return,
82 Begin,
83 Lambda,
84 Quote,
85 SyntaxRules,
86 DefineSyntax,
87 Ellipses,
88 Set,
89 Require,
90 CharacterLiteral(char),
91 DatumComment,
92 Comment,
93 BooleanLiteral(bool),
94 Identifier(S),
95 Keyword(S),
96 Number(InternedNumber),
97 StringLiteral(InternedString),
98 Dot,
99}
100
101impl<T> TokenType<T> {
102 pub fn identifier_mut(&mut self) -> Option<&mut T> {
103 if let Self::Identifier(i) = self {
104 Some(i)
105 } else {
106 None
107 }
108 }
109
110 pub fn identifier(&self) -> Option<&T> {
111 if let Self::Identifier(i) = self {
112 Some(i)
113 } else {
114 None
115 }
116 }
117}
118
119#[derive(Default)]
120struct NumberLiteralInterner {
121 keys: DashMap<NumberLiteral, u32>,
122 values: DashMap<u32, NumberLiteral>,
123 key: AtomicU32,
124}
125
126static NUMBER_INTERNER: LazyLock<NumberLiteralInterner> =
127 LazyLock::new(NumberLiteralInterner::default);
128
129impl NumberLiteralInterner {
130 pub fn add(&self, n: NumberLiteral) -> InternedNumber {
131 if let Some(value) = self.keys.get(&n) {
132 return InternedNumber(*value);
133 }
134
135 let value = self.key.fetch_add(1, core::sync::atomic::Ordering::Acquire);
136 self.keys.insert(n.clone(), value);
137 self.values.insert(value, n.clone());
138 InternedNumber(value)
139 }
140}
141
142#[derive(Copy, Clone, Debug, PartialEq)]
143pub struct InternedNumber(u32);
144
145impl std::fmt::Display for InternedNumber {
146 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147 write!(f, "{}", self.resolve())
148 }
149}
150
151impl InternedNumber {
152 pub fn resolve(&self) -> NumberLiteral {
153 NUMBER_INTERNER.values.get(&self.0).unwrap().clone()
154 }
155}
156
157impl From<NumberLiteral> for InternedNumber {
158 fn from(value: NumberLiteral) -> Self {
159 NUMBER_INTERNER.add(value)
160 }
161}
162
163impl Serialize for InternedNumber {
164 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
165 where
166 S: serde::Serializer,
167 {
168 self.resolve().serialize(serializer)
169 }
170}
171
172impl<'de> Deserialize<'de> for InternedNumber {
173 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
174 where
175 D: serde::Deserializer<'de>,
176 {
177 let key = <NumberLiteral>::deserialize(deserializer)?;
178
179 Ok(InternedNumber::from(key))
180 }
181}
182
183#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, PartialOrd, Hash, Eq)]
184pub enum NumberLiteral {
185 Real(RealLiteral),
186 Complex(RealLiteral, RealLiteral),
187 Polar(RealLiteral, RealLiteral),
188}
189
190impl Display for NumberLiteral {
191 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192 match self {
193 NumberLiteral::Real(r) => r.fmt(f),
194 NumberLiteral::Complex(re, im) => {
195 if im.is_negative() || !im.is_finite() {
196 write!(f, "{re}{im}i")
197 } else {
198 write!(f, "{re}+{im}i")
199 }
200 }
201 NumberLiteral::Polar(r, theta) => {
202 write!(f, "{r}@{theta}")
203 }
204 }
205 }
206}
207
208impl<S> From<NumberLiteral> for TokenType<S> {
209 fn from(n: NumberLiteral) -> Self {
210 TokenType::Number(n.into())
211 }
212}
213
214#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, PartialOrd, Hash, Eq)]
215pub enum RealLiteral {
216 Int(IntLiteral),
217 Rational(IntLiteral, IntLiteral),
218 Float(OrderedFloat<f64>),
219}
220
221impl RealLiteral {
222 fn is_negative(&self) -> bool {
223 match self {
224 RealLiteral::Int(i) => i.is_negative(),
225 RealLiteral::Rational(n, _) => n.is_negative(),
226 RealLiteral::Float(f) => f.is_sign_negative(),
227 }
228 }
229
230 fn is_finite(&self) -> bool {
231 match self {
232 RealLiteral::Int(_) => true,
233 RealLiteral::Rational(_, _) => true,
234 RealLiteral::Float(f) => f.is_finite(),
235 }
236 }
237}
238
239impl From<RealLiteral> for NumberLiteral {
240 fn from(value: RealLiteral) -> Self {
241 NumberLiteral::Real(value)
242 }
243}
244
245impl<S> From<RealLiteral> for TokenType<S> {
246 fn from(value: RealLiteral) -> Self {
247 NumberLiteral::Real(value).into()
248 }
249}
250
251impl From<f64> for RealLiteral {
252 fn from(value: f64) -> RealLiteral {
253 RealLiteral::Float(value.into())
254 }
255}
256
257impl From<isize> for RealLiteral {
258 fn from(value: isize) -> RealLiteral {
259 RealLiteral::Int(IntLiteral::Small(value))
260 }
261}
262
263impl From<Rational32> for RealLiteral {
264 fn from(value: Rational32) -> RealLiteral {
265 RealLiteral::Rational(
266 (*value.numer() as isize).into(),
267 (*value.denom() as isize).into(),
268 )
269 }
270}
271
272impl Display for RealLiteral {
273 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
274 match self {
275 RealLiteral::Int(i) => i.fmt(f),
276 RealLiteral::Rational(n, d) => write!(f, "{n}/{d}"),
277 RealLiteral::Float(x) => {
278 if x.is_nan() {
279 write!(f, "{}", lexer::NAN)
280 } else if x.is_infinite() && x.is_sign_negative() {
281 write!(f, "{}", lexer::NEG_INFINITY)
282 } else if x.is_infinite() {
283 write!(f, "{}", lexer::INFINITY)
284 } else {
285 write!(f, "{x:?}")
286 }
287 }
288 }
289 }
290}
291
292#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
293pub enum IntLiteral {
294 Small(isize),
295 Big(Box<BigInt>),
296}
297
298impl IntLiteral {
299 pub fn from_str_radix(src: &str, radix: u32) -> Result<IntLiteral, ParseBigIntError> {
300 isize::from_str_radix(src, radix)
301 .map(IntLiteral::Small)
302 .or_else(|_| {
303 BigInt::from_str_radix(src, radix)
304 .map(Box::new)
305 .map(IntLiteral::Big)
306 })
307 }
308
309 fn is_negative(&self) -> bool {
310 match self {
311 IntLiteral::Small(i) => i.is_negative(),
312 IntLiteral::Big(i) => i.is_negative(),
313 }
314 }
315}
316
317impl FromStr for IntLiteral {
318 type Err = <num_bigint::BigInt as FromStr>::Err;
319
320 fn from_str(s: &str) -> Result<Self, Self::Err> {
321 s.parse::<isize>().map(IntLiteral::Small).or_else(|_| {
322 s.parse::<num_bigint::BigInt>()
323 .map(|b| IntLiteral::Big(Box::new(b)))
324 })
325 }
326}
327
328impl Display for IntLiteral {
329 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
330 match self {
331 Self::Small(s) => write!(f, "{s}"),
332 Self::Big(b) => write!(f, "{b}"),
333 }
334 }
335}
336
337impl<S> From<IntLiteral> for TokenType<S> {
338 fn from(value: IntLiteral) -> Self {
339 RealLiteral::Int(value).into()
340 }
341}
342
343impl From<IntLiteral> for RealLiteral {
344 fn from(value: IntLiteral) -> Self {
345 RealLiteral::Int(value)
346 }
347}
348
349impl From<IntLiteral> for BigInt {
350 fn from(v: IntLiteral) -> BigInt {
351 match v {
352 IntLiteral::Small(x) => x.into(),
353 IntLiteral::Big(x) => *x,
354 }
355 }
356}
357
358impl From<isize> for IntLiteral {
359 fn from(value: isize) -> Self {
360 IntLiteral::Small(value)
361 }
362}
363
364impl From<BigInt> for IntLiteral {
365 fn from(value: BigInt) -> Self {
366 IntLiteral::Big(Box::new(value))
367 }
368}
369
370impl<'a> TokenType<Cow<'a, str>> {
371 pub fn open_span(mut span: Span, paren_mod: Option<ParenMod>) -> Span {
372 let offset = match paren_mod {
373 Some(ParenMod::Vector) => 1,
374 Some(ParenMod::Bytes) => 3,
375 None => 0,
376 };
377
378 span.start += offset;
379
380 span
381 }
382
383 pub fn to_owned<T: From<Cow<'a, str>>>(self) -> TokenType<T> {
384 match self {
385 TokenType::Identifier(i) => TokenType::Identifier(i.into()),
386 TokenType::Keyword(i) => TokenType::Keyword(i.into()),
387 OpenParen(p, m) => OpenParen(p, m),
388 CloseParen(p) => CloseParen(p),
389 CharacterLiteral(x) => CharacterLiteral(x),
390 BooleanLiteral(x) => BooleanLiteral(x),
391 Number(x) => Number(x),
392 StringLiteral(x) => StringLiteral(x),
393 QuoteTick => QuoteTick,
394 Unquote => Unquote,
395 QuasiQuote => QuasiQuote,
396 UnquoteSplice => UnquoteSplice,
397 Comment => Comment,
398 DatumComment => DatumComment,
399 If => If,
400 Define => Define,
401 Let => Let,
402 TestLet => TestLet,
403 Return => Return,
404 Begin => Begin,
405 Lambda => Lambda,
406 Quote => Quote,
407 DefineSyntax => DefineSyntax,
408 SyntaxRules => SyntaxRules,
409 Ellipses => Ellipses,
410 Set => Set,
411 Require => Require,
412 QuasiQuoteSyntax => QuasiQuoteSyntax,
413 UnquoteSyntax => UnquoteSyntax,
414 QuoteSyntax => QuoteSyntax,
415 UnquoteSpliceSyntax => UnquoteSpliceSyntax,
416 Dot => Dot,
417 }
418 }
419
420 pub fn map<T>(self, mut func: impl FnMut(Cow<'a, str>) -> T) -> TokenType<T> {
421 match self {
422 TokenType::Identifier(i) => TokenType::Identifier(func(i)),
423 TokenType::Keyword(i) => TokenType::Keyword(func(i)),
424 OpenParen(p, m) => OpenParen(p, m),
425 CloseParen(p) => CloseParen(p),
426 CharacterLiteral(x) => CharacterLiteral(x),
427 BooleanLiteral(x) => BooleanLiteral(x),
428 Number(x) => Number(x),
429 StringLiteral(x) => StringLiteral(x),
430 QuoteTick => QuoteTick,
431 Unquote => Unquote,
432 QuasiQuote => QuasiQuote,
433 UnquoteSplice => UnquoteSplice,
434 Comment => Comment,
435 DatumComment => DatumComment,
436 If => If,
437 Define => Define,
438 Let => Let,
439 TestLet => TestLet,
440 Return => Return,
441 Begin => Begin,
442 Lambda => Lambda,
443 Quote => Quote,
444 DefineSyntax => DefineSyntax,
445 SyntaxRules => SyntaxRules,
446 Ellipses => Ellipses,
447 Set => Set,
448 Require => Require,
449 QuasiQuoteSyntax => QuasiQuoteSyntax,
450 UnquoteSyntax => UnquoteSyntax,
451 QuoteSyntax => QuoteSyntax,
452 UnquoteSpliceSyntax => UnquoteSpliceSyntax,
453 Dot => Dot,
454 }
455 }
456}
457
458fn character_special_display(c: char, f: &mut fmt::Formatter) -> fmt::Result {
459 match c {
460 ' ' => write!(f, "#\\space"),
461 '\0' => write!(f, "#\\null"),
462 '\t' => write!(f, "#\\tab"),
463 '\n' => write!(f, "#\\newline"),
464 '\r' => write!(f, "#\\return"),
465 _ => {
466 let escape = c.escape_debug();
467 if escape.len() <= 2 {
468 write!(f, "#\\{}", c)
470 } else {
471 write!(f, "#\\u{:04x}", c as u32)
473 }
474 }
475 }
476}
477
478impl<T: Display> fmt::Display for TokenType<T> {
479 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
480 match self {
481 OpenParen(p, m) => {
482 if let Some(m) = m {
483 m.fmt(f)?;
484 }
485
486 write!(f, "{}", p.open())
487 }
488 CloseParen(p) => write!(f, "{}", p.close()),
489 CharacterLiteral(x) => character_special_display(*x, f),
490 BooleanLiteral(x) => write!(f, "#{x}"),
491 Identifier(x) => write!(f, "{x}"),
492 Number(x) => write!(f, "{x}"),
493 StringLiteral(x) => write!(f, "\"{x}\""),
494 Keyword(x) => write!(f, "{x}"),
495 QuoteTick => write!(f, "'"),
496 Unquote => write!(f, ","),
497 QuasiQuote => write!(f, "`"),
498 UnquoteSplice => write!(f, ",@"),
499 QuoteSyntax => write!(f, "#'"),
500 QuasiQuoteSyntax => write!(f, "#`"),
501 UnquoteSyntax => write!(f, "#,"),
502 UnquoteSpliceSyntax => write!(f, "#,@"),
503 DatumComment => write!(f, "#;"),
504 Comment => write!(f, ""),
505 If => write!(f, "if"),
506 Define => write!(f, "define"),
507 Let => write!(f, "let"),
508 TestLet => write!(f, "%plain-let"),
509 Return => write!(f, "return!"),
510 Begin => write!(f, "begin"),
511 Lambda => write!(f, "lambda"),
512 Quote => write!(f, "quote"),
513 DefineSyntax => write!(f, "define-syntax"),
514 SyntaxRules => write!(f, "syntax-rules"),
515 Ellipses => write!(f, "..."),
516 Set => write!(f, "set!"),
517 Require => write!(f, "require"),
518 Dot => write!(f, "."),
519 }
520 }
521}
522
523#[derive(Debug, Clone, PartialEq)]
524pub struct TokenLike<'a, TY> {
525 pub ty: TY,
526 pub source: &'a str,
527 pub span: Span,
528}
529
530impl<'a, TY> TokenLike<'a, TY> {
531 pub const fn new(
532 ty: TY,
533 source: &'a str,
534 range: ops::Range<u32>,
535 source_id: Option<SourceId>,
536 ) -> Self {
537 Self {
538 ty,
539 source,
540 span: Span::new(range.start, range.end, source_id),
541 }
542 }
543}
544
545pub type Token<'a, T> = TokenLike<'a, TokenType<T>>;
546
547impl<'a, T> Token<'a, T> {
548 pub fn typ(&self) -> &TokenType<T> {
549 &self.ty
550 }
551
552 pub const fn span(&self) -> Span {
553 self.span
554 }
555
556 pub const fn range(&self) -> ops::Range<u32> {
557 self.span.start()..self.span.end()
558 }
559
560 pub const fn source(&self) -> &'a str {
561 self.source
562 }
563}
564
565impl<T> From<Token<'_, T>> for Span {
566 fn from(token: Token<'_, T>) -> Self {
567 token.span()
568 }
569}
570
571impl<T> From<&Token<'_, T>> for Span {
572 fn from(token: &Token<'_, T>) -> Self {
573 token.span()
574 }
575}
576
577impl<T> From<Token<'_, T>> for ops::Range<u32> {
578 fn from(token: Token<'_, T>) -> Self {
579 token.span().into()
580 }
581}
582
583impl<T> From<&Token<'_, T>> for ops::Range<u32> {
584 fn from(token: &Token<'_, T>) -> Self {
585 token.span().into()
586 }
587}
588
589impl<T> From<Token<'_, T>> for (u32, u32) {
590 fn from(token: Token<'_, T>) -> Self {
591 token.span().into()
592 }
593}
594
595impl<T> From<&Token<'_, T>> for (u32, u32) {
596 fn from(token: &Token<'_, T>) -> Self {
597 token.span().into()
598 }
599}
600
601impl<T> From<Token<'_, T>> for [u32; 2] {
602 fn from(token: Token<'_, T>) -> Self {
603 token.span().into()
604 }
605}
606
607impl<T> From<&Token<'_, T>> for [u32; 2] {
608 fn from(token: &Token<'_, T>) -> Self {
609 token.span().into()
610 }
611}
612
613impl<T> Display for Token<'_, T> {
614 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
615 write!(f, "{} @ {:?}", self.source, self.span)
616 }
617}