Skip to main content

abyss_core/parser/
tokens.rs

1use chumsky::{error::Rich, extra, prelude::*, span::SimpleSpan as ChumskySpan};
2use ordered_float::OrderedFloat;
3use std::fmt;
4
5use crate::ast::Type;
6
7use super::SimpleSpan;
8
9#[derive(Debug, Clone, PartialEq, Eq, Hash)]
10pub enum Token {
11    Forge,
12    Morph,
13    Core,
14    Oracle,
15    Orbit,
16    Resume,
17    Eject,
18    Engrave,
19    Reveal,
20    Artifact,
21    Ward,
22    Identifier(String),
23    Type(Type),
24    OmenLiteral(bool),
25    Arcana(i64),
26    Aether(OrderedFloat<f64>),
27    Rune(String),
28    Semicolon,
29    Colon,
30    Comma,
31    Arrow,
32    FatArrow,
33    DoubleColon,
34    Assign,
35    AddAssign,
36    SubAssign,
37    MulAssign,
38    DivAssign,
39    ModAssign,
40    PowArcanaAssign,
41    PowAetherAssign,
42    Equal,
43    NotEqual,
44    LessThan,
45    LessThanOrEqual,
46    GreaterThan,
47    GreaterThanOrEqual,
48    Plus,
49    Minus,
50    Star,
51    Slash,
52    Percent,
53    Caret,
54    DoubleStar,
55    DoublePipe,
56    DoubleAmpersand,
57    Bang,
58    OpenParen,
59    CloseParen,
60    OpenBrace,
61    CloseBrace,
62    OpenBracket,
63    CloseBracket,
64    RangeInclusive,
65    RangeExclusive,
66    Dot,
67}
68
69pub type SpannedToken = (Token, SimpleSpan<usize>);
70
71impl fmt::Display for Token {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        match self {
74            Token::Forge => write!(f, "forge"),
75            Token::Morph => write!(f, "morph"),
76            Token::Core => write!(f, "core"),
77            Token::Oracle => write!(f, "oracle"),
78            Token::Orbit => write!(f, "orbit"),
79            Token::Resume => write!(f, "resume"),
80            Token::Eject => write!(f, "eject"),
81            Token::Engrave => write!(f, "engrave"),
82            Token::Reveal => write!(f, "reveal"),
83            Token::Artifact => write!(f, "artifact"),
84            Token::Ward => write!(f, "ward"),
85            Token::Identifier(name) => write!(f, "identifier `{name}`"),
86            Token::Type(ty) => write!(f, "type `{ty:?}`"),
87            Token::OmenLiteral(true) => write!(f, "boon"),
88            Token::OmenLiteral(false) => write!(f, "hex"),
89            Token::Arcana(value) => write!(f, "arcana literal {value}"),
90            Token::Aether(value) => write!(f, "aether literal {value}"),
91            Token::Rune(value) => write!(f, "rune literal \"{value}\""),
92            Token::Semicolon => write!(f, ";"),
93            Token::Colon => write!(f, ":"),
94            Token::Comma => write!(f, ","),
95            Token::Arrow => write!(f, "->"),
96            Token::FatArrow => write!(f, "=>"),
97            Token::DoubleColon => write!(f, "::"),
98            Token::Assign => write!(f, "="),
99            Token::AddAssign => write!(f, "+="),
100            Token::SubAssign => write!(f, "-="),
101            Token::MulAssign => write!(f, "*="),
102            Token::DivAssign => write!(f, "/="),
103            Token::ModAssign => write!(f, "%="),
104            Token::PowArcanaAssign => write!(f, "^="),
105            Token::PowAetherAssign => write!(f, "**="),
106            Token::Equal => write!(f, "=="),
107            Token::NotEqual => write!(f, "!="),
108            Token::LessThan => write!(f, "<"),
109            Token::LessThanOrEqual => write!(f, "<="),
110            Token::GreaterThan => write!(f, ">"),
111            Token::GreaterThanOrEqual => write!(f, ">="),
112            Token::Plus => write!(f, "+"),
113            Token::Minus => write!(f, "-"),
114            Token::Star => write!(f, "*"),
115            Token::Slash => write!(f, "/"),
116            Token::Percent => write!(f, "%"),
117            Token::Caret => write!(f, "^"),
118            Token::DoubleStar => write!(f, "**"),
119            Token::DoublePipe => write!(f, "||"),
120            Token::DoubleAmpersand => write!(f, "&&"),
121            Token::Bang => write!(f, "!"),
122            Token::OpenParen => write!(f, "("),
123            Token::CloseParen => write!(f, ")"),
124            Token::OpenBrace => write!(f, "{{"),
125            Token::CloseBrace => write!(f, "}}"),
126            Token::OpenBracket => write!(f, "["),
127            Token::CloseBracket => write!(f, "]"),
128            Token::RangeInclusive => write!(f, "..="),
129            Token::RangeExclusive => write!(f, ".."),
130            Token::Dot => write!(f, "."),
131        }
132    }
133}
134
135type LexerExtra<'src> = extra::Err<Rich<'src, char, ChumskySpan<usize>>>;
136
137pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<SpannedToken>, LexerExtra<'src>> {
138    use chumsky::text;
139
140    let sign = just::<&str, _, LexerExtra<'src>>("-")
141        .to(String::from("-"))
142        .or_not();
143
144    let digits = |radix| text::digits::<_, LexerExtra<'src>>(radix).collect::<String>();
145
146    let aether = sign
147        .clone()
148        .then(digits(10))
149        .then_ignore(just::<&str, _, LexerExtra<'src>>("."))
150        .then(digits(10))
151        .map(|((sign, int_part), frac_part)| {
152            let mut number = String::new();
153            if let Some(sign) = sign {
154                number.push_str(&sign);
155            }
156            number.push_str(&int_part);
157            number.push('.');
158            number.push_str(&frac_part);
159            let value = number
160                .parse::<f64>()
161                .expect("parser should only construct valid f64 literals");
162            Token::Aether(OrderedFloat(value))
163        });
164
165    let arcana = sign.then(digits(10)).map(|(sign, value)| {
166        let mut number = String::new();
167        if let Some(sign) = sign {
168            number.push_str(&sign);
169        }
170        number.push_str(&value);
171        Token::Arcana(
172            number
173                .parse::<i64>()
174                .expect("parser should only construct valid i64 literals"),
175        )
176    });
177
178    let escape = just::<char, _, LexerExtra<'src>>('\\').ignore_then(
179        one_of::<_, _, LexerExtra<'src>>(r#""ntr\"#).map(|c| match c {
180            '"' => '"',
181            'n' => '\n',
182            't' => '\t',
183            'r' => '\r',
184            '\\' => '\\',
185            other => other, // fallback: just use the char as is
186        }),
187    );
188    let rune_char =
189        escape.or(any::<_, LexerExtra<'src>>().filter(|c: &char| *c != '"' && *c != '\\'));
190    let rune = just::<char, _, LexerExtra<'src>>('"')
191        .ignore_then(rune_char.repeated().collect::<String>())
192        .then_ignore(just::<char, _, LexerExtra<'src>>('"'))
193        .map(Token::Rune);
194
195    let ident = text::ident::<_, LexerExtra<'src>>().map(|ident: &'src str| match ident {
196        "forge" => Token::Forge,
197        "morph" => Token::Morph,
198        "core" => Token::Core,
199        "oracle" => Token::Oracle,
200        "orbit" => Token::Orbit,
201        "resume" => Token::Resume,
202        "eject" => Token::Eject,
203        "engrave" => Token::Engrave,
204        "reveal" => Token::Reveal,
205        "trans" => Token::Identifier("trans".to_string()),
206        "as" => Token::Identifier("as".to_string()),
207        "artifact" => Token::Artifact,
208        "ward" => Token::Ward,
209        "arcana" => Token::Type(Type::Arcana),
210        "aether" => Token::Type(Type::Aether),
211        "rune" => Token::Type(Type::Rune),
212        "omen" => Token::Type(Type::Omen),
213        "abyss" => Token::Type(Type::Abyss),
214        "scroll" => Token::Type(Type::Scroll),
215        "lexicon" => Token::Type(Type::Lexicon),
216        "materia" => Token::Type(Type::Materia),
217        "glyph" => Token::Type(Type::Glyph),
218        "boon" => Token::OmenLiteral(true),
219        "hex" => Token::OmenLiteral(false),
220        _ => Token::Identifier(ident.to_string()),
221    });
222
223    let multi_char_symbols = choice((
224        just::<&str, _, LexerExtra<'src>>("**=").to(Token::PowAetherAssign),
225        just::<&str, _, LexerExtra<'src>>("**").to(Token::DoubleStar),
226        just::<&str, _, LexerExtra<'src>>("^=").to(Token::PowArcanaAssign),
227        just::<&str, _, LexerExtra<'src>>("+=").to(Token::AddAssign),
228        just::<&str, _, LexerExtra<'src>>("-=").to(Token::SubAssign),
229        just::<&str, _, LexerExtra<'src>>("*=").to(Token::MulAssign),
230        just::<&str, _, LexerExtra<'src>>("/=").to(Token::DivAssign),
231        just::<&str, _, LexerExtra<'src>>("%=").to(Token::ModAssign),
232        just::<&str, _, LexerExtra<'src>>("=>").to(Token::FatArrow),
233        just::<&str, _, LexerExtra<'src>>("::").to(Token::DoubleColon),
234        just::<&str, _, LexerExtra<'src>>("->").to(Token::Arrow),
235        just::<&str, _, LexerExtra<'src>>("||").to(Token::DoublePipe),
236        just::<&str, _, LexerExtra<'src>>("&&").to(Token::DoubleAmpersand),
237        just::<&str, _, LexerExtra<'src>>("==").to(Token::Equal),
238        just::<&str, _, LexerExtra<'src>>("!=").to(Token::NotEqual),
239        just::<&str, _, LexerExtra<'src>>("<=").to(Token::LessThanOrEqual),
240        just::<&str, _, LexerExtra<'src>>(">=").to(Token::GreaterThanOrEqual),
241        just::<&str, _, LexerExtra<'src>>("..=").to(Token::RangeInclusive),
242        just::<&str, _, LexerExtra<'src>>("..").to(Token::RangeExclusive),
243    ));
244
245    let single_char_symbols = choice((
246        just::<char, _, LexerExtra<'src>>('=').to(Token::Assign),
247        just::<char, _, LexerExtra<'src>>('+').to(Token::Plus),
248        just::<char, _, LexerExtra<'src>>('-').to(Token::Minus),
249        just::<char, _, LexerExtra<'src>>('*').to(Token::Star),
250        just::<char, _, LexerExtra<'src>>('/').to(Token::Slash),
251        just::<char, _, LexerExtra<'src>>('%').to(Token::Percent),
252        just::<char, _, LexerExtra<'src>>('^').to(Token::Caret),
253        just::<char, _, LexerExtra<'src>>('<').to(Token::LessThan),
254        just::<char, _, LexerExtra<'src>>('>').to(Token::GreaterThan),
255        just::<char, _, LexerExtra<'src>>('!').to(Token::Bang),
256        just::<char, _, LexerExtra<'src>>(';').to(Token::Semicolon),
257        just::<char, _, LexerExtra<'src>>(':').to(Token::Colon),
258        just::<char, _, LexerExtra<'src>>(',').to(Token::Comma),
259        just::<char, _, LexerExtra<'src>>('(').to(Token::OpenParen),
260        just::<char, _, LexerExtra<'src>>(')').to(Token::CloseParen),
261        just::<char, _, LexerExtra<'src>>('{').to(Token::OpenBrace),
262        just::<char, _, LexerExtra<'src>>('}').to(Token::CloseBrace),
263        just::<char, _, LexerExtra<'src>>('[').to(Token::OpenBracket),
264        just::<char, _, LexerExtra<'src>>(']').to(Token::CloseBracket),
265        just::<char, _, LexerExtra<'src>>('.').to(Token::Dot),
266    ));
267
268    let token = choice((
269        aether,
270        arcana,
271        rune,
272        ident,
273        multi_char_symbols,
274        single_char_symbols,
275    ))
276    .map_with(|tok, extra| {
277        let span: ChumskySpan<usize> = extra.span();
278        (tok, SimpleSpan::new(span.start(), span.end()))
279    });
280
281    token
282        .padded_by(crate::parser::helpers::abyss_whitespace())
283        .repeated()
284        .collect()
285        .then_ignore(end())
286}