august_build/
lexer.rs

1use std::{fmt::Display, ops::Range};
2
3use chumsky::{
4    prelude::*,
5    text::{ident, Character},
6};
7
8#[derive(Debug, Clone, Hash, PartialEq, Eq)]
9pub enum Token {
10    String(String),
11    Ident(String),
12    RawIdent(String),
13
14    Unit,
15    Expose,
16    As,
17
18    Attr,
19    Tilde,
20    DoubleColon,
21    DoubleArrow,
22    Comma,
23
24    OpenDelim(Delim),
25    CloseDelim(Delim),
26
27    Err(char),
28}
29
30#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
31pub enum Delim {
32    Round,
33    Square,
34    Arrow,
35    Curly,
36}
37
38impl Display for Token {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        use Delim::{Arrow, Curly, Round, Square};
41        use Token::{
42            As, Attr, CloseDelim, Comma, DoubleArrow, DoubleColon, Err, Expose, Ident, OpenDelim,
43            RawIdent, String, Tilde, Unit,
44        };
45
46        match self {
47            String(s) => f.write_fmt(format_args!("\"{s}\"")),
48            Ident(i) | RawIdent(i) => i.fmt(f),
49            Unit => "unit".fmt(f),
50            Expose => "expose".fmt(f),
51            As => "as".fmt(f),
52            Attr => "@".fmt(f),
53            Tilde => "~".fmt(f),
54            Comma => ",".fmt(f),
55            DoubleColon => "::".fmt(f),
56            DoubleArrow => "=>".fmt(f),
57            OpenDelim(Round) => "(".fmt(f),
58            OpenDelim(Square) => "[".fmt(f),
59            OpenDelim(Arrow) => "<".fmt(f),
60            OpenDelim(Curly) => "{".fmt(f),
61            CloseDelim(Round) => ")".fmt(f),
62            CloseDelim(Square) => "]".fmt(f),
63            CloseDelim(Arrow) => ">".fmt(f),
64            CloseDelim(Curly) => "}".fmt(f),
65            Err(c) => c.fmt(f),
66        }
67    }
68}
69
70pub fn lexer() -> impl Parser<char, Vec<(Token, Range<usize>)>, Error = Simple<char>> {
71    let escape = just('\\').ignore_then(
72        just('\\')
73            .or(just('/'))
74            .or(just('"'))
75            .or(just('b').to('\x08'))
76            .or(just('f').to('\x0C'))
77            .or(just('n').to('\n'))
78            .or(just('r').to('\r'))
79            .or(just('t').to('\t')),
80    );
81
82    let str_lit = filter(|c| *c != '\\' && *c != '"')
83        .or(escape)
84        .repeated()
85        .delimited_by(just('"'), just('"'))
86        .collect();
87
88    let keywords = choice((
89        just("expose").to(Token::Expose),
90        just("as").to(Token::As),
91        just("unit").to(Token::Unit),
92        just('@').to(Token::Attr),
93        just('~').to(Token::Tilde),
94        just("::").to(Token::DoubleColon),
95        just("=>").to(Token::DoubleArrow),
96        just(',').to(Token::Comma),
97    ));
98
99    let delim = choice((
100        just('(').to(Token::OpenDelim(Delim::Round)),
101        just('[').to(Token::OpenDelim(Delim::Square)),
102        just('<').to(Token::OpenDelim(Delim::Arrow)),
103        just('{').to(Token::OpenDelim(Delim::Curly)),
104        just(')').to(Token::CloseDelim(Delim::Round)),
105        just(']').to(Token::CloseDelim(Delim::Square)),
106        just('>').to(Token::CloseDelim(Delim::Arrow)),
107        just('}').to(Token::CloseDelim(Delim::Curly)),
108    ));
109
110    let raw_ident = filter(|c: &char| {
111        matches!(c.to_char(),
112        '!'..='&' | '*'..='+' | '-'..='.' | '0'..=';' | '=' | '?'..='Z' | '^'..='z' | '|')
113    })
114    .repeated()
115    .at_least(1)
116    .collect::<String>();
117
118    let token = choice((
119        keywords,
120        delim,
121        str_lit.map(Token::String),
122        ident().map(Token::Ident),
123        raw_ident.map(Token::RawIdent),
124    ))
125    .padded()
126    .map_with_span(|t, span| (t, span));
127
128    token
129        .clone()
130        .recover_with(skip_parser(
131            token
132                .not()
133                .repeated()
134                .ignore_then(any().rewind())
135                .map_with_span(|c, span| (Token::Err(c), span)),
136        ))
137        .repeated()
138        .padded()
139        .then_ignore(end())
140        .labelled("tokens")
141}