august_build/
lexer.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
use std::{fmt::Display, ops::Range};

use chumsky::{
    prelude::*,
    text::{ident, Character},
};

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum Token {
    String(String),
    Ident(String),
    RawIdent(String),

    Unit,
    Expose,
    As,

    Attr,
    Tilde,
    DoubleColon,
    DoubleArrow,
    Comma,

    OpenDelim(Delim),
    CloseDelim(Delim),

    Err(char),
}

#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub enum Delim {
    Round,
    Square,
    Arrow,
    Curly,
}

impl Display for Token {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use Delim::{Arrow, Curly, Round, Square};
        use Token::{
            As, Attr, CloseDelim, Comma, DoubleArrow, DoubleColon, Err, Expose, Ident, OpenDelim,
            RawIdent, String, Tilde, Unit,
        };

        match self {
            String(s) => f.write_fmt(format_args!("\"{s}\"")),
            Ident(i) | RawIdent(i) => i.fmt(f),
            Unit => "unit".fmt(f),
            Expose => "expose".fmt(f),
            As => "as".fmt(f),
            Attr => "@".fmt(f),
            Tilde => "~".fmt(f),
            Comma => ",".fmt(f),
            DoubleColon => "::".fmt(f),
            DoubleArrow => "=>".fmt(f),
            OpenDelim(Round) => "(".fmt(f),
            OpenDelim(Square) => "[".fmt(f),
            OpenDelim(Arrow) => "<".fmt(f),
            OpenDelim(Curly) => "{".fmt(f),
            CloseDelim(Round) => ")".fmt(f),
            CloseDelim(Square) => "]".fmt(f),
            CloseDelim(Arrow) => ">".fmt(f),
            CloseDelim(Curly) => "}".fmt(f),
            Err(c) => c.fmt(f),
        }
    }
}

pub fn lexer() -> impl Parser<char, Vec<(Token, Range<usize>)>, Error = Simple<char>> {
    let escape = just('\\').ignore_then(
        just('\\')
            .or(just('/'))
            .or(just('"'))
            .or(just('b').to('\x08'))
            .or(just('f').to('\x0C'))
            .or(just('n').to('\n'))
            .or(just('r').to('\r'))
            .or(just('t').to('\t')),
    );

    let str_lit = filter(|c| *c != '\\' && *c != '"')
        .or(escape)
        .repeated()
        .delimited_by(just('"'), just('"'))
        .collect();

    let keywords = choice((
        just("expose").to(Token::Expose),
        just("as").to(Token::As),
        just("unit").to(Token::Unit),
        just('@').to(Token::Attr),
        just('~').to(Token::Tilde),
        just("::").to(Token::DoubleColon),
        just("=>").to(Token::DoubleArrow),
        just(',').to(Token::Comma),
    ));

    let delim = choice((
        just('(').to(Token::OpenDelim(Delim::Round)),
        just('[').to(Token::OpenDelim(Delim::Square)),
        just('<').to(Token::OpenDelim(Delim::Arrow)),
        just('{').to(Token::OpenDelim(Delim::Curly)),
        just(')').to(Token::CloseDelim(Delim::Round)),
        just(']').to(Token::CloseDelim(Delim::Square)),
        just('>').to(Token::CloseDelim(Delim::Arrow)),
        just('}').to(Token::CloseDelim(Delim::Curly)),
    ));

    let raw_ident = filter(|c: &char| {
        matches!(c.to_char(),
        '!'..='&' | '*'..='+' | '-'..='.' | '0'..=';' | '=' | '?'..='Z' | '^'..='z' | '|')
    })
    .repeated()
    .at_least(1)
    .collect::<String>();

    let token = choice((
        keywords,
        delim,
        str_lit.map(Token::String),
        ident().map(Token::Ident),
        raw_ident.map(Token::RawIdent),
    ))
    .padded()
    .map_with_span(|t, span| (t, span));

    token
        .clone()
        .recover_with(skip_parser(
            token
                .not()
                .repeated()
                .ignore_then(any().rewind())
                .map_with_span(|c, span| (Token::Err(c), span)),
        ))
        .repeated()
        .padded()
        .then_ignore(end())
        .labelled("tokens")
}