bril_frontend/
lexer.rs

1// Copyright (C) 2024 Ethan Uppal.
2//
3// This Source Code Form is subject to the terms of the Mozilla Public License,
4// v. 2.0. If a copy of the MPL was not distributed with this file, You can
5// obtain one at https://mozilla.org/MPL/2.0/.
6
7use logos::Logos;
8
9pub fn extract_string_from_token(slice: &str) -> Option<&str> {
10    Some(&slice[1..slice.len() - 1])
11}
12
13pub fn extract_character_from_token(slice: &str) -> Option<char> {
14    slice.chars().nth(1)
15}
16
17#[derive(Logos, Debug)]
18#[logos(skip r"[ \t\f]+")]
19pub enum Token<'a> {
20    #[regex(r"#[^\n]*", |lexer| lexer.slice().trim())]
21    Comment(&'a str),
22    #[token("\n")]
23    Newline,
24
25    #[token("import")]
26    Import,
27    #[token("from")]
28    From,
29    #[token("as")]
30    As,
31
32    #[regex(r"@[\p{XID_Start}_]\p{XID_Continue}*")]
33    FunctionName(&'a str),
34    #[regex(r"[\p{XID_Start}_]\p{XID_Continue}*")]
35    Identifier(&'a str),
36    #[regex(r"\.[\p{XID_Start}_][\p{XID_Continue}\.]*")]
37    Label(&'a str),
38    #[regex(r#""(?:[^"]|\\")*""#, |lexer| extract_string_from_token(lexer.slice()))]
39    Path(&'a str),
40
41    #[token("{")]
42    LeftBrace,
43    #[token("}")]
44    RightBrace,
45    #[token("(")]
46    LeftPar,
47    #[token(")")]
48    RightPar,
49    #[token(",")]
50    Comma,
51    #[token(":")]
52    Colon,
53    #[token("<")]
54    LeftAngle,
55    #[token(">")]
56    RightAngle,
57    #[token(";")]
58    Semi,
59    #[token("=")]
60    Equals,
61
62    #[regex("-?[0-9][0-9]*", |lexer| lexer.slice().parse().ok())]
63    Integer(i64),
64    #[regex(r"-?[0-9]*\.[0-9][0-9]*", |lexer| lexer.slice().parse().ok())]
65    Float(f64),
66    #[regex("'.'", |lexer| extract_character_from_token(lexer.slice()))]
67    Character(char),
68    #[token("true")]
69    True,
70    #[token("false")]
71    False,
72}
73
74impl Token<'_> {
75    pub fn pattern_name(&self) -> &'static str {
76        match self {
77            Self::Comment(_) => "<comment>",
78            Self::Newline => "<newline>",
79            Self::Import => "import",
80            Self::From => "from",
81            Self::As => "as",
82            Self::FunctionName(_) => "<function name>",
83            Self::Identifier(_) => "<identifier>",
84            Self::Label(_) => "<label>",
85            Self::Path(_) => "<path>",
86            Self::LeftBrace => "(",
87            Self::RightBrace => "}",
88            Self::LeftPar => "(",
89            Self::RightPar => ")",
90            Self::Comma => ",",
91            Self::Colon => ":",
92            Self::LeftAngle => "<",
93            Self::RightAngle => ">",
94            Self::Semi => ";",
95            Self::Equals => "=",
96            Self::Integer(_) => "<integer>",
97            Self::Float(_) => "<float>",
98            Self::Character(_) => "<character>",
99            Self::True => "true",
100            Self::False => "false",
101        }
102    }
103}
104
105impl<'a> Token<'a> {
106    pub fn matches_against(&self, pattern: Token<'a>) -> bool {
107        matches!(
108            (self, pattern),
109            (Self::Comment(_), Self::Comment(_))
110                | (Self::Newline, Self::Newline)
111                | (Self::Import, Self::Import)
112                | (Self::From, Self::From)
113                | (Self::As, Self::As)
114                | (Self::FunctionName(_), Self::FunctionName(_))
115                | (Self::Identifier(_), Self::Identifier(_))
116                | (Self::Label(_), Self::Label(_))
117                | (Self::Path(_), Self::Path(_))
118                | (Self::LeftBrace, Self::LeftBrace)
119                | (Self::RightBrace, Self::RightBrace)
120                | (Self::LeftPar, Self::LeftPar)
121                | (Self::RightPar, Self::RightPar)
122                | (Self::Comma, Self::Comma)
123                | (Self::Colon, Self::Colon)
124                | (Self::LeftAngle, Self::LeftAngle)
125                | (Self::RightAngle, Self::RightAngle)
126                | (Self::Semi, Self::Semi)
127                | (Self::Equals, Self::Equals)
128                | (Self::Integer(_), Self::Integer(_))
129                | (Self::Float(_), Self::Float(_))
130                | (Self::Character(_), Self::Character(_))
131                | (Self::True, Self::True)
132                | (Self::False, Self::False)
133        )
134    }
135
136    pub fn assume_comment(self) -> &'a str {
137        let Self::Comment(comment) = self else {
138            panic!("Expected comment");
139        };
140        comment
141    }
142
143    pub fn assume_function_name(self) -> &'a str {
144        let Self::FunctionName(function_name) = self else {
145            panic!("Expected function name");
146        };
147        function_name
148    }
149
150    pub fn assume_identifier(self) -> &'a str {
151        let Self::Identifier(identifier) = self else {
152            panic!("Expected identifier");
153        };
154        identifier
155    }
156
157    pub fn assume_identifier_like(self) -> &'a str {
158        match self {
159            Token::Import => "import",
160            Token::From => "from",
161            Token::As => "as",
162            Token::True => "true",
163            Token::False => "false",
164            Token::Identifier(identifier) => identifier,
165            _ => panic!("Expected identifier or keyword"),
166        }
167    }
168
169    pub fn assume_label(self) -> &'a str {
170        let Self::Label(label) = self else {
171            panic!("Expected label");
172        };
173        label
174    }
175
176    pub fn assume_path(self) -> &'a str {
177        let Self::Path(path) = self else {
178            panic!("Expected path");
179        };
180        path
181    }
182
183    pub fn assume_integer(self) -> i64 {
184        let Self::Integer(integer) = self else {
185            panic!("Expected integer");
186        };
187        integer
188    }
189
190    pub fn assume_float(self) -> f64 {
191        let Self::Float(float) = self else {
192            panic!("Expected float");
193        };
194        float
195    }
196
197    pub fn assume_character(self) -> char {
198        let Self::Character(character) = self else {
199            panic!("Expected character");
200        };
201        character
202    }
203}
204
205impl Clone for Token<'_> {
206    fn clone(&self) -> Self {
207        match self {
208            Self::Comment(comment) => Self::Comment(comment),
209            Self::Newline => Self::Newline,
210            Self::Import => Self::Import,
211            Self::From => Self::From,
212            Self::As => Self::As,
213            Self::FunctionName(function_name) => Self::FunctionName(function_name),
214            Self::Identifier(identifier) => Self::Identifier(identifier),
215            Self::Label(label) => Self::Label(label),
216            Self::Path(path) => Self::Path(path),
217            Self::LeftBrace => Self::LeftBrace,
218            Self::RightBrace => Self::RightBrace,
219            Self::LeftPar => Self::LeftPar,
220            Self::RightPar => Self::RightPar,
221            Self::Comma => Self::Comma,
222            Self::Colon => Self::Colon,
223            Self::LeftAngle => Self::LeftAngle,
224            Self::RightAngle => Self::RightAngle,
225            Self::Semi => Self::Semi,
226            Self::Equals => Self::Equals,
227            Self::Integer(integer) => Self::Integer(*integer),
228            Self::Float(float) => Self::Float(*float),
229            Self::Character(character) => Self::Character(*character),
230            Self::True => Self::True,
231            Self::False => Self::False,
232        }
233    }
234}
235
236pub trait TokenPattern<'a> {
237    fn matches(self) -> impl Iterator<Item = Token<'a>>;
238}
239
240impl<'a, T: IntoIterator<Item = Token<'a>>> TokenPattern<'a> for T {
241    fn matches(self) -> impl Iterator<Item = Token<'a>> {
242        self.into_iter()
243    }
244}
245
246impl<'a> TokenPattern<'a> for Token<'a> {
247    fn matches(self) -> impl Iterator<Item = Token<'a>> {
248        [self].into_iter()
249    }
250}
251
252pub const KEYWORD_LIKE: [Token<'static>; 6] = [
253    Token::Identifier(""),
254    Token::Import,
255    Token::As,
256    Token::From,
257    Token::True,
258    Token::False,
259];