bril_frontend/
lexer.rs

1// Copyright (C) 2024 Ethan Uppal.
2//
3// This Source Code Form is subject to the terms of the Mozilla Public License,
4// v. 2.0. If a copy of the MPL was not distributed with this file, You can
5// obtain one at https://mozilla.org/MPL/2.0/.
6
7use logos::Logos;
8
9pub fn extract_string_from_token(slice: &str) -> Option<&str> {
10    Some(&slice[1..slice.len() - 1])
11}
12
13pub fn extract_character_from_token(slice: &str) -> Option<char> {
14    slice.chars().nth(1)
15}
16
17#[derive(Logos, Debug)]
18#[logos(skip r"[ \t\n\f]+")]
19pub enum Token<'a> {
20    #[regex(r"#[^\n]*\n", logos::skip)]
21    Comment,
22
23    #[token("import")]
24    Import,
25    #[token("from")]
26    From,
27    #[token("as")]
28    As,
29
30    #[regex(r"@[\p{XID_Start}_]\p{XID_Continue}*")]
31    FunctionName(&'a str),
32    #[regex(r"[\p{XID_Start}_]\p{XID_Continue}*")]
33    Identifier(&'a str),
34    #[regex(r"\.[\p{XID_Start}_][\p{XID_Continue}\.]*")]
35    Label(&'a str),
36    #[regex(r#""(?:[^"]|\\")*""#, |lexer| extract_string_from_token(lexer.slice()))]
37    Path(&'a str),
38
39    #[token("{")]
40    LeftBrace,
41    #[token("}")]
42    RightBrace,
43    #[token("(")]
44    LeftPar,
45    #[token(")")]
46    RightPar,
47    #[token(",")]
48    Comma,
49    #[token(":")]
50    Colon,
51    #[token("<")]
52    LeftAngle,
53    #[token(">")]
54    RightAngle,
55    #[token(";")]
56    Semi,
57    #[token("=")]
58    Equals,
59
60    #[regex("-?[0-9][0-9]*", |lexer| lexer.slice().parse().ok())]
61    Integer(i64),
62    #[regex(r"-?[0-9][0-9]*\.[0-9][0-9]*", |lexer| lexer.slice().parse().ok())]
63    Float(f64),
64    #[regex("'.'", |lexer| extract_character_from_token(lexer.slice()))]
65    Character(char),
66    #[token("true")]
67    True,
68    #[token("false")]
69    False,
70}
71
72impl Token<'_> {
73    pub fn pattern_name(&self) -> &'static str {
74        match self {
75            Self::Comment => unreachable!(),
76            Self::Import => "import",
77            Self::From => "from",
78            Self::As => "as",
79            Self::FunctionName(_) => "<function name>",
80            Self::Identifier(_) => "<identifier>",
81            Self::Label(_) => "<label>",
82            Self::Path(_) => "<path>",
83            Self::LeftBrace => "(",
84            Self::RightBrace => "}",
85            Self::LeftPar => "(",
86            Self::RightPar => ")",
87            Self::Comma => ",",
88            Self::Colon => ":",
89            Self::LeftAngle => "<",
90            Self::RightAngle => ">",
91            Self::Semi => ";",
92            Self::Equals => "=",
93            Self::Integer(_) => "<integer>",
94            Self::Float(_) => "<float>",
95            Self::Character(_) => "<character>",
96            Self::True => "true",
97            Self::False => "false",
98        }
99    }
100}
101
102impl<'a> Token<'a> {
103    pub fn matches_against(&self, pattern: Token<'a>) -> bool {
104        matches!(
105            (self, pattern),
106            (Self::Import, Self::Import)
107                | (Self::From, Self::From)
108                | (Self::As, Self::As)
109                | (Self::FunctionName(_), Self::FunctionName(_))
110                | (Self::Identifier(_), Self::Identifier(_))
111                | (Self::Label(_), Self::Label(_))
112                | (Self::Path(_), Self::Path(_))
113                | (Self::LeftBrace, Self::LeftBrace)
114                | (Self::RightBrace, Self::RightBrace)
115                | (Self::LeftPar, Self::LeftPar)
116                | (Self::RightPar, Self::RightPar)
117                | (Self::Comma, Self::Comma)
118                | (Self::Colon, Self::Colon)
119                | (Self::LeftAngle, Self::LeftAngle)
120                | (Self::RightAngle, Self::RightAngle)
121                | (Self::Semi, Self::Semi)
122                | (Self::Equals, Self::Equals)
123                | (Self::Integer(_), Self::Integer(_))
124                | (Self::Float(_), Self::Float(_))
125                | (Self::Character(_), Self::Character(_))
126                | (Self::True, Self::True)
127                | (Self::False, Self::False)
128        )
129    }
130
131    pub fn assume_function_name(self) -> &'a str {
132        let Self::FunctionName(function_name) = self else {
133            panic!("Expected function name");
134        };
135        function_name
136    }
137
138    pub fn assume_identifier(self) -> &'a str {
139        let Self::Identifier(identifier) = self else {
140            panic!("Expected identifier");
141        };
142        identifier
143    }
144
145    pub fn assume_identifier_like(self) -> &'a str {
146        match self {
147            Token::Import => "import",
148            Token::From => "from",
149            Token::As => "as",
150            Token::True => "true",
151            Token::False => "false",
152            Token::Identifier(identifier) => identifier,
153            _ => panic!("Expected identifier or keyword"),
154        }
155    }
156
157    pub fn assume_label(self) -> &'a str {
158        let Self::Label(label) = self else {
159            panic!("Expected label");
160        };
161        label
162    }
163
164    pub fn assume_path(self) -> &'a str {
165        let Self::Path(path) = self else {
166            panic!("Expected path");
167        };
168        path
169    }
170
171    pub fn assume_integer(self) -> i64 {
172        let Self::Integer(integer) = self else {
173            panic!("Expected integer");
174        };
175        integer
176    }
177
178    pub fn assume_float(self) -> f64 {
179        let Self::Float(float) = self else {
180            panic!("Expected float");
181        };
182        float
183    }
184
185    pub fn assume_character(self) -> char {
186        let Self::Character(character) = self else {
187            panic!("Expected character");
188        };
189        character
190    }
191}
192
193impl Clone for Token<'_> {
194    fn clone(&self) -> Self {
195        match self {
196            Self::Comment => unreachable!(),
197            Self::Import => Self::Import,
198            Self::From => Self::From,
199            Self::As => Self::As,
200            Self::FunctionName(function_name) => Self::FunctionName(function_name),
201            Self::Identifier(identifier) => Self::Identifier(identifier),
202            Self::Label(label) => Self::Label(label),
203            Self::Path(path) => Self::Path(path),
204            Self::LeftBrace => Self::LeftBrace,
205            Self::RightBrace => Self::RightBrace,
206            Self::LeftPar => Self::LeftPar,
207            Self::RightPar => Self::RightPar,
208            Self::Comma => Self::Comma,
209            Self::Colon => Self::Colon,
210            Self::LeftAngle => Self::LeftAngle,
211            Self::RightAngle => Self::RightAngle,
212            Self::Semi => Self::Semi,
213            Self::Equals => Self::Equals,
214            Self::Integer(integer) => Self::Integer(*integer),
215            Self::Float(float) => Self::Float(*float),
216            Self::Character(character) => Self::Character(*character),
217            Self::True => Self::True,
218            Self::False => Self::False,
219        }
220    }
221}
222
223pub trait TokenPattern<'a> {
224    fn matches(self) -> impl Iterator<Item = Token<'a>>;
225}
226
227impl<'a, T: IntoIterator<Item = Token<'a>>> TokenPattern<'a> for T {
228    fn matches(self) -> impl Iterator<Item = Token<'a>> {
229        self.into_iter()
230    }
231}
232
233impl<'a> TokenPattern<'a> for Token<'a> {
234    fn matches(self) -> impl Iterator<Item = Token<'a>> {
235        [self].into_iter()
236    }
237}
238
239pub const KEYWORD_LIKE: [Token<'static>; 6] = [
240    Token::Identifier(""),
241    Token::Import,
242    Token::As,
243    Token::From,
244    Token::True,
245    Token::False,
246];