Skip to main content

oak_nix/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Token types for the Nix language.
3pub mod token_type;
4
5use crate::{language::NixLanguage, lexer::token_type::NixTokenType};
6use oak_core::{
7    Source,
8    lexer::{LexOutput, Lexer, LexerCache, LexerState},
9    source::TextEdit,
10};
11
12pub(crate) type State<'a, S> = LexerState<'a, S, NixLanguage>;
13
14/// Lexer for the Nix language.
15#[derive(Clone, Debug)]
16pub struct NixLexer<'config> {
17    config: &'config NixLanguage,
18}
19
20impl<'config> NixLexer<'config> {
21    /// Creates a new `NixLexer` with the given configuration.
22    pub fn new(config: &'config NixLanguage) -> Self {
23        Self { config }
24    }
25}
26
27impl NixLexer<'_> {
28    /// Runs the lexer on the given state, tokenizing the entire source.
29    pub fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
30        while state.not_at_end() {
31            if self.skip_whitespace(state) {
32                continue;
33            }
34            if self.lex_newline(state) {
35                continue;
36            }
37            if self.lex_comment(state) {
38                continue;
39            }
40            if self.lex_string(state) {
41                continue;
42            }
43            if self.lex_number(state) {
44                continue;
45            }
46            if self.lex_identifier(state) {
47                continue;
48            }
49            if self.lex_operator(state) {
50                continue;
51            }
52
53            // If no pattern matches, add error kind
54            let start_pos = state.get_position();
55            if let Some(ch) = state.peek() {
56                state.advance(ch.len_utf8());
57                state.add_token(NixTokenType::Error, start_pos, state.get_position());
58            }
59        }
60        Ok(())
61    }
62}
63
64impl<'config> Lexer<NixLanguage> for NixLexer<'config> {
65    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<NixLanguage>) -> LexOutput<NixLanguage> {
66        let mut state = LexerState::new(source);
67        let result = self.run(&mut state);
68        if result.is_ok() {
69            state.add_eof();
70        }
71        state.finish_with_cache(result, cache)
72    }
73}
74
75impl NixLexer<'_> {
76    /// Skips whitespace characters
77    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
78        let start_pos = state.get_position();
79
80        while let Some(ch) = state.peek() {
81            if ch == ' ' || ch == '\t' {
82                state.advance(ch.len_utf8());
83            }
84            else {
85                break;
86            }
87        }
88
89        if state.get_position() > start_pos {
90            state.add_token(NixTokenType::Whitespace, start_pos, state.get_position());
91            true
92        }
93        else {
94            false
95        }
96    }
97
98    /// Handles newlines
99    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
100        let start_pos = state.get_position();
101
102        if let Some('\n') = state.peek() {
103            state.advance(1);
104            state.add_token(NixTokenType::Newline, start_pos, state.get_position());
105            true
106        }
107        else if let Some('\r') = state.peek() {
108            state.advance(1);
109            if let Some('\n') = state.peek() {
110                state.advance(1);
111            }
112            state.add_token(NixTokenType::Newline, start_pos, state.get_position());
113            true
114        }
115        else {
116            false
117        }
118    }
119
120    /// Handles comments
121    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
122        let start_pos = state.get_position();
123
124        if let Some('#') = state.peek() {
125            state.advance(1);
126
127            // Read to the end of the line
128            while let Some(ch) = state.peek() {
129                if ch == '\n' || ch == '\r' {
130                    break;
131                }
132                state.advance(ch.len_utf8());
133            }
134
135            state.add_token(NixTokenType::Comment, start_pos, state.get_position());
136            true
137        }
138        else {
139            false
140        }
141    }
142
143    /// Handles string literals
144    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
145        let start_pos = state.get_position();
146
147        if let Some('"') = state.peek() {
148            state.advance(1);
149
150            while let Some(ch) = state.peek() {
151                if ch == '"' {
152                    state.advance(1);
153                    break;
154                }
155                else if ch == '\\' {
156                    state.advance(1);
157                    if let Some(_) = state.peek() {
158                        state.advance(1);
159                    }
160                }
161                else {
162                    state.advance(ch.len_utf8());
163                }
164            }
165
166            state.add_token(NixTokenType::String, start_pos, state.get_position());
167            true
168        }
169        else {
170            false
171        }
172    }
173
174    /// Handles number literals
175    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176        let start_pos = state.get_position();
177
178        if let Some(ch) = state.peek() {
179            if ch.is_ascii_digit() {
180                state.advance(1);
181                while let Some(ch) = state.peek() {
182                    if ch.is_ascii_digit() || ch == '.' {
183                        state.advance(1);
184                    }
185                    else {
186                        break;
187                    }
188                }
189                state.add_token(NixTokenType::Number, start_pos, state.get_position());
190                return true;
191            }
192        }
193        false
194    }
195
196    /// Handles identifiers and keywords
197    fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
198        let start_pos = state.get_position();
199
200        if let Some(ch) = state.peek() {
201            if ch.is_alphabetic() || ch == '_' {
202                state.advance(ch.len_utf8());
203                while let Some(ch) = state.peek() {
204                    if ch.is_alphanumeric() || ch == '_' || ch == '-' || ch == '\'' {
205                        state.advance(ch.len_utf8());
206                    }
207                    else {
208                        break;
209                    }
210                }
211
212                let text = state.get_text_in((start_pos..state.get_position()).into());
213                let kind = match &*text {
214                    "let" => NixTokenType::Let,
215                    "in" => NixTokenType::In,
216                    "if" => NixTokenType::If,
217                    "then" => NixTokenType::Then,
218                    "else" => NixTokenType::Else,
219                    "with" => NixTokenType::With,
220                    "inherit" => NixTokenType::Inherit,
221                    "rec" => NixTokenType::Rec,
222                    "import" => NixTokenType::Import,
223                    "true" | "false" => NixTokenType::Boolean,
224                    "null" => NixTokenType::Null,
225                    _ => NixTokenType::Identifier,
226                };
227
228                state.add_token(kind, start_pos, state.get_position());
229                true
230            }
231            else {
232                false
233            }
234        }
235        else {
236            false
237        }
238    }
239
240    /// Handles operators
241    fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
242        let start_pos = state.get_position();
243
244        if let Some(ch) = state.peek() {
245            let kind = match ch {
246                '+' => {
247                    state.advance(1);
248                    if let Some('+') = state.peek() {
249                        state.advance(1);
250                        NixTokenType::Concatenation
251                    }
252                    else {
253                        NixTokenType::Plus
254                    }
255                }
256                '-' => {
257                    state.advance(1);
258                    if let Some('>') = state.peek() {
259                        state.advance(1);
260                        NixTokenType::Implication
261                    }
262                    else {
263                        NixTokenType::Minus
264                    }
265                }
266                '*' => {
267                    state.advance(1);
268                    NixTokenType::Star
269                }
270                '/' => {
271                    state.advance(1);
272                    if let Some('/') = state.peek() {
273                        state.advance(1);
274                        NixTokenType::Update
275                    }
276                    else {
277                        NixTokenType::Slash
278                    }
279                }
280                '%' => {
281                    state.advance(1);
282                    NixTokenType::Percent
283                }
284                '=' => {
285                    state.advance(1);
286                    if let Some('=') = state.peek() {
287                        state.advance(1);
288                        NixTokenType::Equal
289                    }
290                    else {
291                        NixTokenType::Assign
292                    }
293                }
294                '!' => {
295                    state.advance(1);
296                    if let Some('=') = state.peek() {
297                        state.advance(1);
298                        NixTokenType::NotEqual
299                    }
300                    else {
301                        return false;
302                    }
303                }
304                '<' => {
305                    state.advance(1);
306                    if let Some('=') = state.peek() {
307                        state.advance(1);
308                        NixTokenType::LessEqual
309                    }
310                    else {
311                        NixTokenType::Less
312                    }
313                }
314                '>' => {
315                    state.advance(1);
316                    if let Some('=') = state.peek() {
317                        state.advance(1);
318                        NixTokenType::GreaterEqual
319                    }
320                    else {
321                        NixTokenType::Greater
322                    }
323                }
324                '&' => {
325                    state.advance(1);
326                    if let Some('&') = state.peek() {
327                        state.advance(1);
328                        NixTokenType::LogicalAnd
329                    }
330                    else {
331                        return false;
332                    }
333                }
334                '|' => {
335                    state.advance(1);
336                    if let Some('|') = state.peek() {
337                        state.advance(1);
338                        NixTokenType::LogicalOr
339                    }
340                    else {
341                        return false;
342                    }
343                }
344                '?' => {
345                    state.advance(1);
346                    NixTokenType::Question
347                }
348                '(' => {
349                    state.advance(1);
350                    NixTokenType::LeftParen
351                }
352                ')' => {
353                    state.advance(1);
354                    NixTokenType::RightParen
355                }
356                '{' => {
357                    state.advance(1);
358                    NixTokenType::LeftBrace
359                }
360                '}' => {
361                    state.advance(1);
362                    NixTokenType::RightBrace
363                }
364                '[' => {
365                    state.advance(1);
366                    NixTokenType::LeftBracket
367                }
368                ']' => {
369                    state.advance(1);
370                    NixTokenType::RightBracket
371                }
372                ';' => {
373                    state.advance(1);
374                    NixTokenType::Semicolon
375                }
376                ':' => {
377                    state.advance(1);
378                    NixTokenType::Colon
379                }
380                ',' => {
381                    state.advance(1);
382                    NixTokenType::Comma
383                }
384                '.' => {
385                    state.advance(1);
386                    NixTokenType::Dot
387                }
388                '@' => {
389                    state.advance(1);
390                    NixTokenType::At
391                }
392                '$' => {
393                    state.advance(1);
394                    NixTokenType::Dollar
395                }
396                '#' => {
397                    state.advance(1);
398                    NixTokenType::Hash
399                }
400                _ => return false,
401            };
402
403            state.add_token(kind, start_pos, state.get_position());
404            true
405        }
406        else {
407            false
408        }
409    }
410}