luau_lexer/
lexer.rs

1//! The actual lexer.
2
3use smol_str::SmolStr;
4use std::{
5    borrow::Cow,
6    ops::{Deref, DerefMut},
7};
8
9use crate::{
10    error::ParseError,
11    state::State,
12    token::{Comment, Token, TokenType, Trivia},
13    utils::can_be_identifier,
14};
15
16/// The main component of this crate, the lexer.
17#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
18#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
19pub struct Lexer<'a> {
20    /// The input text
21    pub(crate) input: Cow<'a, str>,
22
23    /// The characters in the input
24    pub(crate) chars: Vec<char>,
25
26    /// The errors met during lexing. They are added when [`Lexer::next_token`] is
27    /// called and gets emptied before any new tokens are lexed.
28    pub(crate) errors: Vec<ParseError>,
29
30    /// The current state of the lexer.
31    pub(crate) state: State,
32}
33
34impl<'a> Lexer<'a> {
35    /// Create a new [`Lexer`].
36    #[inline]
37    pub fn new(input: impl Into<Cow<'a, str>>) -> Self {
38        Self::default().with_input(input)
39    }
40
41    /// Set the lexer's input. Meant to be chained.
42    #[inline]
43    pub fn with_input(mut self, input: impl Into<Cow<'a, str>>) -> Self {
44        self.set_input(input);
45        self
46    }
47
48    /// Set the lexer's input.
49    #[inline]
50    pub fn set_input(&mut self, input: impl Into<Cow<'a, str>>) {
51        self.input = input.into();
52        self.chars = self.input.chars().collect();
53        self.last_trivia = self.skip_trivia();
54    }
55
56    /// Save the current [`State`]. To be used with [`Lexer::set_state`].
57    #[inline]
58    pub fn save_state(&self) -> State {
59        self.state.clone()
60    }
61
62    /// Set the current [`State`]. To be paired with [`Lexer::save_state`].
63    #[inline]
64    pub fn set_state(&mut self, state: State) {
65        self.state = state;
66    }
67
68    /// Lex the next token. This will return any errors met while parsing the
69    /// *previous* token before lexing a new one.
70    pub fn next_token(&mut self) -> Token {
71        if !self.errors.is_empty() {
72            let error = self.errors.remove(0);
73            let start = error.start();
74
75            return TokenType::Error(error).into_token(
76                start,
77                self.lexer_position,
78                Vec::new(),
79                Vec::new(),
80            );
81        }
82
83        let start = self.lexer_position;
84
85        let token_type = TokenType::try_lex(self).unwrap_or(TokenType::EndOfFile);
86
87        let trivia = self.skip_trivia();
88        let leading_trivia = self.last_trivia.clone();
89        let trailing_trivia = trivia.clone();
90
91        self.last_trivia = trivia;
92
93        token_type.into_token(start, self.lexer_position, leading_trivia, trailing_trivia)
94    }
95
96    /// Get the current character.
97    #[inline]
98    pub fn current_char(&self) -> Option<char> {
99        self.chars.get(self.position).copied()
100    }
101
102    /// Get the next character.
103    #[inline]
104    pub fn next_char(&self) -> Option<char> {
105        self.chars.get(self.position + 1).copied()
106    }
107
108    /// Move the lexer after the current character if it matches the passed one,
109    /// and return if it did so.
110    #[inline]
111    pub fn consume(&mut self, character: char) -> bool {
112        if self.current_char() == Some(character) {
113            self.increment_position_by_char(character);
114
115            true
116        } else {
117            false
118        }
119    }
120
121    /// Like [`Lexer::consume`] but checks for the next character instead. Moves
122    /// the lexer after both the current and next character.
123    #[inline]
124    pub fn consume_with_next(&mut self, character: char) -> bool {
125        if self.next_char() == Some(character) {
126            let current_char = self.current_char().unwrap();
127
128            self.increment_position_by_char(current_char);
129            self.increment_position_by_char(character);
130
131            true
132        } else {
133            false
134        }
135    }
136
137    /// Consume the next identifier and return it. This assumes there's at least
138    /// one character to form a valid identifier at the current position,
139    pub fn consume_identifier(&mut self) -> SmolStr {
140        let start = self.byte_position;
141        while let Some(character) = self.current_char() {
142            if can_be_identifier(character) {
143                self.increment_position_by_char(character);
144            } else {
145                break;
146            }
147        }
148
149        self.input[start..self.byte_position].into()
150    }
151
152    /// Get the trivia after the current position and move the lexer to after them.
153    pub fn skip_trivia(&mut self) -> Vec<Trivia> {
154        let mut trivia = Vec::new();
155
156        loop {
157            let spaces = self.skip_whitespace();
158
159            if !spaces.is_empty() {
160                trivia.push(Trivia::Spaces(spaces));
161            } else if self.current_char() == Some('-') && self.consume_with_next('-') {
162                trivia.push(Trivia::Comment(Comment::try_lex(self).unwrap()));
163            } else {
164                break;
165            }
166        }
167
168        trivia
169    }
170
171    /// Get the whitespaces after the current positive and move the lexer to after
172    /// them.
173    pub fn skip_whitespace(&mut self) -> SmolStr {
174        let start = self.byte_position;
175        while let Some(character) = self.current_char() {
176            if character.is_whitespace() {
177                self.increment_position_by_char(character);
178            } else {
179                break;
180            }
181        }
182
183        (start != self.byte_position)
184            .then(|| self.input[start..self.byte_position].into())
185            .unwrap_or_default()
186    }
187}
188
189impl Deref for Lexer<'_> {
190    type Target = State;
191
192    fn deref(&self) -> &Self::Target {
193        &self.state
194    }
195}
196
197impl DerefMut for Lexer<'_> {
198    fn deref_mut(&mut self) -> &mut Self::Target {
199        &mut self.state
200    }
201}
202
203/// A trait which means this item can be lexed.
204pub trait Lexable: Sized {
205    /// Try lexing the item.
206    fn try_lex(lexer: &mut Lexer) -> Option<Self>;
207}