luau_lexer/
lexer.rs

1//! The actual lexer.
2
3use smol_str::SmolStr;
4use std::ops::{Deref, DerefMut};
5
6use crate::{
7    error::Error,
8    state::State,
9    token::{Comment, Token, TokenType, Trivia},
10    utils::can_be_identifier,
11};
12
13/// The main component of this crate, the lexer.
14#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
15#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
16pub struct Lexer {
17    /// The characters in the input
18    pub(crate) chars: Vec<char>,
19
20    /// The errors met during lexing. They are added when [`Lexer::next_token`] is
21    /// called and gets emptied before any new tokens are lexed.
22    pub(crate) errors: Vec<Error>,
23
24    /// The current state of the lexer.
25    pub(crate) state: State,
26}
27
28impl Lexer {
29    /// Create a new [`Lexer`].
30    #[inline]
31    pub fn new(input: &str) -> Self {
32        Self::default().with_input(input)
33    }
34
35    /// Set the lexer's input. Meant to be chained.
36    #[inline]
37    pub fn with_input(mut self, input: &str) -> Self {
38        self.set_input(input);
39        self
40    }
41
42    /// Set the lexer's input.
43    #[inline]
44    pub fn set_input(&mut self, input: &str) {
45        self.chars = input.chars().collect();
46        self.last_trivia = self.skip_trivia();
47    }
48
49    /// Save the current [`State`]. To be used with [`Lexer::set_state`].
50    #[inline]
51    pub fn save_state(&self) -> State {
52        self.state.clone()
53    }
54
55    /// Set the current [`State`]. To be paired with [`Lexer::save_state`].
56    #[inline]
57    pub fn set_state(&mut self, state: State) {
58        self.state = state;
59    }
60
61    /// Lex the next token. This will return any errors met while parsing the
62    /// *previous* token before lexing a new one.
63    pub fn next_token(&mut self) -> Token {
64        if !self.errors.is_empty() {
65            let error = self.errors.remove(0);
66            let start = error.start();
67
68            return TokenType::Error(error).into_token(
69                start,
70                self.lexer_position,
71                Vec::new(),
72                Vec::new(),
73            );
74        }
75
76        let start = self.lexer_position;
77
78        let token_type = TokenType::try_lex(self).unwrap_or(TokenType::EndOfFile);
79
80        let trivia = self.skip_trivia();
81        let leading_trivia = self.last_trivia.clone();
82        let trailing_trivia = trivia.clone();
83
84        self.last_trivia = trivia;
85
86        token_type.into_token(start, self.lexer_position, leading_trivia, trailing_trivia)
87    }
88
89    /// Get the current character.
90    #[inline]
91    pub fn current_char(&self) -> Option<char> {
92        self.chars.get(self.position).copied()
93    }
94
95    /// Get the next character.
96    #[inline]
97    pub fn next_char(&self) -> Option<char> {
98        self.chars.get(self.position + 1).copied()
99    }
100
101    /// Move the lexer after the current character if it matches the passed one,
102    /// and return if it did so.
103    #[inline]
104    pub fn consume(&mut self, character: char) -> bool {
105        if self.current_char() == Some(character) {
106            self.increment_position_by_char(character);
107
108            true
109        } else {
110            false
111        }
112    }
113
114    /// Like [`Lexer::consume`] but checks for the next character instead. Moves
115    /// the lexer after both the current and next character.
116    #[inline]
117    #[allow(clippy::missing_panics_doc)] // SAFETY: Will never actually panic.
118    pub fn consume_with_next(&mut self, character: char) -> bool {
119        if self.next_char() == Some(character) {
120            // SAFETY: `self.current_char()` is guaranteed Some(_) due to above line
121            #[allow(clippy::unwrap_used)]
122            let current_char = self.current_char().unwrap();
123
124            self.increment_position_by_char(current_char);
125            self.increment_position_by_char(character);
126
127            true
128        } else {
129            false
130        }
131    }
132
133    /// Consume the next identifier and return it. This assumes there's at least
134    /// one character to form a valid identifier at the current position,
135    pub fn consume_identifier(&mut self) -> SmolStr {
136        let start = self.position;
137        while let Some(character) = self.current_char() {
138            if can_be_identifier(character) {
139                self.increment_position_by_char(character);
140            } else {
141                break;
142            }
143        }
144
145        SmolStr::from_iter(self.chars[start..self.position].to_vec())
146    }
147
148    /// Get the trivia after the current position and move the lexer to after them.
149    #[allow(clippy::missing_panics_doc)] // SAFETY: Will never actually panic.
150    pub fn skip_trivia(&mut self) -> Vec<Trivia> {
151        let mut trivia = Vec::new();
152
153        loop {
154            let spaces = self.skip_whitespace();
155
156            if !spaces.is_empty() {
157                trivia.push(Trivia::Spaces(spaces));
158            } else if self.current_char() == Some('-') && self.consume_with_next('-') {
159                // SAFETY: Will always return `Some(_)`. It's just the trait definition.
160                #[allow(clippy::unwrap_used)]
161                trivia.push(Trivia::Comment(Comment::try_lex(self).unwrap()));
162            } else {
163                break;
164            }
165        }
166
167        trivia
168    }
169
170    /// Get the whitespaces after the current positive and move the lexer to after
171    /// them.
172    pub fn skip_whitespace(&mut self) -> SmolStr {
173        let start = self.position;
174        while let Some(character) = self.current_char() {
175            if character.is_whitespace() {
176                self.increment_position_by_char(character);
177            } else {
178                break;
179            }
180        }
181
182        (start != self.position)
183            .then(|| SmolStr::from_iter(self.chars[start..self.position].to_vec()))
184            .unwrap_or_default()
185    }
186}
187
188impl Deref for Lexer {
189    type Target = State;
190
191    fn deref(&self) -> &Self::Target {
192        &self.state
193    }
194}
195
196impl DerefMut for Lexer {
197    fn deref_mut(&mut self) -> &mut Self::Target {
198        &mut self.state
199    }
200}
201
202/// A trait which means this item can be lexed.
203pub trait Lexable: Sized {
204    /// Try lexing the item.
205    fn try_lex(lexer: &mut Lexer) -> Option<Self>;
206}