luau_lexer/
lexer.rs

1//! The actual lexer.
2
3use smol_str::SmolStr;
4use std::ops::{Deref, DerefMut};
5
6use crate::{
7    error::ParseError,
8    state::State,
9    token::{Comment, Token, TokenType, Trivia},
10    utils::can_be_identifier,
11};
12
13/// The main component of this crate, the lexer.
14#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
15#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
16pub struct Lexer<'a> {
17    /// The input text
18    pub(crate) input: &'a str,
19
20    /// The characters in the input
21    pub(crate) chars: Vec<char>,
22
23    /// The errors met during lexing. They are added when [`Lexer::next_token`] is
24    /// called and gets emptied before any new tokens are lexed.
25    pub(crate) errors: Vec<ParseError>,
26
27    /// The current state of the lexer.
28    pub(crate) state: State,
29}
30
31impl<'a> Lexer<'a> {
32    /// Create a new [`Lexer`].
33    #[inline]
34    pub fn new(input: &'a str) -> Self {
35        Self::default().with_input(input)
36    }
37
38    /// Set the lexer's input. Meant to be chained.
39    #[inline]
40    pub fn with_input(mut self, input: &'a str) -> Self {
41        self.set_input(input);
42        self
43    }
44
45    /// Set the lexer's input.
46    #[inline]
47    pub fn set_input(&mut self, input: &'a str) {
48        self.input = input;
49        self.chars = input.chars().collect();
50        self.last_trivia = self.skip_trivia();
51    }
52
53    /// Save the current [`State`]. To be used with [`Lexer::set_state`].
54    #[inline]
55    pub fn save_state(&self) -> State {
56        self.state.clone()
57    }
58
59    /// Set the current [`State`]. To be paired with [`Lexer::save_state`].
60    #[inline]
61    pub fn set_state(&mut self, state: State) {
62        self.state = state;
63    }
64
65    /// Lex the next token. This will return any errors met while parsing the
66    /// *previous* token before lexing a new one.
67    pub fn next_token(&mut self) -> Token {
68        if !self.errors.is_empty() {
69            let error = self.errors.remove(0);
70            let start = error.start();
71
72            return TokenType::Error(error).into_token(
73                start,
74                self.lexer_position,
75                Vec::new(),
76                Vec::new(),
77            );
78        }
79
80        let start = self.lexer_position;
81
82        TokenType::try_lex(self)
83            .map(|token_type| {
84                let trivia = self.skip_trivia();
85                let leading_trivia = self.last_trivia.clone();
86                let trailing_trivia = trivia.clone();
87
88                self.last_trivia = trivia;
89
90                token_type.into_token(start, self.lexer_position, leading_trivia, trailing_trivia)
91            })
92            .unwrap_or_else(|| Token::END_OF_FILE)
93    }
94
95    /// Get the current character.
96    #[inline]
97    pub fn current_char(&self) -> Option<char> {
98        self.chars.get(self.position).copied()
99    }
100
101    /// Get the next character.
102    #[inline]
103    pub fn next_char(&self) -> Option<char> {
104        self.chars.get(self.position + 1).copied()
105    }
106
107    /// Move the lexer after the current character if it matches the passed one,
108    /// and return if it did so.
109    #[inline]
110    pub fn consume(&mut self, character: char) -> bool {
111        if self.current_char() == Some(character) {
112            self.increment_position_by_char(character);
113
114            true
115        } else {
116            false
117        }
118    }
119
120    /// Like [`Lexer::consume`] but checks for the next character instead. Moves
121    /// the lexer after both the current and next character.
122    #[inline]
123    pub fn consume_with_next(&mut self, character: char) -> bool {
124        if self.next_char() == Some(character) {
125            let current_char = self.current_char().unwrap();
126
127            self.increment_position_by_char(current_char);
128            self.increment_position_by_char(character);
129
130            true
131        } else {
132            false
133        }
134    }
135
136    /// Consume the next identifier and return it. This assumes there's at least
137    /// one character to form a valid identifier at the current position,
138    pub fn consume_identifier(&mut self) -> SmolStr {
139        let start = self.position;
140        while let Some(character) = self.current_char() {
141            if can_be_identifier(character) {
142                self.increment_position_by_char(character);
143            } else {
144                break;
145            }
146        }
147
148        self.input[start..self.position].into()
149    }
150
151    /// Get the trivia after the current position and move the lexer to after them.
152    pub fn skip_trivia(&mut self) -> Vec<Trivia> {
153        let mut trivia = Vec::new();
154
155        loop {
156            let spaces = self.skip_whitespace();
157
158            if !spaces.is_empty() {
159                trivia.push(Trivia::Spaces(spaces));
160            } else if self.current_char() == Some('-') && self.consume_with_next('-') {
161                trivia.push(Trivia::Comment(Comment::try_lex(self).unwrap()));
162            } else {
163                break;
164            }
165        }
166
167        trivia
168    }
169
170    /// Get the whitespaces after the current positive and move the lexer to after
171    /// them.
172    pub fn skip_whitespace(&mut self) -> SmolStr {
173        let start = self.position;
174        while let Some(character) = self.current_char() {
175            if character.is_whitespace() {
176                self.increment_position_by_char(character);
177            } else {
178                break;
179            }
180        }
181
182        (start != self.position)
183            .then(|| self.input[start..self.position].into())
184            .unwrap_or_default()
185    }
186}
187
188impl Deref for Lexer<'_> {
189    type Target = State;
190
191    fn deref(&self) -> &Self::Target {
192        &self.state
193    }
194}
195
196impl DerefMut for Lexer<'_> {
197    fn deref_mut(&mut self) -> &mut Self::Target {
198        &mut self.state
199    }
200}
201
202/// A trait which means this item can be lexed.
203pub trait Lexable: Sized {
204    /// Try lexing the item.
205    fn try_lex(lexer: &mut Lexer) -> Option<Self>;
206}