luau_lexer/
lexer.rs

1//! The actual lexer.
2
3use smol_str::SmolStr;
4use std::ops::{Deref, DerefMut};
5
6use crate::{
7    error::ParseError,
8    state::State,
9    token::{Comment, Token, TokenType, Trivia},
10    utils::can_be_identifier,
11};
12
13/// The main component of this crate, the lexer.
14#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
15#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
16pub struct Lexer<'a> {
17    /// The input text
18    pub(crate) input: &'a str,
19
20    /// The characters in the input
21    pub(crate) chars: Vec<char>,
22
23    /// The errors met during lexing. They are added when [`Lexer::next_token`] is
24    /// called and gets emptied before any new tokens are lexed.
25    pub(crate) errors: Vec<ParseError>,
26
27    /// The current state of the lexer.
28    pub(crate) state: State,
29}
30
31impl<'a> Lexer<'a> {
32    /// Create a new [`Lexer`].
33    #[inline]
34    pub fn new(input: &'a str) -> Self {
35        Self::default().with_input(input)
36    }
37
38    /// Set the lexer's input. Meant to be chained.
39    #[inline]
40    pub fn with_input(mut self, input: &'a str) -> Self {
41        self.set_input(input);
42        self
43    }
44
45    /// Set the lexer's input.
46    #[inline]
47    pub fn set_input(&mut self, input: &'a str) {
48        self.input = input;
49        self.chars = input.chars().collect();
50        self.last_trivia = self.skip_trivia();
51    }
52
53    /// Save the current [`State`]. To be used with [`Lexer::set_state`].
54    #[inline]
55    pub fn save_state(&self) -> State {
56        self.state.clone()
57    }
58
59    /// Set the current [`State`]. To be paired with [`Lexer::save_state`].
60    #[inline]
61    pub fn set_state(&mut self, state: State) {
62        self.state = state;
63    }
64
65    /// Lex the next token. This will return any errors met while parsing the
66    /// *previous* token before lexing a new one.
67    pub fn next_token(&mut self) -> Token {
68        if !self.errors.is_empty() {
69            let error = self.errors.remove(0);
70            let start = error.start();
71
72            return TokenType::Error(error).into_token(
73                start,
74                self.lexer_position,
75                Vec::new(),
76                Vec::new(),
77            );
78        }
79
80        let start = self.lexer_position;
81
82        let token_type = TokenType::try_lex(self).unwrap_or(TokenType::EndOfFile);
83
84        let trivia = self.skip_trivia();
85        let leading_trivia = self.last_trivia.clone();
86        let trailing_trivia = trivia.clone();
87
88        self.last_trivia = trivia;
89
90        token_type.into_token(start, self.lexer_position, leading_trivia, trailing_trivia)
91    }
92
93    /// Get the current character.
94    #[inline]
95    pub fn current_char(&self) -> Option<char> {
96        self.chars.get(self.position).copied()
97    }
98
99    /// Get the next character.
100    #[inline]
101    pub fn next_char(&self) -> Option<char> {
102        self.chars.get(self.position + 1).copied()
103    }
104
105    /// Move the lexer after the current character if it matches the passed one,
106    /// and return if it did so.
107    #[inline]
108    pub fn consume(&mut self, character: char) -> bool {
109        if self.current_char() == Some(character) {
110            self.increment_position_by_char(character);
111
112            true
113        } else {
114            false
115        }
116    }
117
118    /// Like [`Lexer::consume`] but checks for the next character instead. Moves
119    /// the lexer after both the current and next character.
120    #[inline]
121    pub fn consume_with_next(&mut self, character: char) -> bool {
122        if self.next_char() == Some(character) {
123            let current_char = self.current_char().unwrap();
124
125            self.increment_position_by_char(current_char);
126            self.increment_position_by_char(character);
127
128            true
129        } else {
130            false
131        }
132    }
133
134    /// Consume the next identifier and return it. This assumes there's at least
135    /// one character to form a valid identifier at the current position,
136    pub fn consume_identifier(&mut self) -> SmolStr {
137        let start = self.byte_position;
138        while let Some(character) = self.current_char() {
139            if can_be_identifier(character) {
140                self.increment_position_by_char(character);
141            } else {
142                break;
143            }
144        }
145
146        self.input[start..self.byte_position].into()
147    }
148
149    /// Get the trivia after the current position and move the lexer to after them.
150    pub fn skip_trivia(&mut self) -> Vec<Trivia> {
151        let mut trivia = Vec::new();
152
153        loop {
154            let spaces = self.skip_whitespace();
155
156            if !spaces.is_empty() {
157                trivia.push(Trivia::Spaces(spaces));
158            } else if self.current_char() == Some('-') && self.consume_with_next('-') {
159                trivia.push(Trivia::Comment(Comment::try_lex(self).unwrap()));
160            } else {
161                break;
162            }
163        }
164
165        trivia
166    }
167
168    /// Get the whitespaces after the current positive and move the lexer to after
169    /// them.
170    pub fn skip_whitespace(&mut self) -> SmolStr {
171        let start = self.byte_position;
172        while let Some(character) = self.current_char() {
173            if character.is_whitespace() {
174                self.increment_position_by_char(character);
175            } else {
176                break;
177            }
178        }
179
180        (start != self.byte_position)
181            .then(|| self.input[start..self.byte_position].into())
182            .unwrap_or_default()
183    }
184}
185
186impl Deref for Lexer<'_> {
187    type Target = State;
188
189    fn deref(&self) -> &Self::Target {
190        &self.state
191    }
192}
193
194impl DerefMut for Lexer<'_> {
195    fn deref_mut(&mut self) -> &mut Self::Target {
196        &mut self.state
197    }
198}
199
200/// A trait which means this item can be lexed.
201pub trait Lexable: Sized {
202    /// Try lexing the item.
203    fn try_lex(lexer: &mut Lexer) -> Option<Self>;
204}