luau_lexer/
lexer.rs

1//! The actual lexer.
2
3use smol_str::SmolStr;
4use std::ops::{Deref, DerefMut};
5
6use crate::{
7    error::ParseError,
8    state::State,
9    token::{Comment, Token, TokenType, Trivia},
10    utils::can_be_identifier,
11};
12
13/// The main component of this crate, the lexer.
14#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
15#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
16pub struct Lexer {
17    /// The characters in the input
18    pub(crate) chars: Vec<char>,
19
20    /// The errors met during lexing. They are added when [`Lexer::next_token`] is
21    /// called and gets emptied before any new tokens are lexed.
22    pub(crate) errors: Vec<ParseError>,
23
24    /// The current state of the lexer.
25    pub(crate) state: State,
26}
27
28impl Lexer {
29    /// Create a new [`Lexer`].
30    #[inline]
31    pub fn new(input: &str) -> Self {
32        Self::default().with_input(input)
33    }
34
35    /// Set the lexer's input. Meant to be chained.
36    #[inline]
37    pub fn with_input(mut self, input: &str) -> Self {
38        self.set_input(input);
39        self
40    }
41
42    /// Set the lexer's input.
43    #[inline]
44    pub fn set_input(&mut self, input: &str) {
45        self.chars = input.chars().collect();
46        self.last_trivia = self.skip_trivia();
47    }
48
49    /// Save the current [`State`]. To be used with [`Lexer::set_state`].
50    #[inline]
51    pub fn save_state(&self) -> State {
52        self.state.clone()
53    }
54
55    /// Set the current [`State`]. To be paired with [`Lexer::save_state`].
56    #[inline]
57    pub fn set_state(&mut self, state: State) {
58        self.state = state;
59    }
60
61    /// Lex the next token. This will return any errors met while parsing the
62    /// *previous* token before lexing a new one.
63    pub fn next_token(&mut self) -> Token {
64        if !self.errors.is_empty() {
65            let error = self.errors.remove(0);
66            let start = error.start();
67
68            return TokenType::Error(error).into_token(
69                start,
70                self.lexer_position,
71                Vec::new(),
72                Vec::new(),
73            );
74        }
75
76        let start = self.lexer_position;
77
78        let token_type = TokenType::try_lex(self).unwrap_or(TokenType::EndOfFile);
79
80        let trivia = self.skip_trivia();
81        let leading_trivia = self.last_trivia.clone();
82        let trailing_trivia = trivia.clone();
83
84        self.last_trivia = trivia;
85
86        token_type.into_token(start, self.lexer_position, leading_trivia, trailing_trivia)
87    }
88
89    /// Get the current character.
90    #[inline]
91    pub fn current_char(&self) -> Option<char> {
92        self.chars.get(self.position).copied()
93    }
94
95    /// Get the next character.
96    #[inline]
97    pub fn next_char(&self) -> Option<char> {
98        self.chars.get(self.position + 1).copied()
99    }
100
101    /// Move the lexer after the current character if it matches the passed one,
102    /// and return if it did so.
103    #[inline]
104    pub fn consume(&mut self, character: char) -> bool {
105        if self.current_char() == Some(character) {
106            self.increment_position_by_char(character);
107
108            true
109        } else {
110            false
111        }
112    }
113
114    /// Like [`Lexer::consume`] but checks for the next character instead. Moves
115    /// the lexer after both the current and next character.
116    #[inline]
117    pub fn consume_with_next(&mut self, character: char) -> bool {
118        if self.next_char() == Some(character) {
119            let current_char = self.current_char().unwrap();
120
121            self.increment_position_by_char(current_char);
122            self.increment_position_by_char(character);
123
124            true
125        } else {
126            false
127        }
128    }
129
130    /// Consume the next identifier and return it. This assumes there's at least
131    /// one character to form a valid identifier at the current position,
132    pub fn consume_identifier(&mut self) -> SmolStr {
133        let start = self.position;
134        while let Some(character) = self.current_char() {
135            if can_be_identifier(character) {
136                self.increment_position_by_char(character);
137            } else {
138                break;
139            }
140        }
141
142        SmolStr::from_iter(self.chars[start..self.position].to_vec())
143    }
144
145    /// Get the trivia after the current position and move the lexer to after them.
146    pub fn skip_trivia(&mut self) -> Vec<Trivia> {
147        let mut trivia = Vec::new();
148
149        loop {
150            let spaces = self.skip_whitespace();
151
152            if !spaces.is_empty() {
153                trivia.push(Trivia::Spaces(spaces));
154            } else if self.current_char() == Some('-') && self.consume_with_next('-') {
155                trivia.push(Trivia::Comment(Comment::try_lex(self).unwrap()));
156            } else {
157                break;
158            }
159        }
160
161        trivia
162    }
163
164    /// Get the whitespaces after the current positive and move the lexer to after
165    /// them.
166    pub fn skip_whitespace(&mut self) -> SmolStr {
167        let start = self.position;
168        while let Some(character) = self.current_char() {
169            if character.is_whitespace() {
170                self.increment_position_by_char(character);
171            } else {
172                break;
173            }
174        }
175
176        (start != self.position)
177            .then(|| SmolStr::from_iter(self.chars[start..self.position].to_vec()))
178            .unwrap_or_default()
179    }
180}
181
182impl Deref for Lexer {
183    type Target = State;
184
185    fn deref(&self) -> &Self::Target {
186        &self.state
187    }
188}
189
190impl DerefMut for Lexer {
191    fn deref_mut(&mut self) -> &mut Self::Target {
192        &mut self.state
193    }
194}
195
196/// A trait which means this item can be lexed.
197pub trait Lexable: Sized {
198    /// Try lexing the item.
199    fn try_lex(lexer: &mut Lexer) -> Option<Self>;
200}