luau_lexer/
lexer.rs

1//! The actual lexer.
2
3use smol_str::SmolStr;
4use std::ops::{Deref, DerefMut};
5
6use crate::{
7    error::ParseError,
8    state::State,
9    token::{Token, TokenType},
10    utils::can_be_identifier,
11};
12
13/// The main component of this crate, the lexer.
14#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
15#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
16pub struct Lexer<'a> {
17    /// The input text
18    pub(crate) input: &'a str,
19
20    /// The characters in the input
21    pub(crate) chars: Vec<char>,
22
23    /// The errors met during lexing. They are added when [`Lexer::next_token`] is
24    /// called and gets emptied before any new tokens are lexed.
25    pub(crate) errors: Vec<ParseError>,
26
27    /// The current state of the lexer.
28    pub(crate) state: State,
29}
30
31impl<'a> Lexer<'a> {
32    /// Create a new [`Lexer`].
33    #[inline]
34    pub fn new(input: &'a str) -> Self {
35        Self::default().with_input(input)
36    }
37
38    /// Set the lexer's input. Meant to be chained.
39    #[inline]
40    pub fn with_input(mut self, input: &'a str) -> Self {
41        self.set_input(input);
42        self
43    }
44
45    /// Set the lexer's input.
46    #[inline]
47    pub fn set_input(&mut self, input: &'a str) {
48        self.input = input;
49        self.chars = input.chars().collect();
50        self.last_whitespace = self.skip_whitespace();
51    }
52
53    /// Save the current [`State`]. To be used with [`Lexer::set_state`].
54    #[inline]
55    pub fn save_state(&self) -> State {
56        self.state.clone()
57    }
58
59    /// Set the current [`State`]. To be paired with [`Lexer::save_state`].
60    #[inline]
61    pub fn set_state(&mut self, state: State) {
62        self.state = state;
63    }
64
65    /// Lex the next token. This will return any errors met while parsing the
66    /// *previous* token before lexing a new one.
67    pub fn next_token(&mut self) -> Token {
68        if !self.errors.is_empty() {
69            let error = self.errors.remove(0);
70            let start = error.start();
71
72            return TokenType::Error(error).into_token(start, self.lexer_position, "", "");
73        }
74
75        let start = self.lexer_position;
76
77        TokenType::try_lex(self)
78            .map(|token_type| {
79                let whitespaces = self.skip_whitespace();
80                let spaces_before = self.last_whitespace.clone();
81                let spaces_after = whitespaces.clone();
82
83                self.last_whitespace = whitespaces;
84
85                token_type.into_token(start, self.lexer_position, spaces_before, spaces_after)
86            })
87            .unwrap_or_else(|| Token::END_OF_FILE)
88    }
89
90    /// Get the current character.
91    #[inline]
92    pub fn current_char(&self) -> Option<char> {
93        self.chars.get(self.position).copied()
94    }
95
96    /// Get the next character.
97    #[inline]
98    pub fn next_char(&self) -> Option<char> {
99        self.chars.get(self.position + 1).copied()
100    }
101
102    /// Move the lexer after the current character if it matches the passed one,
103    /// and return if it did so.
104    #[inline]
105    pub fn consume(&mut self, character: char) -> bool {
106        if self.current_char() == Some(character) {
107            self.increment_position_by_char(character);
108
109            true
110        } else {
111            false
112        }
113    }
114
115    /// Like [`Lexer::consume`] but checks for the next character instead. Moves
116    /// the lexer after both the current and next character.
117    #[inline]
118    pub fn consume_with_next(&mut self, character: char) -> bool {
119        if self.next_char() == Some(character) {
120            let current_char = self.current_char().unwrap();
121
122            self.increment_position_by_char(current_char);
123            self.increment_position_by_char(character);
124
125            true
126        } else {
127            false
128        }
129    }
130
131    /// Consume the next identifier and return it. This assumes there's at least
132    /// one character to form a valid identifier at the current position,
133    pub fn consume_identifier(&mut self) -> SmolStr {
134        let start = self.position;
135        while let Some(character) = self.current_char() {
136            if can_be_identifier(character) {
137                self.increment_position_by_char(character);
138            } else {
139                break;
140            }
141        }
142
143        self.input[start..self.position].into()
144    }
145
146    /// Get the whitespaces after the current positive and move the lexer to after
147    /// them.
148    pub fn skip_whitespace(&mut self) -> SmolStr {
149        let start = self.position;
150        while let Some(character) = self.current_char() {
151            if character.is_whitespace() {
152                self.increment_position_by_char(character);
153            } else {
154                break;
155            }
156        }
157
158        (start != self.position)
159            .then(|| self.input[start..self.position].into())
160            .unwrap_or_default()
161    }
162}
163
164impl Deref for Lexer<'_> {
165    type Target = State;
166
167    fn deref(&self) -> &Self::Target {
168        &self.state
169    }
170}
171
172impl DerefMut for Lexer<'_> {
173    fn deref_mut(&mut self) -> &mut Self::Target {
174        &mut self.state
175    }
176}
177
178/// A trait which means this item can be lexed.
179pub trait Lexable: Sized {
180    /// Try lexing the item.
181    fn try_lex(lexer: &mut Lexer) -> Option<Self>;
182}