1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
//! The actual lexer.
use smol_str::SmolStr;
use std::ops::{Deref, DerefMut};
use crate::{
error::ParseError,
state::State,
token::{Comment, Token, TokenType, Trivia},
utils::can_be_identifier,
};
/// The main component of this crate, the lexer.
#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct Lexer<'a> {
/// The input text
pub(crate) input: &'a str,
/// The characters in the input
pub(crate) chars: Vec<char>,
/// The errors met during lexing. They are added when [`Lexer::next_token`] is
/// called and gets emptied before any new tokens are lexed.
pub(crate) errors: Vec<ParseError>,
/// The current state of the lexer.
pub(crate) state: State,
}
impl<'a> Lexer<'a> {
/// Create a new [`Lexer`].
#[inline]
pub fn new(input: &'a str) -> Self {
Self::default().with_input(input)
}
/// Set the lexer's input. Meant to be chained.
#[inline]
pub fn with_input(mut self, input: &'a str) -> Self {
self.set_input(input);
self
}
/// Set the lexer's input.
#[inline]
pub fn set_input(&mut self, input: &'a str) {
self.input = input;
self.chars = input.chars().collect();
self.last_trivia = self.skip_trivia();
}
/// Save the current [`State`]. To be used with [`Lexer::set_state`].
#[inline]
pub fn save_state(&self) -> State {
self.state.clone()
}
/// Set the current [`State`]. To be paired with [`Lexer::save_state`].
#[inline]
pub fn set_state(&mut self, state: State) {
self.state = state;
}
/// Lex the next token. This will return any errors met while parsing the
/// *previous* token before lexing a new one.
pub fn next_token(&mut self) -> Token {
if !self.errors.is_empty() {
let error = self.errors.remove(0);
let start = error.start();
return TokenType::Error(error).into_token(
start,
self.lexer_position,
Vec::new(),
Vec::new(),
);
}
let start = self.lexer_position;
TokenType::try_lex(self)
.map(|token_type| {
let trivia = self.skip_trivia();
let leading_trivia = self.last_trivia.clone();
let trailing_trivia = trivia.clone();
self.last_trivia = trivia;
token_type.into_token(start, self.lexer_position, leading_trivia, trailing_trivia)
})
.unwrap_or_else(|| Token::END_OF_FILE)
}
/// Get the current character.
#[inline]
pub fn current_char(&self) -> Option<char> {
self.chars.get(self.position).copied()
}
/// Get the next character.
#[inline]
pub fn next_char(&self) -> Option<char> {
self.chars.get(self.position + 1).copied()
}
/// Move the lexer after the current character if it matches the passed one,
/// and return if it did so.
#[inline]
pub fn consume(&mut self, character: char) -> bool {
if self.current_char() == Some(character) {
self.increment_position_by_char(character);
true
} else {
false
}
}
/// Like [`Lexer::consume`] but checks for the next character instead. Moves
/// the lexer after both the current and next character.
#[inline]
pub fn consume_with_next(&mut self, character: char) -> bool {
if self.next_char() == Some(character) {
let current_char = self.current_char().unwrap();
self.increment_position_by_char(current_char);
self.increment_position_by_char(character);
true
} else {
false
}
}
/// Consume the next identifier and return it. This assumes there's at least
/// one character to form a valid identifier at the current position,
pub fn consume_identifier(&mut self) -> SmolStr {
let start = self.position;
while let Some(character) = self.current_char() {
if can_be_identifier(character) {
self.increment_position_by_char(character);
} else {
break;
}
}
self.input[start..self.position].into()
}
/// Get the trivia after the current position and move the lexer to after them.
pub fn skip_trivia(&mut self) -> Vec<Trivia> {
let mut trivia = Vec::new();
loop {
let spaces = self.skip_whitespace();
if !spaces.is_empty() {
trivia.push(Trivia::Spaces(spaces));
} else if self.current_char() == Some('-') && self.consume_with_next('-') {
trivia.push(Trivia::Comment(Comment::try_lex(self).unwrap()));
} else {
break;
}
}
trivia
}
/// Get the whitespaces after the current positive and move the lexer to after
/// them.
pub fn skip_whitespace(&mut self) -> SmolStr {
let start = self.position;
while let Some(character) = self.current_char() {
if character.is_whitespace() {
self.increment_position_by_char(character);
} else {
break;
}
}
(start != self.position)
.then(|| self.input[start..self.position].into())
.unwrap_or_default()
}
}
impl Deref for Lexer<'_> {
type Target = State;
fn deref(&self) -> &Self::Target {
&self.state
}
}
impl DerefMut for Lexer<'_> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.state
}
}
/// A trait which means this item can be lexed.
pub trait Lexable: Sized {
/// Try lexing the item.
fn try_lex(lexer: &mut Lexer) -> Option<Self>;
}