1use smol_str::SmolStr;
4use std::ops::{Deref, DerefMut};
5
6use crate::{
7 error::ParseError,
8 state::State,
9 token::{Comment, Token, TokenType, Trivia},
10 utils::can_be_identifier,
11};
12
13#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
15#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
16pub struct Lexer<'a> {
17 pub(crate) input: &'a str,
19
20 pub(crate) chars: Vec<char>,
22
23 pub(crate) errors: Vec<ParseError>,
26
27 pub(crate) state: State,
29}
30
31impl<'a> Lexer<'a> {
32 #[inline]
34 pub fn new(input: &'a str) -> Self {
35 Self::default().with_input(input)
36 }
37
38 #[inline]
40 pub fn with_input(mut self, input: &'a str) -> Self {
41 self.set_input(input);
42 self
43 }
44
45 #[inline]
47 pub fn set_input(&mut self, input: &'a str) {
48 self.input = input;
49 self.chars = input.chars().collect();
50 self.last_trivia = self.skip_trivia();
51 }
52
53 #[inline]
55 pub fn save_state(&self) -> State {
56 self.state.clone()
57 }
58
59 #[inline]
61 pub fn set_state(&mut self, state: State) {
62 self.state = state;
63 }
64
65 pub fn next_token(&mut self) -> Token {
68 if !self.errors.is_empty() {
69 let error = self.errors.remove(0);
70 let start = error.start();
71
72 return TokenType::Error(error).into_token(
73 start,
74 self.lexer_position,
75 Vec::new(),
76 Vec::new(),
77 );
78 }
79
80 let start = self.lexer_position;
81
82 let token_type = TokenType::try_lex(self).unwrap_or(TokenType::EndOfFile);
83
84 let trivia = self.skip_trivia();
85 let leading_trivia = self.last_trivia.clone();
86 let trailing_trivia = trivia.clone();
87
88 self.last_trivia = trivia;
89
90 token_type.into_token(start, self.lexer_position, leading_trivia, trailing_trivia)
91 }
92
93 #[inline]
95 pub fn current_char(&self) -> Option<char> {
96 self.chars.get(self.position).copied()
97 }
98
99 #[inline]
101 pub fn next_char(&self) -> Option<char> {
102 self.chars.get(self.position + 1).copied()
103 }
104
105 #[inline]
108 pub fn consume(&mut self, character: char) -> bool {
109 if self.current_char() == Some(character) {
110 self.increment_position_by_char(character);
111
112 true
113 } else {
114 false
115 }
116 }
117
118 #[inline]
121 pub fn consume_with_next(&mut self, character: char) -> bool {
122 if self.next_char() == Some(character) {
123 let current_char = self.current_char().unwrap();
124
125 self.increment_position_by_char(current_char);
126 self.increment_position_by_char(character);
127
128 true
129 } else {
130 false
131 }
132 }
133
134 pub fn consume_identifier(&mut self) -> SmolStr {
137 let start = self.byte_position;
138 while let Some(character) = self.current_char() {
139 if can_be_identifier(character) {
140 self.increment_position_by_char(character);
141 } else {
142 break;
143 }
144 }
145
146 self.input[start..self.byte_position].into()
147 }
148
149 pub fn skip_trivia(&mut self) -> Vec<Trivia> {
151 let mut trivia = Vec::new();
152
153 loop {
154 let spaces = self.skip_whitespace();
155
156 if !spaces.is_empty() {
157 trivia.push(Trivia::Spaces(spaces));
158 } else if self.current_char() == Some('-') && self.consume_with_next('-') {
159 trivia.push(Trivia::Comment(Comment::try_lex(self).unwrap()));
160 } else {
161 break;
162 }
163 }
164
165 trivia
166 }
167
168 pub fn skip_whitespace(&mut self) -> SmolStr {
171 let start = self.byte_position;
172 while let Some(character) = self.current_char() {
173 if character.is_whitespace() {
174 self.increment_position_by_char(character);
175 } else {
176 break;
177 }
178 }
179
180 (start != self.byte_position)
181 .then(|| self.input[start..self.byte_position].into())
182 .unwrap_or_default()
183 }
184}
185
186impl Deref for Lexer<'_> {
187 type Target = State;
188
189 fn deref(&self) -> &Self::Target {
190 &self.state
191 }
192}
193
194impl DerefMut for Lexer<'_> {
195 fn deref_mut(&mut self) -> &mut Self::Target {
196 &mut self.state
197 }
198}
199
200pub trait Lexable: Sized {
202 fn try_lex(lexer: &mut Lexer) -> Option<Self>;
204}