1use crate::{error::Error, source::Source, token::Token};
10
11pub trait LexerRule<'a, T> {
13 fn get_token(&self, lexer: &mut Lexer<'a, T>) -> Result<Option<Token<T>>, Error>;
21 fn generates_token(&self) -> bool {
30 true
31 }
32}
33
34pub struct Lexer<'a, T> {
36 pub source: &'a Source<'a>,
38 pub position: usize,
40 pub current_char: Option<char>,
42 rules: Vec<Box<dyn LexerRule<'a, T>>>,
44}
45
46impl<'a, T> Lexer<'a, T> {
47 pub fn new(source: &'a Source<'a>, rules: Vec<Box<dyn LexerRule<'a, T>>>) -> Self {
49 let mut lexer = Lexer {
50 source,
51 position: 0,
52 current_char: None,
53 rules,
54 };
55
56 if lexer.position < lexer.source.code.len() {
57 lexer.current_char = Some(lexer.source.code[lexer.position..].chars().next().unwrap());
58 } else {
59 lexer.current_char = None;
60 }
61
62 lexer
63 }
64
65 pub fn advance(&mut self) {
67 if self.position < self.source.code.len() - 1 {
68 self.position += 1;
69 self.current_char = Some(self.source.code[self.position..].chars().next().unwrap());
70 } else {
71 self.current_char = None;
72 }
73 }
74
75 pub fn jump_to(&mut self, position: usize) {
77 if position < self.source.code.len() {
78 self.position = position;
79 self.current_char = Some(self.source.code[self.position..].chars().next().unwrap());
80 } else {
81 self.position = self.source.code.len() + 1;
82 self.current_char = None;
83 }
84 }
85
86 pub fn get_token(&mut self) -> Result<Option<Token<T>>, Error> {
92 let self_ptr = self as *mut Self;
95
96 for rule in &self.rules {
97 let prev_position = self.position;
98 let token = unsafe { rule.get_token(&mut *self_ptr) }?;
99
100 if let Some(token) = token {
101 return Ok(Some(token));
102 } else if rule.generates_token() {
103 unsafe {
104 (*self_ptr).jump_to(prev_position);
105 }
106 }
107 }
108
109 Ok(None)
110 }
111}
112
113pub mod utils {
115 use crate::lexer::LexerRule;
116
117 mod macros {
118 #[macro_export]
128 macro_rules! rules_vec {
129 ($($rule:expr),* $(,)?) => {
130 vec![$(Box::new($rule) as Box<dyn $crate::lexer::LexerRule<'_, _>>),*]
131 };
132 }
133
134 #[macro_export]
144 macro_rules! match_string {
145 ($string:expr, $token_type:ty, $token_value:expr, $rule_name:ident) => {
146 struct $rule_name;
147 impl<'a> $crate::lexer::LexerRule<'a, $token_type> for $rule_name {
148 fn get_token(
149 &self,
150 lexer: &mut $crate::lexer::Lexer<'a, $token_type>,
151 ) -> Result<Option<$crate::token::Token<$token_type>>, $crate::error::Error>
152 {
153 let start_pos = lexer.position;
154 let mut matched = true;
155
156 for c in $string.chars() {
157 if lexer.current_char == Some(c) {
158 lexer.advance();
159 } else {
160 matched = false;
161 break;
162 }
163 }
164
165 if matched {
166 Ok(Some($crate::token::Token::new(
167 $token_value,
168 $crate::span::Span::new(start_pos, lexer.position),
169 )))
170 } else {
171 Ok(None)
172 }
173 }
174 }
175 };
176 }
177
178 #[macro_export]
192 macro_rules! match_word {
193 ($word:expr, $token_type:ty, $token_value:expr, $rule_name:ident) => {
194 struct $rule_name;
195 impl<'a> $crate::lexer::LexerRule<'a, $token_type> for $rule_name {
196 fn get_token(
197 &self,
198 lexer: &mut $crate::lexer::Lexer<'a, $token_type>,
199 ) -> Result<Option<$crate::token::Token<$token_type>>, $crate::error::Error>
200 {
201 let start_pos = lexer.position;
202 let mut matched = true;
203
204 for c in $word.chars() {
205 if lexer.current_char == Some(c) {
206 lexer.advance();
207 } else {
208 matched = false;
209 break;
210 }
211 }
212
213 if matched
214 && (lexer.current_char == Some(' ') || lexer.current_char.is_none())
215 {
216 Ok(Some($crate::token::Token::new(
217 $token_value,
218 $crate::span::Span::new(start_pos, lexer.position),
219 )))
220 } else {
221 Ok(None)
222 }
223 }
224 }
225 };
226 }
227
228 pub use match_string;
229 pub use match_word;
230 pub use rules_vec;
231 }
232
233 pub struct SkipWhitespaceRule;
235 impl<'a, T> LexerRule<'a, T> for SkipWhitespaceRule {
236 fn get_token(
237 &self,
238 lexer: &mut super::Lexer<'a, T>,
239 ) -> Result<Option<crate::token::Token<T>>, crate::error::Error> {
240 while let Some(c) = lexer.current_char {
241 if c.is_whitespace() {
242 lexer.advance();
243 } else {
244 break;
245 }
246 }
247 Ok(None)
248 }
249
250 fn generates_token(&self) -> bool {
251 false
252 }
253 }
254
255 pub use macros::{match_string, match_word, rules_vec};
256
257 #[cfg(test)]
258 mod tests {
259 use super::*;
260 use crate::{lexer::Lexer, source::Source};
261
262 #[test]
263 fn test_skip_whitespace_rule() {
264 let source = Source::from_str("test_input.txt", " let x = 10;");
265 let rules = rules_vec![SkipWhitespaceRule];
266
267 let mut lexer = Lexer::<String>::new(&source, rules);
268 let token = lexer.get_token().unwrap();
269
270 assert!(token.is_none());
271 assert_eq!(lexer.position, 5);
272 assert_eq!(lexer.current_char, Some('l'));
273 }
274
275 #[test]
276 fn test_rules_vec_macro() {
277 let rules: Vec<Box<dyn LexerRule<'_, String> + 'static>> =
278 rules_vec![SkipWhitespaceRule];
279 assert_eq!(rules.len(), 1);
280 assert!(rules[0].generates_token() == false);
281 }
282
283 #[test]
284 fn test_match_string_macro() {
285 match_string!("let", String, "let".to_string(), LetRule);
286 let source = Source::from_str("test_input.txt", "let x = 10;");
287 let rules = rules_vec![LetRule];
288 let mut lexer = Lexer::<String>::new(&source, rules);
289 let token = lexer.get_token().unwrap();
290
291 assert!(token.is_some());
292 let token = token.unwrap();
293 assert_eq!(token.kind, "let");
294
295 let token = lexer.get_token().unwrap();
296 assert!(token.is_none());
297 assert_eq!(lexer.position, 3);
298 assert_eq!(lexer.current_char, Some(' '));
299 }
300
301 #[test]
302 fn test_match_word_macro() {
303 match_word!("let", String, "let".to_string(), LetRule);
304 let source = Source::from_str("test_input.txt", "let x = 10;");
305 let rules = rules_vec![LetRule];
306 let mut lexer = Lexer::<String>::new(&source, rules);
307
308 let token = lexer.get_token().unwrap();
309 assert!(token.is_some());
310 let token = token.unwrap();
311 assert_eq!(token.kind, "let");
312
313 let source = Source::from_str("test_input.txt", "letx = 10;");
314 let rules = rules_vec![LetRule];
315 let mut lexer = Lexer::<String>::new(&source, rules);
316 let token = lexer.get_token().unwrap();
317 assert!(token.is_none());
318 }
319 }
320
321 }
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327 use crate::{error::Error, source::Source, span::Span, token::Token};
328
329 #[test]
330 fn test_lexer_new() {
331 let source = Source::from_str("test_input.txt", "let x = 10;");
332 let rules = utils::rules_vec![utils::SkipWhitespaceRule];
333 let lexer = Lexer::<u8>::new(&source, rules);
334
335 assert_eq!(lexer.position, 0);
336 assert_eq!(lexer.current_char, Some('l'));
337 }
338
339 #[test]
340 fn test_lexer_advance() {
341 let source = Source::from_str("test_input.txt", "let x = 10;");
342 let rules = utils::rules_vec![utils::SkipWhitespaceRule];
343 let mut lexer = Lexer::<u8>::new(&source, rules);
344
345 lexer.advance();
346 assert_eq!(lexer.position, 1);
347 assert_eq!(lexer.current_char, Some('e'));
348 }
349
350 #[test]
351 fn test_lexer_jump_to() {
352 let source = Source::from_str("test_input.txt", "let x = 10;");
353 let rules = utils::rules_vec![utils::SkipWhitespaceRule];
354 let mut lexer = Lexer::<u8>::new(&source, rules);
355
356 lexer.jump_to(4);
357 assert_eq!(lexer.position, 4);
358 assert_eq!(lexer.current_char, Some('x'));
359 }
360
361 #[test]
362 fn test_lexer_get_token() {
363 let source = Source::from_str("test_input.txt", "let x = 10;");
364
365 struct TestRule;
366 impl<'a> LexerRule<'a, String> for TestRule {
367 fn get_token(
368 &self,
369 lexer: &mut Lexer<'a, String>,
370 ) -> Result<Option<Token<String>>, Error> {
371 if lexer.current_char == Some('l') {
372 lexer.advance();
373 Ok(Some(Token::new("let".to_string(), Span::new(0, 3))))
374 } else {
375 Ok(None)
376 }
377 }
378 }
379
380 let rules = utils::rules_vec![utils::SkipWhitespaceRule, TestRule];
381 let mut lexer = Lexer::<String>::new(&source, rules);
382 let token = lexer.get_token().unwrap();
383
384 assert!(token.is_some());
385
386 let token = token.unwrap();
387
388 assert_eq!(token.kind, "let");
389 assert_eq!(token.span.start, 0);
390 assert_eq!(token.span.end, 3);
391 }
392}