1use std::fmt;
6
7pub type Spanned<Token, Location, Error> = Result<(Location, Token, Location), Error>;
8
9#[derive(Debug, Clone, PartialEq)]
10pub enum Token<'input> {
11 Number(f64),
13 DoubleQuotedString(&'input str),
14 SingleQuotedString(&'input str),
15 Boolean(bool),
16 Null,
17 Identifier(&'input str),
18
19 Plus,
21 Minus,
22 Multiply,
23 Divide,
24 FloorDivide,
25 Modulus,
26 Exponent,
27
28 Equal,
30 NotEqual,
31 Greater,
32 GreaterEqual,
33 Less,
34 LessEqual,
35 In,
36
37 And,
39 Or,
40
41 LeftParen,
43 RightParen,
44 LeftBracket,
45 RightBracket,
46 LeftBrace,
47 RightBrace,
48 Comma,
49 Dot,
50 Colon,
51 Question,
52 Pipe,
53
54 Whitespace,
56 }
58
59impl<'input> fmt::Display for Token<'input> {
60 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
61 match self {
62 Token::Number(n) => write!(f, "{}", n),
63 Token::DoubleQuotedString(s) => write!(f, "\"{}\"", s),
64 Token::SingleQuotedString(s) => write!(f, "'{}'", s),
65 Token::Boolean(b) => write!(f, "{}", b),
66 Token::Null => write!(f, "null"),
67 Token::Identifier(s) => write!(f, "{}", s),
68 Token::Plus => write!(f, "+"),
69 Token::Minus => write!(f, "-"),
70 Token::Multiply => write!(f, "*"),
71 Token::Divide => write!(f, "/"),
72 Token::FloorDivide => write!(f, "//"),
73 Token::Modulus => write!(f, "%"),
74 Token::Exponent => write!(f, "^"),
75 Token::Equal => write!(f, "=="),
76 Token::NotEqual => write!(f, "!="),
77 Token::Greater => write!(f, ">"),
78 Token::GreaterEqual => write!(f, ">="),
79 Token::Less => write!(f, "<"),
80 Token::LessEqual => write!(f, "<="),
81 Token::In => write!(f, "in"),
82 Token::And => write!(f, "&&"),
83 Token::Or => write!(f, "||"),
84 Token::LeftParen => write!(f, "("),
85 Token::RightParen => write!(f, ")"),
86 Token::LeftBracket => write!(f, "["),
87 Token::RightBracket => write!(f, "]"),
88 Token::LeftBrace => write!(f, "{{"),
89 Token::RightBrace => write!(f, "}}"),
90 Token::Comma => write!(f, ","),
91 Token::Dot => write!(f, "."),
92 Token::Colon => write!(f, ":"),
93 Token::Question => write!(f, "?"),
94 Token::Pipe => write!(f, "|"),
95 Token::Whitespace => write!(f, " "),
96 }
97 }
98}
99
100#[derive(Debug, Clone)]
101pub struct Lexer<'input> {
102 input: &'input str,
103 position: usize,
104 line: usize,
105 column: usize,
106}
107
108#[derive(Debug, Clone, PartialEq)]
109pub struct LexError {
110 pub message: String,
111 pub line: usize,
112 pub column: usize,
113}
114
115impl fmt::Display for LexError {
116 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
117 write!(
118 f,
119 "Lexical error at line {}, column {}: {}",
120 self.line, self.column, self.message
121 )
122 }
123}
124
125impl std::error::Error for LexError {}
126
127impl<'input> Lexer<'input> {
128 pub fn new(input: &'input str) -> Self {
129 Lexer {
130 input,
131 position: 0,
132 line: 1,
133 column: 1,
134 }
135 }
136}
137
138impl<'input> Iterator for Lexer<'input> {
139 type Item = Spanned<Token<'input>, usize, LexError>;
140
141 fn next(&mut self) -> Option<Self::Item> {
142 self.skip_whitespace();
144
145 if self.is_at_end() {
147 return None;
148 }
149
150 let start_pos = self.position;
151 match self.next_token_after_whitespace() {
152 Ok(token) => Some(Ok((start_pos, token, self.position))),
153 Err(error) => Some(Err(error)),
154 }
155 }
156}
157
158impl<'input> Lexer<'input> {
159 fn next_token_after_whitespace(&mut self) -> Result<Token<'input>, LexError> {
160 let ch = self.current_char();
162
163 match ch {
164 '+' => {
166 self.advance();
167 Ok(Token::Plus)
168 }
169 '-' => {
170 self.advance();
171 Ok(Token::Minus)
172 }
173 '*' => {
174 self.advance();
175 Ok(Token::Multiply)
176 }
177 '%' => {
178 self.advance();
179 Ok(Token::Modulus)
180 }
181 '^' => {
182 self.advance();
183 Ok(Token::Exponent)
184 }
185 '(' => {
186 self.advance();
187 Ok(Token::LeftParen)
188 }
189 ')' => {
190 self.advance();
191 Ok(Token::RightParen)
192 }
193 '[' => {
194 self.advance();
195 Ok(Token::LeftBracket)
196 }
197 ']' => {
198 self.advance();
199 Ok(Token::RightBracket)
200 }
201 '{' => {
202 self.advance();
203 Ok(Token::LeftBrace)
204 }
205 '}' => {
206 self.advance();
207 Ok(Token::RightBrace)
208 }
209 ',' => {
210 self.advance();
211 Ok(Token::Comma)
212 }
213 ':' => {
214 self.advance();
215 Ok(Token::Colon)
216 }
217 '?' => {
218 self.advance();
219 Ok(Token::Question)
220 }
221 '|' => {
222 self.advance();
223 if self.current_char() == '|' {
224 self.advance();
225 Ok(Token::Or)
226 } else {
227 Ok(Token::Pipe)
228 }
229 }
230
231 '/' => {
233 self.advance();
234 if self.current_char() == '/' {
235 self.advance();
236 Ok(Token::FloorDivide)
237 } else {
238 Ok(Token::Divide)
239 }
240 }
241
242 '=' => {
243 self.advance();
244 if self.current_char() == '=' {
245 self.advance();
246 Ok(Token::Equal)
247 } else {
248 Err(LexError {
249 message: "Unexpected character '='. Did you mean '=='?".to_string(),
250 line: self.line,
251 column: self.column,
252 })
253 }
254 }
255
256 '!' => {
257 self.advance();
258 if self.current_char() == '=' {
259 self.advance();
260 Ok(Token::NotEqual)
261 } else {
262 Err(LexError {
263 message: "Unexpected character '!'. Did you mean '!='?".to_string(),
264 line: self.line,
265 column: self.column,
266 })
267 }
268 }
269
270 '>' => {
271 self.advance();
272 if self.current_char() == '=' {
273 self.advance();
274 Ok(Token::GreaterEqual)
275 } else {
276 Ok(Token::Greater)
277 }
278 }
279
280 '<' => {
281 self.advance();
282 if self.current_char() == '=' {
283 self.advance();
284 Ok(Token::LessEqual)
285 } else {
286 Ok(Token::Less)
287 }
288 }
289
290 '&' => {
291 self.advance();
292 if self.current_char() == '&' {
293 self.advance();
294 Ok(Token::And)
295 } else {
296 Err(LexError {
297 message: "Unexpected character '&'. Did you mean '&&'?".to_string(),
298 line: self.line,
299 column: self.column,
300 })
301 }
302 }
303
304 '"' => self.scan_double_quoted_string(),
306 '\'' => self.scan_single_quoted_string(),
307
308 c if c.is_ascii_digit() => self.scan_number(),
310
311 '.' => {
313 if self.position + 1 < self.input.len() {
314 let next_char = self.input.chars().nth(self.position + 1).unwrap_or('\0');
315 if next_char.is_ascii_digit() {
316 self.scan_number()
317 } else {
318 self.advance();
319 Ok(Token::Dot)
320 }
321 } else {
322 self.advance();
323 Ok(Token::Dot)
324 }
325 }
326
327 c if c.is_alphabetic() || c == '_' => self.scan_identifier(),
329
330 _ => Err(LexError {
331 message: format!("Unexpected character '{}'", ch),
332 line: self.line,
333 column: self.column,
334 }),
335 }
336 }
337
338 fn scan_double_quoted_string(&mut self) -> Result<Token<'input>, LexError> {
339 self.advance(); let start_pos = self.position;
341
342 while !self.is_at_end() {
344 let ch = self.current_char();
345
346 if ch == '"' {
347 let end_pos = self.position;
349 self.advance(); let string_slice = &self.input[start_pos..end_pos];
351 return Ok(Token::DoubleQuotedString(string_slice));
352 } else if ch == '\\' {
353 self.advance(); if !self.is_at_end() && self.current_char() == '"' {
356 self.advance(); } else {
358 return Err(LexError {
359 message: "Invalid escape sequence in double-quoted string".to_string(),
360 line: self.line,
361 column: self.column,
362 });
363 }
364 } else {
365 self.advance();
366 }
367 }
368
369 Err(LexError {
370 message: "Unterminated string literal".to_string(),
371 line: self.line,
372 column: self.column,
373 })
374 }
375
376 fn scan_single_quoted_string(&mut self) -> Result<Token<'input>, LexError> {
377 self.advance(); let start_pos = self.position;
379
380 while !self.is_at_end() {
382 let ch = self.current_char();
383
384 if ch == '\'' {
385 let end_pos = self.position;
387 self.advance(); let string_slice = &self.input[start_pos..end_pos];
389 return Ok(Token::SingleQuotedString(string_slice));
390 } else if ch == '\\' {
391 self.advance(); if !self.is_at_end() && self.current_char() == '\'' {
394 self.advance(); } else {
396 return Err(LexError {
397 message: "Invalid escape sequence in single-quoted string".to_string(),
398 line: self.line,
399 column: self.column,
400 });
401 }
402 } else {
403 self.advance();
404 }
405 }
406
407 Err(LexError {
408 message: "Unterminated string literal".to_string(),
409 line: self.line,
410 column: self.column,
411 })
412 }
413
414 fn scan_number(&mut self) -> Result<Token<'input>, LexError> {
415 let start_pos = self.position;
416
417 if self.current_char() == '.' {
419 self.advance();
420 }
421
422 while !self.is_at_end() && self.current_char().is_ascii_digit() {
424 self.advance();
425 }
426
427 if !&self.input[start_pos..self.position].starts_with('.')
429 && !self.is_at_end()
430 && self.current_char() == '.'
431 {
432 if self.position + 1 < self.input.len() {
434 let next_char = self.input.chars().nth(self.position + 1).unwrap_or('\0');
435 if next_char.is_ascii_digit() {
436 self.advance(); while !self.is_at_end() && self.current_char().is_ascii_digit() {
440 self.advance();
441 }
442 }
443 }
444 }
445
446 let number_str = &self.input[start_pos..self.position];
447 match number_str.parse::<f64>() {
448 Ok(num) => Ok(Token::Number(num)),
449 Err(_) => Err(LexError {
450 message: format!("Invalid number format: {}", number_str),
451 line: self.line,
452 column: self.column,
453 }),
454 }
455 }
456
457 fn scan_identifier(&mut self) -> Result<Token<'input>, LexError> {
458 let start_pos = self.position;
459
460 while !self.is_at_end() {
461 let ch = self.current_char();
462 if ch.is_alphanumeric() || ch == '_' {
463 self.advance();
464 } else {
465 break;
466 }
467 }
468
469 let identifier = &self.input[start_pos..self.position];
470
471 let token = match identifier {
473 "true" => Token::Boolean(true),
474 "false" => Token::Boolean(false),
475 "null" => Token::Null,
476 "in" => Token::In,
477 _ => Token::Identifier(identifier),
478 };
479
480 Ok(token)
481 }
482
483 fn skip_whitespace(&mut self) {
484 while !self.is_at_end() && self.current_char().is_whitespace() {
485 if self.current_char() == '\n' {
486 self.line += 1;
487 self.column = 1;
488 } else {
489 self.column += 1;
490 }
491 self.advance();
492 }
493 }
494
495 fn current_char(&self) -> char {
496 self.input.chars().nth(self.position).unwrap_or('\0')
497 }
498
499 fn advance(&mut self) {
500 if !self.is_at_end() {
501 self.position += 1;
502 self.column += 1;
503 }
504 }
505
506 fn is_at_end(&self) -> bool {
507 self.position >= self.input.len()
508 }
509}
510
511#[cfg(test)]
512mod tests {
513 use super::*;
514
515 #[test]
516 fn test_basic_tokens() {
517 let lexer = Lexer::new("+ - * / % ^");
518 let tokens: Result<Vec<_>, _> = lexer.collect();
519 let tokens = tokens.unwrap();
520
521 let expected_tokens = vec![
522 Token::Plus,
523 Token::Minus,
524 Token::Multiply,
525 Token::Divide,
526 Token::Modulus,
527 Token::Exponent,
528 ];
529
530 let actual_tokens: Vec<Token> = tokens.into_iter().map(|(_, token, _)| token).collect();
531 assert_eq!(actual_tokens, expected_tokens);
532 }
533
534 #[test]
535 fn test_numbers() {
536 let lexer = Lexer::new("123 45.67 .89");
537 let tokens: Result<Vec<_>, _> = lexer.collect();
538 let tokens = tokens.unwrap();
539
540 let expected_tokens = vec![
541 Token::Number(123.0),
542 Token::Number(45.67),
543 Token::Number(0.89),
544 ];
545
546 let actual_tokens: Vec<Token> = tokens.into_iter().map(|(_, token, _)| token).collect();
547 assert_eq!(actual_tokens, expected_tokens);
548 }
549
550 #[test]
551 fn test_strings() {
552 let lexer = Lexer::new(r#""hello" 'world'"#);
553 let tokens: Result<Vec<_>, _> = lexer.collect();
554 let tokens = tokens.unwrap();
555
556 let expected_tokens = vec![
557 Token::DoubleQuotedString("hello"),
558 Token::SingleQuotedString("world"),
559 ];
560
561 let actual_tokens: Vec<Token> = tokens.into_iter().map(|(_, token, _)| token).collect();
562 assert_eq!(actual_tokens, expected_tokens);
563 }
564
565 #[test]
566 fn test_identifiers_and_keywords() {
567 let lexer = Lexer::new("foo true false null in");
568 let tokens: Result<Vec<_>, _> = lexer.collect();
569 let tokens = tokens.unwrap();
570
571 let expected_tokens = vec![
572 Token::Identifier("foo"),
573 Token::Boolean(true),
574 Token::Boolean(false),
575 Token::Null,
576 Token::In,
577 ];
578
579 let actual_tokens: Vec<Token> = tokens.into_iter().map(|(_, token, _)| token).collect();
580 assert_eq!(actual_tokens, expected_tokens);
581 }
582
583 #[test]
584 fn test_complex_expression() {
585 let lexer = Lexer::new("foo.bar[0] == 'test' && (x > 1)");
586 let tokens: Result<Vec<_>, _> = lexer.collect();
587 let tokens = tokens.unwrap();
588
589 let expected_tokens = vec![
590 Token::Identifier("foo"),
591 Token::Dot,
592 Token::Identifier("bar"),
593 Token::LeftBracket,
594 Token::Number(0.0),
595 Token::RightBracket,
596 Token::Equal,
597 Token::SingleQuotedString("test"),
598 Token::And,
599 Token::LeftParen,
600 Token::Identifier("x"),
601 Token::Greater,
602 Token::Number(1.0),
603 Token::RightParen,
604 ];
605
606 let actual_tokens: Vec<Token> = tokens.into_iter().map(|(_, token, _)| token).collect();
607 assert_eq!(actual_tokens, expected_tokens);
608 }
609}