1use std::num::ParseFloatError;
2
3use logos::Logos;
4
5use crate::error::{Error, ErrorS, SyntaxError};
6
7#[derive(Debug)]
8pub struct Lexer<'a> {
9 inner: logos::Lexer<'a, Token>,
10 pending: Option<(usize, Token, usize)>,
11}
12
13impl<'a> Lexer<'a> {
14 pub fn new(source: &'a str) -> Self {
15 Self { inner: Token::lexer(source), pending: None }
16 }
17}
18
19impl<'a> Iterator for Lexer<'a> {
20 type Item = Result<(usize, Token, usize), ErrorS>;
21
22 fn next(&mut self) -> Option<Self::Item> {
23 if let Some(token) = self.pending.take() {
24 return Some(Ok(token));
25 }
26
27 match self.inner.next()? {
28 Token::Error => {
29 let mut span = self.inner.span();
30
31 if self.inner.slice().starts_with('"') {
33 return Some(Err((Error::SyntaxError(SyntaxError::UnterminatedString), span)));
34 }
35
36 while let Some(token) = self.inner.next() {
38 let span_new = self.inner.span();
39 if span.end == span_new.start {
40 span.end = span_new.end;
41 } else {
42 self.pending = Some((span_new.start, token, span_new.end));
43 break;
44 }
45 }
46
47 Some(Err((
48 Error::SyntaxError(SyntaxError::UnexpectedInput {
49 token: self.inner.source()[span.start..span.end].to_string(),
50 }),
51 span,
52 )))
53 }
54 token => {
55 let span = self.inner.span();
56 Some(Ok((span.start, token, span.end)))
57 }
58 }
59 }
60}
61
62#[derive(Clone, Debug, Logos, PartialEq)]
63pub enum Token {
64 #[token("(")]
66 LtParen,
67 #[token(")")]
68 RtParen,
69 #[token("{")]
70 LtBrace,
71 #[token("}")]
72 RtBrace,
73 #[token(",")]
74 Comma,
75 #[token(".")]
76 Dot,
77 #[token("-")]
78 Minus,
79 #[token("+")]
80 Plus,
81 #[token(";")]
82 Semicolon,
83 #[token("/")]
84 Slash,
85 #[token("*")]
86 Asterisk,
87
88 #[token("!")]
90 Bang,
91 #[token("!=")]
92 BangEqual,
93 #[token("=")]
94 Equal,
95 #[token("==")]
96 EqualEqual,
97 #[token(">")]
98 Greater,
99 #[token(">=")]
100 GreaterEqual,
101 #[token("<")]
102 Less,
103 #[token("<=")]
104 LessEqual,
105
106 #[regex("[a-zA-Z_][a-zA-Z0-9_]*", lex_identifier)]
108 Identifier(String),
109 #[regex(r#""[^"]*""#, lex_string)]
110 String(String),
111 #[regex(r#"[0-9]+(\.[0-9]+)?"#, lex_number)]
112 Number(f64),
113
114 #[token("and")]
116 And,
117 #[token("class")]
118 Class,
119 #[token("else")]
120 Else,
121 #[token("false")]
122 False,
123 #[token("for")]
124 For,
125 #[token("fun")]
126 Fun,
127 #[token("if")]
128 If,
129 #[token("nil")]
130 Nil,
131 #[token("or")]
132 Or,
133 #[token("print")]
134 Print,
135 #[token("return")]
136 Return,
137 #[token("super")]
138 Super,
139 #[token("this")]
140 This,
141 #[token("true")]
142 True,
143 #[token("var")]
144 Var,
145 #[token("while")]
146 While,
147
148 #[regex(r"//.*", logos::skip)]
149 #[regex(r"[ \r\n\t\f]+", logos::skip)]
150 #[error]
151 Error,
152}
153
154fn lex_number(lexer: &mut logos::Lexer<Token>) -> Result<f64, ParseFloatError> {
155 let slice = lexer.slice();
156 slice.parse::<f64>()
157}
158
159fn lex_string(lexer: &mut logos::Lexer<Token>) -> String {
160 let slice = lexer.slice();
161 slice[1..slice.len() - 1].to_string()
162}
163
164fn lex_identifier(lexer: &mut logos::Lexer<Token>) -> String {
165 let slice = lexer.slice();
166 slice.to_string()
167}
168
169#[cfg(test)]
170mod tests {
171 use pretty_assertions::assert_eq;
172
173 use super::*;
174
175 #[test]
176 fn lex_invalid_token() {
177 let exp = vec![
178 Err((
179 Error::SyntaxError(SyntaxError::UnexpectedInput { token: "@foo".to_string() }),
180 0..4,
181 )),
182 Ok((5, Token::Identifier("bar".to_string()), 8)),
183 ];
184 let got = Lexer::new("@foo bar").collect::<Vec<_>>();
185 assert_eq!(exp, got);
186 }
187
188 #[test]
189 fn lex_unterminated_string() {
190 let exp = vec![Err((Error::SyntaxError(SyntaxError::UnterminatedString), 0..5))];
191 let got = Lexer::new("\"\nfoo").collect::<Vec<_>>();
192 assert_eq!(exp, got);
193 }
194}