1use log::error;
4
5use token::Token;
6
7use crate::source::Position;
8
9pub mod token;
10
11#[derive(Debug, Clone, PartialEq)]
13pub struct SourceToken {
14 kind: Token,
15 source: Position,
16}
17
18#[derive(Debug)]
19pub struct Lexer<'a> {
20 source: &'a str,
21 start: usize,
22 pos: usize,
23 line: usize,
24 column: usize,
25}
26
27impl<'a> Lexer<'a> {
28 pub fn new(source: &'a str) -> Self {
29 Lexer {
30 source,
31 pos: 0,
32 start: 0,
33 line: 1,
34 column: 1,
35 }
36 }
37
38 pub fn next_token(&mut self) -> SourceToken {
39 let mut maybe_token = self.advance_token();
40 while maybe_token.is_none() {
41 maybe_token = self.advance_token();
42 }
43 maybe_token.unwrap()
44 }
45
46 fn advance_token(&mut self) -> Option<SourceToken> {
47 self.skip_whitespace();
48 if self.at_end() {
49 return Some(Token::EndOfFile.with_position(self.src_pos()));
50 }
51 self.start = self.pos;
52 let c = self.advance().expect("character exhausted prematurely");
53 match c {
54 '+' => Some(Token::Plus.with_position(self.src_pos())),
55 '-' => Some(Token::Minus.with_position(self.src_pos())),
56 '*' => Some(Token::Star.with_position(self.src_pos())),
57 '/' => {
58 if let Some('/') = self.peek(0) {
59 self.advance();
60 while let Some(c) = self.peek(0) {
61 if c == '\n' {
62 break;
63 }
64 self.advance();
65 }
66 None
67 } else {
68 Some(Token::Slash.with_position(self.src_pos()))
69 }
70 }
71 '(' => Some(Token::LeftParen.with_position(self.src_pos())),
72 ')' => Some(Token::RightParen.with_position(self.src_pos())),
73 '{' => Some(Token::LeftCurly.with_position(self.src_pos())),
74 '}' => Some(Token::RightCurly.with_position(self.src_pos())),
75 '[' => Some(Token::LeftSquare.with_position(self.src_pos())),
76 ']' => Some(Token::RightSquare.with_position(self.src_pos())),
77 '=' => {
78 if self.advance_if('=') {
79 Some(Token::EqualEqual.with_position(self.src_pos()))
80 } else {
81 Some(Token::Equal.with_position(self.src_pos()))
82 }
83 }
84 '!' => {
85 if self.advance_if('=') {
86 Some(Token::BangEqual.with_position(self.src_pos()))
87 } else {
88 Some(Token::Bang.with_position(self.src_pos()))
89 }
90 }
91 '<' => {
92 if self.advance_if('=') {
93 Some(Token::LessEqual.with_position(self.src_pos()))
94 } else {
95 Some(Token::Less.with_position(self.src_pos()))
96 }
97 }
98 '>' => {
99 if self.advance_if('=') {
100 Some(Token::GreaterEqual.with_position(self.src_pos()))
101 } else {
102 Some(Token::Greater.with_position(self.src_pos()))
103 }
104 }
105 ';' => Some(Token::Semicolon.with_position(self.src_pos())),
106 ',' => Some(Token::Comma.with_position(self.src_pos())),
107 '0'..='9' => Some(self.number()),
108 'a'..='z' | 'A'..='Z' | '_' => Some(self.identifier()),
109 '"' => Some(self.string_literal()),
110 _ => {
111 error!("unknown token: {}", c);
112 Some(Token::Error.with_position(self.src_pos()))
113 }
114 }
115 }
116
117 fn number(&mut self) -> SourceToken {
118 while let Some(c) = self.peek(0) {
119 if !c.is_ascii_digit() {
120 break;
121 }
122 self.advance();
123 }
124
125 if let Some('.') = self.peek(0) {
126 self.advance();
127 while let Some(c) = self.peek(0) {
128 if !c.is_ascii_digit() {
129 break;
130 }
131 self.advance();
132 }
133 }
134 let number_literal = &self.source[self.start..self.pos];
135 let value: f64 = number_literal.parse().expect("must be a correct number");
136 Token::Number(value).with_position(self.src_pos())
137 }
138
139 fn string_literal(&mut self) -> SourceToken {
140 while let Some(c) = self.peek(0) {
141 if c == '"' {
142 break;
143 }
144 self.advance();
145 }
146 self.advance();
147 let string_literal = &self.source[(self.start + 1)..(self.pos - 1)];
148 Token::StringLiteral(string_literal.to_string()).with_position(self.src_pos())
149 }
150
151 fn identifier(&mut self) -> SourceToken {
152 while let Some(c) = self.peek(0) {
153 if !c.is_ascii_alphanumeric() && c != '_' {
154 break;
155 }
156 self.advance();
157 }
158 let identifier = &self.source[self.start..self.pos];
159 match identifier {
160 "print" => Token::Print.with_position(self.src_pos()),
161 "let" => Token::Let.with_position(self.src_pos()),
162 "true" => Token::True.with_position(self.src_pos()),
163 "false" => Token::False.with_position(self.src_pos()),
164 "if" => Token::If.with_position(self.src_pos()),
165 "else" => Token::Else.with_position(self.src_pos()),
166 "while" => Token::While.with_position(self.src_pos()),
167 "fun" => Token::Fun.with_position(self.src_pos()),
168 "return" => Token::Return.with_position(self.src_pos()),
169 "nil" => Token::Nil.with_position(self.src_pos()),
170 _ => Token::Identifier(identifier.to_string()).with_position(self.src_pos()),
171 }
172 }
173
174 fn advance(&mut self) -> Option<char> {
175 let c = self.source.chars().nth(self.pos);
176 self.pos += 1;
177 self.column += 1;
178 c
179 }
180
181 fn advance_if(&mut self, c: char) -> bool {
182 if self.peek(0) == Some(c) {
183 self.advance();
184 true
185 } else {
186 false
187 }
188 }
189
190 fn peek(&self, offset: usize) -> Option<char> {
191 self.source.chars().nth(self.pos + offset)
192 }
193
194 fn at_end(&self) -> bool {
195 self.pos >= self.source.len()
196 }
197
198 fn skip_whitespace(&mut self) {
199 while let Some(c) = self.peek(0) {
200 if !c.is_ascii_whitespace() {
201 break;
202 }
203 if c == '\n' {
204 self.line += 1;
205 self.column = 0;
206 }
207 self.advance();
208 }
209 }
210
211 fn src_pos(&self) -> Position {
212 Position::new(self.line, self.column - 1)
213 }
214}
215
216impl<'a> Iterator for Lexer<'a> {
217 type Item = SourceToken;
218
219 fn next(&mut self) -> Option<Self::Item> {
220 match self.next_token() {
221 SourceToken {
222 kind: Token::EndOfFile,
223 ..
224 } => None,
225 t => Some(t),
226 }
227 }
228}
229
230impl From<Token> for SourceToken {
231 fn from(token: Token) -> Self {
232 SourceToken {
233 kind: token,
234 source: Position::default(),
235 }
236 }
237}
238
239impl SourceToken {
240 pub fn new(token: Token, source: Position) -> Self {
241 SourceToken {
242 kind: token,
243 source,
244 }
245 }
246
247 pub fn kind(&self) -> &Token {
248 &self.kind
249 }
250
251 pub fn source(&self) -> &Position {
252 &self.source
253 }
254}
255
256impl PartialEq<Token> for SourceToken {
257 fn eq(&self, other: &Token) -> bool {
258 &self.kind == other
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265
266 #[test]
267 fn empty_source() {
268 let mut lexer = Lexer::new("");
269 let token = lexer.next_token();
270 assert_eq!(token, Token::EndOfFile);
271 }
272
273 #[test]
274 fn arithmetic_operators() {
275 let mut lexer = Lexer::new("+");
276 assert_eq!(lexer.next_token(), Token::Plus);
277 assert_eq!(lexer.next_token(), Token::EndOfFile);
278 }
279
280 #[test]
281 fn integer() {
282 let mut lexer = Lexer::new("42");
283 assert_eq!(lexer.next_token(), Token::Number(42.0));
284 assert_eq!(lexer.next_token(), Token::EndOfFile);
285 }
286
287 #[test]
288 fn float_point_literal() {
289 let mut lexer = Lexer::new("5.52");
290 assert_eq!(lexer.next_token(), Token::Number(5.52));
291 assert_eq!(lexer.next_token(), Token::EndOfFile);
292 }
293 #[test]
294 fn arithmetic_expressions() {
295 let mut lexer = Lexer::new("42 + 8 / 2");
296 assert_eq!(lexer.next_token(), Token::Number(42.0));
297 assert_eq!(lexer.next_token(), Token::Plus);
298 assert_eq!(lexer.next_token(), Token::Number(8.0));
299 assert_eq!(lexer.next_token(), Token::Slash);
300 assert_eq!(lexer.next_token(), Token::Number(2.0));
301 }
302
303 #[test]
304 fn inline_comment() {
305 let mut lexer = Lexer::new("42 + 7 // this is a comment");
306 assert_eq!(lexer.next_token(), Token::Number(42.0));
307 assert_eq!(lexer.next_token(), Token::Plus);
308 assert_eq!(lexer.next_token(), Token::Number(7.0));
309 assert_eq!(lexer.next_token(), Token::EndOfFile);
310 }
311
312 #[test]
313 fn line_comment() {
314 let mut lexer = Lexer::new(
315 "// comment
316 42 + 7",
317 );
318 assert_eq!(lexer.next_token(), Token::Number(42.0));
319 assert_eq!(lexer.next_token(), Token::Plus);
320 assert_eq!(lexer.next_token(), Token::Number(7.0));
321 assert_eq!(lexer.next_token(), Token::EndOfFile);
322 }
323
324 #[test]
325 fn print_statement() {
326 let mut lexer = Lexer::new("print 42");
327 assert_eq!(lexer.next_token(), Token::Print);
328 assert_eq!(lexer.next_token(), Token::Number(42.0));
329 assert_eq!(lexer.next_token(), Token::EndOfFile);
330 }
331
332 #[test]
333 fn identifier() {
334 let mut lexer = Lexer::new("foo");
335 assert_eq!(lexer.next_token(), Token::Identifier("foo".to_string()));
336 assert_eq!(lexer.next_token(), Token::EndOfFile);
337 }
338
339 #[test]
340 fn variable_declaration_and_assignment() {
341 let mut lexer = Lexer::new("let foo = 42;");
342 assert_eq!(lexer.next_token(), Token::Let);
343 assert_eq!(lexer.next_token(), Token::Identifier("foo".to_string()));
344 assert_eq!(lexer.next_token(), Token::Equal);
345 assert_eq!(lexer.next_token(), Token::Number(42.0));
346 assert_eq!(lexer.next_token(), Token::Semicolon);
347 assert_eq!(lexer.next_token(), Token::EndOfFile);
348 }
349
350 #[test]
351 fn comparisons() {
352 let mut lexer = Lexer::new("= == != > >= < <=");
353 assert_eq!(lexer.next_token(), Token::Equal);
354 assert_eq!(lexer.next_token(), Token::EqualEqual);
355 assert_eq!(lexer.next_token(), Token::BangEqual);
356 assert_eq!(lexer.next_token(), Token::Greater);
357 assert_eq!(lexer.next_token(), Token::GreaterEqual);
358 assert_eq!(lexer.next_token(), Token::Less);
359 assert_eq!(lexer.next_token(), Token::LessEqual);
360 }
361}