1use std::vec;
2
3use crate::error::{Error, Result};
4use crate::token::Token;
5use crate::value::Value;
6
7pub struct Scanner<'a> {
9 source: &'a str,
10 start: usize,
11 current: usize,
12 end: usize,
13}
14
15impl<'a> Scanner<'a> {
16 pub fn tokenize(source: &'a str) -> Result<Vec<Token>> {
30 let mut scanner = Scanner {
31 source,
32 start: 0,
33 current: 0,
34 end: source.chars().count(),
35 };
36
37 let mut tokens: Vec<Token> = vec![];
38
39 scanner.skip_whitespace();
40
41 while !scanner.is_at_end() {
42 tokens.push(scanner.next_token()?);
43 scanner.skip_whitespace();
44 }
45
46 if tokens.is_empty() {
47 Err(Error::Eof)
48 } else {
49 Ok(tokens)
50 }
51 }
52
53 fn next_token(&mut self) -> Result<Token> {
54 self.start = self.current;
55 let next = self.next_char().ok_or(Error::Eof)?;
56
57 if Scanner::is_identifier_start(next) {
58 return Ok(self.identifier());
59 }
60
61 if char::is_numeric(next) {
62 return self.number();
63 }
64
65 match next {
66 '\'' => self.string(),
67 '.' => self.number(), '(' => Ok(Token::LeftParen),
69 ')' => Ok(Token::RightParen),
70 '[' => Ok(Token::LeftBracket),
71 ']' => Ok(Token::RightBracket),
72 ',' => Ok(Token::Comma),
73 '+' => Ok(Token::Plus),
74 '-' => Ok(Token::Minus),
75 '*' => Ok(Token::Star),
76 '/' => Ok(Token::Slash),
77 '=' => Ok(Token::Equal),
78 '>' => Ok(self.greater()),
79 '<' => Ok(self.lesser()),
80 _ => Err(Error::InvalidCharacter(next)),
81 }
82 }
83
84 fn is_at_end(&self) -> bool {
85 self.current >= self.end
86 }
87
88 fn advance(&mut self) {
89 self.current += 1;
90 }
91
92 fn advance_numeric(&mut self) {
93 while let Some(c) = self.peek() {
94 if c.is_numeric() {
95 self.advance();
96 } else {
97 break;
98 }
99 }
100 }
101
102 fn next_char(&mut self) -> Option<char> {
103 self.advance();
104 self.source.chars().nth(self.current - 1)
105 }
106
107 fn peek(&self) -> Option<char> {
108 self.peek_ahead(0)
109 }
110
111 fn peek_ahead(&self, offset: usize) -> Option<char> {
112 self.source.chars().nth(self.current + offset)
113 }
114
115 fn skip_whitespace(&mut self) {
116 loop {
117 while let Some(' ' | '\r' | '\t' | '\n') = self.peek() {
118 self.advance();
119 }
120
121 if !self.skip_comments() {
122 break;
124 };
125 }
126 }
127
128 fn skip_comments(&mut self) -> bool {
129 match (self.peek_ahead(0), self.peek_ahead(1)) {
130 (Some('/'), Some('/')) => {
131 while self.next_char().is_some_and(|c| c != '\n') {
132 }
134 true }
136 (Some('{'), _) => {
137 self.advance(); let mut comment_depth: i32 = 1;
140 while comment_depth > 0 {
141 match self.next_char() {
142 Some('{') => comment_depth += 1,
143 Some('}') => comment_depth -= 1,
144 None => break, _ => (),
146 }
147 }
148 true }
150 _ => false, }
152 }
153
154 fn get_content(&self, trim_by: usize) -> String {
155 let from = self.start + trim_by;
156 let to = self.current - trim_by;
157
158 self.source.chars().take(to).skip(from).collect()
159 }
160
161 fn is_identifier_start(character: char) -> bool {
162 character.is_alphabetic() || character == '_'
163 }
164
165 fn is_identifier(character: char) -> bool {
166 character.is_alphanumeric() || character == '_'
167 }
168
169 fn identifier(&mut self) -> Token {
170 while self.peek().is_some_and(Scanner::is_identifier) {
171 self.advance();
172 }
173
174 let ident = self.get_content(0);
175
176 match ident.to_lowercase().as_str() {
177 "true" => Token::Literal(Value::Boolean(true)),
178 "false" => Token::Literal(Value::Boolean(false)),
179 "and" => Token::And,
180 "or" => Token::Or,
181 "xor" => Token::Xor,
182 "not" => Token::Not,
183 "div" => Token::Div,
184 "mod" => Token::Mod,
185 _ => Token::Identifier(ident),
186 }
187 }
188
189 fn extract_number(content: &str) -> Result<f64> {
190 content
191 .parse::<f64>()
192 .map_err(|o| Error::InvalidNumber(o.to_string()))
193 }
194
195 fn number(&mut self) -> Result<Token> {
196 self.advance_numeric(); if self.peek() == Some('.') {
199 self.advance(); if let Some(fractional) = self.peek() {
202 if fractional.is_numeric() {
203 self.advance_numeric(); }
205 }
206 }
207
208 let content = self.get_content(0);
209 let number = Scanner::extract_number(content.as_str())?;
210
211 Ok(Token::Literal(Value::Number(number)))
212 }
213
214 fn string(&mut self) -> Result<Token> {
215 let mut contains_single_quote = false;
216
217 loop {
218 while self.peek().is_some_and(|c| c != '\'') {
219 self.advance(); }
221
222 if self.is_at_end() {
223 return Err(Error::UnterminatedStringLiteral);
224 };
225
226 self.advance(); if self.peek() == Some('\'') {
229 contains_single_quote = true; self.advance();
231 } else {
232 break; }
234 }
235
236 let mut content = self.get_content(1);
237
238 if contains_single_quote {
239 content = content.replace("''", "'"); }
241
242 Ok(Token::Literal(Value::String(content)))
243 }
244
245 fn encounter_double(&mut self, token: Token) -> Token {
246 self.advance();
247 token
248 }
249
250 fn greater(&mut self) -> Token {
251 match self.peek() {
252 Some('=') => self.encounter_double(Token::GreaterEqual),
253 _ => Token::Greater,
254 }
255 }
256
257 fn lesser(&mut self) -> Token {
258 match self.peek() {
259 Some('=') => self.encounter_double(Token::LessEqual),
260 Some('>') => self.encounter_double(Token::NotEqual),
261 _ => Token::Less,
262 }
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use std::f64::consts::PI;
269
270 use super::{Scanner, Token};
271 use crate::{
272 error::{Error, Result},
273 value::Value,
274 };
275
276 #[test]
277 fn simple_bool() -> Result<()> {
278 let tokens = Scanner::tokenize("True")?;
279 let expected = Token::Literal(Value::Boolean(true));
280
281 assert_eq!(tokens[0], expected);
282 Ok(())
283 }
284
285 #[test]
286 fn simple_integer() -> Result<()> {
287 let tokens = Scanner::tokenize("9001")?;
288 let expected = Token::Literal(Value::Number(9001.0));
289
290 assert_eq!(tokens[0], expected);
291 Ok(())
292 }
293
294 #[test]
295 fn simple_float() -> Result<()> {
296 let tokens = Scanner::tokenize("3.141592653589793")?;
297 let expected = Token::Literal(Value::Number(PI));
298
299 assert_eq!(tokens[0], expected);
300 Ok(())
301 }
302
303 #[test]
304 fn simple_string() -> Result<()> {
305 let tokens = Scanner::tokenize("'Hello World'")?;
306 let expected = Token::Literal(Value::String(String::from("Hello World")));
307
308 assert!(tokens.first().is_some());
309 assert_eq!(tokens[0], expected);
310 Ok(())
311 }
312
313 #[test]
314 fn multiple_tokens() -> Result<()> {
315 let tokens = Scanner::tokenize("1 + 1")?;
316 let expected: Vec<Token> = vec![
317 Token::Literal(Value::Number(1.0)),
318 Token::Plus,
319 Token::Literal(Value::Number(1.0)),
320 ];
321
322 assert_eq!(tokens, expected);
323 Ok(())
324 }
325
326 #[test]
327 fn var_name_underscore() -> Result<()> {
328 let tokens = Scanner::tokenize("(_SOME_VAR1 * ANOTHER-ONE)")?;
329 let expected = vec![
330 Token::LeftParen,
331 Token::Identifier(String::from("_SOME_VAR1")),
332 Token::Star,
333 Token::Identifier(String::from("ANOTHER")),
334 Token::Minus,
335 Token::Identifier(String::from("ONE")),
336 Token::RightParen,
337 ];
338
339 assert_eq!(expected, tokens);
340 Ok(())
341 }
342
343 #[test]
344 fn unterminated_less() -> Result<()> {
345 let tokens = Scanner::tokenize("<")?;
346 let expected = vec![Token::Less];
347
348 assert_eq!(expected, tokens);
349 Ok(())
350 }
351
352 fn test_number(input: &str, expected: f64) -> Result<()> {
353 let tokens = Scanner::tokenize(input)?;
354 let expected = vec![Token::Literal(Value::Number(expected))];
355
356 assert_eq!(expected, tokens);
357 Ok(())
358 }
359
360 #[test]
361 fn number_parts() -> Result<()> {
362 test_number("10", 10.0)?;
363 test_number("10.0", 10.0)?;
364 test_number("20.4", 20.4)?;
365 test_number("30.", 30.0)?;
366 test_number(".4", 0.4)?;
367
368 Ok(())
369 }
370
371 #[test]
372 fn err_empty_input() {
373 let tokens = Scanner::tokenize("");
374 let expected = Err(Error::Eof);
375
376 assert_eq!(expected, tokens);
377 }
378
379 #[test]
380 fn err_unknown_token_1() {
381 let tokens = Scanner::tokenize("$");
382 let expected = Err(Error::InvalidCharacter('$'));
383
384 assert_eq!(expected, tokens);
385 }
386
387 #[test]
388 fn err_unknown_token_2() {
389 let tokens = Scanner::tokenize("$hello");
390 let expected = Err(Error::InvalidCharacter('$'));
391
392 assert_eq!(expected, tokens);
393 }
394
395 #[test]
396 fn err_unterminated_string() {
397 let tokens = Scanner::tokenize("'hello' + 'world");
398 let expected = Err(Error::UnterminatedStringLiteral);
399
400 assert_eq!(expected, tokens);
401 }
402
403 #[test]
404 fn has_slash_comment() {
405 let tokens = Scanner::tokenize("true // some comment");
406 let expected = Ok(vec![Token::Literal(Value::Boolean(true))]);
407
408 assert_eq!(expected, tokens);
409
410 let tokens = Scanner::tokenize("true //");
411 let expected = Ok(vec![Token::Literal(Value::Boolean(true))]);
412
413 assert_eq!(expected, tokens);
414 }
415
416 #[test]
417 fn has_slash_comment_multiline() {
418 let tokens = Scanner::tokenize("true // some comment \n and false");
419 let expected = Ok(vec![
420 Token::Literal(Value::Boolean(true)),
421 Token::And,
422 Token::Literal(Value::Boolean(false)),
423 ]);
424
425 assert_eq!(expected, tokens);
426
427 let tokens = Scanner::tokenize("true //\n//\n and false");
428 let expected = Ok(vec![
429 Token::Literal(Value::Boolean(true)),
430 Token::And,
431 Token::Literal(Value::Boolean(false)),
432 ]);
433
434 assert_eq!(expected, tokens);
435 }
436
437 #[test]
438 fn has_brace_comment() {
439 let expected = Ok(vec![
440 Token::Literal(Value::Number(1.0)),
441 Token::Plus,
442 Token::Literal(Value::Number(3.0)),
443 ]);
444
445 assert_eq!(expected, Scanner::tokenize("1 + {2} 3"));
446 assert_eq!(expected, Scanner::tokenize("1 + 3 {123}"));
447 assert_eq!(expected, Scanner::tokenize("1 + {123 {+4}} 3"));
448 assert_eq!(expected, Scanner::tokenize("1 + 3 { "));
449 assert_eq!(expected, Scanner::tokenize("{Test}1+3"));
450 }
451
452 #[test]
453 fn quote_char_in_string() {
454 let expected = Ok(vec![Token::Literal(Value::String(String::from(
455 "It's Working!",
456 )))]);
457 assert_eq!(expected, Scanner::tokenize("'It''s Working!'"));
458
459 let expected = Ok(vec![Token::Literal(Value::String(String::from("'")))]);
460 assert_eq!(expected, Scanner::tokenize("''''"));
461
462 let expected = Err(Error::UnterminatedStringLiteral);
463 assert_eq!(expected, Scanner::tokenize("'''"));
464
465 let expected = Ok(vec![
466 Token::Literal(Value::String(String::from(""))),
467 Token::Literal(Value::String(String::from(""))),
468 ]);
469 assert_eq!(expected, Scanner::tokenize("'' ''"));
470
471 let expected = Ok(vec![Token::Literal(Value::String(String::from(
472 "He's She's It's",
473 )))]);
474 assert_eq!(expected, Scanner::tokenize("'He''s She''s It''s'"));
475 }
476}