1use crate::types::{
2 Delimiter,
3 ToonError,
4 ToonResult,
5};
6
7#[derive(Debug, Clone, PartialEq)]
9pub enum Token {
10 LeftBracket,
11 RightBracket,
12 LeftBrace,
13 RightBrace,
14 Colon,
15 Dash,
16 Newline,
17 String(String, bool),
18 Number(f64),
19 Integer(i64),
20 Bool(bool),
21 Null,
22 Delimiter(Delimiter),
23 Eof,
24}
25
26pub struct Scanner {
28 input: Vec<char>,
29 position: usize,
30 line: usize,
31 column: usize,
32 active_delimiter: Option<Delimiter>,
33 last_line_indent: usize,
34}
35
36impl Scanner {
37 pub fn new(input: &str) -> Self {
39 Self {
40 input: input.chars().collect(),
41 position: 0,
42 line: 1,
43 column: 1,
44 active_delimiter: None,
45 last_line_indent: 0,
46 }
47 }
48
49 pub fn set_active_delimiter(&mut self, delimiter: Option<Delimiter>) {
51 self.active_delimiter = delimiter;
52 }
53
54 pub fn current_position(&self) -> (usize, usize) {
56 (self.line, self.column)
57 }
58
59 pub fn get_line(&self) -> usize {
60 self.line
61 }
62
63 pub fn get_column(&self) -> usize {
64 self.column
65 }
66
67 pub fn peek(&self) -> Option<char> {
68 self.input.get(self.position).copied()
69 }
70
71 pub fn count_leading_spaces(&self) -> usize {
72 let mut idx = self.position;
73 let mut count = 0;
74 while let Some(&ch) = self.input.get(idx) {
75 if ch == ' ' {
76 count += 1;
77 idx += 1;
78 } else {
79 break;
80 }
81 }
82 count
83 }
84
85 pub fn count_spaces_after_newline(&self) -> usize {
86 let mut idx = self.position;
87 if self.input.get(idx) != Some(&'\n') {
88 return 0;
89 }
90 idx += 1;
91 let mut count = 0;
92 while let Some(&ch) = self.input.get(idx) {
93 if ch == ' ' {
94 count += 1;
95 idx += 1;
96 } else {
97 break;
98 }
99 }
100 count
101 }
102
103 pub fn peek_ahead(&self, offset: usize) -> Option<char> {
104 self.input.get(self.position + offset).copied()
105 }
106
107 pub fn advance(&mut self) -> Option<char> {
108 if let Some(ch) = self.input.get(self.position) {
109 self.position += 1;
110 if *ch == '\n' {
111 self.line += 1;
112 self.column = 1;
113 } else {
114 self.column += 1;
115 }
116 Some(*ch)
117 } else {
118 None
119 }
120 }
121
122 pub fn skip_whitespace(&mut self) {
123 while let Some(ch) = self.peek() {
124 if ch == ' ' {
125 self.advance();
126 } else {
127 break;
128 }
129 }
130 }
131
132 pub fn scan_token(&mut self) -> ToonResult<Token> {
134 if self.column == 1 {
135 let mut count = 0;
136 let mut idx = self.position;
137
138 while let Some(&ch) = self.input.get(idx) {
139 if ch == ' ' {
140 count += 1;
141 idx += 1;
142 } else {
143 if ch == '\t' {
144 let (line, col) = self.current_position();
145 return Err(ToonError::parse_error(
146 line,
147 col + count,
148 "Tabs are not allowed in indentation",
149 ));
150 }
151 break;
152 }
153 }
154 self.last_line_indent = count;
155 }
156
157 self.skip_whitespace();
158
159 match self.peek() {
160 None => Ok(Token::Eof),
161 Some('\n') => {
162 self.advance();
163 Ok(Token::Newline)
164 }
165 Some('[') => {
166 self.advance();
167 Ok(Token::LeftBracket)
168 }
169 Some(']') => {
170 self.advance();
171 Ok(Token::RightBracket)
172 }
173 Some('{') => {
174 self.advance();
175 Ok(Token::LeftBrace)
176 }
177 Some('}') => {
178 self.advance();
179 Ok(Token::RightBrace)
180 }
181 Some(':') => {
182 self.advance();
183 Ok(Token::Colon)
184 }
185 Some('-') => {
186 self.advance();
187 if let Some(ch) = self.peek() {
188 if ch.is_ascii_digit() {
189 let num_str = self.scan_number_string(true)?;
190 return self.parse_number(&num_str);
191 }
192 }
193 Ok(Token::Dash)
194 }
195 Some(',') => {
196 if matches!(self.active_delimiter, Some(Delimiter::Comma)) {
198 self.advance();
199 Ok(Token::Delimiter(Delimiter::Comma))
200 } else {
201 self.scan_unquoted_string()
202 }
203 }
204 Some('|') => {
205 if matches!(self.active_delimiter, Some(Delimiter::Pipe)) {
206 self.advance();
207 Ok(Token::Delimiter(Delimiter::Pipe))
208 } else {
209 self.scan_unquoted_string()
210 }
211 }
212 Some('\t') => {
213 if matches!(self.active_delimiter, Some(Delimiter::Tab)) {
214 self.advance();
215 Ok(Token::Delimiter(Delimiter::Tab))
216 } else {
217 self.scan_unquoted_string()
218 }
219 }
220 Some('"') => self.scan_quoted_string(),
221 Some(ch) if ch.is_ascii_digit() => {
222 let num_str = self.scan_number_string(false)?;
223 self.parse_number(&num_str)
224 }
225 Some(_) => self.scan_unquoted_string(),
226 }
227 }
228
229 fn scan_quoted_string(&mut self) -> ToonResult<Token> {
230 self.advance();
231
232 let mut value = String::new();
233 let mut escaped = false;
234
235 while let Some(ch) = self.advance() {
236 if escaped {
237 match ch {
238 'n' => value.push('\n'),
239 'r' => value.push('\r'),
240 't' => value.push('\t'),
241 '"' => value.push('"'),
242 '\\' => value.push('\\'),
243 _ => {
244 let (line, col) = self.current_position();
245 return Err(ToonError::parse_error(
246 line,
247 col - 1,
248 format!("Invalid escape sequence: \\{ch}"),
249 ));
250 }
251 }
252 escaped = false;
253 } else if ch == '\\' {
254 escaped = true;
255 } else if ch == '"' {
256 return Ok(Token::String(value, true));
257 } else {
258 value.push(ch);
259 }
260 }
261
262 Err(ToonError::UnexpectedEof)
263 }
264
265 fn scan_unquoted_string(&mut self) -> ToonResult<Token> {
266 let mut value = String::new();
267
268 while let Some(ch) = self.peek() {
269 if ch == '\n'
270 || ch == ' '
271 || ch == ':'
272 || ch == '['
273 || ch == ']'
274 || ch == '{'
275 || ch == '}'
276 {
277 break;
278 }
279
280 if let Some(active) = self.active_delimiter {
282 if (active == Delimiter::Comma && ch == ',')
283 || (active == Delimiter::Pipe && ch == '|')
284 || (active == Delimiter::Tab && ch == '\t')
285 {
286 break;
287 }
288 }
289 value.push(ch);
290 self.advance();
291 }
292
293 let value = if value.len() == 1 && (value == "," || value == "|" || value == "\t") {
295 value
296 } else {
297 value.trim_end().to_string()
298 };
299
300 match value.as_str() {
301 "null" => Ok(Token::Null),
302 "true" => Ok(Token::Bool(true)),
303 "false" => Ok(Token::Bool(false)),
304 _ => Ok(Token::String(value, false)),
305 }
306 }
307
308 pub fn get_last_line_indent(&self) -> usize {
309 self.last_line_indent
310 }
311
312 fn scan_number_string(&mut self, negative: bool) -> ToonResult<String> {
313 let mut num_str = if negative {
314 String::from("-")
315 } else {
316 String::new()
317 };
318
319 while let Some(ch) = self.peek() {
320 if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '+' || ch == '-'
321 {
322 num_str.push(ch);
323 self.advance();
324 } else {
325 break;
326 }
327 }
328
329 Ok(num_str)
330 }
331
332 fn parse_number(&self, s: &str) -> ToonResult<Token> {
333 if let Some(next_ch) = self.peek() {
335 if next_ch != ' '
336 && next_ch != '\n'
337 && next_ch != ':'
338 && next_ch != '['
339 && next_ch != ']'
340 && next_ch != '{'
341 && next_ch != '}'
342 && !matches!(
343 (self.active_delimiter, next_ch),
344 (Some(Delimiter::Comma), ',')
345 | (Some(Delimiter::Pipe), '|')
346 | (Some(Delimiter::Tab), '\t')
347 )
348 {
349 return Ok(Token::String(s.to_string(), false));
350 }
351 }
352
353 if s.starts_with('0') && s.len() > 1 {
355 let second_char = s.chars().nth(1).unwrap();
356 if second_char.is_ascii_digit() {
357 return Ok(Token::String(s.to_string(), false));
358 }
359 }
360
361 if s.contains('.') || s.contains('e') || s.contains('E') {
362 if let Ok(f) = s.parse::<f64>() {
363 Ok(Token::Number(f))
364 } else {
365 Ok(Token::String(s.to_string(), false))
366 }
367 } else if let Ok(i) = s.parse::<i64>() {
368 Ok(Token::Integer(i))
369 } else {
370 Ok(Token::String(s.to_string(), false))
371 }
372 }
373
374 pub fn read_rest_of_line_with_space_info(&mut self) -> (String, bool) {
378 let had_leading_space = matches!(self.peek(), Some(' '));
379 self.skip_whitespace();
380
381 let mut result = String::new();
382 while let Some(ch) = self.peek() {
383 if ch == '\n' {
384 break;
385 }
386 result.push(ch);
387 self.advance();
388 }
389
390 (result.trim_end().to_string(), had_leading_space)
391 }
392
393 pub fn read_rest_of_line(&mut self) -> String {
395 self.read_rest_of_line_with_space_info().0
396 }
397
398 pub fn parse_value_string(&self, s: &str) -> ToonResult<Token> {
400 let trimmed = s.trim();
401
402 if trimmed.is_empty() {
403 return Ok(Token::String(String::new(), false));
404 }
405
406 if trimmed.starts_with('"') {
407 let mut value = String::new();
408 let mut escaped = false;
409 let chars: Vec<char> = trimmed.chars().collect();
410 let mut i = 1;
411
412 while i < chars.len() {
413 let ch = chars[i];
414 if escaped {
415 match ch {
416 'n' => value.push('\n'),
417 'r' => value.push('\r'),
418 't' => value.push('\t'),
419 '"' => value.push('"'),
420 '\\' => value.push('\\'),
421 _ => {
422 return Err(ToonError::parse_error(
423 self.line,
424 self.column,
425 format!("Invalid escape sequence: \\{ch}"),
426 ));
427 }
428 }
429 escaped = false;
430 } else if ch == '\\' {
431 escaped = true;
432 } else if ch == '"' {
433 if i != chars.len() - 1 {
434 return Err(ToonError::parse_error(
435 self.line,
436 self.column,
437 "Unexpected characters after closing quote",
438 ));
439 }
440 return Ok(Token::String(value, true));
441 } else {
442 value.push(ch);
443 }
444 i += 1;
445 }
446
447 return Err(ToonError::parse_error(
448 self.line,
449 self.column,
450 "Unterminated string: missing closing quote",
451 ));
452 }
453
454 match trimmed {
455 "true" => return Ok(Token::Bool(true)),
456 "false" => return Ok(Token::Bool(false)),
457 "null" => return Ok(Token::Null),
458 _ => {}
459 }
460
461 if trimmed.starts_with('-') || trimmed.chars().next().unwrap().is_ascii_digit() {
462 if trimmed.starts_with('0') && trimmed.len() > 1 {
464 let second_char = trimmed.chars().nth(1).unwrap();
465 if second_char.is_ascii_digit() {
466 return Ok(Token::String(trimmed.to_string(), false));
467 }
468 }
469
470 if trimmed.contains('.') || trimmed.contains('e') || trimmed.contains('E') {
471 if let Ok(f) = trimmed.parse::<f64>() {
472 let normalized = if f == -0.0 { 0.0 } else { f };
473 return Ok(Token::Number(normalized));
474 }
475 } else if let Ok(i) = trimmed.parse::<i64>() {
476 return Ok(Token::Integer(i));
477 }
478 }
479
480 Ok(Token::String(trimmed.to_string(), false))
481 }
482
483 pub fn detect_delimiter(&mut self) -> Option<Delimiter> {
484 let saved_pos = self.position;
485
486 while let Some(ch) = self.peek() {
487 match ch {
488 ',' => {
489 self.position = saved_pos;
490 return Some(Delimiter::Comma);
491 }
492 '|' => {
493 self.position = saved_pos;
494 return Some(Delimiter::Pipe);
495 }
496 '\t' => {
497 self.position = saved_pos;
498 return Some(Delimiter::Tab);
499 }
500 '\n' | ':' | '[' | ']' | '{' | '}' => break,
501 _ => {
502 self.advance();
503 }
504 }
505 }
506
507 self.position = saved_pos;
508 None
509 }
510}
511
512#[cfg(test)]
513mod tests {
514 use core::f64;
515
516 use super::*;
517
518 #[test]
519 fn test_scan_structural_tokens() {
520 let mut scanner = Scanner::new("[]{}:-");
521 assert_eq!(scanner.scan_token().unwrap(), Token::LeftBracket);
522 assert_eq!(scanner.scan_token().unwrap(), Token::RightBracket);
523 assert_eq!(scanner.scan_token().unwrap(), Token::LeftBrace);
524 assert_eq!(scanner.scan_token().unwrap(), Token::RightBrace);
525 assert_eq!(scanner.scan_token().unwrap(), Token::Colon);
526 assert_eq!(scanner.scan_token().unwrap(), Token::Dash);
527 }
528
529 #[test]
530 fn test_scan_numbers() {
531 let mut scanner = Scanner::new("42 3.141592653589793 -5");
532 assert_eq!(scanner.scan_token().unwrap(), Token::Integer(42));
533 assert_eq!(
534 scanner.scan_token().unwrap(),
535 Token::Number(f64::consts::PI)
536 );
537 assert_eq!(scanner.scan_token().unwrap(), Token::Integer(-5));
538 }
539
540 #[test]
541 fn test_scan_booleans() {
542 let mut scanner = Scanner::new("true false");
543 assert_eq!(scanner.scan_token().unwrap(), Token::Bool(true));
544 assert_eq!(scanner.scan_token().unwrap(), Token::Bool(false));
545 }
546
547 #[test]
548 fn test_scan_null() {
549 let mut scanner = Scanner::new("null");
550 assert_eq!(scanner.scan_token().unwrap(), Token::Null);
551 }
552
553 #[test]
554 fn test_scan_quoted_string() {
555 let mut scanner = Scanner::new(r#""hello world""#);
556 assert_eq!(
557 scanner.scan_token().unwrap(),
558 Token::String("hello world".to_string(), true)
559 );
560 }
561
562 #[test]
563 fn test_scan_escaped_string() {
564 let mut scanner = Scanner::new(r#""hello\nworld""#);
565 assert_eq!(
566 scanner.scan_token().unwrap(),
567 Token::String("hello\nworld".to_string(), true)
568 );
569 }
570
571 #[test]
572 fn test_scan_unquoted_string() {
573 let mut scanner = Scanner::new("hello");
574 assert_eq!(
575 scanner.scan_token().unwrap(),
576 Token::String("hello".to_string(), false)
577 );
578 }
579
580 #[test]
581 fn test_detect_delimiter() {
582 let mut scanner = Scanner::new("a,b,c");
583 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Comma));
584
585 let mut scanner = Scanner::new("a|b|c");
586 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Pipe));
587
588 let mut scanner = Scanner::new("a\tb\tc");
589 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Tab));
590 }
591
592 #[test]
593 fn test_read_rest_of_line_with_space_info() {
594 let mut scanner = Scanner::new(" world");
595 let (content, had_space) = scanner.read_rest_of_line_with_space_info();
596 assert_eq!(content, "world");
597 assert!(had_space);
598
599 let mut scanner = Scanner::new("world");
600 let (content, had_space) = scanner.read_rest_of_line_with_space_info();
601 assert_eq!(content, "world");
602 assert!(!had_space);
603
604 let mut scanner = Scanner::new("(hello)");
605 let (content, had_space) = scanner.read_rest_of_line_with_space_info();
606 assert_eq!(content, "(hello)");
607 assert!(!had_space);
608
609 let mut scanner = Scanner::new("");
610 let (content, had_space) = scanner.read_rest_of_line_with_space_info();
611 assert_eq!(content, "");
612 assert!(!had_space);
613 }
614
615 #[test]
616 fn test_parse_value_string() {
617 let scanner = Scanner::new("");
618 assert_eq!(
619 scanner.parse_value_string("hello").unwrap(),
620 Token::String("hello".to_string(), false)
621 );
622
623 assert_eq!(
624 scanner.parse_value_string("(hello)").unwrap(),
625 Token::String("(hello)".to_string(), false)
626 );
627
628 assert_eq!(
629 scanner
630 .parse_value_string("Mostly Functions (3 of 3)")
631 .unwrap(),
632 Token::String("Mostly Functions (3 of 3)".to_string(), false)
633 );
634 assert_eq!(
635 scanner.parse_value_string("0(f)").unwrap(),
636 Token::String("0(f)".to_string(), false)
637 );
638
639 assert_eq!(
640 scanner.parse_value_string("42").unwrap(),
641 Token::Integer(42)
642 );
643
644 assert_eq!(
645 scanner.parse_value_string("true").unwrap(),
646 Token::Bool(true)
647 );
648 assert_eq!(
649 scanner.parse_value_string("false").unwrap(),
650 Token::Bool(false)
651 );
652 assert_eq!(scanner.parse_value_string("null").unwrap(), Token::Null);
653
654 assert_eq!(
655 scanner.parse_value_string(r#""hello world""#).unwrap(),
656 Token::String("hello world".to_string(), true)
657 );
658 }
659
660 #[test]
661 fn test_number_followed_by_parenthesis() {
662 let mut scanner = Scanner::new("0(f)");
663 let num_token = scanner.scan_number_string(false).unwrap();
664 let token = scanner.parse_number(&num_token).unwrap();
665
666 assert_eq!(token, Token::String("0".to_string(), false));
667 }
668}