1use crate::types::{
2 Delimiter,
3 ToonError,
4 ToonResult,
5};
6
7#[derive(Debug, Clone, PartialEq)]
9pub enum Token {
10 LeftBracket,
11 RightBracket,
12 LeftBrace,
13 RightBrace,
14 Colon,
15 Dash,
16 Newline,
17 String(String, bool),
18 Number(f64),
19 Integer(i64),
20 Bool(bool),
21 Null,
22 Delimiter(Delimiter),
23 Eof,
24}
25
26pub struct Scanner {
28 input: Vec<char>,
29 position: usize,
30 line: usize,
31 column: usize,
32 active_delimiter: Option<Delimiter>,
33 last_line_indent: usize,
34 last_whitespace_count: usize,
35 last_token_text: String,
36}
37
38impl Scanner {
39 pub fn new(input: &str) -> Self {
41 Self {
42 input: input.chars().collect(),
43 position: 0,
44 line: 1,
45 column: 1,
46 active_delimiter: None,
47 last_line_indent: 0,
48 last_whitespace_count: 0,
49 last_token_text: String::new(),
50 }
51 }
52
53 pub fn set_active_delimiter(&mut self, delimiter: Option<Delimiter>) {
55 self.active_delimiter = delimiter;
56 }
57
58 pub fn current_position(&self) -> (usize, usize) {
60 (self.line, self.column)
61 }
62
63 pub fn get_line(&self) -> usize {
64 self.line
65 }
66
67 pub fn get_column(&self) -> usize {
68 self.column
69 }
70
71 pub fn peek(&self) -> Option<char> {
72 self.input.get(self.position).copied()
73 }
74
75 pub fn count_leading_spaces(&self) -> usize {
76 let mut idx = self.position;
77 let mut count = 0;
78 while let Some(&ch) = self.input.get(idx) {
79 if ch == ' ' {
80 count += 1;
81 idx += 1;
82 } else {
83 break;
84 }
85 }
86 count
87 }
88
89 pub fn count_spaces_after_newline(&self) -> usize {
90 let mut idx = self.position;
91 if self.input.get(idx) != Some(&'\n') {
92 return 0;
93 }
94 idx += 1;
95 let mut count = 0;
96 while let Some(&ch) = self.input.get(idx) {
97 if ch == ' ' {
98 count += 1;
99 idx += 1;
100 } else {
101 break;
102 }
103 }
104 count
105 }
106
107 pub fn peek_ahead(&self, offset: usize) -> Option<char> {
108 self.input.get(self.position + offset).copied()
109 }
110
111 pub fn advance(&mut self) -> Option<char> {
112 if let Some(ch) = self.input.get(self.position) {
113 self.position += 1;
114 if *ch == '\n' {
115 self.line += 1;
116 self.column = 1;
117 } else {
118 self.column += 1;
119 }
120 Some(*ch)
121 } else {
122 None
123 }
124 }
125
126 pub fn skip_whitespace(&mut self) {
127 self.last_whitespace_count = 0;
128 while let Some(ch) = self.peek() {
129 if ch == ' ' {
130 self.last_whitespace_count += 1;
131 self.advance();
132 } else {
133 break;
134 }
135 }
136 }
137
138 pub fn last_whitespace_count(&self) -> usize {
139 self.last_whitespace_count
140 }
141
142 pub fn last_token_text(&self) -> &str {
143 &self.last_token_text
144 }
145
146 pub fn scan_token(&mut self) -> ToonResult<Token> {
148 if self.column == 1 {
149 let mut count = 0;
150 let mut idx = self.position;
151
152 while let Some(&ch) = self.input.get(idx) {
153 if ch == ' ' {
154 count += 1;
155 idx += 1;
156 } else {
157 if ch == '\t' {
158 let (line, col) = self.current_position();
159 return Err(ToonError::parse_error(
160 line,
161 col + count,
162 "Tabs are not allowed in indentation",
163 ));
164 }
165 break;
166 }
167 }
168 self.last_line_indent = count;
169 }
170
171 self.skip_whitespace();
172
173 match self.peek() {
174 None => Ok(Token::Eof),
175 Some('\n') => {
176 self.advance();
177 Ok(Token::Newline)
178 }
179 Some('[') => {
180 self.advance();
181 self.last_token_text = "[".to_string();
182 Ok(Token::LeftBracket)
183 }
184 Some(']') => {
185 self.advance();
186 self.last_token_text = "]".to_string();
187 Ok(Token::RightBracket)
188 }
189 Some('{') => {
190 self.advance();
191 self.last_token_text = "{".to_string();
192 Ok(Token::LeftBrace)
193 }
194 Some('}') => {
195 self.advance();
196 self.last_token_text = "}".to_string();
197 Ok(Token::RightBrace)
198 }
199 Some(':') => {
200 self.advance();
201 self.last_token_text = ":".to_string();
202 Ok(Token::Colon)
203 }
204 Some('-') => {
205 self.advance();
206 if let Some(ch) = self.peek() {
207 if ch.is_ascii_digit() {
208 let num_str = self.scan_number_string(true)?;
209 self.last_token_text = num_str.clone();
210 return self.parse_number(&num_str);
211 }
212 }
213 self.last_token_text = "-".to_string();
214 Ok(Token::Dash)
215 }
216 Some(',') => {
217 if matches!(self.active_delimiter, Some(Delimiter::Comma)) {
219 self.advance();
220 self.last_token_text = ",".to_string();
221 Ok(Token::Delimiter(Delimiter::Comma))
222 } else {
223 self.scan_unquoted_string()
224 }
225 }
226 Some('|') => {
227 if matches!(self.active_delimiter, Some(Delimiter::Pipe)) {
228 self.advance();
229 self.last_token_text = "|".to_string();
230 Ok(Token::Delimiter(Delimiter::Pipe))
231 } else {
232 self.scan_unquoted_string()
233 }
234 }
235 Some('\t') => {
236 if matches!(self.active_delimiter, Some(Delimiter::Tab)) {
237 self.advance();
238 self.last_token_text = "\t".to_string();
239 Ok(Token::Delimiter(Delimiter::Tab))
240 } else {
241 self.scan_unquoted_string()
242 }
243 }
244 Some('"') => self.scan_quoted_string(),
245 Some(ch) if ch.is_ascii_digit() => {
246 let num_str = self.scan_number_string(false)?;
247 self.last_token_text = num_str.clone();
248 self.parse_number(&num_str)
249 }
250 Some(_) => self.scan_unquoted_string(),
251 }
252 }
253
254 fn scan_quoted_string(&mut self) -> ToonResult<Token> {
255 self.advance();
256
257 let mut value = String::new();
258 let mut escaped = false;
259
260 while let Some(ch) = self.advance() {
261 if escaped {
262 match ch {
263 'n' => value.push('\n'),
264 'r' => value.push('\r'),
265 't' => value.push('\t'),
266 '"' => value.push('"'),
267 '\\' => value.push('\\'),
268 _ => {
269 let (line, col) = self.current_position();
270 return Err(ToonError::parse_error(
271 line,
272 col - 1,
273 format!("Invalid escape sequence: \\{ch}"),
274 ));
275 }
276 }
277 escaped = false;
278 } else if ch == '\\' {
279 escaped = true;
280 } else if ch == '"' {
281 self.last_token_text = format!("\"{}\"", crate::utils::escape_string(&value));
282 return Ok(Token::String(value, true));
283 } else {
284 value.push(ch);
285 }
286 }
287
288 Err(ToonError::UnexpectedEof)
289 }
290
291 fn scan_unquoted_string(&mut self) -> ToonResult<Token> {
292 let mut value = String::new();
293
294 while let Some(ch) = self.peek() {
295 if ch == '\n'
296 || ch == ' '
297 || ch == ':'
298 || ch == '['
299 || ch == ']'
300 || ch == '{'
301 || ch == '}'
302 {
303 break;
304 }
305
306 if let Some(active) = self.active_delimiter {
308 if (active == Delimiter::Comma && ch == ',')
309 || (active == Delimiter::Pipe && ch == '|')
310 || (active == Delimiter::Tab && ch == '\t')
311 {
312 break;
313 }
314 }
315 value.push(ch);
316 self.advance();
317 }
318
319 let value = if value.len() == 1 && (value == "," || value == "|" || value == "\t") {
321 value
322 } else {
323 value.trim_end().to_string()
324 };
325
326 self.last_token_text = value.clone();
327 match value.as_str() {
328 "null" => Ok(Token::Null),
329 "true" => Ok(Token::Bool(true)),
330 "false" => Ok(Token::Bool(false)),
331 _ => Ok(Token::String(value, false)),
332 }
333 }
334
335 pub fn get_last_line_indent(&self) -> usize {
336 self.last_line_indent
337 }
338
339 fn scan_number_string(&mut self, negative: bool) -> ToonResult<String> {
340 let mut num_str = if negative {
341 String::from("-")
342 } else {
343 String::new()
344 };
345
346 while let Some(ch) = self.peek() {
347 if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '+' || ch == '-'
348 {
349 num_str.push(ch);
350 self.advance();
351 } else {
352 break;
353 }
354 }
355
356 Ok(num_str)
357 }
358
359 fn parse_number(&self, s: &str) -> ToonResult<Token> {
360 if let Some(next_ch) = self.peek() {
362 if next_ch != ' '
363 && next_ch != '\n'
364 && next_ch != ':'
365 && next_ch != '['
366 && next_ch != ']'
367 && next_ch != '{'
368 && next_ch != '}'
369 && !matches!(
370 (self.active_delimiter, next_ch),
371 (Some(Delimiter::Comma), ',')
372 | (Some(Delimiter::Pipe), '|')
373 | (Some(Delimiter::Tab), '\t')
374 )
375 {
376 return Ok(Token::String(s.to_string(), false));
377 }
378 }
379
380 if s.starts_with('0') && s.len() > 1 {
382 let second_char = s.chars().nth(1).unwrap();
383 if second_char.is_ascii_digit() {
384 return Ok(Token::String(s.to_string(), false));
385 }
386 }
387
388 if s.starts_with("-0") && s.len() > 2 {
390 let third_char = s.chars().nth(2).unwrap();
391 if third_char.is_ascii_digit() {
392 return Ok(Token::String(s.to_string(), false));
393 }
394 }
395
396 if s.contains('.') || s.contains('e') || s.contains('E') {
397 if let Ok(f) = s.parse::<f64>() {
398 Ok(Token::Number(f))
399 } else {
400 Ok(Token::String(s.to_string(), false))
401 }
402 } else if let Ok(i) = s.parse::<i64>() {
403 Ok(Token::Integer(i))
404 } else {
405 Ok(Token::String(s.to_string(), false))
406 }
407 }
408
409 pub fn read_rest_of_line_with_space_info(&mut self) -> (String, usize) {
412 let mut space_count = 0;
413 while let Some(' ') = self.peek() {
414 space_count += 1;
415 self.advance();
416 }
417
418 let mut result = String::new();
419 while let Some(ch) = self.peek() {
420 if ch == '\n' {
421 break;
422 }
423 result.push(ch);
424 self.advance();
425 }
426
427 (result.trim_end().to_string(), space_count)
428 }
429
430 pub fn read_rest_of_line(&mut self) -> String {
432 self.read_rest_of_line_with_space_info().0
433 }
434
435 pub fn read_until_delimiter_with_space_info(&mut self) -> (String, usize) {
438 let mut space_count = 0;
439 while let Some(' ') = self.peek() {
440 space_count += 1;
441 self.advance();
442 }
443
444 let mut result = String::new();
445 while let Some(ch) = self.peek() {
446 if ch == '\n' {
447 break;
448 }
449 if let Some(active) = self.active_delimiter {
450 if (active == Delimiter::Comma && ch == ',')
451 || (active == Delimiter::Pipe && ch == '|')
452 || (active == Delimiter::Tab && ch == '\t')
453 {
454 break;
455 }
456 }
457 result.push(ch);
458 self.advance();
459 }
460
461 (result.trim_end().to_string(), space_count)
462 }
463
464 pub fn parse_value_string(&self, s: &str) -> ToonResult<Token> {
466 let trimmed = s.trim();
467
468 if trimmed.is_empty() {
469 return Ok(Token::String(String::new(), false));
470 }
471
472 if trimmed.starts_with('"') {
473 let mut value = String::new();
474 let mut escaped = false;
475 let chars: Vec<char> = trimmed.chars().collect();
476 let mut i = 1;
477
478 while i < chars.len() {
479 let ch = chars[i];
480 if escaped {
481 match ch {
482 'n' => value.push('\n'),
483 'r' => value.push('\r'),
484 't' => value.push('\t'),
485 '"' => value.push('"'),
486 '\\' => value.push('\\'),
487 _ => {
488 return Err(ToonError::parse_error(
489 self.line,
490 self.column,
491 format!("Invalid escape sequence: \\{ch}"),
492 ));
493 }
494 }
495 escaped = false;
496 } else if ch == '\\' {
497 escaped = true;
498 } else if ch == '"' {
499 if i != chars.len() - 1 {
500 return Err(ToonError::parse_error(
501 self.line,
502 self.column,
503 "Unexpected characters after closing quote",
504 ));
505 }
506 return Ok(Token::String(value, true));
507 } else {
508 value.push(ch);
509 }
510 i += 1;
511 }
512
513 return Err(ToonError::parse_error(
514 self.line,
515 self.column,
516 "Unterminated string: missing closing quote",
517 ));
518 }
519
520 match trimmed {
521 "true" => return Ok(Token::Bool(true)),
522 "false" => return Ok(Token::Bool(false)),
523 "null" => return Ok(Token::Null),
524 _ => {}
525 }
526
527 if trimmed.starts_with('-') || trimmed.chars().next().unwrap().is_ascii_digit() {
528 if trimmed.starts_with('0') && trimmed.len() > 1 {
530 let second_char = trimmed.chars().nth(1).unwrap();
531 if second_char.is_ascii_digit() {
532 return Ok(Token::String(trimmed.to_string(), false));
533 }
534 }
535
536 if trimmed.starts_with("-0") && trimmed.len() > 2 {
538 let third_char = trimmed.chars().nth(2).unwrap();
539 if third_char.is_ascii_digit() {
540 return Ok(Token::String(trimmed.to_string(), false));
541 }
542 }
543
544 if trimmed.contains('.') || trimmed.contains('e') || trimmed.contains('E') {
545 if let Ok(f) = trimmed.parse::<f64>() {
546 let normalized = if f == -0.0 { 0.0 } else { f };
547 return Ok(Token::Number(normalized));
548 }
549 } else if let Ok(i) = trimmed.parse::<i64>() {
550 return Ok(Token::Integer(i));
551 }
552 }
553
554 Ok(Token::String(trimmed.to_string(), false))
555 }
556
557 pub fn detect_delimiter(&mut self) -> Option<Delimiter> {
558 let saved_pos = self.position;
559
560 while let Some(ch) = self.peek() {
561 match ch {
562 ',' => {
563 self.position = saved_pos;
564 return Some(Delimiter::Comma);
565 }
566 '|' => {
567 self.position = saved_pos;
568 return Some(Delimiter::Pipe);
569 }
570 '\t' => {
571 self.position = saved_pos;
572 return Some(Delimiter::Tab);
573 }
574 '\n' | ':' | '[' | ']' | '{' | '}' => break,
575 _ => {
576 self.advance();
577 }
578 }
579 }
580
581 self.position = saved_pos;
582 None
583 }
584}
585
586#[cfg(test)]
587mod tests {
588 use core::f64;
589
590 use super::*;
591
592 #[test]
593 fn test_scan_structural_tokens() {
594 let mut scanner = Scanner::new("[]{}:-");
595 assert_eq!(scanner.scan_token().unwrap(), Token::LeftBracket);
596 assert_eq!(scanner.scan_token().unwrap(), Token::RightBracket);
597 assert_eq!(scanner.scan_token().unwrap(), Token::LeftBrace);
598 assert_eq!(scanner.scan_token().unwrap(), Token::RightBrace);
599 assert_eq!(scanner.scan_token().unwrap(), Token::Colon);
600 assert_eq!(scanner.scan_token().unwrap(), Token::Dash);
601 }
602
603 #[test]
604 fn test_scan_numbers() {
605 let mut scanner = Scanner::new("42 3.141592653589793 -5");
606 assert_eq!(scanner.scan_token().unwrap(), Token::Integer(42));
607 assert_eq!(
608 scanner.scan_token().unwrap(),
609 Token::Number(f64::consts::PI)
610 );
611 assert_eq!(scanner.scan_token().unwrap(), Token::Integer(-5));
612 }
613
614 #[test]
615 fn test_scan_booleans() {
616 let mut scanner = Scanner::new("true false");
617 assert_eq!(scanner.scan_token().unwrap(), Token::Bool(true));
618 assert_eq!(scanner.scan_token().unwrap(), Token::Bool(false));
619 }
620
621 #[test]
622 fn test_scan_null() {
623 let mut scanner = Scanner::new("null");
624 assert_eq!(scanner.scan_token().unwrap(), Token::Null);
625 }
626
627 #[test]
628 fn test_scan_quoted_string() {
629 let mut scanner = Scanner::new(r#""hello world""#);
630 assert_eq!(
631 scanner.scan_token().unwrap(),
632 Token::String("hello world".to_string(), true)
633 );
634 }
635
636 #[test]
637 fn test_scan_escaped_string() {
638 let mut scanner = Scanner::new(r#""hello\nworld""#);
639 assert_eq!(
640 scanner.scan_token().unwrap(),
641 Token::String("hello\nworld".to_string(), true)
642 );
643 }
644
645 #[test]
646 fn test_scan_unquoted_string() {
647 let mut scanner = Scanner::new("hello");
648 assert_eq!(
649 scanner.scan_token().unwrap(),
650 Token::String("hello".to_string(), false)
651 );
652 }
653
654 #[test]
655 fn test_detect_delimiter() {
656 let mut scanner = Scanner::new("a,b,c");
657 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Comma));
658
659 let mut scanner = Scanner::new("a|b|c");
660 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Pipe));
661
662 let mut scanner = Scanner::new("a\tb\tc");
663 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Tab));
664 }
665
666 #[test]
667 fn test_read_rest_of_line_with_space_info() {
668 let mut scanner = Scanner::new(" world");
669 let (content, space_count) = scanner.read_rest_of_line_with_space_info();
670 assert_eq!(content, "world");
671 assert_eq!(space_count, 1);
672
673 let mut scanner = Scanner::new("world");
674 let (content, space_count) = scanner.read_rest_of_line_with_space_info();
675 assert_eq!(content, "world");
676 assert_eq!(space_count, 0);
677
678 let mut scanner = Scanner::new("(hello)");
679 let (content, space_count) = scanner.read_rest_of_line_with_space_info();
680 assert_eq!(content, "(hello)");
681 assert_eq!(space_count, 0);
682
683 let mut scanner = Scanner::new("");
684 let (content, space_count) = scanner.read_rest_of_line_with_space_info();
685 assert_eq!(content, "");
686 assert_eq!(space_count, 0);
687
688 let mut scanner = Scanner::new(" world");
689 let (content, space_count) = scanner.read_rest_of_line_with_space_info();
690 assert_eq!(content, "world");
691 assert_eq!(space_count, 3);
692 }
693
694 #[test]
695 fn test_parse_value_string() {
696 let scanner = Scanner::new("");
697 assert_eq!(
698 scanner.parse_value_string("hello").unwrap(),
699 Token::String("hello".to_string(), false)
700 );
701
702 assert_eq!(
703 scanner.parse_value_string("(hello)").unwrap(),
704 Token::String("(hello)".to_string(), false)
705 );
706
707 assert_eq!(
708 scanner
709 .parse_value_string("Mostly Functions (3 of 3)")
710 .unwrap(),
711 Token::String("Mostly Functions (3 of 3)".to_string(), false)
712 );
713 assert_eq!(
714 scanner.parse_value_string("0(f)").unwrap(),
715 Token::String("0(f)".to_string(), false)
716 );
717
718 assert_eq!(
719 scanner.parse_value_string("42").unwrap(),
720 Token::Integer(42)
721 );
722
723 assert_eq!(
724 scanner.parse_value_string("true").unwrap(),
725 Token::Bool(true)
726 );
727 assert_eq!(
728 scanner.parse_value_string("false").unwrap(),
729 Token::Bool(false)
730 );
731 assert_eq!(scanner.parse_value_string("null").unwrap(), Token::Null);
732
733 assert_eq!(
734 scanner.parse_value_string(r#""hello world""#).unwrap(),
735 Token::String("hello world".to_string(), true)
736 );
737 }
738
739 #[test]
740 fn test_number_followed_by_parenthesis() {
741 let mut scanner = Scanner::new("0(f)");
742 let num_token = scanner.scan_number_string(false).unwrap();
743 let token = scanner.parse_number(&num_token).unwrap();
744
745 assert_eq!(token, Token::String("0".to_string(), false));
746 }
747}