1use crate::types::{
2 Delimiter,
3 ToonError,
4 ToonResult,
5};
6
7#[derive(Debug, Clone, PartialEq)]
9pub enum Token {
10 LeftBracket,
11 RightBracket,
12 LeftBrace,
13 RightBrace,
14 Colon,
15 Dash,
16 Newline,
17 String(String, bool),
18 Number(f64),
19 Integer(i64),
20 Bool(bool),
21 Null,
22 Delimiter(Delimiter),
23 Eof,
24}
25
26pub struct Scanner {
28 input: Vec<char>,
29 position: usize,
30 line: usize,
31 column: usize,
32 active_delimiter: Option<Delimiter>,
33 last_line_indent: usize,
34}
35
36impl Scanner {
37 pub fn new(input: &str) -> Self {
39 Self {
40 input: input.chars().collect(),
41 position: 0,
42 line: 1,
43 column: 1,
44 active_delimiter: None,
45 last_line_indent: 0,
46 }
47 }
48
49 pub fn set_active_delimiter(&mut self, delimiter: Option<Delimiter>) {
51 self.active_delimiter = delimiter;
52 }
53
54 pub fn current_position(&self) -> (usize, usize) {
56 (self.line, self.column)
57 }
58
59 pub fn get_line(&self) -> usize {
60 self.line
61 }
62
63 pub fn get_column(&self) -> usize {
64 self.column
65 }
66
67 pub fn peek(&self) -> Option<char> {
68 self.input.get(self.position).copied()
69 }
70
71 pub fn count_leading_spaces(&self) -> usize {
72 let mut idx = self.position;
73 let mut count = 0;
74 while let Some(&ch) = self.input.get(idx) {
75 if ch == ' ' {
76 count += 1;
77 idx += 1;
78 } else {
79 break;
80 }
81 }
82 count
83 }
84
85 pub fn count_spaces_after_newline(&self) -> usize {
86 let mut idx = self.position;
87 if self.input.get(idx) != Some(&'\n') {
88 return 0;
89 }
90 idx += 1;
91 let mut count = 0;
92 while let Some(&ch) = self.input.get(idx) {
93 if ch == ' ' {
94 count += 1;
95 idx += 1;
96 } else {
97 break;
98 }
99 }
100 count
101 }
102
103 pub fn peek_ahead(&self, offset: usize) -> Option<char> {
104 self.input.get(self.position + offset).copied()
105 }
106
107 pub fn advance(&mut self) -> Option<char> {
108 if let Some(ch) = self.input.get(self.position) {
109 self.position += 1;
110 if *ch == '\n' {
111 self.line += 1;
112 self.column = 1;
113 } else {
114 self.column += 1;
115 }
116 Some(*ch)
117 } else {
118 None
119 }
120 }
121
122 pub fn skip_whitespace(&mut self) {
123 while let Some(ch) = self.peek() {
124 if ch == ' ' {
125 self.advance();
126 } else {
127 break;
128 }
129 }
130 }
131
132 pub fn scan_token(&mut self) -> ToonResult<Token> {
134 if self.column == 1 {
136 let mut count = 0;
137 let mut idx = self.position;
138
139 while let Some(&ch) = self.input.get(idx) {
140 if ch == ' ' {
141 count += 1;
142 idx += 1;
143 } else {
144 if ch == '\t' {
145 let (line, col) = self.current_position();
146 return Err(ToonError::parse_error(
147 line,
148 col + count,
149 "Tabs are not allowed in indentation",
150 ));
151 }
152 break;
153 }
154 }
155 self.last_line_indent = count;
156 }
157
158 self.skip_whitespace();
159
160 match self.peek() {
161 None => Ok(Token::Eof),
162 Some('\n') => {
163 self.advance();
164 Ok(Token::Newline)
165 }
166 Some('[') => {
167 self.advance();
168 Ok(Token::LeftBracket)
169 }
170 Some(']') => {
171 self.advance();
172 Ok(Token::RightBracket)
173 }
174 Some('{') => {
175 self.advance();
176 Ok(Token::LeftBrace)
177 }
178 Some('}') => {
179 self.advance();
180 Ok(Token::RightBrace)
181 }
182 Some(':') => {
183 self.advance();
184 Ok(Token::Colon)
185 }
186 Some('-') => {
187 self.advance();
188 if let Some(ch) = self.peek() {
190 if ch.is_ascii_digit() {
191 let num_str = self.scan_number_string(true)?;
192 return self.parse_number(&num_str);
193 }
194 }
195 Ok(Token::Dash)
196 }
197 Some(',') => {
198 if matches!(self.active_delimiter, Some(Delimiter::Comma)) {
200 self.advance();
201 Ok(Token::Delimiter(Delimiter::Comma))
202 } else {
203 self.scan_unquoted_string()
204 }
205 }
206 Some('|') => {
207 if matches!(self.active_delimiter, Some(Delimiter::Pipe)) {
208 self.advance();
209 Ok(Token::Delimiter(Delimiter::Pipe))
210 } else {
211 self.scan_unquoted_string()
212 }
213 }
214 Some('\t') => {
215 if matches!(self.active_delimiter, Some(Delimiter::Tab)) {
216 self.advance();
217 Ok(Token::Delimiter(Delimiter::Tab))
218 } else {
219 self.scan_unquoted_string()
220 }
221 }
222 Some('"') => self.scan_quoted_string(),
223 Some(ch) if ch.is_ascii_digit() => {
224 let num_str = self.scan_number_string(false)?;
225 self.parse_number(&num_str)
226 }
227 Some(_) => self.scan_unquoted_string(),
228 }
229 }
230
231 fn scan_quoted_string(&mut self) -> ToonResult<Token> {
232 self.advance(); let mut value = String::new();
235 let mut escaped = false;
236
237 while let Some(ch) = self.advance() {
238 if escaped {
239 match ch {
241 'n' => value.push('\n'),
242 'r' => value.push('\r'),
243 't' => value.push('\t'),
244 '"' => value.push('"'),
245 '\\' => value.push('\\'),
246 _ => {
247 let (line, col) = self.current_position();
248 return Err(ToonError::parse_error(
249 line,
250 col - 1,
251 format!("Invalid escape sequence: \\{ch}"),
252 ));
253 }
254 }
255 escaped = false;
256 } else if ch == '\\' {
257 escaped = true;
258 } else if ch == '"' {
259 return Ok(Token::String(value, true));
260 } else {
261 value.push(ch);
262 }
263 }
264
265 Err(ToonError::UnexpectedEof)
267 }
268
269 fn scan_unquoted_string(&mut self) -> ToonResult<Token> {
270 let mut value = String::new();
271
272 while let Some(ch) = self.peek() {
274 if ch == '\n'
275 || ch == ' '
276 || ch == ':'
277 || ch == '['
278 || ch == ']'
279 || ch == '{'
280 || ch == '}'
281 {
282 break;
283 }
284
285 if let Some(active) = self.active_delimiter {
287 if (active == Delimiter::Comma && ch == ',')
288 || (active == Delimiter::Pipe && ch == '|')
289 || (active == Delimiter::Tab && ch == '\t')
290 {
291 break;
292 }
293 }
294 value.push(ch);
295 self.advance();
296 }
297
298 let value = if value.len() == 1 && (value == "," || value == "|" || value == "\t") {
301 value
302 } else {
303 value.trim_end().to_string()
304 };
305
306 match value.as_str() {
308 "null" => Ok(Token::Null),
309 "true" => Ok(Token::Bool(true)),
310 "false" => Ok(Token::Bool(false)),
311 _ => Ok(Token::String(value, false)),
312 }
313 }
314
315 pub fn get_last_line_indent(&self) -> usize {
316 self.last_line_indent
317 }
318
319 fn scan_number_string(&mut self, negative: bool) -> ToonResult<String> {
320 let mut num_str = if negative {
321 String::from("-")
322 } else {
323 String::new()
324 };
325
326 while let Some(ch) = self.peek() {
328 if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '+' || ch == '-'
329 {
330 num_str.push(ch);
331 self.advance();
332 } else {
333 break;
334 }
335 }
336
337 Ok(num_str)
338 }
339
340 fn parse_number(&self, s: &str) -> ToonResult<Token> {
341 if s.starts_with('0') && s.len() > 1 {
343 let second_char = s.chars().nth(1).unwrap();
344 if second_char.is_ascii_digit() {
345 return Ok(Token::String(s.to_string(), false));
346 }
347 }
348
349 if s.contains('.') || s.contains('e') || s.contains('E') {
351 if let Ok(f) = s.parse::<f64>() {
352 Ok(Token::Number(f))
353 } else {
354 Ok(Token::String(s.to_string(), false))
356 }
357 } else if let Ok(i) = s.parse::<i64>() {
358 Ok(Token::Integer(i))
359 } else {
360 Ok(Token::String(s.to_string(), false))
362 }
363 }
364
365 pub fn detect_delimiter(&mut self) -> Option<Delimiter> {
368 let saved_pos = self.position;
369
370 while let Some(ch) = self.peek() {
371 match ch {
372 ',' => {
373 self.position = saved_pos;
374 return Some(Delimiter::Comma);
375 }
376 '|' => {
377 self.position = saved_pos;
378 return Some(Delimiter::Pipe);
379 }
380 '\t' => {
381 self.position = saved_pos;
382 return Some(Delimiter::Tab);
383 }
384 '\n' | ':' | '[' | ']' | '{' | '}' => break,
386 _ => {
387 self.advance();
388 }
389 }
390 }
391
392 self.position = saved_pos;
393 None
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use core::f64;
400
401 use super::*;
402
403 #[test]
404 fn test_scan_structural_tokens() {
405 let mut scanner = Scanner::new("[]{}:-");
406 assert_eq!(scanner.scan_token().unwrap(), Token::LeftBracket);
407 assert_eq!(scanner.scan_token().unwrap(), Token::RightBracket);
408 assert_eq!(scanner.scan_token().unwrap(), Token::LeftBrace);
409 assert_eq!(scanner.scan_token().unwrap(), Token::RightBrace);
410 assert_eq!(scanner.scan_token().unwrap(), Token::Colon);
411 assert_eq!(scanner.scan_token().unwrap(), Token::Dash);
412 }
413
414 #[test]
415 fn test_scan_numbers() {
416 let mut scanner = Scanner::new("42 3.141592653589793 -5");
417 assert_eq!(scanner.scan_token().unwrap(), Token::Integer(42));
418 assert_eq!(
419 scanner.scan_token().unwrap(),
420 Token::Number(f64::consts::PI)
421 );
422 assert_eq!(scanner.scan_token().unwrap(), Token::Integer(-5));
423 }
424
425 #[test]
426 fn test_scan_booleans() {
427 let mut scanner = Scanner::new("true false");
428 assert_eq!(scanner.scan_token().unwrap(), Token::Bool(true));
429 assert_eq!(scanner.scan_token().unwrap(), Token::Bool(false));
430 }
431
432 #[test]
433 fn test_scan_null() {
434 let mut scanner = Scanner::new("null");
435 assert_eq!(scanner.scan_token().unwrap(), Token::Null);
436 }
437
438 #[test]
439 fn test_scan_quoted_string() {
440 let mut scanner = Scanner::new(r#""hello world""#);
441 assert_eq!(
442 scanner.scan_token().unwrap(),
443 Token::String("hello world".to_string(), true)
444 );
445 }
446
447 #[test]
448 fn test_scan_escaped_string() {
449 let mut scanner = Scanner::new(r#""hello\nworld""#);
450 assert_eq!(
451 scanner.scan_token().unwrap(),
452 Token::String("hello\nworld".to_string(), true)
453 );
454 }
455
456 #[test]
457 fn test_scan_unquoted_string() {
458 let mut scanner = Scanner::new("hello");
459 assert_eq!(
460 scanner.scan_token().unwrap(),
461 Token::String("hello".to_string(), false)
462 );
463 }
464
465 #[test]
466 fn test_detect_delimiter() {
467 let mut scanner = Scanner::new("a,b,c");
468 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Comma));
469
470 let mut scanner = Scanner::new("a|b|c");
471 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Pipe));
472
473 let mut scanner = Scanner::new("a\tb\tc");
474 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Tab));
475 }
476}