1use crate::types::{
2 Delimiter,
3 ToonError,
4 ToonResult,
5};
6
7#[derive(Debug, Clone, PartialEq)]
9pub enum Token {
10 LeftBracket,
11 RightBracket,
12 LeftBrace,
13 RightBrace,
14 Colon,
15 Dash,
16 Newline,
17 String(String, bool),
18 Number(f64),
19 Integer(i64),
20 Bool(bool),
21 Null,
22 Delimiter(Delimiter),
23 Eof,
24}
25
26pub struct Scanner {
28 input: Vec<char>,
29 position: usize,
30 line: usize,
31 column: usize,
32 active_delimiter: Option<Delimiter>,
33 last_line_indent: usize,
34}
35
36impl Scanner {
37 pub fn new(input: &str) -> Self {
39 Self {
40 input: input.chars().collect(),
41 position: 0,
42 line: 1,
43 column: 1,
44 active_delimiter: None,
45 last_line_indent: 0,
46 }
47 }
48
49 pub fn set_active_delimiter(&mut self, delimiter: Option<Delimiter>) {
51 self.active_delimiter = delimiter;
52 }
53
54 pub fn current_position(&self) -> (usize, usize) {
56 (self.line, self.column)
57 }
58
59 pub fn get_line(&self) -> usize {
60 self.line
61 }
62
63 pub fn get_column(&self) -> usize {
64 self.column
65 }
66
67 pub fn peek(&self) -> Option<char> {
68 self.input.get(self.position).copied()
69 }
70
71 pub fn count_leading_spaces(&self) -> usize {
72 let mut idx = self.position;
73 let mut count = 0;
74 while let Some(&ch) = self.input.get(idx) {
75 if ch == ' ' {
76 count += 1;
77 idx += 1;
78 } else {
79 break;
80 }
81 }
82 count
83 }
84
85 pub fn count_spaces_after_newline(&self) -> usize {
86 let mut idx = self.position;
87 if self.input.get(idx) != Some(&'\n') {
88 return 0;
89 }
90 idx += 1;
91 let mut count = 0;
92 while let Some(&ch) = self.input.get(idx) {
93 if ch == ' ' {
94 count += 1;
95 idx += 1;
96 } else {
97 break;
98 }
99 }
100 count
101 }
102
103 pub fn peek_ahead(&self, offset: usize) -> Option<char> {
104 self.input.get(self.position + offset).copied()
105 }
106
107 pub fn advance(&mut self) -> Option<char> {
108 if let Some(ch) = self.input.get(self.position) {
109 self.position += 1;
110 if *ch == '\n' {
111 self.line += 1;
112 self.column = 1;
113 } else {
114 self.column += 1;
115 }
116 Some(*ch)
117 } else {
118 None
119 }
120 }
121
122 pub fn skip_whitespace(&mut self) {
123 while let Some(ch) = self.peek() {
124 if ch == ' ' {
125 self.advance();
126 } else {
127 break;
128 }
129 }
130 }
131
132 pub fn scan_token(&mut self) -> ToonResult<Token> {
134 if self.column == 1 {
135 let mut count = 0;
136 let mut idx = self.position;
137 while let Some(&ch) = self.input.get(idx) {
138 if ch == ' ' {
139 count += 1;
140 idx += 1;
141 } else {
142 if ch == '\t' {
143 let (line, col) = self.current_position();
144 return Err(ToonError::parse_error(
145 line,
146 col + count,
147 "Tabs are not allowed in indentation",
148 ));
149 }
150 break;
151 }
152 }
153 self.last_line_indent = count;
154 }
155
156 self.skip_whitespace();
157
158 match self.peek() {
159 None => Ok(Token::Eof),
160 Some('\n') => {
161 self.advance();
162 Ok(Token::Newline)
163 }
164 Some('[') => {
165 self.advance();
166 Ok(Token::LeftBracket)
167 }
168 Some(']') => {
169 self.advance();
170 Ok(Token::RightBracket)
171 }
172 Some('{') => {
173 self.advance();
174 Ok(Token::LeftBrace)
175 }
176 Some('}') => {
177 self.advance();
178 Ok(Token::RightBrace)
179 }
180 Some(':') => {
181 self.advance();
182 Ok(Token::Colon)
183 }
184 Some('-') => {
185 self.advance();
186 if let Some(ch) = self.peek() {
188 if ch.is_ascii_digit() {
189 let num_str = self.scan_number_string(true)?;
190 return self.parse_number(&num_str);
191 }
192 }
193 Ok(Token::Dash)
194 }
195 Some(',') => {
196 if matches!(self.active_delimiter, Some(Delimiter::Comma)) {
197 self.advance();
198 Ok(Token::Delimiter(Delimiter::Comma))
199 } else {
200 self.scan_unquoted_string()
201 }
202 }
203 Some('|') => {
204 if matches!(self.active_delimiter, Some(Delimiter::Pipe)) {
205 self.advance();
206 Ok(Token::Delimiter(Delimiter::Pipe))
207 } else {
208 self.scan_unquoted_string()
209 }
210 }
211 Some('\t') => {
212 if matches!(self.active_delimiter, Some(Delimiter::Tab)) {
213 self.advance();
214 Ok(Token::Delimiter(Delimiter::Tab))
215 } else {
216 self.scan_unquoted_string()
217 }
218 }
219 Some('"') => self.scan_quoted_string(),
220 Some(ch) if ch.is_ascii_digit() => {
221 let num_str = self.scan_number_string(false)?;
222 self.parse_number(&num_str)
223 }
224 Some(_) => self.scan_unquoted_string(),
225 }
226 }
227
228 fn scan_quoted_string(&mut self) -> ToonResult<Token> {
229 self.advance();
230
231 let mut value = String::new();
232 let mut escaped = false;
233
234 while let Some(ch) = self.advance() {
235 if escaped {
236 match ch {
237 'n' => value.push('\n'),
238 'r' => value.push('\r'),
239 't' => value.push('\t'),
240 '"' => value.push('"'),
241 '\\' => value.push('\\'),
242 _ => {
243 let (line, col) = self.current_position();
244 return Err(ToonError::parse_error(
245 line,
246 col - 1,
247 format!("Invalid escape sequence: \\{ch}"),
248 ));
249 }
250 }
251 escaped = false;
252 } else if ch == '\\' {
253 escaped = true;
254 } else if ch == '"' {
255 return Ok(Token::String(value, true));
256 } else {
257 value.push(ch);
258 }
259 }
260
261 Err(ToonError::UnexpectedEof)
262 }
263
264 fn scan_unquoted_string(&mut self) -> ToonResult<Token> {
265 let mut value = String::new();
266
267 while let Some(ch) = self.peek() {
268 if ch == '\n'
269 || ch == ' '
270 || ch == ':'
271 || ch == '['
272 || ch == ']'
273 || ch == '{'
274 || ch == '}'
275 {
276 break;
277 }
278
279 if let Some(active) = self.active_delimiter {
280 if (active == Delimiter::Comma && ch == ',')
281 || (active == Delimiter::Pipe && ch == '|')
282 || (active == Delimiter::Tab && ch == '\t')
283 {
284 break;
285 }
286 }
287 value.push(ch);
288 self.advance();
289 }
290
291 let value = if value.len() == 1 && (value == "," || value == "|" || value == "\t") {
292 value
293 } else {
294 value.trim_end().to_string()
295 };
296
297 match value.as_str() {
298 "null" => Ok(Token::Null),
299 "true" => Ok(Token::Bool(true)),
300 "false" => Ok(Token::Bool(false)),
301 _ => Ok(Token::String(value, false)),
302 }
303 }
304
305 pub fn get_last_line_indent(&self) -> usize {
306 self.last_line_indent
307 }
308
309 fn scan_number_string(&mut self, negative: bool) -> ToonResult<String> {
310 let mut num_str = if negative {
311 String::from("-")
312 } else {
313 String::new()
314 };
315
316 while let Some(ch) = self.peek() {
317 if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '+' || ch == '-'
318 {
319 num_str.push(ch);
320 self.advance();
321 } else {
322 break;
323 }
324 }
325
326 Ok(num_str)
327 }
328
329 fn parse_number(&self, s: &str) -> ToonResult<Token> {
330 if s.contains('.') || s.contains('e') || s.contains('E') {
331 if let Ok(f) = s.parse::<f64>() {
332 Ok(Token::Number(f))
333 } else {
334 Ok(Token::String(s.to_string(), false))
335 }
336 } else if let Ok(i) = s.parse::<i64>() {
337 Ok(Token::Integer(i))
338 } else {
339 Ok(Token::String(s.to_string(), false))
340 }
341 }
342
343 pub fn detect_delimiter(&mut self) -> Option<Delimiter> {
345 let saved_pos = self.position;
346
347 while let Some(ch) = self.peek() {
348 match ch {
349 ',' => {
350 self.position = saved_pos;
351 return Some(Delimiter::Comma);
352 }
353 '|' => {
354 self.position = saved_pos;
355 return Some(Delimiter::Pipe);
356 }
357 '\t' => {
358 self.position = saved_pos;
359 return Some(Delimiter::Tab);
360 }
361 '\n' | ':' | '[' | ']' | '{' | '}' => break,
362 _ => {
363 self.advance();
364 }
365 }
366 }
367
368 self.position = saved_pos;
369 None
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use core::f64;
376
377 use super::*;
378
379 #[test]
380 fn test_scan_structural_tokens() {
381 let mut scanner = Scanner::new("[]{}:-");
382 assert_eq!(scanner.scan_token().unwrap(), Token::LeftBracket);
383 assert_eq!(scanner.scan_token().unwrap(), Token::RightBracket);
384 assert_eq!(scanner.scan_token().unwrap(), Token::LeftBrace);
385 assert_eq!(scanner.scan_token().unwrap(), Token::RightBrace);
386 assert_eq!(scanner.scan_token().unwrap(), Token::Colon);
387 assert_eq!(scanner.scan_token().unwrap(), Token::Dash);
388 }
389
390 #[test]
391 fn test_scan_numbers() {
392 let mut scanner = Scanner::new("42 3.141592653589793 -5");
393 assert_eq!(scanner.scan_token().unwrap(), Token::Integer(42));
394 assert_eq!(
395 scanner.scan_token().unwrap(),
396 Token::Number(f64::consts::PI)
397 );
398 assert_eq!(scanner.scan_token().unwrap(), Token::Integer(-5));
399 }
400
401 #[test]
402 fn test_scan_booleans() {
403 let mut scanner = Scanner::new("true false");
404 assert_eq!(scanner.scan_token().unwrap(), Token::Bool(true));
405 assert_eq!(scanner.scan_token().unwrap(), Token::Bool(false));
406 }
407
408 #[test]
409 fn test_scan_null() {
410 let mut scanner = Scanner::new("null");
411 assert_eq!(scanner.scan_token().unwrap(), Token::Null);
412 }
413
414 #[test]
415 fn test_scan_quoted_string() {
416 let mut scanner = Scanner::new(r#""hello world""#);
417 assert_eq!(
418 scanner.scan_token().unwrap(),
419 Token::String("hello world".to_string(), true)
420 );
421 }
422
423 #[test]
424 fn test_scan_escaped_string() {
425 let mut scanner = Scanner::new(r#""hello\nworld""#);
426 assert_eq!(
427 scanner.scan_token().unwrap(),
428 Token::String("hello\nworld".to_string(), true)
429 );
430 }
431
432 #[test]
433 fn test_scan_unquoted_string() {
434 let mut scanner = Scanner::new("hello");
435 assert_eq!(
436 scanner.scan_token().unwrap(),
437 Token::String("hello".to_string(), false)
438 );
439 }
440
441 #[test]
442 fn test_detect_delimiter() {
443 let mut scanner = Scanner::new("a,b,c");
444 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Comma));
445
446 let mut scanner = Scanner::new("a|b|c");
447 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Pipe));
448
449 let mut scanner = Scanner::new("a\tb\tc");
450 assert_eq!(scanner.detect_delimiter(), Some(Delimiter::Tab));
451 }
452}