1use serde_json::{
2 Map,
3 Number,
4 Value,
5};
6
7use crate::{
8 constants::{
9 KEYWORDS,
10 MAX_DEPTH,
11 QUOTED_KEY_MARKER,
12 },
13 decode::{
14 scanner::{
15 Scanner,
16 Token,
17 },
18 validation,
19 },
20 types::{
21 DecodeOptions,
22 Delimiter,
23 ErrorContext,
24 ToonError,
25 ToonResult,
26 },
27 utils::validation::validate_depth,
28};
29
30#[allow(unused)]
32pub struct Parser<'a> {
33 scanner: Scanner,
34 current_token: Token,
35 options: DecodeOptions,
36 delimiter: Option<Delimiter>,
37 input: &'a str,
38}
39
40impl<'a> Parser<'a> {
41 pub fn new(input: &'a str, options: DecodeOptions) -> ToonResult<Self> {
43 let mut scanner = Scanner::new(input);
44 let chosen_delim = options.delimiter;
45 scanner.set_active_delimiter(chosen_delim);
46 let current_token = scanner.scan_token()?;
47
48 Ok(Self {
49 scanner,
50 current_token,
51 delimiter: chosen_delim,
52 options,
53 input,
54 })
55 }
56
57 pub fn parse(&mut self) -> ToonResult<Value> {
59 if self.options.strict {
60 self.validate_indentation(self.scanner.get_last_line_indent())?;
61 }
62 let value = self.parse_value()?;
63
64 if self.options.strict {
66 self.skip_newlines()?;
67 if !matches!(self.current_token, Token::Eof) {
68 return Err(self
69 .parse_error_with_context(
70 "Multiple values at root level are not allowed in strict mode",
71 )
72 .with_suggestion("Wrap multiple values in an object or array"));
73 }
74 }
75
76 Ok(value)
77 }
78
79 fn advance(&mut self) -> ToonResult<()> {
80 self.current_token = self.scanner.scan_token()?;
81 Ok(())
82 }
83
84 fn skip_newlines(&mut self) -> ToonResult<()> {
85 while matches!(self.current_token, Token::Newline) {
86 self.advance()?;
87 }
88 Ok(())
89 }
90
91 fn parse_value(&mut self) -> ToonResult<Value> {
92 self.parse_value_with_depth(0)
93 }
94
95 fn parse_value_with_depth(&mut self, depth: usize) -> ToonResult<Value> {
96 validate_depth(depth, MAX_DEPTH)?;
97
98 let had_newline = matches!(self.current_token, Token::Newline);
99 self.skip_newlines()?;
100
101 match &self.current_token {
102 Token::Null => {
103 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
105 if next_char_is_colon {
106 let key = KEYWORDS[0].to_string();
107 self.advance()?;
108 self.parse_object_with_initial_key(key, depth)
109 } else {
110 self.advance()?;
111 Ok(Value::Null)
112 }
113 }
114 Token::Bool(b) => {
115 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
116 if next_char_is_colon {
117 let key = if *b {
118 KEYWORDS[1].to_string()
119 } else {
120 KEYWORDS[2].to_string()
121 };
122 self.advance()?;
123 self.parse_object_with_initial_key(key, depth)
124 } else {
125 let val = *b;
126 self.advance()?;
127 Ok(Value::Bool(val))
128 }
129 }
130 Token::Integer(i) => {
131 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
132 if next_char_is_colon {
133 let key = i.to_string();
134 self.advance()?;
135 self.parse_object_with_initial_key(key, depth)
136 } else {
137 let val = *i;
138 self.advance()?;
139 Ok(serde_json::Number::from(val).into())
140 }
141 }
142 Token::Number(n) => {
143 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
144 if next_char_is_colon {
145 let key = n.to_string();
146 self.advance()?;
147 self.parse_object_with_initial_key(key, depth)
148 } else {
149 let val = *n;
150 self.advance()?;
151 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
153 Ok(serde_json::Number::from(val as i64).into())
154 } else {
155 Ok(serde_json::Number::from_f64(val)
156 .ok_or_else(|| {
157 ToonError::InvalidInput(format!("Invalid number: {val}"))
158 })?
159 .into())
160 }
161 }
162 }
163 Token::String(s, _) => {
164 let first = s.clone();
165 self.advance()?;
166
167 match &self.current_token {
168 Token::Colon | Token::LeftBracket => {
169 self.parse_object_with_initial_key(first, depth)
170 }
171 _ => {
172 if self.options.strict && depth > 0 && had_newline {
175 return Err(self
176 .parse_error_with_context(format!(
177 "Expected ':' after '{first}' in object context"
178 ))
179 .with_suggestion(
180 "Add ':' after the key, or place the value on the same line \
181 as the parent key",
182 ));
183 }
184
185 let mut accumulated = first;
187 while let Token::String(next, _) = &self.current_token {
188 if !accumulated.is_empty() {
189 accumulated.push(' ');
190 }
191 accumulated.push_str(next);
192 self.advance()?;
193 }
194 Ok(Value::String(accumulated))
195 }
196 }
197 }
198 Token::LeftBracket => self.parse_root_array(depth),
199 Token::Eof => Ok(Value::Object(Map::new())),
200 _ => self.parse_object(depth),
201 }
202 }
203
204 fn parse_object(&mut self, depth: usize) -> ToonResult<Value> {
205 validate_depth(depth, MAX_DEPTH)?;
206
207 let mut obj = Map::new();
208 let mut base_indent: Option<usize> = None;
210
211 loop {
212 while matches!(self.current_token, Token::Newline) {
213 self.advance()?;
214 }
215
216 if matches!(self.current_token, Token::Eof) {
217 break;
218 }
219
220 let current_indent = self.scanner.get_last_line_indent();
221
222 if self.options.strict {
223 self.validate_indentation(current_indent)?;
224 }
225
226 if let Some(expected) = base_indent {
228 if current_indent != expected {
229 break;
230 }
231 } else {
232 base_indent = Some(current_indent);
233 }
234
235 let key = match &self.current_token {
236 Token::String(s, was_quoted) => {
237 if *was_quoted && s.contains('.') {
240 format!("{QUOTED_KEY_MARKER}{s}")
241 } else {
242 s.clone()
243 }
244 }
245 _ => {
246 return Err(self
247 .parse_error_with_context(format!(
248 "Expected key, found {:?}",
249 self.current_token
250 ))
251 .with_suggestion("Object keys must be strings"));
252 }
253 };
254 self.advance()?;
255
256 let value = if matches!(self.current_token, Token::LeftBracket) {
257 self.parse_array(depth)?
258 } else {
259 if !matches!(self.current_token, Token::Colon) {
260 return Err(self
261 .parse_error_with_context(format!(
262 "Expected ':' or '[', found {:?}",
263 self.current_token
264 ))
265 .with_suggestion("Use ':' for object values or '[' for arrays"));
266 }
267 self.advance()?;
268 self.parse_field_value(depth)?
269 };
270
271 obj.insert(key, value);
272 }
273
274 Ok(Value::Object(obj))
275 }
276
277 fn parse_object_with_initial_key(&mut self, key: String, depth: usize) -> ToonResult<Value> {
278 validate_depth(depth, MAX_DEPTH)?;
279
280 let mut obj = Map::new();
281 let mut base_indent: Option<usize> = None;
282
283 if self.options.strict {
285 let current_indent = self.scanner.get_last_line_indent();
286 self.validate_indentation(current_indent)?;
287 }
288
289 if matches!(self.current_token, Token::LeftBracket) {
290 let value = self.parse_array(depth)?;
291 obj.insert(key, value);
292 } else {
293 if !matches!(self.current_token, Token::Colon) {
294 return Err(self.parse_error_with_context(format!(
295 "Expected ':', found {:?}",
296 self.current_token
297 )));
298 }
299 self.advance()?;
300
301 let value = self.parse_field_value(depth)?;
302 obj.insert(key, value);
303 }
304
305 loop {
306 while matches!(self.current_token, Token::Newline) {
308 self.advance()?;
309
310 if !self.options.strict {
311 while matches!(self.current_token, Token::Newline) {
312 self.advance()?;
313 }
314 }
315
316 if matches!(self.current_token, Token::Newline) {
317 continue;
318 }
319
320 let next_indent = self.scanner.get_last_line_indent();
321
322 let should_continue = if let Some(expected) = base_indent {
324 next_indent == expected
325 } else {
326 let current_depth_indent = self.options.indent.get_spaces() * depth;
328 next_indent == current_depth_indent
329 };
330
331 if !should_continue {
332 break;
333 }
334 }
335
336 if matches!(self.current_token, Token::Eof) {
337 break;
338 }
339
340 if !matches!(self.current_token, Token::String(_, _)) {
341 break;
342 }
343
344 if matches!(self.current_token, Token::Eof) {
345 break;
346 }
347
348 let current_indent = self.scanner.get_last_line_indent();
349
350 if let Some(expected) = base_indent {
351 if current_indent != expected {
352 break;
353 }
354 } else {
355 let expected_depth_indent = self.options.indent.get_spaces() * depth;
357 if current_indent != expected_depth_indent {
358 break;
359 }
360 }
361
362 if self.options.strict {
363 self.validate_indentation(current_indent)?;
364 }
365
366 if base_indent.is_none() {
367 base_indent = Some(current_indent);
368 }
369
370 let key = match &self.current_token {
371 Token::String(s, was_quoted) => {
372 if *was_quoted && s.contains('.') {
375 format!("{QUOTED_KEY_MARKER}{s}")
376 } else {
377 s.clone()
378 }
379 }
380 _ => break,
381 };
382 self.advance()?;
383
384 let value = if matches!(self.current_token, Token::LeftBracket) {
385 self.parse_array(depth)?
386 } else {
387 if !matches!(self.current_token, Token::Colon) {
388 break;
389 }
390 self.advance()?;
391 self.parse_field_value(depth)?
392 };
393
394 obj.insert(key, value);
395 }
396
397 Ok(Value::Object(obj))
398 }
399
400 fn parse_field_value(&mut self, depth: usize) -> ToonResult<Value> {
401 validate_depth(depth, MAX_DEPTH)?;
402
403 if matches!(self.current_token, Token::Newline | Token::Eof) {
404 let has_children = if matches!(self.current_token, Token::Newline) {
406 let current_depth_indent = self.options.indent.get_spaces() * (depth + 1);
407 let next_indent = self.scanner.count_leading_spaces();
408 next_indent >= current_depth_indent
409 } else {
410 false
411 };
412
413 if has_children {
414 self.parse_value_with_depth(depth + 1)
415 } else {
416 Ok(Value::Object(Map::new()))
418 }
419 } else {
420 self.parse_value_with_depth(depth + 1)
421 }
422 }
423
424 fn parse_root_array(&mut self, depth: usize) -> ToonResult<Value> {
425 validate_depth(depth, MAX_DEPTH)?;
426
427 if !matches!(self.current_token, Token::LeftBracket) {
428 return Err(self.parse_error_with_context("Expected '[' at the start of root array"));
429 }
430
431 self.parse_array(depth)
432 }
433
434 fn parse_array_header(
435 &mut self,
436 ) -> ToonResult<(usize, Option<Delimiter>, Option<Vec<String>>)> {
437 if !matches!(self.current_token, Token::LeftBracket) {
438 return Err(self.parse_error_with_context("Expected '['"));
439 }
440 self.advance()?;
441
442 let length = if let Token::Integer(n) = &self.current_token {
445 *n as usize
446 } else if let Token::String(s, _) = &self.current_token {
447 if s.starts_with('#') {
449 return Err(self
450 .parse_error_with_context(
451 "Length marker '#' is no longer supported in TOON spec v2.0. Use [N] \
452 format instead of [#N]",
453 )
454 .with_suggestion("Remove the '#' prefix from the array length"));
455 }
456
457 s.parse::<usize>().map_err(|_| {
459 self.parse_error_with_context(format!("Expected array length, found: {s}"))
460 })?
461 } else {
462 return Err(self.parse_error_with_context(format!(
463 "Expected array length, found {:?}",
464 self.current_token
465 )));
466 };
467
468 self.advance()?;
469
470 let detected_delim = match &self.current_token {
472 Token::Delimiter(d) => {
473 let delim = *d;
474 self.advance()?;
475 Some(delim)
476 }
477 Token::String(s, _) if s == "," => {
478 self.advance()?;
479 Some(Delimiter::Comma)
480 }
481 Token::String(s, _) if s == "|" => {
482 self.advance()?;
483 Some(Delimiter::Pipe)
484 }
485 Token::String(s, _) if s == "\t" => {
486 self.advance()?;
487 Some(Delimiter::Tab)
488 }
489 _ => None,
490 };
491
492 let active_delim = detected_delim.or(Some(Delimiter::Comma));
494
495 self.scanner.set_active_delimiter(active_delim);
496
497 if !matches!(self.current_token, Token::RightBracket) {
498 return Err(self.parse_error_with_context(format!(
499 "Expected ']', found {:?}",
500 self.current_token
501 )));
502 }
503 self.advance()?;
504
505 let fields = if matches!(self.current_token, Token::LeftBrace) {
506 self.advance()?;
507 let mut fields = Vec::new();
508
509 loop {
510 match &self.current_token {
511 Token::String(s, _) => {
512 fields.push(s.clone());
513 self.advance()?;
514
515 if matches!(self.current_token, Token::RightBrace) {
516 break;
517 }
518
519 let is_delim = match &self.current_token {
520 Token::Delimiter(_) => true,
521 Token::String(s, _) if s == "," || s == "|" || s == "\t" => true,
522 _ => false,
523 };
524 if is_delim {
525 self.advance()?;
526 } else {
527 return Err(self.parse_error_with_context(format!(
528 "Expected delimiter or '}}', found {:?}",
529 self.current_token
530 )));
531 }
532 }
533 Token::RightBrace => break,
534 _ => {
535 return Err(self.parse_error_with_context(format!(
536 "Expected field name, found {:?}",
537 self.current_token
538 )))
539 }
540 }
541 }
542
543 self.advance()?;
544 Some(fields)
545 } else {
546 None
547 };
548
549 if !matches!(self.current_token, Token::Colon) {
550 return Err(self.parse_error_with_context("Expected ':' after array header"));
551 }
552 self.advance()?;
553
554 Ok((length, detected_delim, fields))
555 }
556
557 fn parse_array(&mut self, depth: usize) -> ToonResult<Value> {
558 validate_depth(depth, MAX_DEPTH)?;
559
560 let (length, _detected_delim, fields) = self.parse_array_header()?;
561
562 if let Some(fields) = fields {
563 validation::validate_field_list(&fields)?;
564 self.parse_tabular_array(length, fields, depth)
565 } else {
566 self.parse_regular_array(length, depth)
567 }
568 }
569
570 fn parse_tabular_array(
571 &mut self,
572 length: usize,
573 fields: Vec<String>,
574 depth: usize,
575 ) -> ToonResult<Value> {
576 let mut rows = Vec::new();
577
578 if !matches!(self.current_token, Token::Newline) {
579 return Err(self
580 .parse_error_with_context("Expected newline after tabular array header")
581 .with_suggestion("Tabular arrays must have rows on separate lines"));
582 }
583 self.skip_newlines()?;
584
585 for row_index in 0..length {
586 if matches!(self.current_token, Token::Eof) {
587 if self.options.strict {
588 return Err(self.parse_error_with_context(format!(
589 "Expected {} rows, but got {} before EOF",
590 length,
591 rows.len()
592 )));
593 }
594 break;
595 }
596
597 let current_indent = self.scanner.get_last_line_indent();
598 let expected_indent = self.options.indent.get_spaces() * (depth + 1);
599
600 if self.options.strict {
601 self.validate_indentation(current_indent)?;
602
603 if current_indent != expected_indent {
604 return Err(self.parse_error_with_context(format!(
605 "Invalid indentation for tabular row: expected {expected_indent} spaces, \
606 found {current_indent}"
607 )));
608 }
609 }
610
611 let mut row = Map::new();
612
613 for (field_index, field) in fields.iter().enumerate() {
614 if field_index > 0 {
616 if matches!(self.current_token, Token::Delimiter(_))
617 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
618 {
619 self.advance()?;
620 } else {
621 return Err(self
622 .parse_error_with_context(format!(
623 "Expected delimiter, found {:?}",
624 self.current_token
625 ))
626 .with_suggestion(format!(
627 "Tabular row {} field {} needs a delimiter",
628 row_index + 1,
629 field_index + 1
630 )));
631 }
632 }
633
634 let value = if matches!(self.current_token, Token::Delimiter(_))
636 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
637 || matches!(self.current_token, Token::Newline | Token::Eof)
638 {
639 Value::String(String::new())
640 } else {
641 self.parse_tabular_field_value()?
642 };
643
644 row.insert(field.clone(), value);
645
646 if field_index < fields.len() - 1 {
648 if matches!(self.current_token, Token::Newline | Token::Eof) {
650 if self.options.strict {
651 return Err(self
652 .parse_error_with_context(format!(
653 "Tabular row {}: expected {} values, but found only {}",
654 row_index + 1,
655 fields.len(),
656 field_index + 1
657 ))
658 .with_suggestion(format!(
659 "Row {} should have exactly {} values",
660 row_index + 1,
661 fields.len()
662 )));
663 } else {
664 for field in fields.iter().skip(field_index + 1) {
666 row.insert(field.clone(), Value::Null);
667 }
668 break;
669 }
670 }
671 } else if !matches!(self.current_token, Token::Newline | Token::Eof)
672 && (matches!(self.current_token, Token::Delimiter(_))
673 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t"))
674 {
675 return Err(self
677 .parse_error_with_context(format!(
678 "Tabular row {}: expected {} values, but found extra values",
679 row_index + 1,
680 fields.len()
681 ))
682 .with_suggestion(format!(
683 "Row {} should have exactly {} values",
684 row_index + 1,
685 fields.len()
686 )));
687 }
688 }
689
690 if !self.options.strict && row.len() < fields.len() {
691 for field in fields.iter().skip(row.len()) {
692 row.insert(field.clone(), Value::Null);
693 }
694 }
695
696 rows.push(Value::Object(row));
697
698 if matches!(self.current_token, Token::Eof) {
699 break;
700 }
701
702 if !matches!(self.current_token, Token::Newline) {
703 if !self.options.strict {
704 while !matches!(self.current_token, Token::Newline | Token::Eof) {
705 self.advance()?;
706 }
707 if matches!(self.current_token, Token::Eof) {
708 break;
709 }
710 } else {
711 return Err(self.parse_error_with_context(format!(
712 "Expected newline after tabular row {}",
713 row_index + 1
714 )));
715 }
716 }
717
718 if row_index + 1 < length {
719 self.advance()?;
720 if self.options.strict && matches!(self.current_token, Token::Newline) {
721 return Err(self.parse_error_with_context(
722 "Blank lines are not allowed inside tabular arrays in strict mode",
723 ));
724 }
725
726 self.skip_newlines()?;
727 } else if matches!(self.current_token, Token::Newline) {
728 self.advance()?;
730 self.skip_newlines()?;
731
732 let expected_indent = self.options.indent.get_spaces() * (depth + 1);
733 let actual_indent = self.scanner.get_last_line_indent();
734
735 if actual_indent == expected_indent && !matches!(self.current_token, Token::Eof) {
738 let is_key_value = matches!(self.current_token, Token::String(_, _))
739 && matches!(self.scanner.peek(), Some(':'));
740
741 if !is_key_value {
742 return Err(self.parse_error_with_context(format!(
743 "Array length mismatch: expected {length} rows, but more rows found",
744 )));
745 }
746 }
747 }
748 }
749
750 validation::validate_array_length(length, rows.len())?;
751
752 Ok(Value::Array(rows))
753 }
754
755 fn parse_regular_array(&mut self, length: usize, depth: usize) -> ToonResult<Value> {
756 let mut items = Vec::new();
757
758 match &self.current_token {
759 Token::Newline => {
760 self.skip_newlines()?;
761
762 let expected_indent = self.options.indent.get_spaces() * (depth + 1);
763
764 for i in 0..length {
765 let current_indent = self.scanner.get_last_line_indent();
766 if self.options.strict {
767 self.validate_indentation(current_indent)?;
768
769 if current_indent != expected_indent {
770 return Err(self.parse_error_with_context(format!(
771 "Invalid indentation for list item: expected {expected_indent} \
772 spaces, found {current_indent}"
773 )));
774 }
775 }
776 if !matches!(self.current_token, Token::Dash) {
777 return Err(self
778 .parse_error_with_context(format!(
779 "Expected '-' for list item, found {:?}",
780 self.current_token
781 ))
782 .with_suggestion(format!(
783 "List arrays need '-' prefix for each item (item {} of {})",
784 i + 1,
785 length
786 )));
787 }
788 self.advance()?;
789
790 let value = if matches!(self.current_token, Token::Newline | Token::Eof) {
791 Value::Object(Map::new())
792 } else if matches!(self.current_token, Token::LeftBracket) {
793 self.parse_array(depth + 1)?
794 } else if let Token::String(s, _) = &self.current_token {
795 let key = s.clone();
796 self.advance()?;
797
798 if matches!(self.current_token, Token::Colon | Token::LeftBracket) {
799 let first_value = if matches!(self.current_token, Token::LeftBracket) {
801 self.parse_array(depth + 1)?
802 } else {
803 self.advance()?;
804 if matches!(self.current_token, Token::LeftBracket) {
806 self.parse_array(depth + 2)?
807 } else {
808 self.parse_field_value(depth + 2)?
809 }
810 };
811
812 let mut obj = Map::new();
813 obj.insert(key, first_value);
814
815 let field_indent = self.options.indent.get_spaces() * (depth + 2);
816
817 let should_parse_more_fields =
819 if matches!(self.current_token, Token::Newline) {
820 let next_indent = self.scanner.count_leading_spaces();
821
822 if next_indent < field_indent {
823 false
824 } else {
825 self.advance()?;
826
827 if !self.options.strict {
828 self.skip_newlines()?;
829 }
830 true
831 }
832 } else if matches!(self.current_token, Token::String(_, _)) {
833 let current_indent = self.scanner.get_last_line_indent();
835 current_indent == field_indent
836 } else {
837 false
838 };
839
840 if should_parse_more_fields {
842 while !matches!(self.current_token, Token::Eof) {
843 let current_indent = self.scanner.get_last_line_indent();
844
845 if current_indent < field_indent {
846 break;
847 }
848
849 if current_indent != field_indent && self.options.strict {
850 break;
851 }
852
853 if matches!(self.current_token, Token::Dash) {
855 break;
856 }
857
858 let field_key = match &self.current_token {
859 Token::String(s, _) => s.clone(),
860 _ => break,
861 };
862 self.advance()?;
863
864 let field_value =
865 if matches!(self.current_token, Token::LeftBracket) {
866 self.parse_array(depth + 2)?
867 } else if matches!(self.current_token, Token::Colon) {
868 self.advance()?;
869 if matches!(self.current_token, Token::LeftBracket) {
870 self.parse_array(depth + 2)?
871 } else {
872 self.parse_field_value(depth + 2)?
873 }
874 } else {
875 break;
876 };
877
878 obj.insert(field_key, field_value);
879
880 if matches!(self.current_token, Token::Newline) {
881 let next_indent = self.scanner.count_leading_spaces();
882 if next_indent < field_indent {
883 break;
884 }
885 self.advance()?;
886 if !self.options.strict {
887 self.skip_newlines()?;
888 }
889 } else {
890 break;
891 }
892 }
893 }
894
895 Value::Object(obj)
896 } else if matches!(self.current_token, Token::LeftBracket) {
897 let array_value = self.parse_array(depth + 1)?;
899 let mut obj = Map::new();
900 obj.insert(key, array_value);
901 Value::Object(obj)
902 } else {
903 let mut accumulated = key;
905 while let Token::String(next, _) = &self.current_token {
906 if !accumulated.is_empty() {
907 accumulated.push(' ');
908 }
909 accumulated.push_str(next);
910 self.advance()?;
911 }
912 Value::String(accumulated)
913 }
914 } else {
915 self.parse_primitive()?
916 };
917
918 items.push(value);
919
920 if items.len() < length {
921 if matches!(self.current_token, Token::Newline) {
922 self.advance()?;
923
924 if self.options.strict && matches!(self.current_token, Token::Newline) {
925 return Err(self.parse_error_with_context(
926 "Blank lines are not allowed inside list arrays in strict mode",
927 ));
928 }
929
930 self.skip_newlines()?;
931 } else if !matches!(self.current_token, Token::Dash) {
932 return Err(self.parse_error_with_context(format!(
933 "Expected newline or next list item after list item {}",
934 i + 1
935 )));
936 }
937 } else if matches!(self.current_token, Token::Newline) {
938 self.advance()?;
940 self.skip_newlines()?;
941
942 let list_indent = self.options.indent.get_spaces() * (depth + 1);
943 let actual_indent = self.scanner.get_last_line_indent();
944 if actual_indent == list_indent && matches!(self.current_token, Token::Dash)
946 {
947 return Err(self.parse_error_with_context(format!(
948 "Array length mismatch: expected {length} items, but more items \
949 found",
950 )));
951 }
952 }
953 }
954 }
955 _ => {
956 for i in 0..length {
957 if i > 0 {
958 if matches!(self.current_token, Token::Delimiter(_))
959 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
960 {
961 self.advance()?;
962 } else {
963 return Err(self
964 .parse_error_with_context(format!(
965 "Expected delimiter, found {:?}",
966 self.current_token
967 ))
968 .with_suggestion(format!(
969 "Expected delimiter between items (item {} of {})",
970 i + 1,
971 length
972 )));
973 }
974 }
975
976 let value = if matches!(self.current_token, Token::Delimiter(_))
977 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
978 || (matches!(self.current_token, Token::Eof | Token::Newline) && i < length)
979 {
980 Value::String(String::new())
981 } else if matches!(self.current_token, Token::LeftBracket) {
982 self.parse_array(depth + 1)?
983 } else {
984 self.parse_primitive()?
985 };
986
987 items.push(value);
988 }
989 }
990 }
991
992 validation::validate_array_length(length, items.len())?;
993
994 if self.options.strict && matches!(self.current_token, Token::Delimiter(_)) {
995 return Err(self.parse_error_with_context(format!(
996 "Array length mismatch: expected {length} items, but more items found",
997 )));
998 }
999
1000 Ok(Value::Array(items))
1001 }
1002
1003 fn parse_tabular_field_value(&mut self) -> ToonResult<Value> {
1004 match &self.current_token {
1005 Token::Null => {
1006 self.advance()?;
1007 Ok(Value::Null)
1008 }
1009 Token::Bool(b) => {
1010 let val = *b;
1011 self.advance()?;
1012 Ok(Value::Bool(val))
1013 }
1014 Token::Integer(i) => {
1015 let val = *i;
1016 self.advance()?;
1017 Ok(Number::from(val).into())
1018 }
1019 Token::Number(n) => {
1020 let val = *n;
1021 self.advance()?;
1022 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1024 Ok(Number::from(val as i64).into())
1025 } else {
1026 Ok(Number::from_f64(val)
1027 .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1028 .into())
1029 }
1030 }
1031 Token::String(s, _) => {
1032 let mut accumulated = s.clone();
1034 self.advance()?;
1035
1036 while let Token::String(next, _) = &self.current_token {
1037 if !accumulated.is_empty() {
1038 accumulated.push(' ');
1039 }
1040 accumulated.push_str(next);
1041 self.advance()?;
1042 }
1043
1044 Ok(Value::String(accumulated))
1045 }
1046 _ => Err(self.parse_error_with_context(format!(
1047 "Expected primitive value, found {:?}",
1048 self.current_token
1049 ))),
1050 }
1051 }
1052
1053 fn parse_primitive(&mut self) -> ToonResult<Value> {
1054 match &self.current_token {
1055 Token::Null => {
1056 self.advance()?;
1057 Ok(Value::Null)
1058 }
1059 Token::Bool(b) => {
1060 let val = *b;
1061 self.advance()?;
1062 Ok(Value::Bool(val))
1063 }
1064 Token::Integer(i) => {
1065 let val = *i;
1066 self.advance()?;
1067 Ok(Number::from(val).into())
1068 }
1069 Token::Number(n) => {
1070 let val = *n;
1071 self.advance()?;
1072
1073 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1074 Ok(Number::from(val as i64).into())
1075 } else {
1076 Ok(Number::from_f64(val)
1077 .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1078 .into())
1079 }
1080 }
1081 Token::String(s, _) => {
1082 let val = s.clone();
1083 self.advance()?;
1084 Ok(Value::String(val))
1085 }
1086 _ => Err(self.parse_error_with_context(format!(
1087 "Expected primitive value, found {:?}",
1088 self.current_token
1089 ))),
1090 }
1091 }
1092
1093 fn parse_error_with_context(&self, message: impl Into<String>) -> ToonError {
1094 let (line, column) = self.scanner.current_position();
1095 let message = message.into();
1096
1097 let context = self.get_error_context(line, column);
1098
1099 ToonError::ParseError {
1100 line,
1101 column,
1102 message,
1103 context: Some(Box::new(context)),
1104 }
1105 }
1106
1107 fn get_error_context(&self, line: usize, column: usize) -> ErrorContext {
1108 let lines: Vec<&str> = self.input.lines().collect();
1109
1110 let source_line = if line > 0 && line <= lines.len() {
1111 lines[line - 1].to_string()
1112 } else {
1113 String::new()
1114 };
1115
1116 let preceding_lines: Vec<String> = if line > 1 {
1117 lines[line.saturating_sub(3)..line - 1]
1118 .iter()
1119 .map(|s| s.to_string())
1120 .collect()
1121 } else {
1122 Vec::new()
1123 };
1124
1125 let following_lines: Vec<String> = if line < lines.len() {
1126 lines[line..line.saturating_add(2).min(lines.len())]
1127 .iter()
1128 .map(|s| s.to_string())
1129 .collect()
1130 } else {
1131 Vec::new()
1132 };
1133
1134 let indicator = if column > 0 {
1135 Some(format!("{:width$}^", "", width = column - 1))
1136 } else {
1137 None
1138 };
1139
1140 ErrorContext {
1141 source_line,
1142 preceding_lines,
1143 following_lines,
1144 suggestion: None,
1145 indicator,
1146 }
1147 }
1148
1149 fn validate_indentation(&self, indent_amount: usize) -> ToonResult<()> {
1150 if !self.options.strict {
1151 return Ok(());
1152 }
1153
1154 let indent_size = self.options.indent.get_spaces();
1155 if indent_size > 0 && indent_amount > 0 && !indent_amount.is_multiple_of(indent_size) {
1157 Err(self.parse_error_with_context(format!(
1158 "Invalid indentation: found {indent_amount} spaces, but must be a multiple of \
1159 {indent_size}"
1160 )))
1161 } else {
1162 Ok(())
1163 }
1164 }
1165}
1166
1167#[cfg(test)]
1168mod tests {
1169 use std::f64;
1170
1171 use serde_json::json;
1172
1173 use super::*;
1174
1175 fn parse(input: &str) -> ToonResult<Value> {
1176 let mut parser = Parser::new(input, DecodeOptions::default())?;
1177 parser.parse()
1178 }
1179
1180 #[test]
1181 fn test_parse_primitives() {
1182 assert_eq!(parse("null").unwrap(), json!(null));
1183 assert_eq!(parse("true").unwrap(), json!(true));
1184 assert_eq!(parse("false").unwrap(), json!(false));
1185 assert_eq!(parse("42").unwrap(), json!(42));
1186 assert_eq!(parse("3.141592653589793").unwrap(), json!(f64::consts::PI));
1187 assert_eq!(parse("hello").unwrap(), json!("hello"));
1188 }
1189
1190 #[test]
1191 fn test_parse_simple_object() {
1192 let result = parse("name: Alice\nage: 30").unwrap();
1193 assert_eq!(result["name"], json!("Alice"));
1194 assert_eq!(result["age"], json!(30));
1195 }
1196
1197 #[test]
1198 fn test_parse_primitive_array() {
1199 let result = parse("tags[3]: a,b,c").unwrap();
1200 assert_eq!(result["tags"], json!(["a", "b", "c"]));
1201 }
1202
1203 #[test]
1204 fn test_parse_empty_array() {
1205 let result = parse("items[0]:").unwrap();
1206 assert_eq!(result["items"], json!([]));
1207 }
1208
1209 #[test]
1210 fn test_parse_tabular_array() {
1211 let result = parse("users[2]{id,name}:\n 1,Alice\n 2,Bob").unwrap();
1212 assert_eq!(
1213 result["users"],
1214 json!([
1215 {"id": 1, "name": "Alice"},
1216 {"id": 2, "name": "Bob"}
1217 ])
1218 );
1219 }
1220
1221 #[test]
1222 fn test_empty_tokens() {
1223 let result = parse("items[3]: a,,c").unwrap();
1224 assert_eq!(result["items"], json!(["a", "", "c"]));
1225 }
1226
1227 #[test]
1228 fn test_empty_nested_object() {
1229 let result = parse("user:").unwrap();
1230 assert_eq!(result, json!({"user": {}}));
1231 }
1232
1233 #[test]
1234 fn test_list_item_object() {
1235 let result =
1236 parse("items[2]:\n - id: 1\n name: First\n - id: 2\n name: Second").unwrap();
1237 assert_eq!(
1238 result["items"],
1239 json!([
1240 {"id": 1, "name": "First"},
1241 {"id": 2, "name": "Second"}
1242 ])
1243 );
1244 }
1245
1246 #[test]
1247 fn test_nested_array_in_list_item() {
1248 let result = parse("items[1]:\n - tags[3]: a,b,c").unwrap();
1249 assert_eq!(result["items"], json!([{"tags": ["a", "b", "c"]}]));
1250 }
1251
1252 #[test]
1253 fn test_two_level_siblings() {
1254 let input = "x:\n y: 1\n z: 2";
1255 let opts = DecodeOptions::default();
1256 let mut parser = Parser::new(input, opts).unwrap();
1257 let result = parser.parse().unwrap();
1258
1259 let x = result.as_object().unwrap().get("x").unwrap();
1260 let x_obj = x.as_object().unwrap();
1261
1262 assert_eq!(x_obj.len(), 2, "x should have 2 keys");
1263 assert_eq!(x_obj.get("y").unwrap(), &serde_json::json!(1));
1264 assert_eq!(x_obj.get("z").unwrap(), &serde_json::json!(2));
1265 }
1266
1267 #[test]
1268 fn test_nested_object_with_sibling() {
1269 let input = "a:\n b:\n c: 1\n d: 2";
1270 let opts = DecodeOptions::default();
1271 let mut parser = Parser::new(input, opts).unwrap();
1272 let result = parser.parse().unwrap();
1273
1274 let a = result.as_object().unwrap().get("a").unwrap();
1276 let a_obj = a.as_object().unwrap();
1277
1278 assert_eq!(a_obj.len(), 2, "a should have 2 keys (b and d)");
1279 assert!(a_obj.contains_key("b"), "a should have key 'b'");
1280 assert!(a_obj.contains_key("d"), "a should have key 'd'");
1281
1282 let b = a_obj.get("b").unwrap().as_object().unwrap();
1283 assert_eq!(b.len(), 1, "b should have only 1 key (c)");
1284 assert!(b.contains_key("c"), "b should have key 'c'");
1285 assert!(!b.contains_key("d"), "b should NOT have key 'd'");
1286 }
1287}