1use serde_json::{
2 Map,
3 Number,
4 Value,
5};
6
7use crate::{
8 constants::{
9 KEYWORDS,
10 MAX_DEPTH,
11 QUOTED_KEY_MARKER,
12 },
13 decode::{
14 scanner::{
15 Scanner,
16 Token,
17 },
18 validation,
19 },
20 types::{
21 DecodeOptions,
22 Delimiter,
23 ErrorContext,
24 ToonError,
25 ToonResult,
26 },
27 utils::validation::validate_depth,
28};
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36enum ArrayParseContext {
37 Normal,
39
40 ListItemFirstField,
43}
44
45#[allow(unused)]
47pub struct Parser<'a> {
48 scanner: Scanner,
49 current_token: Token,
50 options: DecodeOptions,
51 delimiter: Option<Delimiter>,
52 input: &'a str,
53}
54
55impl<'a> Parser<'a> {
56 pub fn new(input: &'a str, options: DecodeOptions) -> ToonResult<Self> {
58 let mut scanner = Scanner::new(input);
59 let chosen_delim = options.delimiter;
60 scanner.set_active_delimiter(chosen_delim);
61 let current_token = scanner.scan_token()?;
62
63 Ok(Self {
64 scanner,
65 current_token,
66 delimiter: chosen_delim,
67 options,
68 input,
69 })
70 }
71
72 pub fn parse(&mut self) -> ToonResult<Value> {
74 if self.options.strict {
75 self.validate_indentation(self.scanner.get_last_line_indent())?;
76 }
77 let value = self.parse_value()?;
78
79 if self.options.strict {
81 self.skip_newlines()?;
82 if !matches!(self.current_token, Token::Eof) {
83 return Err(self
84 .parse_error_with_context(
85 "Multiple values at root level are not allowed in strict mode",
86 )
87 .with_suggestion("Wrap multiple values in an object or array"));
88 }
89 }
90
91 Ok(value)
92 }
93
94 fn advance(&mut self) -> ToonResult<()> {
95 self.current_token = self.scanner.scan_token()?;
96 Ok(())
97 }
98
99 fn skip_newlines(&mut self) -> ToonResult<()> {
100 while matches!(self.current_token, Token::Newline) {
101 self.advance()?;
102 }
103 Ok(())
104 }
105
106 fn parse_value(&mut self) -> ToonResult<Value> {
107 self.parse_value_with_depth(0)
108 }
109
110 fn parse_value_with_depth(&mut self, depth: usize) -> ToonResult<Value> {
111 validate_depth(depth, MAX_DEPTH)?;
112
113 let had_newline = matches!(self.current_token, Token::Newline);
114 self.skip_newlines()?;
115
116 match &self.current_token {
117 Token::Null => {
118 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
120 if next_char_is_colon {
121 let key = KEYWORDS[0].to_string();
122 self.advance()?;
123 self.parse_object_with_initial_key(key, depth)
124 } else {
125 self.advance()?;
126 Ok(Value::Null)
127 }
128 }
129 Token::Bool(b) => {
130 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
131 if next_char_is_colon {
132 let key = if *b {
133 KEYWORDS[1].to_string()
134 } else {
135 KEYWORDS[2].to_string()
136 };
137 self.advance()?;
138 self.parse_object_with_initial_key(key, depth)
139 } else {
140 let val = *b;
141 self.advance()?;
142 Ok(Value::Bool(val))
143 }
144 }
145 Token::Integer(i) => {
146 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
147 if next_char_is_colon {
148 let key = i.to_string();
149 self.advance()?;
150 self.parse_object_with_initial_key(key, depth)
151 } else {
152 let val = *i;
153 self.advance()?;
154 Ok(serde_json::Number::from(val).into())
155 }
156 }
157 Token::Number(n) => {
158 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
159 if next_char_is_colon {
160 let key = n.to_string();
161 self.advance()?;
162 self.parse_object_with_initial_key(key, depth)
163 } else {
164 let val = *n;
165 self.advance()?;
166 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
168 Ok(serde_json::Number::from(val as i64).into())
169 } else {
170 Ok(serde_json::Number::from_f64(val)
171 .ok_or_else(|| {
172 ToonError::InvalidInput(format!("Invalid number: {val}"))
173 })?
174 .into())
175 }
176 }
177 }
178 Token::String(s, _) => {
179 let first = s.clone();
180 self.advance()?;
181
182 match &self.current_token {
183 Token::Colon | Token::LeftBracket => {
184 self.parse_object_with_initial_key(first, depth)
185 }
186 _ => {
187 if self.options.strict && depth > 0 && had_newline {
190 return Err(self
191 .parse_error_with_context(format!(
192 "Expected ':' after '{first}' in object context"
193 ))
194 .with_suggestion(
195 "Add ':' after the key, or place the value on the same line \
196 as the parent key",
197 ));
198 }
199
200 let mut accumulated = first;
202 while let Token::String(next, _) = &self.current_token {
203 if !accumulated.is_empty() {
204 accumulated.push(' ');
205 }
206 accumulated.push_str(next);
207 self.advance()?;
208 }
209 Ok(Value::String(accumulated))
210 }
211 }
212 }
213 Token::LeftBracket => self.parse_root_array(depth),
214 Token::Eof => Ok(Value::Object(Map::new())),
215 _ => self.parse_object(depth),
216 }
217 }
218
219 fn parse_object(&mut self, depth: usize) -> ToonResult<Value> {
220 validate_depth(depth, MAX_DEPTH)?;
221
222 let mut obj = Map::new();
223 let mut base_indent: Option<usize> = None;
225
226 loop {
227 while matches!(self.current_token, Token::Newline) {
228 self.advance()?;
229 }
230
231 if matches!(self.current_token, Token::Eof) {
232 break;
233 }
234
235 let current_indent = self.scanner.get_last_line_indent();
236
237 if self.options.strict {
238 self.validate_indentation(current_indent)?;
239 }
240
241 if let Some(expected) = base_indent {
243 if current_indent != expected {
244 break;
245 }
246 } else {
247 base_indent = Some(current_indent);
248 }
249
250 let key = match &self.current_token {
251 Token::String(s, was_quoted) => {
252 if *was_quoted && s.contains('.') {
255 format!("{QUOTED_KEY_MARKER}{s}")
256 } else {
257 s.clone()
258 }
259 }
260 _ => {
261 return Err(self
262 .parse_error_with_context(format!(
263 "Expected key, found {:?}",
264 self.current_token
265 ))
266 .with_suggestion("Object keys must be strings"));
267 }
268 };
269 self.advance()?;
270
271 let value = if matches!(self.current_token, Token::LeftBracket) {
272 self.parse_array(depth)?
273 } else {
274 if !matches!(self.current_token, Token::Colon) {
275 return Err(self
276 .parse_error_with_context(format!(
277 "Expected ':' or '[', found {:?}",
278 self.current_token
279 ))
280 .with_suggestion("Use ':' for object values or '[' for arrays"));
281 }
282 self.advance()?;
283 self.parse_field_value(depth)?
284 };
285
286 obj.insert(key, value);
287 }
288
289 Ok(Value::Object(obj))
290 }
291
292 fn parse_object_with_initial_key(&mut self, key: String, depth: usize) -> ToonResult<Value> {
293 validate_depth(depth, MAX_DEPTH)?;
294
295 let mut obj = Map::new();
296 let mut base_indent: Option<usize> = None;
297
298 if self.options.strict {
300 let current_indent = self.scanner.get_last_line_indent();
301 self.validate_indentation(current_indent)?;
302 }
303
304 if matches!(self.current_token, Token::LeftBracket) {
305 let value = self.parse_array(depth)?;
306 obj.insert(key, value);
307 } else {
308 if !matches!(self.current_token, Token::Colon) {
309 return Err(self.parse_error_with_context(format!(
310 "Expected ':', found {:?}",
311 self.current_token
312 )));
313 }
314 self.advance()?;
315
316 let value = self.parse_field_value(depth)?;
317 obj.insert(key, value);
318 }
319
320 loop {
321 while matches!(self.current_token, Token::Newline) {
323 self.advance()?;
324
325 if !self.options.strict {
326 while matches!(self.current_token, Token::Newline) {
327 self.advance()?;
328 }
329 }
330
331 if matches!(self.current_token, Token::Newline) {
332 continue;
333 }
334
335 let next_indent = self.scanner.get_last_line_indent();
336
337 let should_continue = if let Some(expected) = base_indent {
339 next_indent == expected
340 } else {
341 let current_depth_indent = self.options.indent.get_spaces() * depth;
343 next_indent == current_depth_indent
344 };
345
346 if !should_continue {
347 break;
348 }
349 }
350
351 if matches!(self.current_token, Token::Eof) {
352 break;
353 }
354
355 if !matches!(self.current_token, Token::String(_, _)) {
356 break;
357 }
358
359 if matches!(self.current_token, Token::Eof) {
360 break;
361 }
362
363 let current_indent = self.scanner.get_last_line_indent();
364
365 if let Some(expected) = base_indent {
366 if current_indent != expected {
367 break;
368 }
369 } else {
370 let expected_depth_indent = self.options.indent.get_spaces() * depth;
372 if current_indent != expected_depth_indent {
373 break;
374 }
375 }
376
377 if self.options.strict {
378 self.validate_indentation(current_indent)?;
379 }
380
381 if base_indent.is_none() {
382 base_indent = Some(current_indent);
383 }
384
385 let key = match &self.current_token {
386 Token::String(s, was_quoted) => {
387 if *was_quoted && s.contains('.') {
390 format!("{QUOTED_KEY_MARKER}{s}")
391 } else {
392 s.clone()
393 }
394 }
395 _ => break,
396 };
397 self.advance()?;
398
399 let value = if matches!(self.current_token, Token::LeftBracket) {
400 self.parse_array(depth)?
401 } else {
402 if !matches!(self.current_token, Token::Colon) {
403 break;
404 }
405 self.advance()?;
406 self.parse_field_value(depth)?
407 };
408
409 obj.insert(key, value);
410 }
411
412 Ok(Value::Object(obj))
413 }
414
415 fn parse_field_value(&mut self, depth: usize) -> ToonResult<Value> {
416 validate_depth(depth, MAX_DEPTH)?;
417
418 if matches!(self.current_token, Token::Newline | Token::Eof) {
419 let has_children = if matches!(self.current_token, Token::Newline) {
420 let current_depth_indent = self.options.indent.get_spaces() * (depth + 1);
421 let next_indent = self.scanner.count_leading_spaces();
422 next_indent >= current_depth_indent
423 } else {
424 false
425 };
426
427 if has_children {
428 self.parse_value_with_depth(depth + 1)
429 } else {
430 Ok(Value::Object(Map::new()))
431 }
432 } else if matches!(self.current_token, Token::LeftBracket) {
433 self.parse_value_with_depth(depth + 1)
434 } else {
435 let (rest, had_space) = self.scanner.read_rest_of_line_with_space_info();
437
438 let result = if rest.is_empty() {
439 match &self.current_token {
441 Token::String(s, _) => Ok(Value::String(s.clone())),
442 Token::Integer(i) => Ok(serde_json::Number::from(*i).into()),
443 Token::Number(n) => {
444 let val = *n;
445 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
446 Ok(serde_json::Number::from(val as i64).into())
447 } else {
448 Ok(serde_json::Number::from_f64(val)
449 .ok_or_else(|| {
450 ToonError::InvalidInput(format!("Invalid number: {val}"))
451 })?
452 .into())
453 }
454 }
455 Token::Bool(b) => Ok(Value::Bool(*b)),
456 Token::Null => Ok(Value::Null),
457 _ => Err(self.parse_error_with_context("Unexpected token after colon")),
458 }
459 } else {
460 let mut value_str = String::new();
462
463 match &self.current_token {
464 Token::String(s, true) => {
465 value_str.push('"');
467 value_str.push_str(&crate::utils::escape_string(s));
468 value_str.push('"');
469 }
470 Token::String(s, false) => value_str.push_str(s),
471 Token::Integer(i) => value_str.push_str(&i.to_string()),
472 Token::Number(n) => value_str.push_str(&n.to_string()),
473 Token::Bool(b) => value_str.push_str(if *b { "true" } else { "false" }),
474 Token::Null => value_str.push_str("null"),
475 _ => {
476 return Err(self.parse_error_with_context("Unexpected token after colon"));
477 }
478 }
479
480 if had_space {
482 value_str.push(' ');
483 }
484 value_str.push_str(&rest);
485
486 let token = self.scanner.parse_value_string(&value_str)?;
487 match token {
488 Token::String(s, _) => Ok(Value::String(s)),
489 Token::Integer(i) => Ok(serde_json::Number::from(i).into()),
490 Token::Number(n) => {
491 if n.is_finite() && n.fract() == 0.0 && n.abs() <= i64::MAX as f64 {
492 Ok(serde_json::Number::from(n as i64).into())
493 } else {
494 Ok(serde_json::Number::from_f64(n)
495 .ok_or_else(|| {
496 ToonError::InvalidInput(format!("Invalid number: {n}"))
497 })?
498 .into())
499 }
500 }
501 Token::Bool(b) => Ok(Value::Bool(b)),
502 Token::Null => Ok(Value::Null),
503 _ => Err(ToonError::InvalidInput("Unexpected token type".to_string())),
504 }
505 }?;
506
507 self.current_token = self.scanner.scan_token()?;
508 Ok(result)
509 }
510 }
511
512 fn parse_root_array(&mut self, depth: usize) -> ToonResult<Value> {
513 validate_depth(depth, MAX_DEPTH)?;
514
515 if !matches!(self.current_token, Token::LeftBracket) {
516 return Err(self.parse_error_with_context("Expected '[' at the start of root array"));
517 }
518
519 self.parse_array(depth)
520 }
521
522 fn parse_array_header(
523 &mut self,
524 ) -> ToonResult<(usize, Option<Delimiter>, Option<Vec<String>>)> {
525 if !matches!(self.current_token, Token::LeftBracket) {
526 return Err(self.parse_error_with_context("Expected '['"));
527 }
528 self.advance()?;
529
530 let length = if let Token::Integer(n) = &self.current_token {
533 *n as usize
534 } else if let Token::String(s, _) = &self.current_token {
535 if s.starts_with('#') {
537 return Err(self
538 .parse_error_with_context(
539 "Length marker '#' is not supported. Use [N] format instead of [#N]",
540 )
541 .with_suggestion("Remove the '#' prefix from the array length"));
542 }
543
544 s.parse::<usize>().map_err(|_| {
546 self.parse_error_with_context(format!("Expected array length, found: {s}"))
547 })?
548 } else {
549 return Err(self.parse_error_with_context(format!(
550 "Expected array length, found {:?}",
551 self.current_token
552 )));
553 };
554
555 self.advance()?;
556
557 let detected_delim = match &self.current_token {
559 Token::Delimiter(d) => {
560 let delim = *d;
561 self.advance()?;
562 Some(delim)
563 }
564 Token::String(s, _) if s == "," => {
565 self.advance()?;
566 Some(Delimiter::Comma)
567 }
568 Token::String(s, _) if s == "|" => {
569 self.advance()?;
570 Some(Delimiter::Pipe)
571 }
572 Token::String(s, _) if s == "\t" => {
573 self.advance()?;
574 Some(Delimiter::Tab)
575 }
576 _ => None,
577 };
578
579 let active_delim = detected_delim.or(Some(Delimiter::Comma));
581
582 self.scanner.set_active_delimiter(active_delim);
583
584 if !matches!(self.current_token, Token::RightBracket) {
585 return Err(self.parse_error_with_context(format!(
586 "Expected ']', found {:?}",
587 self.current_token
588 )));
589 }
590 self.advance()?;
591
592 let fields = if matches!(self.current_token, Token::LeftBrace) {
593 self.advance()?;
594 let mut fields = Vec::new();
595
596 loop {
597 match &self.current_token {
598 Token::String(s, _) => {
599 fields.push(s.clone());
600 self.advance()?;
601
602 if matches!(self.current_token, Token::RightBrace) {
603 break;
604 }
605
606 if matches!(self.current_token, Token::Delimiter(_)) {
607 self.advance()?;
608 } else {
609 return Err(self.parse_error_with_context(format!(
610 "Expected delimiter or '}}', found {:?}",
611 self.current_token
612 )));
613 }
614 }
615 Token::RightBrace => break,
616 _ => {
617 return Err(self.parse_error_with_context(format!(
618 "Expected field name, found {:?}",
619 self.current_token
620 )))
621 }
622 }
623 }
624
625 self.advance()?;
626 Some(fields)
627 } else {
628 None
629 };
630
631 if !matches!(self.current_token, Token::Colon) {
632 return Err(self.parse_error_with_context("Expected ':' after array header"));
633 }
634 self.advance()?;
635
636 Ok((length, detected_delim, fields))
637 }
638
639 fn parse_array(&mut self, depth: usize) -> ToonResult<Value> {
640 self.parse_array_with_context(depth, ArrayParseContext::Normal)
641 }
642
643 fn parse_array_with_context(
644 &mut self,
645 depth: usize,
646 context: ArrayParseContext,
647 ) -> ToonResult<Value> {
648 validate_depth(depth, MAX_DEPTH)?;
649
650 let (length, _detected_delim, fields) = self.parse_array_header()?;
651
652 if let Some(fields) = fields {
653 validation::validate_field_list(&fields)?;
654 self.parse_tabular_array(length, &fields, depth, context)
655 } else {
656 let adjusted_depth = match context {
659 ArrayParseContext::Normal => depth,
660 ArrayParseContext::ListItemFirstField => depth + 1,
661 };
662 self.parse_regular_array(length, adjusted_depth)
663 }
664 }
665
666 fn parse_tabular_array(
667 &mut self,
668 length: usize,
669 fields: &[String],
670 depth: usize,
671 context: ArrayParseContext,
672 ) -> ToonResult<Value> {
673 let mut rows = Vec::new();
674
675 if !matches!(self.current_token, Token::Newline) {
676 return Err(self
677 .parse_error_with_context("Expected newline after tabular array header")
678 .with_suggestion("Tabular arrays must have rows on separate lines"));
679 }
680 self.skip_newlines()?;
681
682 for row_index in 0..length {
683 if matches!(self.current_token, Token::Eof) {
684 if self.options.strict {
685 return Err(self.parse_error_with_context(format!(
686 "Expected {} rows, but got {} before EOF",
687 length,
688 rows.len()
689 )));
690 }
691 break;
692 }
693
694 let current_indent = self.scanner.get_last_line_indent();
695
696 let row_depth_offset = match context {
699 ArrayParseContext::Normal => 1,
700 ArrayParseContext::ListItemFirstField => 2,
701 };
702 let expected_indent = self.options.indent.get_spaces() * (depth + row_depth_offset);
703
704 if self.options.strict {
705 self.validate_indentation(current_indent)?;
706
707 if current_indent != expected_indent {
708 return Err(self.parse_error_with_context(format!(
709 "Invalid indentation for tabular row: expected {expected_indent} spaces, \
710 found {current_indent}"
711 )));
712 }
713 }
714
715 let mut row = Map::new();
716
717 for (field_index, field) in fields.iter().enumerate() {
718 if field_index > 0 {
720 if matches!(self.current_token, Token::Delimiter(_)) {
721 self.advance()?;
722 } else {
723 return Err(self
724 .parse_error_with_context(format!(
725 "Expected delimiter, found {:?}",
726 self.current_token
727 ))
728 .with_suggestion(format!(
729 "Tabular row {} field {} needs a delimiter",
730 row_index + 1,
731 field_index + 1
732 )));
733 }
734 }
735
736 let value = if matches!(self.current_token, Token::Delimiter(_))
738 || matches!(self.current_token, Token::Newline | Token::Eof)
739 {
740 Value::String(String::new())
741 } else {
742 self.parse_tabular_field_value()?
743 };
744
745 row.insert(field.clone(), value);
746
747 if field_index < fields.len() - 1 {
749 if matches!(self.current_token, Token::Newline | Token::Eof) {
751 if self.options.strict {
752 return Err(self
753 .parse_error_with_context(format!(
754 "Tabular row {}: expected {} values, but found only {}",
755 row_index + 1,
756 fields.len(),
757 field_index + 1
758 ))
759 .with_suggestion(format!(
760 "Row {} should have exactly {} values",
761 row_index + 1,
762 fields.len()
763 )));
764 } else {
765 for field in fields.iter().skip(field_index + 1) {
767 row.insert(field.clone(), Value::Null);
768 }
769 break;
770 }
771 }
772 } else if !matches!(self.current_token, Token::Newline | Token::Eof)
773 && matches!(self.current_token, Token::Delimiter(_))
774 {
775 return Err(self
777 .parse_error_with_context(format!(
778 "Tabular row {}: expected {} values, but found extra values",
779 row_index + 1,
780 fields.len()
781 ))
782 .with_suggestion(format!(
783 "Row {} should have exactly {} values",
784 row_index + 1,
785 fields.len()
786 )));
787 }
788 }
789
790 if !self.options.strict && row.len() < fields.len() {
791 for field in fields.iter().skip(row.len()) {
792 row.insert(field.clone(), Value::Null);
793 }
794 }
795
796 rows.push(Value::Object(row));
797
798 if matches!(self.current_token, Token::Eof) {
799 break;
800 }
801
802 if !matches!(self.current_token, Token::Newline) {
803 if !self.options.strict {
804 while !matches!(self.current_token, Token::Newline | Token::Eof) {
805 self.advance()?;
806 }
807 if matches!(self.current_token, Token::Eof) {
808 break;
809 }
810 } else {
811 return Err(self.parse_error_with_context(format!(
812 "Expected newline after tabular row {}",
813 row_index + 1
814 )));
815 }
816 }
817
818 if row_index + 1 < length {
819 self.advance()?;
820 if self.options.strict && matches!(self.current_token, Token::Newline) {
821 return Err(self.parse_error_with_context(
822 "Blank lines are not allowed inside tabular arrays in strict mode",
823 ));
824 }
825
826 self.skip_newlines()?;
827 } else if matches!(self.current_token, Token::Newline) {
828 self.advance()?;
830 self.skip_newlines()?;
831
832 let expected_indent = self.options.indent.get_spaces() * (depth + 1);
833 let actual_indent = self.scanner.get_last_line_indent();
834
835 if actual_indent == expected_indent && !matches!(self.current_token, Token::Eof) {
838 let is_key_value = matches!(self.current_token, Token::String(_, _))
839 && matches!(self.scanner.peek(), Some(':'));
840
841 if !is_key_value {
842 return Err(self.parse_error_with_context(format!(
843 "Array length mismatch: expected {length} rows, but more rows found",
844 )));
845 }
846 }
847 }
848 }
849
850 validation::validate_array_length(length, rows.len())?;
851
852 Ok(Value::Array(rows))
853 }
854
855 fn parse_regular_array(&mut self, length: usize, depth: usize) -> ToonResult<Value> {
856 let mut items = Vec::new();
857
858 match &self.current_token {
859 Token::Newline => {
860 self.skip_newlines()?;
861
862 let expected_indent = self.options.indent.get_spaces() * (depth + 1);
863
864 for i in 0..length {
865 let current_indent = self.scanner.get_last_line_indent();
866 if self.options.strict {
867 self.validate_indentation(current_indent)?;
868
869 if current_indent != expected_indent {
870 return Err(self.parse_error_with_context(format!(
871 "Invalid indentation for list item: expected {expected_indent} \
872 spaces, found {current_indent}"
873 )));
874 }
875 }
876 if !matches!(self.current_token, Token::Dash) {
877 return Err(self
878 .parse_error_with_context(format!(
879 "Expected '-' for list item, found {:?}",
880 self.current_token
881 ))
882 .with_suggestion(format!(
883 "List arrays need '-' prefix for each item (item {} of {})",
884 i + 1,
885 length
886 )));
887 }
888 self.advance()?;
889
890 let value = if matches!(self.current_token, Token::Newline | Token::Eof) {
891 Value::Object(Map::new())
892 } else if matches!(self.current_token, Token::LeftBracket) {
893 self.parse_array(depth + 1)?
894 } else if let Token::String(s, _) = &self.current_token {
895 let key = s.clone();
896 self.advance()?;
897
898 if matches!(self.current_token, Token::Colon | Token::LeftBracket) {
899 let first_value = if matches!(self.current_token, Token::LeftBracket) {
903 self.parse_array_with_context(
906 depth + 1,
907 ArrayParseContext::ListItemFirstField,
908 )?
909 } else {
910 self.advance()?;
911 if matches!(self.current_token, Token::LeftBracket) {
913 self.parse_array(depth + 2)?
916 } else {
917 self.parse_field_value(depth + 2)?
918 }
919 };
920
921 let mut obj = Map::new();
922 obj.insert(key, first_value);
923
924 let field_indent = self.options.indent.get_spaces() * (depth + 2);
925
926 let should_parse_more_fields =
928 if matches!(self.current_token, Token::Newline) {
929 let next_indent = self.scanner.count_leading_spaces();
930
931 if next_indent < field_indent {
932 false
933 } else {
934 self.advance()?;
935
936 if !self.options.strict {
937 self.skip_newlines()?;
938 }
939 true
940 }
941 } else if matches!(self.current_token, Token::String(_, _)) {
942 let current_indent = self.scanner.get_last_line_indent();
944 current_indent == field_indent
945 } else {
946 false
947 };
948
949 if should_parse_more_fields {
951 while !matches!(self.current_token, Token::Eof) {
952 let current_indent = self.scanner.get_last_line_indent();
953
954 if current_indent < field_indent {
955 break;
956 }
957
958 if current_indent != field_indent && self.options.strict {
959 break;
960 }
961
962 if matches!(self.current_token, Token::Dash) {
964 break;
965 }
966
967 let field_key = match &self.current_token {
968 Token::String(s, _) => s.clone(),
969 _ => break,
970 };
971 self.advance()?;
972
973 let field_value =
974 if matches!(self.current_token, Token::LeftBracket) {
975 self.parse_array(depth + 2)?
976 } else if matches!(self.current_token, Token::Colon) {
977 self.advance()?;
978 if matches!(self.current_token, Token::LeftBracket) {
979 self.parse_array(depth + 2)?
980 } else {
981 self.parse_field_value(depth + 2)?
982 }
983 } else {
984 break;
985 };
986
987 obj.insert(field_key, field_value);
988
989 if matches!(self.current_token, Token::Newline) {
990 let next_indent = self.scanner.count_leading_spaces();
991 if next_indent < field_indent {
992 break;
993 }
994 self.advance()?;
995 if !self.options.strict {
996 self.skip_newlines()?;
997 }
998 } else {
999 break;
1000 }
1001 }
1002 }
1003
1004 Value::Object(obj)
1005 } else if matches!(self.current_token, Token::LeftBracket) {
1006 let array_value = self.parse_array(depth + 1)?;
1008 let mut obj = Map::new();
1009 obj.insert(key, array_value);
1010 Value::Object(obj)
1011 } else {
1012 Value::String(key)
1014 }
1015 } else {
1016 self.parse_primitive()?
1017 };
1018
1019 items.push(value);
1020
1021 if items.len() < length {
1022 if matches!(self.current_token, Token::Newline) {
1023 self.advance()?;
1024
1025 if self.options.strict && matches!(self.current_token, Token::Newline) {
1026 return Err(self.parse_error_with_context(
1027 "Blank lines are not allowed inside list arrays in strict mode",
1028 ));
1029 }
1030
1031 self.skip_newlines()?;
1032 } else if !matches!(self.current_token, Token::Dash) {
1033 return Err(self.parse_error_with_context(format!(
1034 "Expected newline or next list item after list item {}",
1035 i + 1
1036 )));
1037 }
1038 } else if matches!(self.current_token, Token::Newline) {
1039 self.advance()?;
1041 self.skip_newlines()?;
1042
1043 let list_indent = self.options.indent.get_spaces() * (depth + 1);
1044 let actual_indent = self.scanner.get_last_line_indent();
1045 if actual_indent == list_indent && matches!(self.current_token, Token::Dash)
1047 {
1048 return Err(self.parse_error_with_context(format!(
1049 "Array length mismatch: expected {length} items, but more items \
1050 found",
1051 )));
1052 }
1053 }
1054 }
1055 }
1056 _ => {
1057 for i in 0..length {
1058 if i > 0 {
1059 if matches!(self.current_token, Token::Delimiter(_)) {
1060 self.advance()?;
1061 } else {
1062 return Err(self
1063 .parse_error_with_context(format!(
1064 "Expected delimiter, found {:?}",
1065 self.current_token
1066 ))
1067 .with_suggestion(format!(
1068 "Expected delimiter between items (item {} of {})",
1069 i + 1,
1070 length
1071 )));
1072 }
1073 }
1074
1075 let value = if matches!(self.current_token, Token::Delimiter(_))
1076 || (matches!(self.current_token, Token::Eof | Token::Newline) && i < length)
1077 {
1078 Value::String(String::new())
1079 } else if matches!(self.current_token, Token::LeftBracket) {
1080 self.parse_array(depth + 1)?
1081 } else {
1082 self.parse_primitive()?
1083 };
1084
1085 items.push(value);
1086 }
1087 }
1088 }
1089
1090 validation::validate_array_length(length, items.len())?;
1091
1092 if self.options.strict && matches!(self.current_token, Token::Delimiter(_)) {
1093 return Err(self.parse_error_with_context(format!(
1094 "Array length mismatch: expected {length} items, but more items found",
1095 )));
1096 }
1097
1098 Ok(Value::Array(items))
1099 }
1100
1101 fn parse_tabular_field_value(&mut self) -> ToonResult<Value> {
1102 match &self.current_token {
1103 Token::Null => {
1104 self.advance()?;
1105 Ok(Value::Null)
1106 }
1107 Token::Bool(b) => {
1108 let val = *b;
1109 self.advance()?;
1110 Ok(Value::Bool(val))
1111 }
1112 Token::Integer(i) => {
1113 let val = *i;
1114 self.advance()?;
1115 Ok(Number::from(val).into())
1116 }
1117 Token::Number(n) => {
1118 let val = *n;
1119 self.advance()?;
1120 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1122 Ok(Number::from(val as i64).into())
1123 } else {
1124 Ok(Number::from_f64(val)
1125 .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1126 .into())
1127 }
1128 }
1129 Token::String(s, _) => {
1130 let mut accumulated = s.clone();
1132 self.advance()?;
1133
1134 while let Token::String(next, _) = &self.current_token {
1135 if !accumulated.is_empty() {
1136 accumulated.push(' ');
1137 }
1138 accumulated.push_str(next);
1139 self.advance()?;
1140 }
1141
1142 Ok(Value::String(accumulated))
1143 }
1144 _ => Err(self.parse_error_with_context(format!(
1145 "Expected primitive value, found {:?}",
1146 self.current_token
1147 ))),
1148 }
1149 }
1150
1151 fn parse_primitive(&mut self) -> ToonResult<Value> {
1152 match &self.current_token {
1153 Token::Null => {
1154 self.advance()?;
1155 Ok(Value::Null)
1156 }
1157 Token::Bool(b) => {
1158 let val = *b;
1159 self.advance()?;
1160 Ok(Value::Bool(val))
1161 }
1162 Token::Integer(i) => {
1163 let val = *i;
1164 self.advance()?;
1165 Ok(Number::from(val).into())
1166 }
1167 Token::Number(n) => {
1168 let val = *n;
1169 self.advance()?;
1170
1171 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1172 Ok(Number::from(val as i64).into())
1173 } else {
1174 Ok(Number::from_f64(val)
1175 .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1176 .into())
1177 }
1178 }
1179 Token::String(s, _) => {
1180 let val = s.clone();
1181 self.advance()?;
1182 Ok(Value::String(val))
1183 }
1184 _ => Err(self.parse_error_with_context(format!(
1185 "Expected primitive value, found {:?}",
1186 self.current_token
1187 ))),
1188 }
1189 }
1190
1191 fn parse_error_with_context(&self, message: impl Into<String>) -> ToonError {
1192 let (line, column) = self.scanner.current_position();
1193 let message = message.into();
1194
1195 let context = self.get_error_context(line, column);
1196
1197 ToonError::ParseError {
1198 line,
1199 column,
1200 message,
1201 context: Some(Box::new(context)),
1202 }
1203 }
1204
1205 fn get_error_context(&self, line: usize, column: usize) -> ErrorContext {
1206 let lines: Vec<&str> = self.input.lines().collect();
1207
1208 let source_line = if line > 0 && line <= lines.len() {
1209 lines[line - 1].to_string()
1210 } else {
1211 String::new()
1212 };
1213
1214 let preceding_lines: Vec<String> = if line > 1 {
1215 lines[line.saturating_sub(3)..line - 1]
1216 .iter()
1217 .map(|s| s.to_string())
1218 .collect()
1219 } else {
1220 Vec::new()
1221 };
1222
1223 let following_lines: Vec<String> = if line < lines.len() {
1224 lines[line..line.saturating_add(2).min(lines.len())]
1225 .iter()
1226 .map(|s| s.to_string())
1227 .collect()
1228 } else {
1229 Vec::new()
1230 };
1231
1232 let indicator = if column > 0 {
1233 Some(format!("{:width$}^", "", width = column - 1))
1234 } else {
1235 None
1236 };
1237
1238 ErrorContext {
1239 source_line,
1240 preceding_lines,
1241 following_lines,
1242 suggestion: None,
1243 indicator,
1244 }
1245 }
1246
1247 fn validate_indentation(&self, indent_amount: usize) -> ToonResult<()> {
1248 if !self.options.strict {
1249 return Ok(());
1250 }
1251
1252 let indent_size = self.options.indent.get_spaces();
1253 if indent_size > 0 && indent_amount > 0 && !indent_amount.is_multiple_of(indent_size) {
1255 Err(self.parse_error_with_context(format!(
1256 "Invalid indentation: found {indent_amount} spaces, but must be a multiple of \
1257 {indent_size}"
1258 )))
1259 } else {
1260 Ok(())
1261 }
1262 }
1263}
1264
1265#[cfg(test)]
1266mod tests {
1267 use std::f64;
1268
1269 use serde_json::json;
1270
1271 use super::*;
1272
1273 fn parse(input: &str) -> ToonResult<Value> {
1274 let mut parser = Parser::new(input, DecodeOptions::default())?;
1275 parser.parse()
1276 }
1277
1278 #[test]
1279 fn test_parse_primitives() {
1280 assert_eq!(parse("null").unwrap(), json!(null));
1281 assert_eq!(parse("true").unwrap(), json!(true));
1282 assert_eq!(parse("false").unwrap(), json!(false));
1283 assert_eq!(parse("42").unwrap(), json!(42));
1284 assert_eq!(parse("3.141592653589793").unwrap(), json!(f64::consts::PI));
1285 assert_eq!(parse("hello").unwrap(), json!("hello"));
1286 }
1287
1288 #[test]
1289 fn test_parse_simple_object() {
1290 let result = parse("name: Alice\nage: 30").unwrap();
1291 assert_eq!(result["name"], json!("Alice"));
1292 assert_eq!(result["age"], json!(30));
1293 }
1294
1295 #[test]
1296 fn test_parse_primitive_array() {
1297 let result = parse("tags[3]: a,b,c").unwrap();
1298 assert_eq!(result["tags"], json!(["a", "b", "c"]));
1299 }
1300
1301 #[test]
1302 fn test_parse_empty_array() {
1303 let result = parse("items[0]:").unwrap();
1304 assert_eq!(result["items"], json!([]));
1305 }
1306
1307 #[test]
1308 fn test_parse_tabular_array() {
1309 let result = parse("users[2]{id,name}:\n 1,Alice\n 2,Bob").unwrap();
1310 assert_eq!(
1311 result["users"],
1312 json!([
1313 {"id": 1, "name": "Alice"},
1314 {"id": 2, "name": "Bob"}
1315 ])
1316 );
1317 }
1318
1319 #[test]
1320 fn test_empty_tokens() {
1321 let result = parse("items[3]: a,,c").unwrap();
1322 assert_eq!(result["items"], json!(["a", "", "c"]));
1323 }
1324
1325 #[test]
1326 fn test_empty_nested_object() {
1327 let result = parse("user:").unwrap();
1328 assert_eq!(result, json!({"user": {}}));
1329 }
1330
1331 #[test]
1332 fn test_list_item_object() {
1333 let result =
1334 parse("items[2]:\n - id: 1\n name: First\n - id: 2\n name: Second").unwrap();
1335 assert_eq!(
1336 result["items"],
1337 json!([
1338 {"id": 1, "name": "First"},
1339 {"id": 2, "name": "Second"}
1340 ])
1341 );
1342 }
1343
1344 #[test]
1345 fn test_nested_array_in_list_item() {
1346 let result = parse("items[1]:\n - tags[3]: a,b,c").unwrap();
1347 assert_eq!(result["items"], json!([{"tags": ["a", "b", "c"]}]));
1348 }
1349
1350 #[test]
1351 fn test_two_level_siblings() {
1352 let input = "x:\n y: 1\n z: 2";
1353 let opts = DecodeOptions::default();
1354 let mut parser = Parser::new(input, opts).unwrap();
1355 let result = parser.parse().unwrap();
1356
1357 let x = result.as_object().unwrap().get("x").unwrap();
1358 let x_obj = x.as_object().unwrap();
1359
1360 assert_eq!(x_obj.len(), 2, "x should have 2 keys");
1361 assert_eq!(x_obj.get("y").unwrap(), &serde_json::json!(1));
1362 assert_eq!(x_obj.get("z").unwrap(), &serde_json::json!(2));
1363 }
1364
1365 #[test]
1366 fn test_nested_object_with_sibling() {
1367 let input = "a:\n b:\n c: 1\n d: 2";
1368 let opts = DecodeOptions::default();
1369 let mut parser = Parser::new(input, opts).unwrap();
1370 let result = parser.parse().unwrap();
1371
1372 let a = result.as_object().unwrap().get("a").unwrap();
1373 let a_obj = a.as_object().unwrap();
1374
1375 assert_eq!(a_obj.len(), 2, "a should have 2 keys (b and d)");
1376 assert!(a_obj.contains_key("b"), "a should have key 'b'");
1377 assert!(a_obj.contains_key("d"), "a should have key 'd'");
1378
1379 let b = a_obj.get("b").unwrap().as_object().unwrap();
1380 assert_eq!(b.len(), 1, "b should have only 1 key (c)");
1381 assert!(b.contains_key("c"), "b should have key 'c'");
1382 assert!(!b.contains_key("d"), "b should NOT have key 'd'");
1383 }
1384
1385 #[test]
1386 fn test_field_value_with_parentheses() {
1387 let result = parse("msg: Mostly Functions (3 of 3)").unwrap();
1388 assert_eq!(result, json!({"msg": "Mostly Functions (3 of 3)"}));
1389
1390 let result = parse("val: (hello)").unwrap();
1391 assert_eq!(result, json!({"val": "(hello)"}));
1392
1393 let result = parse("test: a (b) c (d)").unwrap();
1394 assert_eq!(result, json!({"test": "a (b) c (d)"}));
1395 }
1396
1397 #[test]
1398 fn test_field_value_number_with_parentheses() {
1399 let result = parse("code: 0(f)").unwrap();
1400 assert_eq!(result, json!({"code": "0(f)"}));
1401
1402 let result = parse("val: 5(test)").unwrap();
1403 assert_eq!(result, json!({"val": "5(test)"}));
1404
1405 let result = parse("msg: test 123)").unwrap();
1406 assert_eq!(result, json!({"msg": "test 123)"}));
1407 }
1408
1409 #[test]
1410 fn test_field_value_single_token_optimization() {
1411 let result = parse("name: hello").unwrap();
1412 assert_eq!(result, json!({"name": "hello"}));
1413
1414 let result = parse("age: 42").unwrap();
1415 assert_eq!(result, json!({"age": 42}));
1416
1417 let result = parse("active: true").unwrap();
1418 assert_eq!(result, json!({"active": true}));
1419
1420 let result = parse("value: null").unwrap();
1421 assert_eq!(result, json!({"value": null}));
1422 }
1423
1424 #[test]
1425 fn test_field_value_multi_token() {
1426 let result = parse("msg: hello world").unwrap();
1427 assert_eq!(result, json!({"msg": "hello world"}));
1428
1429 let result = parse("msg: test 123 end").unwrap();
1430 assert_eq!(result, json!({"msg": "test 123 end"}));
1431 }
1432
1433 #[test]
1434 fn test_field_value_spacing_preserved() {
1435 let result = parse("val: hello world").unwrap();
1436 assert_eq!(result, json!({"val": "hello world"}));
1437
1438 let result = parse("val: 0(f)").unwrap();
1439 assert_eq!(result, json!({"val": "0(f)"}));
1440 }
1441
1442 #[test]
1443 fn test_round_trip_parentheses() {
1444 use crate::{
1445 decode::decode_default,
1446 encode::encode_default,
1447 };
1448
1449 let original = json!({
1450 "message": "Mostly Functions (3 of 3)",
1451 "code": "0(f)",
1452 "simple": "(hello)",
1453 "mixed": "test 123)"
1454 });
1455
1456 let encoded = encode_default(&original).unwrap();
1457 let decoded: Value = decode_default(&encoded).unwrap();
1458
1459 assert_eq!(original, decoded);
1460 }
1461
1462 #[test]
1463 fn test_multiple_fields_with_edge_cases() {
1464 let input = r#"message: Mostly Functions (3 of 3)
1465sone: (hello)
1466hello: 0(f)"#;
1467
1468 let result = parse(input).unwrap();
1469 assert_eq!(
1470 result,
1471 json!({
1472 "message": "Mostly Functions (3 of 3)",
1473 "sone": "(hello)",
1474 "hello": "0(f)"
1475 })
1476 );
1477 }
1478
1479 #[test]
1480 fn test_decode_list_item_tabular_array_v3() {
1481 let input = r#"items[1]:
1484 - users[2]{id,name}:
1485 1,Ada
1486 2,Bob
1487 status: active"#;
1488
1489 let result = parse(input).unwrap();
1490
1491 assert_eq!(
1492 result,
1493 json!({
1494 "items": [
1495 {
1496 "users": [
1497 {"id": 1, "name": "Ada"},
1498 {"id": 2, "name": "Bob"}
1499 ],
1500 "status": "active"
1501 }
1502 ]
1503 })
1504 );
1505 }
1506
1507 #[test]
1508 fn test_decode_list_item_tabular_array_multiple_items() {
1509 let input = r#"data[2]:
1511 - records[1]{id,val}:
1512 1,x
1513 count: 1
1514 - records[1]{id,val}:
1515 2,y
1516 count: 1"#;
1517
1518 let result = parse(input).unwrap();
1519
1520 assert_eq!(
1521 result,
1522 json!({
1523 "data": [
1524 {
1525 "records": [{"id": 1, "val": "x"}],
1526 "count": 1
1527 },
1528 {
1529 "records": [{"id": 2, "val": "y"}],
1530 "count": 1
1531 }
1532 ]
1533 })
1534 );
1535 }
1536
1537 #[test]
1538 fn test_decode_list_item_tabular_array_with_multiple_fields() {
1539 let input = r#"entries[1]:
1541 - people[2]{name,age}:
1542 Alice,30
1543 Bob,25
1544 total: 2
1545 category: staff"#;
1546
1547 let result = parse(input).unwrap();
1548
1549 assert_eq!(
1550 result,
1551 json!({
1552 "entries": [
1553 {
1554 "people": [
1555 {"name": "Alice", "age": 30},
1556 {"name": "Bob", "age": 25}
1557 ],
1558 "total": 2,
1559 "category": "staff"
1560 }
1561 ]
1562 })
1563 );
1564 }
1565
1566 #[test]
1567 fn test_decode_list_item_non_tabular_array_unchanged() {
1568 let input = r#"items[1]:
1570 - tags[3]: a,b,c
1571 name: test"#;
1572
1573 let result = parse(input).unwrap();
1574
1575 assert_eq!(
1576 result,
1577 json!({
1578 "items": [
1579 {
1580 "tags": ["a", "b", "c"],
1581 "name": "test"
1582 }
1583 ]
1584 })
1585 );
1586 }
1587
1588 #[test]
1589 fn test_decode_strict_rejects_v2_tabular_indent() {
1590 use crate::decode::decode_strict;
1591
1592 let input_v2 = r#"items[1]:
1595 - users[2]{id,name}:
1596 1,Ada
1597 2,Bob"#;
1598
1599 let result = decode_strict::<Value>(input_v2);
1600
1601 assert!(
1603 result.is_err(),
1604 "Old format with incorrect indentation should be rejected in strict mode"
1605 );
1606 let err_msg = result.unwrap_err().to_string();
1607 assert!(
1608 err_msg.contains("indentation") || err_msg.contains("Invalid indentation"),
1609 "Error should mention indentation. Got: {}",
1610 err_msg
1611 );
1612 }
1613
1614 #[test]
1615 fn test_decode_tabular_array_not_in_list_item_unchanged() {
1616 let input = r#"users[2]{id,name}:
1618 1,Ada
1619 2,Bob"#;
1620
1621 let result = parse(input).unwrap();
1622
1623 assert_eq!(
1624 result,
1625 json!({
1626 "users": [
1627 {"id": 1, "name": "Ada"},
1628 {"id": 2, "name": "Bob"}
1629 ]
1630 })
1631 );
1632 }
1633
1634 #[test]
1635 fn test_decode_nested_tabular_not_first_field() {
1636 let input = r#"items[1]:
1638 - name: test
1639 data[2]{id,val}:
1640 1,x
1641 2,y"#;
1642
1643 let result = parse(input).unwrap();
1644
1645 assert_eq!(
1646 result,
1647 json!({
1648 "items": [
1649 {
1650 "name": "test",
1651 "data": [
1652 {"id": 1, "val": "x"},
1653 {"id": 2, "val": "y"}
1654 ]
1655 }
1656 ]
1657 })
1658 );
1659 }
1660}