1use serde_json::{
2 Map,
3 Number,
4 Value,
5};
6
7use crate::{
8 constants::{
9 KEYWORDS,
10 MAX_DEPTH,
11 },
12 decode::{
13 scanner::{
14 Scanner,
15 Token,
16 },
17 validation,
18 },
19 types::{
20 DecodeOptions,
21 Delimiter,
22 ErrorContext,
23 ToonError,
24 ToonResult,
25 },
26 utils::validation::validate_depth,
27};
28
29#[allow(unused)]
31pub struct Parser<'a> {
32 scanner: Scanner,
33 current_token: Token,
34 options: DecodeOptions,
35 delimiter: Option<Delimiter>,
36 input: &'a str,
37}
38
39impl<'a> Parser<'a> {
40 pub fn new(input: &'a str, options: DecodeOptions) -> ToonResult<Self> {
42 let mut scanner = Scanner::new(input);
43 let chosen_delim = options.delimiter;
44 scanner.set_active_delimiter(chosen_delim);
45 let current_token = scanner.scan_token()?;
46
47 Ok(Self {
48 scanner,
49 current_token,
50 delimiter: chosen_delim,
51 options,
52 input,
53 })
54 }
55
56 pub fn parse(&mut self) -> ToonResult<Value> {
58 if self.options.strict {
59 self.validate_indentation(self.scanner.get_last_line_indent())?;
60 }
61 let value = self.parse_value()?;
62
63 if self.options.strict {
65 self.skip_newlines()?;
66 if !matches!(self.current_token, Token::Eof) {
67 return Err(self
68 .parse_error_with_context(
69 "Multiple values at root level are not allowed in strict mode",
70 )
71 .with_suggestion("Wrap multiple values in an object or array"));
72 }
73 }
74
75 Ok(value)
76 }
77
78 fn advance(&mut self) -> ToonResult<()> {
79 self.current_token = self.scanner.scan_token()?;
80 Ok(())
81 }
82
83 fn skip_newlines(&mut self) -> ToonResult<()> {
84 while matches!(self.current_token, Token::Newline) {
85 self.advance()?;
86 }
87 Ok(())
88 }
89
90 fn parse_value(&mut self) -> ToonResult<Value> {
91 self.parse_value_with_depth(0)
92 }
93
94 fn parse_value_with_depth(&mut self, depth: usize) -> ToonResult<Value> {
95 validate_depth(depth, MAX_DEPTH)?;
96
97 let had_newline = matches!(self.current_token, Token::Newline);
98 self.skip_newlines()?;
99
100 match &self.current_token {
101 Token::Null => {
102 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
104 if next_char_is_colon {
105 let key = KEYWORDS[0].to_string();
106 self.advance()?;
107 self.parse_object_with_initial_key(key, depth)
108 } else {
109 self.advance()?;
110 Ok(Value::Null)
111 }
112 }
113 Token::Bool(b) => {
114 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
115 if next_char_is_colon {
116 let key = if *b {
117 KEYWORDS[1].to_string()
118 } else {
119 KEYWORDS[2].to_string()
120 };
121 self.advance()?;
122 self.parse_object_with_initial_key(key, depth)
123 } else {
124 let val = *b;
125 self.advance()?;
126 Ok(Value::Bool(val))
127 }
128 }
129 Token::Integer(i) => {
130 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
131 if next_char_is_colon {
132 let key = i.to_string();
133 self.advance()?;
134 self.parse_object_with_initial_key(key, depth)
135 } else {
136 let val = *i;
137 self.advance()?;
138 Ok(serde_json::Number::from(val).into())
139 }
140 }
141 Token::Number(n) => {
142 let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
143 if next_char_is_colon {
144 let key = n.to_string();
145 self.advance()?;
146 self.parse_object_with_initial_key(key, depth)
147 } else {
148 let val = *n;
149 self.advance()?;
150 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
152 Ok(serde_json::Number::from(val as i64).into())
153 } else {
154 Ok(serde_json::Number::from_f64(val)
155 .ok_or_else(|| {
156 ToonError::InvalidInput(format!("Invalid number: {val}"))
157 })?
158 .into())
159 }
160 }
161 }
162 Token::String(s, _) => {
163 let first = s.clone();
164 self.advance()?;
165
166 match &self.current_token {
167 Token::Colon | Token::LeftBracket => {
168 self.parse_object_with_initial_key(first, depth)
169 }
170 _ => {
171 if self.options.strict && depth > 0 && had_newline {
174 return Err(self
175 .parse_error_with_context(format!(
176 "Expected ':' after '{first}' in object context"
177 ))
178 .with_suggestion(
179 "Add ':' after the key, or place the value on the same line \
180 as the parent key",
181 ));
182 }
183
184 let mut accumulated = first;
186 while let Token::String(next, _) = &self.current_token {
187 if !accumulated.is_empty() {
188 accumulated.push(' ');
189 }
190 accumulated.push_str(next);
191 self.advance()?;
192 }
193 Ok(Value::String(accumulated))
194 }
195 }
196 }
197 Token::LeftBracket => self.parse_root_array(depth),
198 Token::Eof => Ok(Value::Object(Map::new())),
199 _ => self.parse_object(depth),
200 }
201 }
202
203 fn parse_object(&mut self, depth: usize) -> ToonResult<Value> {
204 validate_depth(depth, MAX_DEPTH)?;
205
206 let mut obj = Map::new();
207 let mut base_indent: Option<usize> = None;
209
210 loop {
211 while matches!(self.current_token, Token::Newline) {
212 self.advance()?;
213 }
214
215 if matches!(self.current_token, Token::Eof) {
216 break;
217 }
218
219 let current_indent = self.scanner.get_last_line_indent();
220
221 if self.options.strict {
222 self.validate_indentation(current_indent)?;
223 }
224
225 if let Some(expected) = base_indent {
227 if current_indent != expected {
228 break;
229 }
230 } else {
231 base_indent = Some(current_indent);
232 }
233
234 let key = match &self.current_token {
235 Token::String(s, _) => s.clone(),
236 _ => {
237 return Err(self
238 .parse_error_with_context(format!(
239 "Expected key, found {:?}",
240 self.current_token
241 ))
242 .with_suggestion("Object keys must be strings"));
243 }
244 };
245 self.advance()?;
246
247 let value = if matches!(self.current_token, Token::LeftBracket) {
248 self.parse_array(depth)?
249 } else {
250 if !matches!(self.current_token, Token::Colon) {
251 return Err(self
252 .parse_error_with_context(format!(
253 "Expected ':' or '[', found {:?}",
254 self.current_token
255 ))
256 .with_suggestion("Use ':' for object values or '[' for arrays"));
257 }
258 self.advance()?;
259 self.parse_field_value(depth)?
260 };
261
262 obj.insert(key, value);
263 }
264
265 Ok(Value::Object(obj))
266 }
267
268 fn parse_object_with_initial_key(&mut self, key: String, depth: usize) -> ToonResult<Value> {
269 validate_depth(depth, MAX_DEPTH)?;
270
271 let mut obj = Map::new();
272 let mut base_indent: Option<usize> = None;
273
274 if self.options.strict {
276 let current_indent = self.scanner.get_last_line_indent();
277 self.validate_indentation(current_indent)?;
278 }
279
280 if matches!(self.current_token, Token::LeftBracket) {
281 let value = self.parse_array(depth)?;
282 obj.insert(key, value);
283 } else {
284 if !matches!(self.current_token, Token::Colon) {
285 return Err(self.parse_error_with_context(format!(
286 "Expected ':', found {:?}",
287 self.current_token
288 )));
289 }
290 self.advance()?;
291
292 let value = self.parse_field_value(depth)?;
293 obj.insert(key, value);
294 }
295
296 loop {
297 while matches!(self.current_token, Token::Newline) {
299 self.advance()?;
300
301 if !self.options.strict {
302 while matches!(self.current_token, Token::Newline) {
303 self.advance()?;
304 }
305 }
306
307 if matches!(self.current_token, Token::Newline) {
308 continue;
309 }
310
311 let next_indent = self.scanner.get_last_line_indent();
312
313 let should_continue = if let Some(expected) = base_indent {
315 next_indent == expected
316 } else {
317 let current_depth_indent = self.options.indent.get_spaces() * depth;
319 next_indent == current_depth_indent
320 };
321
322 if !should_continue {
323 break;
324 }
325 }
326
327 if matches!(self.current_token, Token::Eof) {
328 break;
329 }
330
331 if !matches!(self.current_token, Token::String(_, _)) {
332 break;
333 }
334
335 if matches!(self.current_token, Token::Eof) {
336 break;
337 }
338
339 let current_indent = self.scanner.get_last_line_indent();
340
341 if let Some(expected) = base_indent {
342 if current_indent != expected {
343 break;
344 }
345 }
346
347 if self.options.strict {
348 self.validate_indentation(current_indent)?;
349 }
350
351 if base_indent.is_none() {
352 base_indent = Some(current_indent);
353 }
354
355 let key = match &self.current_token {
356 Token::String(s, _) => s.clone(),
357 _ => break,
358 };
359 self.advance()?;
360
361 let value = if matches!(self.current_token, Token::LeftBracket) {
362 self.parse_array(depth)?
363 } else {
364 if !matches!(self.current_token, Token::Colon) {
365 break;
366 }
367 self.advance()?;
368 self.parse_field_value(depth)?
369 };
370
371 obj.insert(key, value);
372 }
373
374 Ok(Value::Object(obj))
375 }
376
377 fn parse_field_value(&mut self, depth: usize) -> ToonResult<Value> {
378 validate_depth(depth, MAX_DEPTH)?;
379
380 if matches!(self.current_token, Token::Newline | Token::Eof) {
381 let has_children = if matches!(self.current_token, Token::Newline) {
383 let current_depth_indent = self.options.indent.get_spaces() * (depth + 1);
384 let next_indent = self.scanner.count_leading_spaces();
385 next_indent >= current_depth_indent
386 } else {
387 false
388 };
389
390 if has_children {
391 self.parse_value_with_depth(depth + 1)
392 } else {
393 Ok(Value::Object(Map::new()))
395 }
396 } else {
397 self.parse_value_with_depth(depth + 1)
398 }
399 }
400
401 fn parse_root_array(&mut self, depth: usize) -> ToonResult<Value> {
402 validate_depth(depth, MAX_DEPTH)?;
403
404 if !matches!(self.current_token, Token::LeftBracket) {
405 return Err(self.parse_error_with_context("Expected '[' at the start of root array"));
406 }
407
408 self.parse_array(depth)
409 }
410
411 fn parse_array_header(
412 &mut self,
413 ) -> ToonResult<(usize, Option<Delimiter>, Option<Vec<String>>)> {
414 if !matches!(self.current_token, Token::LeftBracket) {
415 return Err(self.parse_error_with_context("Expected '['"));
416 }
417 self.advance()?;
418
419 let (length, embedded_delim) = if let Token::String(s, _) = &self.current_token {
422 if let Some(stripped) = s.strip_prefix('#') {
423 let len_str = stripped
425 .chars()
426 .take_while(|c| c.is_ascii_digit())
427 .collect::<String>();
428
429 if len_str.is_empty() {
430 return Err(self.parse_error_with_context(format!(
431 "Expected array length after #, found: {stripped}",
432 )));
433 }
434
435 let length = len_str.parse::<usize>().map_err(|_| {
436 self.parse_error_with_context(format!(
437 "Expected array length after #, found: {len_str}",
438 ))
439 })?;
440
441 let remainder = &stripped[len_str.len()..];
442 let embedded_delim = if remainder == "|" {
443 Some(Delimiter::Pipe)
444 } else if remainder == "\t" {
445 Some(Delimiter::Tab)
446 } else if remainder == "," {
447 Some(Delimiter::Comma)
448 } else if remainder.is_empty() {
449 None
450 } else {
451 return Err(self.parse_error_with_context(format!(
452 "Unexpected characters after length: {remainder}",
453 )));
454 };
455
456 self.advance()?;
457 (length, embedded_delim)
458 } else if s == "#" {
459 self.advance()?;
461 match &self.current_token {
462 Token::Integer(n) => {
463 let val = *n as usize;
464 self.advance()?;
465 (val, None)
466 }
467 _ => {
468 return Err(self.parse_error_with_context(format!(
469 "Expected array length after #, found {:?}",
470 self.current_token
471 )))
472 }
473 }
474 } else {
475 let val = s.parse::<usize>().map_err(|_| {
477 self.parse_error_with_context(format!("Expected array length, found: {s}",))
478 })?;
479 (val, None)
480 }
481 } else if let Token::Integer(n) = &self.current_token {
482 let val = *n as usize;
483 self.advance()?;
484 (val, None)
485 } else {
486 return Err(self.parse_error_with_context(format!(
487 "Expected array length, found {:?}",
488 self.current_token
489 )));
490 };
491
492 let detected_delim = if let Some(delim) = embedded_delim {
494 Some(delim)
495 } else {
496 match &self.current_token {
497 Token::Delimiter(d) => {
498 let delim = *d;
499 self.advance()?;
500 Some(delim)
501 }
502 Token::String(s, _) if s == "," => {
503 self.advance()?;
504 Some(Delimiter::Comma)
505 }
506 Token::String(s, _) if s == "|" => {
507 self.advance()?;
508 Some(Delimiter::Pipe)
509 }
510 Token::String(s, _) if s == "\t" => {
511 self.advance()?;
512 Some(Delimiter::Tab)
513 }
514 _ => None,
515 }
516 };
517
518 let active_delim = detected_delim.or(Some(Delimiter::Comma));
520
521 self.scanner.set_active_delimiter(active_delim);
522
523 if !matches!(self.current_token, Token::RightBracket) {
524 return Err(self.parse_error_with_context(format!(
525 "Expected ']', found {:?}",
526 self.current_token
527 )));
528 }
529 self.advance()?;
530
531 let fields = if matches!(self.current_token, Token::LeftBrace) {
532 self.advance()?;
533 let mut fields = Vec::new();
534
535 loop {
536 match &self.current_token {
537 Token::String(s, _) => {
538 fields.push(s.clone());
539 self.advance()?;
540
541 if matches!(self.current_token, Token::RightBrace) {
542 break;
543 }
544
545 let is_delim = match &self.current_token {
546 Token::Delimiter(_) => true,
547 Token::String(s, _) if s == "," || s == "|" || s == "\t" => true,
548 _ => false,
549 };
550 if is_delim {
551 self.advance()?;
552 } else {
553 return Err(self.parse_error_with_context(format!(
554 "Expected delimiter or '}}', found {:?}",
555 self.current_token
556 )));
557 }
558 }
559 Token::RightBrace => break,
560 _ => {
561 return Err(self.parse_error_with_context(format!(
562 "Expected field name, found {:?}",
563 self.current_token
564 )))
565 }
566 }
567 }
568
569 self.advance()?;
570 Some(fields)
571 } else {
572 None
573 };
574
575 if !matches!(self.current_token, Token::Colon) {
576 return Err(self.parse_error_with_context("Expected ':' after array header"));
577 }
578 self.advance()?;
579
580 Ok((length, detected_delim, fields))
581 }
582
583 fn parse_array(&mut self, depth: usize) -> ToonResult<Value> {
584 validate_depth(depth, MAX_DEPTH)?;
585
586 let (length, _detected_delim, fields) = self.parse_array_header()?;
587
588 if let Some(fields) = fields {
589 validation::validate_field_list(&fields)?;
590 self.parse_tabular_array(length, fields, depth)
591 } else {
592 self.parse_regular_array(length, depth)
593 }
594 }
595
596 fn parse_tabular_array(
597 &mut self,
598 length: usize,
599 fields: Vec<String>,
600 depth: usize,
601 ) -> ToonResult<Value> {
602 let mut rows = Vec::new();
603
604 if !matches!(self.current_token, Token::Newline) {
605 return Err(self
606 .parse_error_with_context("Expected newline after tabular array header")
607 .with_suggestion("Tabular arrays must have rows on separate lines"));
608 }
609 self.skip_newlines()?;
610
611 for row_index in 0..length {
612 if matches!(self.current_token, Token::Eof) {
613 if self.options.strict {
614 return Err(self.parse_error_with_context(format!(
615 "Expected {} rows, but got {} before EOF",
616 length,
617 rows.len()
618 )));
619 }
620 break;
621 }
622
623 let current_indent = self.scanner.get_last_line_indent();
624 let expected_indent = self.options.indent.get_spaces() * (depth + 1);
625
626 if self.options.strict {
627 self.validate_indentation(current_indent)?;
628
629 if current_indent != expected_indent {
630 return Err(self.parse_error_with_context(format!(
631 "Invalid indentation for tabular row: expected {expected_indent} spaces, \
632 found {current_indent}"
633 )));
634 }
635 }
636
637 let mut row = Map::new();
638
639 for (field_index, field) in fields.iter().enumerate() {
640 if field_index > 0 {
642 if matches!(self.current_token, Token::Delimiter(_))
643 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
644 {
645 self.advance()?;
646 } else {
647 return Err(self
648 .parse_error_with_context(format!(
649 "Expected delimiter, found {:?}",
650 self.current_token
651 ))
652 .with_suggestion(format!(
653 "Tabular row {} field {} needs a delimiter",
654 row_index + 1,
655 field_index + 1
656 )));
657 }
658 }
659
660 let value = if matches!(self.current_token, Token::Delimiter(_))
662 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
663 || matches!(self.current_token, Token::Newline | Token::Eof)
664 {
665 Value::String(String::new())
666 } else {
667 self.parse_tabular_field_value()?
668 };
669
670 row.insert(field.clone(), value);
671
672 if field_index < fields.len() - 1 {
674 if matches!(self.current_token, Token::Newline | Token::Eof) {
676 if self.options.strict {
677 return Err(self
678 .parse_error_with_context(format!(
679 "Tabular row {}: expected {} values, but found only {}",
680 row_index + 1,
681 fields.len(),
682 field_index + 1
683 ))
684 .with_suggestion(format!(
685 "Row {} should have exactly {} values",
686 row_index + 1,
687 fields.len()
688 )));
689 } else {
690 for field in fields.iter().skip(field_index + 1) {
692 row.insert(field.clone(), Value::Null);
693 }
694 break;
695 }
696 }
697 } else if !matches!(self.current_token, Token::Newline | Token::Eof)
698 && (matches!(self.current_token, Token::Delimiter(_))
699 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t"))
700 {
701 return Err(self
703 .parse_error_with_context(format!(
704 "Tabular row {}: expected {} values, but found extra values",
705 row_index + 1,
706 fields.len()
707 ))
708 .with_suggestion(format!(
709 "Row {} should have exactly {} values",
710 row_index + 1,
711 fields.len()
712 )));
713 }
714 }
715
716 if !self.options.strict && row.len() < fields.len() {
717 for field in fields.iter().skip(row.len()) {
718 row.insert(field.clone(), Value::Null);
719 }
720 }
721
722 rows.push(Value::Object(row));
723
724 if matches!(self.current_token, Token::Eof) {
725 break;
726 }
727
728 if !matches!(self.current_token, Token::Newline) {
729 if !self.options.strict {
730 while !matches!(self.current_token, Token::Newline | Token::Eof) {
731 self.advance()?;
732 }
733 if matches!(self.current_token, Token::Eof) {
734 break;
735 }
736 } else {
737 return Err(self.parse_error_with_context(format!(
738 "Expected newline after tabular row {}",
739 row_index + 1
740 )));
741 }
742 }
743
744 if row_index + 1 < length {
745 self.advance()?;
746 if self.options.strict && matches!(self.current_token, Token::Newline) {
747 return Err(self.parse_error_with_context(
748 "Blank lines are not allowed inside tabular arrays in strict mode",
749 ));
750 }
751
752 self.skip_newlines()?;
753 } else if matches!(self.current_token, Token::Newline) {
754 self.advance()?;
756 self.skip_newlines()?;
757
758 let expected_indent = self.options.indent.get_spaces() * (depth + 1);
759 let actual_indent = self.scanner.get_last_line_indent();
760
761 if actual_indent == expected_indent && !matches!(self.current_token, Token::Eof) {
764 let is_key_value = matches!(self.current_token, Token::String(_, _))
765 && matches!(self.scanner.peek(), Some(':'));
766
767 if !is_key_value {
768 return Err(self.parse_error_with_context(format!(
769 "Array length mismatch: expected {length} rows, but more rows found",
770 )));
771 }
772 }
773 }
774 }
775
776 validation::validate_array_length(length, rows.len())?;
777
778 Ok(Value::Array(rows))
779 }
780
781 fn parse_regular_array(&mut self, length: usize, depth: usize) -> ToonResult<Value> {
782 let mut items = Vec::new();
783
784 match &self.current_token {
785 Token::Newline => {
786 self.skip_newlines()?;
787
788 let expected_indent = self.options.indent.get_spaces() * (depth + 1);
789
790 for i in 0..length {
791 let current_indent = self.scanner.get_last_line_indent();
792 if self.options.strict {
793 self.validate_indentation(current_indent)?;
794
795 if current_indent != expected_indent {
796 return Err(self.parse_error_with_context(format!(
797 "Invalid indentation for list item: expected {expected_indent} \
798 spaces, found {current_indent}"
799 )));
800 }
801 }
802 if !matches!(self.current_token, Token::Dash) {
803 return Err(self
804 .parse_error_with_context(format!(
805 "Expected '-' for list item, found {:?}",
806 self.current_token
807 ))
808 .with_suggestion(format!(
809 "List arrays need '-' prefix for each item (item {} of {})",
810 i + 1,
811 length
812 )));
813 }
814 self.advance()?;
815
816 let value = if matches!(self.current_token, Token::Newline | Token::Eof) {
817 Value::Object(Map::new())
818 } else if matches!(self.current_token, Token::LeftBracket) {
819 self.parse_array(depth + 1)?
820 } else if let Token::String(s, _) = &self.current_token {
821 let key = s.clone();
822 self.advance()?;
823
824 if matches!(self.current_token, Token::Colon | Token::LeftBracket) {
825 let first_value = if matches!(self.current_token, Token::LeftBracket) {
827 self.parse_array(depth + 1)?
828 } else {
829 self.advance()?;
830 if matches!(self.current_token, Token::LeftBracket) {
832 self.parse_array(depth + 2)?
833 } else {
834 self.parse_field_value(depth + 1)?
835 }
836 };
837
838 let mut obj = Map::new();
839 obj.insert(key, first_value);
840
841 let field_indent = self.options.indent.get_spaces() * (depth + 2);
842
843 let should_parse_more_fields =
845 if matches!(self.current_token, Token::Newline) {
846 let next_indent = self.scanner.count_leading_spaces();
847
848 if next_indent < field_indent {
849 false
850 } else {
851 self.advance()?;
852
853 if !self.options.strict {
854 self.skip_newlines()?;
855 }
856 true
857 }
858 } else {
859 let current_indent = self.scanner.get_last_line_indent();
860 current_indent == field_indent
861 };
862
863 if should_parse_more_fields {
865 while !matches!(self.current_token, Token::Eof) {
866 let current_indent = self.scanner.get_last_line_indent();
867
868 if current_indent < field_indent {
869 break;
870 }
871
872 if current_indent != field_indent && self.options.strict {
873 break;
874 }
875
876 if matches!(self.current_token, Token::Dash) {
878 break;
879 }
880
881 let field_key = match &self.current_token {
882 Token::String(s, _) => s.clone(),
883 _ => break,
884 };
885 self.advance()?;
886
887 let field_value =
888 if matches!(self.current_token, Token::LeftBracket) {
889 self.parse_array(depth + 2)?
890 } else if matches!(self.current_token, Token::Colon) {
891 self.advance()?;
892 if matches!(self.current_token, Token::LeftBracket) {
893 self.parse_array(depth + 2)?
894 } else {
895 self.parse_field_value(depth + 1)?
896 }
897 } else {
898 break;
899 };
900
901 obj.insert(field_key, field_value);
902
903 if matches!(self.current_token, Token::Newline) {
904 let next_indent = self.scanner.count_leading_spaces();
905 if next_indent < field_indent {
906 break;
907 }
908 self.advance()?;
909 if !self.options.strict {
910 self.skip_newlines()?;
911 }
912 } else {
913 break;
914 }
915 }
916 }
917
918 Value::Object(obj)
919 } else if matches!(self.current_token, Token::LeftBracket) {
920 let array_value = self.parse_array(depth + 1)?;
922 let mut obj = Map::new();
923 obj.insert(key, array_value);
924 Value::Object(obj)
925 } else {
926 let mut accumulated = key;
928 while let Token::String(next, _) = &self.current_token {
929 if !accumulated.is_empty() {
930 accumulated.push(' ');
931 }
932 accumulated.push_str(next);
933 self.advance()?;
934 }
935 Value::String(accumulated)
936 }
937 } else {
938 self.parse_primitive()?
939 };
940
941 items.push(value);
942
943 if items.len() < length {
944 if matches!(self.current_token, Token::Newline) {
945 self.advance()?;
946
947 if self.options.strict && matches!(self.current_token, Token::Newline) {
948 return Err(self.parse_error_with_context(
949 "Blank lines are not allowed inside list arrays in strict mode",
950 ));
951 }
952
953 self.skip_newlines()?;
954 } else if !matches!(self.current_token, Token::Dash) {
955 return Err(self.parse_error_with_context(format!(
956 "Expected newline or next list item after list item {}",
957 i + 1
958 )));
959 }
960 } else if matches!(self.current_token, Token::Newline) {
961 self.advance()?;
963 self.skip_newlines()?;
964
965 let list_indent = self.options.indent.get_spaces() * (depth + 1);
966 let actual_indent = self.scanner.get_last_line_indent();
967 if actual_indent == list_indent && matches!(self.current_token, Token::Dash)
969 {
970 return Err(self.parse_error_with_context(format!(
971 "Array length mismatch: expected {length} items, but more items \
972 found",
973 )));
974 }
975 }
976 }
977 }
978 _ => {
979 for i in 0..length {
980 if i > 0 {
981 if matches!(self.current_token, Token::Delimiter(_))
982 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
983 {
984 self.advance()?;
985 } else {
986 return Err(self
987 .parse_error_with_context(format!(
988 "Expected delimiter, found {:?}",
989 self.current_token
990 ))
991 .with_suggestion(format!(
992 "Expected delimiter between items (item {} of {})",
993 i + 1,
994 length
995 )));
996 }
997 }
998
999 let value = if matches!(self.current_token, Token::Delimiter(_))
1000 || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
1001 || (matches!(self.current_token, Token::Eof | Token::Newline) && i < length)
1002 {
1003 Value::String(String::new())
1004 } else if matches!(self.current_token, Token::LeftBracket) {
1005 self.parse_array(depth + 1)?
1006 } else {
1007 self.parse_primitive()?
1008 };
1009
1010 items.push(value);
1011 }
1012 }
1013 }
1014
1015 validation::validate_array_length(length, items.len())?;
1016
1017 if self.options.strict && matches!(self.current_token, Token::Delimiter(_)) {
1018 return Err(self.parse_error_with_context(format!(
1019 "Array length mismatch: expected {length} items, but more items found",
1020 )));
1021 }
1022
1023 Ok(Value::Array(items))
1024 }
1025
1026 fn parse_tabular_field_value(&mut self) -> ToonResult<Value> {
1027 match &self.current_token {
1028 Token::Null => {
1029 self.advance()?;
1030 Ok(Value::Null)
1031 }
1032 Token::Bool(b) => {
1033 let val = *b;
1034 self.advance()?;
1035 Ok(Value::Bool(val))
1036 }
1037 Token::Integer(i) => {
1038 let val = *i;
1039 self.advance()?;
1040 Ok(Number::from(val).into())
1041 }
1042 Token::Number(n) => {
1043 let val = *n;
1044 self.advance()?;
1045 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1047 Ok(Number::from(val as i64).into())
1048 } else {
1049 Ok(Number::from_f64(val)
1050 .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1051 .into())
1052 }
1053 }
1054 Token::String(s, _) => {
1055 let mut accumulated = s.clone();
1057 self.advance()?;
1058
1059 while let Token::String(next, _) = &self.current_token {
1060 if !accumulated.is_empty() {
1061 accumulated.push(' ');
1062 }
1063 accumulated.push_str(next);
1064 self.advance()?;
1065 }
1066
1067 Ok(Value::String(accumulated))
1068 }
1069 _ => Err(self.parse_error_with_context(format!(
1070 "Expected primitive value, found {:?}",
1071 self.current_token
1072 ))),
1073 }
1074 }
1075
1076 fn parse_primitive(&mut self) -> ToonResult<Value> {
1077 match &self.current_token {
1078 Token::Null => {
1079 self.advance()?;
1080 Ok(Value::Null)
1081 }
1082 Token::Bool(b) => {
1083 let val = *b;
1084 self.advance()?;
1085 Ok(Value::Bool(val))
1086 }
1087 Token::Integer(i) => {
1088 let val = *i;
1089 self.advance()?;
1090 Ok(Number::from(val).into())
1091 }
1092 Token::Number(n) => {
1093 let val = *n;
1094 self.advance()?;
1095
1096 if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1097 Ok(Number::from(val as i64).into())
1098 } else {
1099 Ok(Number::from_f64(val)
1100 .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1101 .into())
1102 }
1103 }
1104 Token::String(s, _) => {
1105 let val = s.clone();
1106 self.advance()?;
1107 Ok(Value::String(val))
1108 }
1109 _ => Err(self.parse_error_with_context(format!(
1110 "Expected primitive value, found {:?}",
1111 self.current_token
1112 ))),
1113 }
1114 }
1115
1116 fn parse_error_with_context(&self, message: impl Into<String>) -> ToonError {
1117 let (line, column) = self.scanner.current_position();
1118 let message = message.into();
1119
1120 let context = self.get_error_context(line, column);
1121
1122 ToonError::ParseError {
1123 line,
1124 column,
1125 message,
1126 context: Some(Box::new(context)),
1127 }
1128 }
1129
1130 fn get_error_context(&self, line: usize, column: usize) -> ErrorContext {
1131 let lines: Vec<&str> = self.input.lines().collect();
1132
1133 let source_line = if line > 0 && line <= lines.len() {
1134 lines[line - 1].to_string()
1135 } else {
1136 String::new()
1137 };
1138
1139 let preceding_lines: Vec<String> = if line > 1 {
1140 lines[line.saturating_sub(3)..line - 1]
1141 .iter()
1142 .map(|s| s.to_string())
1143 .collect()
1144 } else {
1145 Vec::new()
1146 };
1147
1148 let following_lines: Vec<String> = if line < lines.len() {
1149 lines[line..line.saturating_add(2).min(lines.len())]
1150 .iter()
1151 .map(|s| s.to_string())
1152 .collect()
1153 } else {
1154 Vec::new()
1155 };
1156
1157 let indicator = if column > 0 {
1158 Some(format!("{:width$}^", "", width = column - 1))
1159 } else {
1160 None
1161 };
1162
1163 ErrorContext {
1164 source_line,
1165 preceding_lines,
1166 following_lines,
1167 suggestion: None,
1168 indicator,
1169 }
1170 }
1171
1172 fn validate_indentation(&self, indent_amount: usize) -> ToonResult<()> {
1173 if !self.options.strict {
1174 return Ok(());
1175 }
1176
1177 let indent_size = self.options.indent.get_spaces();
1178 if indent_size > 0 && indent_amount > 0 && !indent_amount.is_multiple_of(indent_size) {
1180 Err(self.parse_error_with_context(format!(
1181 "Invalid indentation: found {indent_amount} spaces, but must be a multiple of \
1182 {indent_size}"
1183 )))
1184 } else {
1185 Ok(())
1186 }
1187 }
1188}
1189
1190#[cfg(test)]
1191mod tests {
1192 use std::f64;
1193
1194 use serde_json::json;
1195
1196 use super::*;
1197
1198 fn parse(input: &str) -> ToonResult<Value> {
1199 let mut parser = Parser::new(input, DecodeOptions::default())?;
1200 parser.parse()
1201 }
1202
1203 #[test]
1204 fn test_parse_primitives() {
1205 assert_eq!(parse("null").unwrap(), json!(null));
1206 assert_eq!(parse("true").unwrap(), json!(true));
1207 assert_eq!(parse("false").unwrap(), json!(false));
1208 assert_eq!(parse("42").unwrap(), json!(42));
1209 assert_eq!(parse("3.141592653589793").unwrap(), json!(f64::consts::PI));
1210 assert_eq!(parse("hello").unwrap(), json!("hello"));
1211 }
1212
1213 #[test]
1214 fn test_parse_simple_object() {
1215 let result = parse("name: Alice\nage: 30").unwrap();
1216 assert_eq!(result["name"], json!("Alice"));
1217 assert_eq!(result["age"], json!(30));
1218 }
1219
1220 #[test]
1221 fn test_parse_primitive_array() {
1222 let result = parse("tags[3]: a,b,c").unwrap();
1223 assert_eq!(result["tags"], json!(["a", "b", "c"]));
1224 }
1225
1226 #[test]
1227 fn test_parse_empty_array() {
1228 let result = parse("items[0]:").unwrap();
1229 assert_eq!(result["items"], json!([]));
1230 }
1231
1232 #[test]
1233 fn test_parse_tabular_array() {
1234 let result = parse("users[2]{id,name}:\n 1,Alice\n 2,Bob").unwrap();
1235 assert_eq!(
1236 result["users"],
1237 json!([
1238 {"id": 1, "name": "Alice"},
1239 {"id": 2, "name": "Bob"}
1240 ])
1241 );
1242 }
1243
1244 #[test]
1245 fn test_empty_tokens() {
1246 let result = parse("items[3]: a,,c").unwrap();
1247 assert_eq!(result["items"], json!(["a", "", "c"]));
1248 }
1249
1250 #[test]
1251 fn test_empty_nested_object() {
1252 let result = parse("user:").unwrap();
1253 assert_eq!(result, json!({"user": {}}));
1254 }
1255
1256 #[test]
1257 fn test_list_item_object() {
1258 let result =
1259 parse("items[2]:\n - id: 1\n name: First\n - id: 2\n name: Second").unwrap();
1260 assert_eq!(
1261 result["items"],
1262 json!([
1263 {"id": 1, "name": "First"},
1264 {"id": 2, "name": "Second"}
1265 ])
1266 );
1267 }
1268
1269 #[test]
1270 fn test_nested_array_in_list_item() {
1271 let result = parse("items[1]:\n - tags[3]: a,b,c").unwrap();
1272 assert_eq!(result["items"], json!([{"tags": ["a", "b", "c"]}]));
1273 }
1274}