toon_format/decode/
parser.rs

1use serde_json::{
2    Map,
3    Number,
4    Value,
5};
6
7use crate::{
8    constants::{
9        KEYWORDS,
10        MAX_DEPTH,
11        QUOTED_KEY_MARKER,
12    },
13    decode::{
14        scanner::{
15            Scanner,
16            Token,
17        },
18        validation,
19    },
20    types::{
21        DecodeOptions,
22        Delimiter,
23        ErrorContext,
24        ToonError,
25        ToonResult,
26    },
27    utils::validation::validate_depth,
28};
29
30/// Parser that builds JSON values from a sequence of tokens.
31#[allow(unused)]
32pub struct Parser<'a> {
33    scanner: Scanner,
34    current_token: Token,
35    options: DecodeOptions,
36    delimiter: Option<Delimiter>,
37    input: &'a str,
38}
39
40impl<'a> Parser<'a> {
41    /// Create a new parser with the given input and options.
42    pub fn new(input: &'a str, options: DecodeOptions) -> ToonResult<Self> {
43        let mut scanner = Scanner::new(input);
44        let chosen_delim = options.delimiter;
45        scanner.set_active_delimiter(chosen_delim);
46        let current_token = scanner.scan_token()?;
47
48        Ok(Self {
49            scanner,
50            current_token,
51            delimiter: chosen_delim,
52            options,
53            input,
54        })
55    }
56
57    /// Parse the input into a JSON value.
58    pub fn parse(&mut self) -> ToonResult<Value> {
59        if self.options.strict {
60            self.validate_indentation(self.scanner.get_last_line_indent())?;
61        }
62        let value = self.parse_value()?;
63
64        // In strict mode, check for trailing content at root level
65        if self.options.strict {
66            self.skip_newlines()?;
67            if !matches!(self.current_token, Token::Eof) {
68                return Err(self
69                    .parse_error_with_context(
70                        "Multiple values at root level are not allowed in strict mode",
71                    )
72                    .with_suggestion("Wrap multiple values in an object or array"));
73            }
74        }
75
76        Ok(value)
77    }
78
79    fn advance(&mut self) -> ToonResult<()> {
80        self.current_token = self.scanner.scan_token()?;
81        Ok(())
82    }
83
84    fn skip_newlines(&mut self) -> ToonResult<()> {
85        while matches!(self.current_token, Token::Newline) {
86            self.advance()?;
87        }
88        Ok(())
89    }
90
91    fn parse_value(&mut self) -> ToonResult<Value> {
92        self.parse_value_with_depth(0)
93    }
94
95    fn parse_value_with_depth(&mut self, depth: usize) -> ToonResult<Value> {
96        validate_depth(depth, MAX_DEPTH)?;
97
98        let had_newline = matches!(self.current_token, Token::Newline);
99        self.skip_newlines()?;
100
101        match &self.current_token {
102            Token::Null => {
103                // Peek ahead to see if this is a key (followed by ':') or a value
104                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
105                if next_char_is_colon {
106                    let key = KEYWORDS[0].to_string();
107                    self.advance()?;
108                    self.parse_object_with_initial_key(key, depth)
109                } else {
110                    self.advance()?;
111                    Ok(Value::Null)
112                }
113            }
114            Token::Bool(b) => {
115                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
116                if next_char_is_colon {
117                    let key = if *b {
118                        KEYWORDS[1].to_string()
119                    } else {
120                        KEYWORDS[2].to_string()
121                    };
122                    self.advance()?;
123                    self.parse_object_with_initial_key(key, depth)
124                } else {
125                    let val = *b;
126                    self.advance()?;
127                    Ok(Value::Bool(val))
128                }
129            }
130            Token::Integer(i) => {
131                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
132                if next_char_is_colon {
133                    let key = i.to_string();
134                    self.advance()?;
135                    self.parse_object_with_initial_key(key, depth)
136                } else {
137                    let val = *i;
138                    self.advance()?;
139                    Ok(serde_json::Number::from(val).into())
140                }
141            }
142            Token::Number(n) => {
143                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
144                if next_char_is_colon {
145                    let key = n.to_string();
146                    self.advance()?;
147                    self.parse_object_with_initial_key(key, depth)
148                } else {
149                    let val = *n;
150                    self.advance()?;
151                    // Normalize floats that are actually integers
152                    if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
153                        Ok(serde_json::Number::from(val as i64).into())
154                    } else {
155                        Ok(serde_json::Number::from_f64(val)
156                            .ok_or_else(|| {
157                                ToonError::InvalidInput(format!("Invalid number: {val}"))
158                            })?
159                            .into())
160                    }
161                }
162            }
163            Token::String(s, _) => {
164                let first = s.clone();
165                self.advance()?;
166
167                match &self.current_token {
168                    Token::Colon | Token::LeftBracket => {
169                        self.parse_object_with_initial_key(first, depth)
170                    }
171                    _ => {
172                        // Strings on new indented lines could be missing colons (keys) or values
173                        // Only error in strict mode when we know it's a new line
174                        if self.options.strict && depth > 0 && had_newline {
175                            return Err(self
176                                .parse_error_with_context(format!(
177                                    "Expected ':' after '{first}' in object context"
178                                ))
179                                .with_suggestion(
180                                    "Add ':' after the key, or place the value on the same line \
181                                     as the parent key",
182                                ));
183                        }
184
185                        // Multiple consecutive string tokens get joined with spaces
186                        let mut accumulated = first;
187                        while let Token::String(next, _) = &self.current_token {
188                            if !accumulated.is_empty() {
189                                accumulated.push(' ');
190                            }
191                            accumulated.push_str(next);
192                            self.advance()?;
193                        }
194                        Ok(Value::String(accumulated))
195                    }
196                }
197            }
198            Token::LeftBracket => self.parse_root_array(depth),
199            Token::Eof => Ok(Value::Object(Map::new())),
200            _ => self.parse_object(depth),
201        }
202    }
203
204    fn parse_object(&mut self, depth: usize) -> ToonResult<Value> {
205        validate_depth(depth, MAX_DEPTH)?;
206
207        let mut obj = Map::new();
208        // Track the indentation of the first key to ensure all keys align
209        let mut base_indent: Option<usize> = None;
210
211        loop {
212            while matches!(self.current_token, Token::Newline) {
213                self.advance()?;
214            }
215
216            if matches!(self.current_token, Token::Eof) {
217                break;
218            }
219
220            let current_indent = self.scanner.get_last_line_indent();
221
222            if self.options.strict {
223                self.validate_indentation(current_indent)?;
224            }
225
226            // Once we've seen the first key, all subsequent keys must match its indent
227            if let Some(expected) = base_indent {
228                if current_indent != expected {
229                    break;
230                }
231            } else {
232                base_indent = Some(current_indent);
233            }
234
235            let key = match &self.current_token {
236                Token::String(s, was_quoted) => {
237                    // Mark quoted keys containing dots with a special prefix
238                    // so path expansion can skip them
239                    if *was_quoted && s.contains('.') {
240                        format!("{QUOTED_KEY_MARKER}{s}")
241                    } else {
242                        s.clone()
243                    }
244                }
245                _ => {
246                    return Err(self
247                        .parse_error_with_context(format!(
248                            "Expected key, found {:?}",
249                            self.current_token
250                        ))
251                        .with_suggestion("Object keys must be strings"));
252                }
253            };
254            self.advance()?;
255
256            let value = if matches!(self.current_token, Token::LeftBracket) {
257                self.parse_array(depth)?
258            } else {
259                if !matches!(self.current_token, Token::Colon) {
260                    return Err(self
261                        .parse_error_with_context(format!(
262                            "Expected ':' or '[', found {:?}",
263                            self.current_token
264                        ))
265                        .with_suggestion("Use ':' for object values or '[' for arrays"));
266                }
267                self.advance()?;
268                self.parse_field_value(depth)?
269            };
270
271            obj.insert(key, value);
272        }
273
274        Ok(Value::Object(obj))
275    }
276
277    fn parse_object_with_initial_key(&mut self, key: String, depth: usize) -> ToonResult<Value> {
278        validate_depth(depth, MAX_DEPTH)?;
279
280        let mut obj = Map::new();
281        let mut base_indent: Option<usize> = None;
282
283        // Validate indentation for the initial key if in strict mode
284        if self.options.strict {
285            let current_indent = self.scanner.get_last_line_indent();
286            self.validate_indentation(current_indent)?;
287        }
288
289        if matches!(self.current_token, Token::LeftBracket) {
290            let value = self.parse_array(depth)?;
291            obj.insert(key, value);
292        } else {
293            if !matches!(self.current_token, Token::Colon) {
294                return Err(self.parse_error_with_context(format!(
295                    "Expected ':', found {:?}",
296                    self.current_token
297                )));
298            }
299            self.advance()?;
300
301            let value = self.parse_field_value(depth)?;
302            obj.insert(key, value);
303        }
304
305        loop {
306            // Skip newlines and check if the next line belongs to this object
307            while matches!(self.current_token, Token::Newline) {
308                self.advance()?;
309
310                if !self.options.strict {
311                    while matches!(self.current_token, Token::Newline) {
312                        self.advance()?;
313                    }
314                }
315
316                if matches!(self.current_token, Token::Newline) {
317                    continue;
318                }
319
320                let next_indent = self.scanner.get_last_line_indent();
321
322                // Check if the next line is at the right indentation level
323                let should_continue = if let Some(expected) = base_indent {
324                    next_indent == expected
325                } else {
326                    // First field: use depth-based expected indent
327                    let current_depth_indent = self.options.indent.get_spaces() * depth;
328                    next_indent == current_depth_indent
329                };
330
331                if !should_continue {
332                    break;
333                }
334            }
335
336            if matches!(self.current_token, Token::Eof) {
337                break;
338            }
339
340            if !matches!(self.current_token, Token::String(_, _)) {
341                break;
342            }
343
344            if matches!(self.current_token, Token::Eof) {
345                break;
346            }
347
348            let current_indent = self.scanner.get_last_line_indent();
349
350            if let Some(expected) = base_indent {
351                if current_indent != expected {
352                    break;
353                }
354            } else {
355                // verify first additional field matches expected depth
356                let expected_depth_indent = self.options.indent.get_spaces() * depth;
357                if current_indent != expected_depth_indent {
358                    break;
359                }
360            }
361
362            if self.options.strict {
363                self.validate_indentation(current_indent)?;
364            }
365
366            if base_indent.is_none() {
367                base_indent = Some(current_indent);
368            }
369
370            let key = match &self.current_token {
371                Token::String(s, was_quoted) => {
372                    // Mark quoted keys containing dots with a special prefix
373                    // so path expansion can skip them
374                    if *was_quoted && s.contains('.') {
375                        format!("{QUOTED_KEY_MARKER}{s}")
376                    } else {
377                        s.clone()
378                    }
379                }
380                _ => break,
381            };
382            self.advance()?;
383
384            let value = if matches!(self.current_token, Token::LeftBracket) {
385                self.parse_array(depth)?
386            } else {
387                if !matches!(self.current_token, Token::Colon) {
388                    break;
389                }
390                self.advance()?;
391                self.parse_field_value(depth)?
392            };
393
394            obj.insert(key, value);
395        }
396
397        Ok(Value::Object(obj))
398    }
399
400    fn parse_field_value(&mut self, depth: usize) -> ToonResult<Value> {
401        validate_depth(depth, MAX_DEPTH)?;
402
403        if matches!(self.current_token, Token::Newline | Token::Eof) {
404            // After a colon on a new line, check if there are indented children
405            let has_children = if matches!(self.current_token, Token::Newline) {
406                let current_depth_indent = self.options.indent.get_spaces() * (depth + 1);
407                let next_indent = self.scanner.count_leading_spaces();
408                next_indent >= current_depth_indent
409            } else {
410                false
411            };
412
413            if has_children {
414                self.parse_value_with_depth(depth + 1)
415            } else {
416                // Empty object when colon is followed by newline with no children
417                Ok(Value::Object(Map::new()))
418            }
419        } else {
420            self.parse_value_with_depth(depth + 1)
421        }
422    }
423
424    fn parse_root_array(&mut self, depth: usize) -> ToonResult<Value> {
425        validate_depth(depth, MAX_DEPTH)?;
426
427        if !matches!(self.current_token, Token::LeftBracket) {
428            return Err(self.parse_error_with_context("Expected '[' at the start of root array"));
429        }
430
431        self.parse_array(depth)
432    }
433
434    fn parse_array_header(
435        &mut self,
436    ) -> ToonResult<(usize, Option<Delimiter>, Option<Vec<String>>)> {
437        if !matches!(self.current_token, Token::LeftBracket) {
438            return Err(self.parse_error_with_context("Expected '['"));
439        }
440        self.advance()?;
441
442        // Parse array length (plain integer only per TOON spec v2.0)
443        // Supports formats: [N], [N|], [N\t] (no # marker)
444        let length = if let Token::Integer(n) = &self.current_token {
445            *n as usize
446        } else if let Token::String(s, _) = &self.current_token {
447            // Check if string starts with # - this is now invalid per spec v2.0
448            if s.starts_with('#') {
449                return Err(self
450                    .parse_error_with_context(
451                        "Length marker '#' is no longer supported in TOON spec v2.0. Use [N] \
452                         format instead of [#N]",
453                    )
454                    .with_suggestion("Remove the '#' prefix from the array length"));
455            }
456
457            // Plain string that's a number: "3"
458            s.parse::<usize>().map_err(|_| {
459                self.parse_error_with_context(format!("Expected array length, found: {s}"))
460            })?
461        } else {
462            return Err(self.parse_error_with_context(format!(
463                "Expected array length, found {:?}",
464                self.current_token
465            )));
466        };
467
468        self.advance()?;
469
470        // Check for optional delimiter after length
471        let detected_delim = match &self.current_token {
472            Token::Delimiter(d) => {
473                let delim = *d;
474                self.advance()?;
475                Some(delim)
476            }
477            Token::String(s, _) if s == "," => {
478                self.advance()?;
479                Some(Delimiter::Comma)
480            }
481            Token::String(s, _) if s == "|" => {
482                self.advance()?;
483                Some(Delimiter::Pipe)
484            }
485            Token::String(s, _) if s == "\t" => {
486                self.advance()?;
487                Some(Delimiter::Tab)
488            }
489            _ => None,
490        };
491
492        // Default to comma if no delimiter specified
493        let active_delim = detected_delim.or(Some(Delimiter::Comma));
494
495        self.scanner.set_active_delimiter(active_delim);
496
497        if !matches!(self.current_token, Token::RightBracket) {
498            return Err(self.parse_error_with_context(format!(
499                "Expected ']', found {:?}",
500                self.current_token
501            )));
502        }
503        self.advance()?;
504
505        let fields = if matches!(self.current_token, Token::LeftBrace) {
506            self.advance()?;
507            let mut fields = Vec::new();
508
509            loop {
510                match &self.current_token {
511                    Token::String(s, _) => {
512                        fields.push(s.clone());
513                        self.advance()?;
514
515                        if matches!(self.current_token, Token::RightBrace) {
516                            break;
517                        }
518
519                        let is_delim = match &self.current_token {
520                            Token::Delimiter(_) => true,
521                            Token::String(s, _) if s == "," || s == "|" || s == "\t" => true,
522                            _ => false,
523                        };
524                        if is_delim {
525                            self.advance()?;
526                        } else {
527                            return Err(self.parse_error_with_context(format!(
528                                "Expected delimiter or '}}', found {:?}",
529                                self.current_token
530                            )));
531                        }
532                    }
533                    Token::RightBrace => break,
534                    _ => {
535                        return Err(self.parse_error_with_context(format!(
536                            "Expected field name, found {:?}",
537                            self.current_token
538                        )))
539                    }
540                }
541            }
542
543            self.advance()?;
544            Some(fields)
545        } else {
546            None
547        };
548
549        if !matches!(self.current_token, Token::Colon) {
550            return Err(self.parse_error_with_context("Expected ':' after array header"));
551        }
552        self.advance()?;
553
554        Ok((length, detected_delim, fields))
555    }
556
557    fn parse_array(&mut self, depth: usize) -> ToonResult<Value> {
558        validate_depth(depth, MAX_DEPTH)?;
559
560        let (length, _detected_delim, fields) = self.parse_array_header()?;
561
562        if let Some(fields) = fields {
563            validation::validate_field_list(&fields)?;
564            self.parse_tabular_array(length, fields, depth)
565        } else {
566            self.parse_regular_array(length, depth)
567        }
568    }
569
570    fn parse_tabular_array(
571        &mut self,
572        length: usize,
573        fields: Vec<String>,
574        depth: usize,
575    ) -> ToonResult<Value> {
576        let mut rows = Vec::new();
577
578        if !matches!(self.current_token, Token::Newline) {
579            return Err(self
580                .parse_error_with_context("Expected newline after tabular array header")
581                .with_suggestion("Tabular arrays must have rows on separate lines"));
582        }
583        self.skip_newlines()?;
584
585        for row_index in 0..length {
586            if matches!(self.current_token, Token::Eof) {
587                if self.options.strict {
588                    return Err(self.parse_error_with_context(format!(
589                        "Expected {} rows, but got {} before EOF",
590                        length,
591                        rows.len()
592                    )));
593                }
594                break;
595            }
596
597            let current_indent = self.scanner.get_last_line_indent();
598            let expected_indent = self.options.indent.get_spaces() * (depth + 1);
599
600            if self.options.strict {
601                self.validate_indentation(current_indent)?;
602
603                if current_indent != expected_indent {
604                    return Err(self.parse_error_with_context(format!(
605                        "Invalid indentation for tabular row: expected {expected_indent} spaces, \
606                         found {current_indent}"
607                    )));
608                }
609            }
610
611            let mut row = Map::new();
612
613            for (field_index, field) in fields.iter().enumerate() {
614                // Skip delimiter before each field except the first
615                if field_index > 0 {
616                    if matches!(self.current_token, Token::Delimiter(_))
617                        || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
618                    {
619                        self.advance()?;
620                    } else {
621                        return Err(self
622                            .parse_error_with_context(format!(
623                                "Expected delimiter, found {:?}",
624                                self.current_token
625                            ))
626                            .with_suggestion(format!(
627                                "Tabular row {} field {} needs a delimiter",
628                                row_index + 1,
629                                field_index + 1
630                            )));
631                    }
632                }
633
634                // Empty values show up as delimiters or newlines
635                let value = if matches!(self.current_token, Token::Delimiter(_))
636                    || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
637                    || matches!(self.current_token, Token::Newline | Token::Eof)
638                {
639                    Value::String(String::new())
640                } else {
641                    self.parse_tabular_field_value()?
642                };
643
644                row.insert(field.clone(), value);
645
646                // Validate row completeness
647                if field_index < fields.len() - 1 {
648                    // Not the last field - shouldn't hit newline yet
649                    if matches!(self.current_token, Token::Newline | Token::Eof) {
650                        if self.options.strict {
651                            return Err(self
652                                .parse_error_with_context(format!(
653                                    "Tabular row {}: expected {} values, but found only {}",
654                                    row_index + 1,
655                                    fields.len(),
656                                    field_index + 1
657                                ))
658                                .with_suggestion(format!(
659                                    "Row {} should have exactly {} values",
660                                    row_index + 1,
661                                    fields.len()
662                                )));
663                        } else {
664                            // Fill remaining fields with null in non-strict mode
665                            for field in fields.iter().skip(field_index + 1) {
666                                row.insert(field.clone(), Value::Null);
667                            }
668                            break;
669                        }
670                    }
671                } else if !matches!(self.current_token, Token::Newline | Token::Eof)
672                    && (matches!(self.current_token, Token::Delimiter(_))
673                        || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t"))
674                {
675                    // Last field but there's another delimiter - too many values
676                    return Err(self
677                        .parse_error_with_context(format!(
678                            "Tabular row {}: expected {} values, but found extra values",
679                            row_index + 1,
680                            fields.len()
681                        ))
682                        .with_suggestion(format!(
683                            "Row {} should have exactly {} values",
684                            row_index + 1,
685                            fields.len()
686                        )));
687                }
688            }
689
690            if !self.options.strict && row.len() < fields.len() {
691                for field in fields.iter().skip(row.len()) {
692                    row.insert(field.clone(), Value::Null);
693                }
694            }
695
696            rows.push(Value::Object(row));
697
698            if matches!(self.current_token, Token::Eof) {
699                break;
700            }
701
702            if !matches!(self.current_token, Token::Newline) {
703                if !self.options.strict {
704                    while !matches!(self.current_token, Token::Newline | Token::Eof) {
705                        self.advance()?;
706                    }
707                    if matches!(self.current_token, Token::Eof) {
708                        break;
709                    }
710                } else {
711                    return Err(self.parse_error_with_context(format!(
712                        "Expected newline after tabular row {}",
713                        row_index + 1
714                    )));
715                }
716            }
717
718            if row_index + 1 < length {
719                self.advance()?;
720                if self.options.strict && matches!(self.current_token, Token::Newline) {
721                    return Err(self.parse_error_with_context(
722                        "Blank lines are not allowed inside tabular arrays in strict mode",
723                    ));
724                }
725
726                self.skip_newlines()?;
727            } else if matches!(self.current_token, Token::Newline) {
728                // After the last row, check if there are extra rows
729                self.advance()?;
730                self.skip_newlines()?;
731
732                let expected_indent = self.options.indent.get_spaces() * (depth + 1);
733                let actual_indent = self.scanner.get_last_line_indent();
734
735                // If something at the same indent level, it might be a new row (error)
736                // unless it's a key-value pair (which belongs to parent)
737                if actual_indent == expected_indent && !matches!(self.current_token, Token::Eof) {
738                    let is_key_value = matches!(self.current_token, Token::String(_, _))
739                        && matches!(self.scanner.peek(), Some(':'));
740
741                    if !is_key_value {
742                        return Err(self.parse_error_with_context(format!(
743                            "Array length mismatch: expected {length} rows, but more rows found",
744                        )));
745                    }
746                }
747            }
748        }
749
750        validation::validate_array_length(length, rows.len())?;
751
752        Ok(Value::Array(rows))
753    }
754
755    fn parse_regular_array(&mut self, length: usize, depth: usize) -> ToonResult<Value> {
756        let mut items = Vec::new();
757
758        match &self.current_token {
759            Token::Newline => {
760                self.skip_newlines()?;
761
762                let expected_indent = self.options.indent.get_spaces() * (depth + 1);
763
764                for i in 0..length {
765                    let current_indent = self.scanner.get_last_line_indent();
766                    if self.options.strict {
767                        self.validate_indentation(current_indent)?;
768
769                        if current_indent != expected_indent {
770                            return Err(self.parse_error_with_context(format!(
771                                "Invalid indentation for list item: expected {expected_indent} \
772                                 spaces, found {current_indent}"
773                            )));
774                        }
775                    }
776                    if !matches!(self.current_token, Token::Dash) {
777                        return Err(self
778                            .parse_error_with_context(format!(
779                                "Expected '-' for list item, found {:?}",
780                                self.current_token
781                            ))
782                            .with_suggestion(format!(
783                                "List arrays need '-' prefix for each item (item {} of {})",
784                                i + 1,
785                                length
786                            )));
787                    }
788                    self.advance()?;
789
790                    let value = if matches!(self.current_token, Token::Newline | Token::Eof) {
791                        Value::Object(Map::new())
792                    } else if matches!(self.current_token, Token::LeftBracket) {
793                        self.parse_array(depth + 1)?
794                    } else if let Token::String(s, _) = &self.current_token {
795                        let key = s.clone();
796                        self.advance()?;
797
798                        if matches!(self.current_token, Token::Colon | Token::LeftBracket) {
799                            // This is an object: key followed by colon or array bracket
800                            let first_value = if matches!(self.current_token, Token::LeftBracket) {
801                                self.parse_array(depth + 1)?
802                            } else {
803                                self.advance()?;
804                                // Handle nested arrays: "key: [2]: ..."
805                                if matches!(self.current_token, Token::LeftBracket) {
806                                    self.parse_array(depth + 2)?
807                                } else {
808                                    self.parse_field_value(depth + 2)?
809                                }
810                            };
811
812                            let mut obj = Map::new();
813                            obj.insert(key, first_value);
814
815                            let field_indent = self.options.indent.get_spaces() * (depth + 2);
816
817                            // Check if there are more fields at the same indentation level
818                            let should_parse_more_fields =
819                                if matches!(self.current_token, Token::Newline) {
820                                    let next_indent = self.scanner.count_leading_spaces();
821
822                                    if next_indent < field_indent {
823                                        false
824                                    } else {
825                                        self.advance()?;
826
827                                        if !self.options.strict {
828                                            self.skip_newlines()?;
829                                        }
830                                        true
831                                    }
832                                } else if matches!(self.current_token, Token::String(_, _)) {
833                                    // When already positioned at a field key, check its indent
834                                    let current_indent = self.scanner.get_last_line_indent();
835                                    current_indent == field_indent
836                                } else {
837                                    false
838                                };
839
840                            // Parse additional fields if they're at the right indentation
841                            if should_parse_more_fields {
842                                while !matches!(self.current_token, Token::Eof) {
843                                    let current_indent = self.scanner.get_last_line_indent();
844
845                                    if current_indent < field_indent {
846                                        break;
847                                    }
848
849                                    if current_indent != field_indent && self.options.strict {
850                                        break;
851                                    }
852
853                                    // Stop if we hit the next list item
854                                    if matches!(self.current_token, Token::Dash) {
855                                        break;
856                                    }
857
858                                    let field_key = match &self.current_token {
859                                        Token::String(s, _) => s.clone(),
860                                        _ => break,
861                                    };
862                                    self.advance()?;
863
864                                    let field_value =
865                                        if matches!(self.current_token, Token::LeftBracket) {
866                                            self.parse_array(depth + 2)?
867                                        } else if matches!(self.current_token, Token::Colon) {
868                                            self.advance()?;
869                                            if matches!(self.current_token, Token::LeftBracket) {
870                                                self.parse_array(depth + 2)?
871                                            } else {
872                                                self.parse_field_value(depth + 2)?
873                                            }
874                                        } else {
875                                            break;
876                                        };
877
878                                    obj.insert(field_key, field_value);
879
880                                    if matches!(self.current_token, Token::Newline) {
881                                        let next_indent = self.scanner.count_leading_spaces();
882                                        if next_indent < field_indent {
883                                            break;
884                                        }
885                                        self.advance()?;
886                                        if !self.options.strict {
887                                            self.skip_newlines()?;
888                                        }
889                                    } else {
890                                        break;
891                                    }
892                                }
893                            }
894
895                            Value::Object(obj)
896                        } else if matches!(self.current_token, Token::LeftBracket) {
897                            // Array as object value: "key[2]: ..."
898                            let array_value = self.parse_array(depth + 1)?;
899                            let mut obj = Map::new();
900                            obj.insert(key, array_value);
901                            Value::Object(obj)
902                        } else {
903                            // Plain string value - join consecutive string tokens
904                            let mut accumulated = key;
905                            while let Token::String(next, _) = &self.current_token {
906                                if !accumulated.is_empty() {
907                                    accumulated.push(' ');
908                                }
909                                accumulated.push_str(next);
910                                self.advance()?;
911                            }
912                            Value::String(accumulated)
913                        }
914                    } else {
915                        self.parse_primitive()?
916                    };
917
918                    items.push(value);
919
920                    if items.len() < length {
921                        if matches!(self.current_token, Token::Newline) {
922                            self.advance()?;
923
924                            if self.options.strict && matches!(self.current_token, Token::Newline) {
925                                return Err(self.parse_error_with_context(
926                                    "Blank lines are not allowed inside list arrays in strict mode",
927                                ));
928                            }
929
930                            self.skip_newlines()?;
931                        } else if !matches!(self.current_token, Token::Dash) {
932                            return Err(self.parse_error_with_context(format!(
933                                "Expected newline or next list item after list item {}",
934                                i + 1
935                            )));
936                        }
937                    } else if matches!(self.current_token, Token::Newline) {
938                        // After the last item, check for extra items
939                        self.advance()?;
940                        self.skip_newlines()?;
941
942                        let list_indent = self.options.indent.get_spaces() * (depth + 1);
943                        let actual_indent = self.scanner.get_last_line_indent();
944                        // If we see another dash at the same indent, there are too many items
945                        if actual_indent == list_indent && matches!(self.current_token, Token::Dash)
946                        {
947                            return Err(self.parse_error_with_context(format!(
948                                "Array length mismatch: expected {length} items, but more items \
949                                 found",
950                            )));
951                        }
952                    }
953                }
954            }
955            _ => {
956                for i in 0..length {
957                    if i > 0 {
958                        if matches!(self.current_token, Token::Delimiter(_))
959                            || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
960                        {
961                            self.advance()?;
962                        } else {
963                            return Err(self
964                                .parse_error_with_context(format!(
965                                    "Expected delimiter, found {:?}",
966                                    self.current_token
967                                ))
968                                .with_suggestion(format!(
969                                    "Expected delimiter between items (item {} of {})",
970                                    i + 1,
971                                    length
972                                )));
973                        }
974                    }
975
976                    let value = if matches!(self.current_token, Token::Delimiter(_))
977                        || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
978                        || (matches!(self.current_token, Token::Eof | Token::Newline) && i < length)
979                    {
980                        Value::String(String::new())
981                    } else if matches!(self.current_token, Token::LeftBracket) {
982                        self.parse_array(depth + 1)?
983                    } else {
984                        self.parse_primitive()?
985                    };
986
987                    items.push(value);
988                }
989            }
990        }
991
992        validation::validate_array_length(length, items.len())?;
993
994        if self.options.strict && matches!(self.current_token, Token::Delimiter(_)) {
995            return Err(self.parse_error_with_context(format!(
996                "Array length mismatch: expected {length} items, but more items found",
997            )));
998        }
999
1000        Ok(Value::Array(items))
1001    }
1002
1003    fn parse_tabular_field_value(&mut self) -> ToonResult<Value> {
1004        match &self.current_token {
1005            Token::Null => {
1006                self.advance()?;
1007                Ok(Value::Null)
1008            }
1009            Token::Bool(b) => {
1010                let val = *b;
1011                self.advance()?;
1012                Ok(Value::Bool(val))
1013            }
1014            Token::Integer(i) => {
1015                let val = *i;
1016                self.advance()?;
1017                Ok(Number::from(val).into())
1018            }
1019            Token::Number(n) => {
1020                let val = *n;
1021                self.advance()?;
1022                // If the float is actually an integer, represent it as such
1023                if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1024                    Ok(Number::from(val as i64).into())
1025                } else {
1026                    Ok(Number::from_f64(val)
1027                        .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1028                        .into())
1029                }
1030            }
1031            Token::String(s, _) => {
1032                // Tabular fields can have multiple string tokens joined with spaces
1033                let mut accumulated = s.clone();
1034                self.advance()?;
1035
1036                while let Token::String(next, _) = &self.current_token {
1037                    if !accumulated.is_empty() {
1038                        accumulated.push(' ');
1039                    }
1040                    accumulated.push_str(next);
1041                    self.advance()?;
1042                }
1043
1044                Ok(Value::String(accumulated))
1045            }
1046            _ => Err(self.parse_error_with_context(format!(
1047                "Expected primitive value, found {:?}",
1048                self.current_token
1049            ))),
1050        }
1051    }
1052
1053    fn parse_primitive(&mut self) -> ToonResult<Value> {
1054        match &self.current_token {
1055            Token::Null => {
1056                self.advance()?;
1057                Ok(Value::Null)
1058            }
1059            Token::Bool(b) => {
1060                let val = *b;
1061                self.advance()?;
1062                Ok(Value::Bool(val))
1063            }
1064            Token::Integer(i) => {
1065                let val = *i;
1066                self.advance()?;
1067                Ok(Number::from(val).into())
1068            }
1069            Token::Number(n) => {
1070                let val = *n;
1071                self.advance()?;
1072
1073                if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1074                    Ok(Number::from(val as i64).into())
1075                } else {
1076                    Ok(Number::from_f64(val)
1077                        .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1078                        .into())
1079                }
1080            }
1081            Token::String(s, _) => {
1082                let val = s.clone();
1083                self.advance()?;
1084                Ok(Value::String(val))
1085            }
1086            _ => Err(self.parse_error_with_context(format!(
1087                "Expected primitive value, found {:?}",
1088                self.current_token
1089            ))),
1090        }
1091    }
1092
1093    fn parse_error_with_context(&self, message: impl Into<String>) -> ToonError {
1094        let (line, column) = self.scanner.current_position();
1095        let message = message.into();
1096
1097        let context = self.get_error_context(line, column);
1098
1099        ToonError::ParseError {
1100            line,
1101            column,
1102            message,
1103            context: Some(Box::new(context)),
1104        }
1105    }
1106
1107    fn get_error_context(&self, line: usize, column: usize) -> ErrorContext {
1108        let lines: Vec<&str> = self.input.lines().collect();
1109
1110        let source_line = if line > 0 && line <= lines.len() {
1111            lines[line - 1].to_string()
1112        } else {
1113            String::new()
1114        };
1115
1116        let preceding_lines: Vec<String> = if line > 1 {
1117            lines[line.saturating_sub(3)..line - 1]
1118                .iter()
1119                .map(|s| s.to_string())
1120                .collect()
1121        } else {
1122            Vec::new()
1123        };
1124
1125        let following_lines: Vec<String> = if line < lines.len() {
1126            lines[line..line.saturating_add(2).min(lines.len())]
1127                .iter()
1128                .map(|s| s.to_string())
1129                .collect()
1130        } else {
1131            Vec::new()
1132        };
1133
1134        let indicator = if column > 0 {
1135            Some(format!("{:width$}^", "", width = column - 1))
1136        } else {
1137            None
1138        };
1139
1140        ErrorContext {
1141            source_line,
1142            preceding_lines,
1143            following_lines,
1144            suggestion: None,
1145            indicator,
1146        }
1147    }
1148
1149    fn validate_indentation(&self, indent_amount: usize) -> ToonResult<()> {
1150        if !self.options.strict {
1151            return Ok(());
1152        }
1153
1154        let indent_size = self.options.indent.get_spaces();
1155        // In strict mode, indentation must be a multiple of the configured indent size
1156        if indent_size > 0 && indent_amount > 0 && !indent_amount.is_multiple_of(indent_size) {
1157            Err(self.parse_error_with_context(format!(
1158                "Invalid indentation: found {indent_amount} spaces, but must be a multiple of \
1159                 {indent_size}"
1160            )))
1161        } else {
1162            Ok(())
1163        }
1164    }
1165}
1166
1167#[cfg(test)]
1168mod tests {
1169    use std::f64;
1170
1171    use serde_json::json;
1172
1173    use super::*;
1174
1175    fn parse(input: &str) -> ToonResult<Value> {
1176        let mut parser = Parser::new(input, DecodeOptions::default())?;
1177        parser.parse()
1178    }
1179
1180    #[test]
1181    fn test_parse_primitives() {
1182        assert_eq!(parse("null").unwrap(), json!(null));
1183        assert_eq!(parse("true").unwrap(), json!(true));
1184        assert_eq!(parse("false").unwrap(), json!(false));
1185        assert_eq!(parse("42").unwrap(), json!(42));
1186        assert_eq!(parse("3.141592653589793").unwrap(), json!(f64::consts::PI));
1187        assert_eq!(parse("hello").unwrap(), json!("hello"));
1188    }
1189
1190    #[test]
1191    fn test_parse_simple_object() {
1192        let result = parse("name: Alice\nage: 30").unwrap();
1193        assert_eq!(result["name"], json!("Alice"));
1194        assert_eq!(result["age"], json!(30));
1195    }
1196
1197    #[test]
1198    fn test_parse_primitive_array() {
1199        let result = parse("tags[3]: a,b,c").unwrap();
1200        assert_eq!(result["tags"], json!(["a", "b", "c"]));
1201    }
1202
1203    #[test]
1204    fn test_parse_empty_array() {
1205        let result = parse("items[0]:").unwrap();
1206        assert_eq!(result["items"], json!([]));
1207    }
1208
1209    #[test]
1210    fn test_parse_tabular_array() {
1211        let result = parse("users[2]{id,name}:\n  1,Alice\n  2,Bob").unwrap();
1212        assert_eq!(
1213            result["users"],
1214            json!([
1215                {"id": 1, "name": "Alice"},
1216                {"id": 2, "name": "Bob"}
1217            ])
1218        );
1219    }
1220
1221    #[test]
1222    fn test_empty_tokens() {
1223        let result = parse("items[3]: a,,c").unwrap();
1224        assert_eq!(result["items"], json!(["a", "", "c"]));
1225    }
1226
1227    #[test]
1228    fn test_empty_nested_object() {
1229        let result = parse("user:").unwrap();
1230        assert_eq!(result, json!({"user": {}}));
1231    }
1232
1233    #[test]
1234    fn test_list_item_object() {
1235        let result =
1236            parse("items[2]:\n  - id: 1\n    name: First\n  - id: 2\n    name: Second").unwrap();
1237        assert_eq!(
1238            result["items"],
1239            json!([
1240                {"id": 1, "name": "First"},
1241                {"id": 2, "name": "Second"}
1242            ])
1243        );
1244    }
1245
1246    #[test]
1247    fn test_nested_array_in_list_item() {
1248        let result = parse("items[1]:\n  - tags[3]: a,b,c").unwrap();
1249        assert_eq!(result["items"], json!([{"tags": ["a", "b", "c"]}]));
1250    }
1251
1252    #[test]
1253    fn test_two_level_siblings() {
1254        let input = "x:\n  y: 1\n  z: 2";
1255        let opts = DecodeOptions::default();
1256        let mut parser = Parser::new(input, opts).unwrap();
1257        let result = parser.parse().unwrap();
1258
1259        let x = result.as_object().unwrap().get("x").unwrap();
1260        let x_obj = x.as_object().unwrap();
1261
1262        assert_eq!(x_obj.len(), 2, "x should have 2 keys");
1263        assert_eq!(x_obj.get("y").unwrap(), &serde_json::json!(1));
1264        assert_eq!(x_obj.get("z").unwrap(), &serde_json::json!(2));
1265    }
1266
1267    #[test]
1268    fn test_nested_object_with_sibling() {
1269        let input = "a:\n  b:\n    c: 1\n  d: 2";
1270        let opts = DecodeOptions::default();
1271        let mut parser = Parser::new(input, opts).unwrap();
1272        let result = parser.parse().unwrap();
1273
1274        // Expected: {"a":{"b":{"c":1},"d":2}}
1275        let a = result.as_object().unwrap().get("a").unwrap();
1276        let a_obj = a.as_object().unwrap();
1277
1278        assert_eq!(a_obj.len(), 2, "a should have 2 keys (b and d)");
1279        assert!(a_obj.contains_key("b"), "a should have key 'b'");
1280        assert!(a_obj.contains_key("d"), "a should have key 'd'");
1281
1282        let b = a_obj.get("b").unwrap().as_object().unwrap();
1283        assert_eq!(b.len(), 1, "b should have only 1 key (c)");
1284        assert!(b.contains_key("c"), "b should have key 'c'");
1285        assert!(!b.contains_key("d"), "b should NOT have key 'd'");
1286    }
1287}