toon_format/decode/
parser.rs

1use serde_json::{
2    Map,
3    Number,
4    Value,
5};
6
7use crate::{
8    constants::{
9        KEYWORDS,
10        MAX_DEPTH,
11    },
12    decode::{
13        scanner::{
14            Scanner,
15            Token,
16        },
17        validation,
18    },
19    types::{
20        DecodeOptions,
21        Delimiter,
22        ErrorContext,
23        ToonError,
24        ToonResult,
25    },
26    utils::validation::validate_depth,
27};
28
29/// Parser that builds JSON values from a sequence of tokens.
30#[allow(unused)]
31pub struct Parser<'a> {
32    scanner: Scanner,
33    current_token: Token,
34    options: DecodeOptions,
35    delimiter: Option<Delimiter>,
36    input: &'a str,
37}
38
39impl<'a> Parser<'a> {
40    /// Create a new parser with the given input and options.
41    pub fn new(input: &'a str, options: DecodeOptions) -> ToonResult<Self> {
42        let mut scanner = Scanner::new(input);
43        let chosen_delim = options.delimiter;
44        scanner.set_active_delimiter(chosen_delim);
45        let current_token = scanner.scan_token()?;
46
47        Ok(Self {
48            scanner,
49            current_token,
50            delimiter: chosen_delim,
51            options,
52            input,
53        })
54    }
55
56    /// Parse the input into a JSON value.
57    pub fn parse(&mut self) -> ToonResult<Value> {
58        if self.options.strict {
59            self.validate_indentation(self.scanner.get_last_line_indent())?;
60        }
61        let value = self.parse_value()?;
62
63        // In strict mode, check for trailing content at root level
64        if self.options.strict {
65            self.skip_newlines()?;
66            if !matches!(self.current_token, Token::Eof) {
67                return Err(self
68                    .parse_error_with_context(
69                        "Multiple values at root level are not allowed in strict mode",
70                    )
71                    .with_suggestion("Wrap multiple values in an object or array"));
72            }
73        }
74
75        Ok(value)
76    }
77
78    fn advance(&mut self) -> ToonResult<()> {
79        self.current_token = self.scanner.scan_token()?;
80        Ok(())
81    }
82
83    fn skip_newlines(&mut self) -> ToonResult<()> {
84        while matches!(self.current_token, Token::Newline) {
85            self.advance()?;
86        }
87        Ok(())
88    }
89
90    fn parse_value(&mut self) -> ToonResult<Value> {
91        self.parse_value_with_depth(0)
92    }
93
94    fn parse_value_with_depth(&mut self, depth: usize) -> ToonResult<Value> {
95        validate_depth(depth, MAX_DEPTH)?;
96
97        let had_newline = matches!(self.current_token, Token::Newline);
98        self.skip_newlines()?;
99
100        match &self.current_token {
101            Token::Null => {
102                // Peek ahead to see if this is a key (followed by ':') or a value
103                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
104                if next_char_is_colon {
105                    let key = KEYWORDS[0].to_string();
106                    self.advance()?;
107                    self.parse_object_with_initial_key(key, depth)
108                } else {
109                    self.advance()?;
110                    Ok(Value::Null)
111                }
112            }
113            Token::Bool(b) => {
114                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
115                if next_char_is_colon {
116                    let key = if *b {
117                        KEYWORDS[1].to_string()
118                    } else {
119                        KEYWORDS[2].to_string()
120                    };
121                    self.advance()?;
122                    self.parse_object_with_initial_key(key, depth)
123                } else {
124                    let val = *b;
125                    self.advance()?;
126                    Ok(Value::Bool(val))
127                }
128            }
129            Token::Integer(i) => {
130                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
131                if next_char_is_colon {
132                    let key = i.to_string();
133                    self.advance()?;
134                    self.parse_object_with_initial_key(key, depth)
135                } else {
136                    let val = *i;
137                    self.advance()?;
138                    Ok(serde_json::Number::from(val).into())
139                }
140            }
141            Token::Number(n) => {
142                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
143                if next_char_is_colon {
144                    let key = n.to_string();
145                    self.advance()?;
146                    self.parse_object_with_initial_key(key, depth)
147                } else {
148                    let val = *n;
149                    self.advance()?;
150                    // Normalize floats that are actually integers
151                    if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
152                        Ok(serde_json::Number::from(val as i64).into())
153                    } else {
154                        Ok(serde_json::Number::from_f64(val)
155                            .ok_or_else(|| {
156                                ToonError::InvalidInput(format!("Invalid number: {val}"))
157                            })?
158                            .into())
159                    }
160                }
161            }
162            Token::String(s, _) => {
163                let first = s.clone();
164                self.advance()?;
165
166                match &self.current_token {
167                    Token::Colon | Token::LeftBracket => {
168                        self.parse_object_with_initial_key(first, depth)
169                    }
170                    _ => {
171                        // Strings on new indented lines could be missing colons (keys) or values
172                        // Only error in strict mode when we know it's a new line
173                        if self.options.strict && depth > 0 && had_newline {
174                            return Err(self
175                                .parse_error_with_context(format!(
176                                    "Expected ':' after '{first}' in object context"
177                                ))
178                                .with_suggestion(
179                                    "Add ':' after the key, or place the value on the same line \
180                                     as the parent key",
181                                ));
182                        }
183
184                        // Multiple consecutive string tokens get joined with spaces
185                        let mut accumulated = first;
186                        while let Token::String(next, _) = &self.current_token {
187                            if !accumulated.is_empty() {
188                                accumulated.push(' ');
189                            }
190                            accumulated.push_str(next);
191                            self.advance()?;
192                        }
193                        Ok(Value::String(accumulated))
194                    }
195                }
196            }
197            Token::LeftBracket => self.parse_root_array(depth),
198            Token::Eof => Ok(Value::Object(Map::new())),
199            _ => self.parse_object(depth),
200        }
201    }
202
203    fn parse_object(&mut self, depth: usize) -> ToonResult<Value> {
204        validate_depth(depth, MAX_DEPTH)?;
205
206        let mut obj = Map::new();
207        // Track the indentation of the first key to ensure all keys align
208        let mut base_indent: Option<usize> = None;
209
210        loop {
211            while matches!(self.current_token, Token::Newline) {
212                self.advance()?;
213            }
214
215            if matches!(self.current_token, Token::Eof) {
216                break;
217            }
218
219            let current_indent = self.scanner.get_last_line_indent();
220
221            if self.options.strict {
222                self.validate_indentation(current_indent)?;
223            }
224
225            // Once we've seen the first key, all subsequent keys must match its indent
226            if let Some(expected) = base_indent {
227                if current_indent != expected {
228                    break;
229                }
230            } else {
231                base_indent = Some(current_indent);
232            }
233
234            let key = match &self.current_token {
235                Token::String(s, _) => s.clone(),
236                _ => {
237                    return Err(self
238                        .parse_error_with_context(format!(
239                            "Expected key, found {:?}",
240                            self.current_token
241                        ))
242                        .with_suggestion("Object keys must be strings"));
243                }
244            };
245            self.advance()?;
246
247            let value = if matches!(self.current_token, Token::LeftBracket) {
248                self.parse_array(depth)?
249            } else {
250                if !matches!(self.current_token, Token::Colon) {
251                    return Err(self
252                        .parse_error_with_context(format!(
253                            "Expected ':' or '[', found {:?}",
254                            self.current_token
255                        ))
256                        .with_suggestion("Use ':' for object values or '[' for arrays"));
257                }
258                self.advance()?;
259                self.parse_field_value(depth)?
260            };
261
262            obj.insert(key, value);
263        }
264
265        Ok(Value::Object(obj))
266    }
267
268    fn parse_object_with_initial_key(&mut self, key: String, depth: usize) -> ToonResult<Value> {
269        validate_depth(depth, MAX_DEPTH)?;
270
271        let mut obj = Map::new();
272        let mut base_indent: Option<usize> = None;
273
274        // Validate indentation for the initial key if in strict mode
275        if self.options.strict {
276            let current_indent = self.scanner.get_last_line_indent();
277            self.validate_indentation(current_indent)?;
278        }
279
280        if matches!(self.current_token, Token::LeftBracket) {
281            let value = self.parse_array(depth)?;
282            obj.insert(key, value);
283        } else {
284            if !matches!(self.current_token, Token::Colon) {
285                return Err(self.parse_error_with_context(format!(
286                    "Expected ':', found {:?}",
287                    self.current_token
288                )));
289            }
290            self.advance()?;
291
292            let value = self.parse_field_value(depth)?;
293            obj.insert(key, value);
294        }
295
296        loop {
297            // Skip newlines and check if the next line belongs to this object
298            while matches!(self.current_token, Token::Newline) {
299                self.advance()?;
300
301                if !self.options.strict {
302                    while matches!(self.current_token, Token::Newline) {
303                        self.advance()?;
304                    }
305                }
306
307                if matches!(self.current_token, Token::Newline) {
308                    continue;
309                }
310
311                let next_indent = self.scanner.get_last_line_indent();
312
313                // Check if the next line is at the right indentation level
314                let should_continue = if let Some(expected) = base_indent {
315                    next_indent == expected
316                } else {
317                    // First field: use depth-based expected indent
318                    let current_depth_indent = self.options.indent.get_spaces() * depth;
319                    next_indent == current_depth_indent
320                };
321
322                if !should_continue {
323                    break;
324                }
325            }
326
327            if matches!(self.current_token, Token::Eof) {
328                break;
329            }
330
331            if !matches!(self.current_token, Token::String(_, _)) {
332                break;
333            }
334
335            if matches!(self.current_token, Token::Eof) {
336                break;
337            }
338
339            let current_indent = self.scanner.get_last_line_indent();
340
341            if let Some(expected) = base_indent {
342                if current_indent != expected {
343                    break;
344                }
345            }
346
347            if self.options.strict {
348                self.validate_indentation(current_indent)?;
349            }
350
351            if base_indent.is_none() {
352                base_indent = Some(current_indent);
353            }
354
355            let key = match &self.current_token {
356                Token::String(s, _) => s.clone(),
357                _ => break,
358            };
359            self.advance()?;
360
361            let value = if matches!(self.current_token, Token::LeftBracket) {
362                self.parse_array(depth)?
363            } else {
364                if !matches!(self.current_token, Token::Colon) {
365                    break;
366                }
367                self.advance()?;
368                self.parse_field_value(depth)?
369            };
370
371            obj.insert(key, value);
372        }
373
374        Ok(Value::Object(obj))
375    }
376
377    fn parse_field_value(&mut self, depth: usize) -> ToonResult<Value> {
378        validate_depth(depth, MAX_DEPTH)?;
379
380        if matches!(self.current_token, Token::Newline | Token::Eof) {
381            // After a colon on a new line, check if there are indented children
382            let has_children = if matches!(self.current_token, Token::Newline) {
383                let current_depth_indent = self.options.indent.get_spaces() * (depth + 1);
384                let next_indent = self.scanner.count_leading_spaces();
385                next_indent >= current_depth_indent
386            } else {
387                false
388            };
389
390            if has_children {
391                self.parse_value_with_depth(depth + 1)
392            } else {
393                // Empty object when colon is followed by newline with no children
394                Ok(Value::Object(Map::new()))
395            }
396        } else {
397            self.parse_value_with_depth(depth + 1)
398        }
399    }
400
401    fn parse_root_array(&mut self, depth: usize) -> ToonResult<Value> {
402        validate_depth(depth, MAX_DEPTH)?;
403
404        if !matches!(self.current_token, Token::LeftBracket) {
405            return Err(self.parse_error_with_context("Expected '[' at the start of root array"));
406        }
407
408        self.parse_array(depth)
409    }
410
411    fn parse_array_header(
412        &mut self,
413    ) -> ToonResult<(usize, Option<Delimiter>, Option<Vec<String>>)> {
414        if !matches!(self.current_token, Token::LeftBracket) {
415            return Err(self.parse_error_with_context("Expected '['"));
416        }
417        self.advance()?;
418
419        // Parse array length and optional embedded delimiter
420        // Supports formats like: [3], [#3], [3|], ["#3,"], etc.
421        let (length, embedded_delim) = if let Token::String(s, _) = &self.current_token {
422            if let Some(stripped) = s.strip_prefix('#') {
423                // Format: "#3|" or "#3," - length with embedded delimiter
424                let len_str = stripped
425                    .chars()
426                    .take_while(|c| c.is_ascii_digit())
427                    .collect::<String>();
428
429                if len_str.is_empty() {
430                    return Err(self.parse_error_with_context(format!(
431                        "Expected array length after #, found: {stripped}",
432                    )));
433                }
434
435                let length = len_str.parse::<usize>().map_err(|_| {
436                    self.parse_error_with_context(format!(
437                        "Expected array length after #, found: {len_str}",
438                    ))
439                })?;
440
441                let remainder = &stripped[len_str.len()..];
442                let embedded_delim = if remainder == "|" {
443                    Some(Delimiter::Pipe)
444                } else if remainder == "\t" {
445                    Some(Delimiter::Tab)
446                } else if remainder == "," {
447                    Some(Delimiter::Comma)
448                } else if remainder.is_empty() {
449                    None
450                } else {
451                    return Err(self.parse_error_with_context(format!(
452                        "Unexpected characters after length: {remainder}",
453                    )));
454                };
455
456                self.advance()?;
457                (length, embedded_delim)
458            } else if s == "#" {
459                // Format: "# 3" - separate # token followed by integer
460                self.advance()?;
461                match &self.current_token {
462                    Token::Integer(n) => {
463                        let val = *n as usize;
464                        self.advance()?;
465                        (val, None)
466                    }
467                    _ => {
468                        return Err(self.parse_error_with_context(format!(
469                            "Expected array length after #, found {:?}",
470                            self.current_token
471                        )))
472                    }
473                }
474            } else {
475                // Plain string that's a number: "3"
476                let val = s.parse::<usize>().map_err(|_| {
477                    self.parse_error_with_context(format!("Expected array length, found: {s}",))
478                })?;
479                (val, None)
480            }
481        } else if let Token::Integer(n) = &self.current_token {
482            let val = *n as usize;
483            self.advance()?;
484            (val, None)
485        } else {
486            return Err(self.parse_error_with_context(format!(
487                "Expected array length, found {:?}",
488                self.current_token
489            )));
490        };
491
492        // Delimiter can be embedded in the length string or appear as a separate token
493        let detected_delim = if let Some(delim) = embedded_delim {
494            Some(delim)
495        } else {
496            match &self.current_token {
497                Token::Delimiter(d) => {
498                    let delim = *d;
499                    self.advance()?;
500                    Some(delim)
501                }
502                Token::String(s, _) if s == "," => {
503                    self.advance()?;
504                    Some(Delimiter::Comma)
505                }
506                Token::String(s, _) if s == "|" => {
507                    self.advance()?;
508                    Some(Delimiter::Pipe)
509                }
510                Token::String(s, _) if s == "\t" => {
511                    self.advance()?;
512                    Some(Delimiter::Tab)
513                }
514                _ => None,
515            }
516        };
517
518        // Default to comma if no delimiter specified
519        let active_delim = detected_delim.or(Some(Delimiter::Comma));
520
521        self.scanner.set_active_delimiter(active_delim);
522
523        if !matches!(self.current_token, Token::RightBracket) {
524            return Err(self.parse_error_with_context(format!(
525                "Expected ']', found {:?}",
526                self.current_token
527            )));
528        }
529        self.advance()?;
530
531        let fields = if matches!(self.current_token, Token::LeftBrace) {
532            self.advance()?;
533            let mut fields = Vec::new();
534
535            loop {
536                match &self.current_token {
537                    Token::String(s, _) => {
538                        fields.push(s.clone());
539                        self.advance()?;
540
541                        if matches!(self.current_token, Token::RightBrace) {
542                            break;
543                        }
544
545                        let is_delim = match &self.current_token {
546                            Token::Delimiter(_) => true,
547                            Token::String(s, _) if s == "," || s == "|" || s == "\t" => true,
548                            _ => false,
549                        };
550                        if is_delim {
551                            self.advance()?;
552                        } else {
553                            return Err(self.parse_error_with_context(format!(
554                                "Expected delimiter or '}}', found {:?}",
555                                self.current_token
556                            )));
557                        }
558                    }
559                    Token::RightBrace => break,
560                    _ => {
561                        return Err(self.parse_error_with_context(format!(
562                            "Expected field name, found {:?}",
563                            self.current_token
564                        )))
565                    }
566                }
567            }
568
569            self.advance()?;
570            Some(fields)
571        } else {
572            None
573        };
574
575        if !matches!(self.current_token, Token::Colon) {
576            return Err(self.parse_error_with_context("Expected ':' after array header"));
577        }
578        self.advance()?;
579
580        Ok((length, detected_delim, fields))
581    }
582
583    fn parse_array(&mut self, depth: usize) -> ToonResult<Value> {
584        validate_depth(depth, MAX_DEPTH)?;
585
586        let (length, _detected_delim, fields) = self.parse_array_header()?;
587
588        if let Some(fields) = fields {
589            validation::validate_field_list(&fields)?;
590            self.parse_tabular_array(length, fields, depth)
591        } else {
592            self.parse_regular_array(length, depth)
593        }
594    }
595
596    fn parse_tabular_array(
597        &mut self,
598        length: usize,
599        fields: Vec<String>,
600        depth: usize,
601    ) -> ToonResult<Value> {
602        let mut rows = Vec::new();
603
604        if !matches!(self.current_token, Token::Newline) {
605            return Err(self
606                .parse_error_with_context("Expected newline after tabular array header")
607                .with_suggestion("Tabular arrays must have rows on separate lines"));
608        }
609        self.skip_newlines()?;
610
611        for row_index in 0..length {
612            if matches!(self.current_token, Token::Eof) {
613                if self.options.strict {
614                    return Err(self.parse_error_with_context(format!(
615                        "Expected {} rows, but got {} before EOF",
616                        length,
617                        rows.len()
618                    )));
619                }
620                break;
621            }
622
623            let current_indent = self.scanner.get_last_line_indent();
624            let expected_indent = self.options.indent.get_spaces() * (depth + 1);
625
626            if self.options.strict {
627                self.validate_indentation(current_indent)?;
628
629                if current_indent != expected_indent {
630                    return Err(self.parse_error_with_context(format!(
631                        "Invalid indentation for tabular row: expected {expected_indent} spaces, \
632                         found {current_indent}"
633                    )));
634                }
635            }
636
637            let mut row = Map::new();
638
639            for (field_index, field) in fields.iter().enumerate() {
640                // Skip delimiter before each field except the first
641                if field_index > 0 {
642                    if matches!(self.current_token, Token::Delimiter(_))
643                        || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
644                    {
645                        self.advance()?;
646                    } else {
647                        return Err(self
648                            .parse_error_with_context(format!(
649                                "Expected delimiter, found {:?}",
650                                self.current_token
651                            ))
652                            .with_suggestion(format!(
653                                "Tabular row {} field {} needs a delimiter",
654                                row_index + 1,
655                                field_index + 1
656                            )));
657                    }
658                }
659
660                // Empty values show up as delimiters or newlines
661                let value = if matches!(self.current_token, Token::Delimiter(_))
662                    || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
663                    || matches!(self.current_token, Token::Newline | Token::Eof)
664                {
665                    Value::String(String::new())
666                } else {
667                    self.parse_tabular_field_value()?
668                };
669
670                row.insert(field.clone(), value);
671
672                // Validate row completeness
673                if field_index < fields.len() - 1 {
674                    // Not the last field - shouldn't hit newline yet
675                    if matches!(self.current_token, Token::Newline | Token::Eof) {
676                        if self.options.strict {
677                            return Err(self
678                                .parse_error_with_context(format!(
679                                    "Tabular row {}: expected {} values, but found only {}",
680                                    row_index + 1,
681                                    fields.len(),
682                                    field_index + 1
683                                ))
684                                .with_suggestion(format!(
685                                    "Row {} should have exactly {} values",
686                                    row_index + 1,
687                                    fields.len()
688                                )));
689                        } else {
690                            // Fill remaining fields with null in non-strict mode
691                            for field in fields.iter().skip(field_index + 1) {
692                                row.insert(field.clone(), Value::Null);
693                            }
694                            break;
695                        }
696                    }
697                } else if !matches!(self.current_token, Token::Newline | Token::Eof)
698                    && (matches!(self.current_token, Token::Delimiter(_))
699                        || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t"))
700                {
701                    // Last field but there's another delimiter - too many values
702                    return Err(self
703                        .parse_error_with_context(format!(
704                            "Tabular row {}: expected {} values, but found extra values",
705                            row_index + 1,
706                            fields.len()
707                        ))
708                        .with_suggestion(format!(
709                            "Row {} should have exactly {} values",
710                            row_index + 1,
711                            fields.len()
712                        )));
713                }
714            }
715
716            if !self.options.strict && row.len() < fields.len() {
717                for field in fields.iter().skip(row.len()) {
718                    row.insert(field.clone(), Value::Null);
719                }
720            }
721
722            rows.push(Value::Object(row));
723
724            if matches!(self.current_token, Token::Eof) {
725                break;
726            }
727
728            if !matches!(self.current_token, Token::Newline) {
729                if !self.options.strict {
730                    while !matches!(self.current_token, Token::Newline | Token::Eof) {
731                        self.advance()?;
732                    }
733                    if matches!(self.current_token, Token::Eof) {
734                        break;
735                    }
736                } else {
737                    return Err(self.parse_error_with_context(format!(
738                        "Expected newline after tabular row {}",
739                        row_index + 1
740                    )));
741                }
742            }
743
744            if row_index + 1 < length {
745                self.advance()?;
746                if self.options.strict && matches!(self.current_token, Token::Newline) {
747                    return Err(self.parse_error_with_context(
748                        "Blank lines are not allowed inside tabular arrays in strict mode",
749                    ));
750                }
751
752                self.skip_newlines()?;
753            } else if matches!(self.current_token, Token::Newline) {
754                // After the last row, check if there are extra rows
755                self.advance()?;
756                self.skip_newlines()?;
757
758                let expected_indent = self.options.indent.get_spaces() * (depth + 1);
759                let actual_indent = self.scanner.get_last_line_indent();
760
761                // If something at the same indent level, it might be a new row (error)
762                // unless it's a key-value pair (which belongs to parent)
763                if actual_indent == expected_indent && !matches!(self.current_token, Token::Eof) {
764                    let is_key_value = matches!(self.current_token, Token::String(_, _))
765                        && matches!(self.scanner.peek(), Some(':'));
766
767                    if !is_key_value {
768                        return Err(self.parse_error_with_context(format!(
769                            "Array length mismatch: expected {length} rows, but more rows found",
770                        )));
771                    }
772                }
773            }
774        }
775
776        validation::validate_array_length(length, rows.len())?;
777
778        Ok(Value::Array(rows))
779    }
780
781    fn parse_regular_array(&mut self, length: usize, depth: usize) -> ToonResult<Value> {
782        let mut items = Vec::new();
783
784        match &self.current_token {
785            Token::Newline => {
786                self.skip_newlines()?;
787
788                let expected_indent = self.options.indent.get_spaces() * (depth + 1);
789
790                for i in 0..length {
791                    let current_indent = self.scanner.get_last_line_indent();
792                    if self.options.strict {
793                        self.validate_indentation(current_indent)?;
794
795                        if current_indent != expected_indent {
796                            return Err(self.parse_error_with_context(format!(
797                                "Invalid indentation for list item: expected {expected_indent} \
798                                 spaces, found {current_indent}"
799                            )));
800                        }
801                    }
802                    if !matches!(self.current_token, Token::Dash) {
803                        return Err(self
804                            .parse_error_with_context(format!(
805                                "Expected '-' for list item, found {:?}",
806                                self.current_token
807                            ))
808                            .with_suggestion(format!(
809                                "List arrays need '-' prefix for each item (item {} of {})",
810                                i + 1,
811                                length
812                            )));
813                    }
814                    self.advance()?;
815
816                    let value = if matches!(self.current_token, Token::Newline | Token::Eof) {
817                        Value::Object(Map::new())
818                    } else if matches!(self.current_token, Token::LeftBracket) {
819                        self.parse_array(depth + 1)?
820                    } else if let Token::String(s, _) = &self.current_token {
821                        let key = s.clone();
822                        self.advance()?;
823
824                        if matches!(self.current_token, Token::Colon | Token::LeftBracket) {
825                            // This is an object: key followed by colon or array bracket
826                            let first_value = if matches!(self.current_token, Token::LeftBracket) {
827                                self.parse_array(depth + 1)?
828                            } else {
829                                self.advance()?;
830                                // Handle nested arrays: "key: [2]: ..."
831                                if matches!(self.current_token, Token::LeftBracket) {
832                                    self.parse_array(depth + 2)?
833                                } else {
834                                    self.parse_field_value(depth + 1)?
835                                }
836                            };
837
838                            let mut obj = Map::new();
839                            obj.insert(key, first_value);
840
841                            let field_indent = self.options.indent.get_spaces() * (depth + 2);
842
843                            // Check if there are more fields at the same indentation level
844                            let should_parse_more_fields =
845                                if matches!(self.current_token, Token::Newline) {
846                                    let next_indent = self.scanner.count_leading_spaces();
847
848                                    if next_indent < field_indent {
849                                        false
850                                    } else {
851                                        self.advance()?;
852
853                                        if !self.options.strict {
854                                            self.skip_newlines()?;
855                                        }
856                                        true
857                                    }
858                                } else {
859                                    let current_indent = self.scanner.get_last_line_indent();
860                                    current_indent == field_indent
861                                };
862
863                            // Parse additional fields if they're at the right indentation
864                            if should_parse_more_fields {
865                                while !matches!(self.current_token, Token::Eof) {
866                                    let current_indent = self.scanner.get_last_line_indent();
867
868                                    if current_indent < field_indent {
869                                        break;
870                                    }
871
872                                    if current_indent != field_indent && self.options.strict {
873                                        break;
874                                    }
875
876                                    // Stop if we hit the next list item
877                                    if matches!(self.current_token, Token::Dash) {
878                                        break;
879                                    }
880
881                                    let field_key = match &self.current_token {
882                                        Token::String(s, _) => s.clone(),
883                                        _ => break,
884                                    };
885                                    self.advance()?;
886
887                                    let field_value =
888                                        if matches!(self.current_token, Token::LeftBracket) {
889                                            self.parse_array(depth + 2)?
890                                        } else if matches!(self.current_token, Token::Colon) {
891                                            self.advance()?;
892                                            if matches!(self.current_token, Token::LeftBracket) {
893                                                self.parse_array(depth + 2)?
894                                            } else {
895                                                self.parse_field_value(depth + 1)?
896                                            }
897                                        } else {
898                                            break;
899                                        };
900
901                                    obj.insert(field_key, field_value);
902
903                                    if matches!(self.current_token, Token::Newline) {
904                                        let next_indent = self.scanner.count_leading_spaces();
905                                        if next_indent < field_indent {
906                                            break;
907                                        }
908                                        self.advance()?;
909                                        if !self.options.strict {
910                                            self.skip_newlines()?;
911                                        }
912                                    } else {
913                                        break;
914                                    }
915                                }
916                            }
917
918                            Value::Object(obj)
919                        } else if matches!(self.current_token, Token::LeftBracket) {
920                            // Array as object value: "key[2]: ..."
921                            let array_value = self.parse_array(depth + 1)?;
922                            let mut obj = Map::new();
923                            obj.insert(key, array_value);
924                            Value::Object(obj)
925                        } else {
926                            // Plain string value - join consecutive string tokens
927                            let mut accumulated = key;
928                            while let Token::String(next, _) = &self.current_token {
929                                if !accumulated.is_empty() {
930                                    accumulated.push(' ');
931                                }
932                                accumulated.push_str(next);
933                                self.advance()?;
934                            }
935                            Value::String(accumulated)
936                        }
937                    } else {
938                        self.parse_primitive()?
939                    };
940
941                    items.push(value);
942
943                    if items.len() < length {
944                        if matches!(self.current_token, Token::Newline) {
945                            self.advance()?;
946
947                            if self.options.strict && matches!(self.current_token, Token::Newline) {
948                                return Err(self.parse_error_with_context(
949                                    "Blank lines are not allowed inside list arrays in strict mode",
950                                ));
951                            }
952
953                            self.skip_newlines()?;
954                        } else if !matches!(self.current_token, Token::Dash) {
955                            return Err(self.parse_error_with_context(format!(
956                                "Expected newline or next list item after list item {}",
957                                i + 1
958                            )));
959                        }
960                    } else if matches!(self.current_token, Token::Newline) {
961                        // After the last item, check for extra items
962                        self.advance()?;
963                        self.skip_newlines()?;
964
965                        let list_indent = self.options.indent.get_spaces() * (depth + 1);
966                        let actual_indent = self.scanner.get_last_line_indent();
967                        // If we see another dash at the same indent, there are too many items
968                        if actual_indent == list_indent && matches!(self.current_token, Token::Dash)
969                        {
970                            return Err(self.parse_error_with_context(format!(
971                                "Array length mismatch: expected {length} items, but more items \
972                                 found",
973                            )));
974                        }
975                    }
976                }
977            }
978            _ => {
979                for i in 0..length {
980                    if i > 0 {
981                        if matches!(self.current_token, Token::Delimiter(_))
982                            || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
983                        {
984                            self.advance()?;
985                        } else {
986                            return Err(self
987                                .parse_error_with_context(format!(
988                                    "Expected delimiter, found {:?}",
989                                    self.current_token
990                                ))
991                                .with_suggestion(format!(
992                                    "Expected delimiter between items (item {} of {})",
993                                    i + 1,
994                                    length
995                                )));
996                        }
997                    }
998
999                    let value = if matches!(self.current_token, Token::Delimiter(_))
1000                        || matches!(&self.current_token, Token::String(s, _) if s == "," || s == "|" || s == "\t")
1001                        || (matches!(self.current_token, Token::Eof | Token::Newline) && i < length)
1002                    {
1003                        Value::String(String::new())
1004                    } else if matches!(self.current_token, Token::LeftBracket) {
1005                        self.parse_array(depth + 1)?
1006                    } else {
1007                        self.parse_primitive()?
1008                    };
1009
1010                    items.push(value);
1011                }
1012            }
1013        }
1014
1015        validation::validate_array_length(length, items.len())?;
1016
1017        if self.options.strict && matches!(self.current_token, Token::Delimiter(_)) {
1018            return Err(self.parse_error_with_context(format!(
1019                "Array length mismatch: expected {length} items, but more items found",
1020            )));
1021        }
1022
1023        Ok(Value::Array(items))
1024    }
1025
1026    fn parse_tabular_field_value(&mut self) -> ToonResult<Value> {
1027        match &self.current_token {
1028            Token::Null => {
1029                self.advance()?;
1030                Ok(Value::Null)
1031            }
1032            Token::Bool(b) => {
1033                let val = *b;
1034                self.advance()?;
1035                Ok(Value::Bool(val))
1036            }
1037            Token::Integer(i) => {
1038                let val = *i;
1039                self.advance()?;
1040                Ok(Number::from(val).into())
1041            }
1042            Token::Number(n) => {
1043                let val = *n;
1044                self.advance()?;
1045                // If the float is actually an integer, represent it as such
1046                if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1047                    Ok(Number::from(val as i64).into())
1048                } else {
1049                    Ok(Number::from_f64(val)
1050                        .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1051                        .into())
1052                }
1053            }
1054            Token::String(s, _) => {
1055                // Tabular fields can have multiple string tokens joined with spaces
1056                let mut accumulated = s.clone();
1057                self.advance()?;
1058
1059                while let Token::String(next, _) = &self.current_token {
1060                    if !accumulated.is_empty() {
1061                        accumulated.push(' ');
1062                    }
1063                    accumulated.push_str(next);
1064                    self.advance()?;
1065                }
1066
1067                Ok(Value::String(accumulated))
1068            }
1069            _ => Err(self.parse_error_with_context(format!(
1070                "Expected primitive value, found {:?}",
1071                self.current_token
1072            ))),
1073        }
1074    }
1075
1076    fn parse_primitive(&mut self) -> ToonResult<Value> {
1077        match &self.current_token {
1078            Token::Null => {
1079                self.advance()?;
1080                Ok(Value::Null)
1081            }
1082            Token::Bool(b) => {
1083                let val = *b;
1084                self.advance()?;
1085                Ok(Value::Bool(val))
1086            }
1087            Token::Integer(i) => {
1088                let val = *i;
1089                self.advance()?;
1090                Ok(Number::from(val).into())
1091            }
1092            Token::Number(n) => {
1093                let val = *n;
1094                self.advance()?;
1095
1096                if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1097                    Ok(Number::from(val as i64).into())
1098                } else {
1099                    Ok(Number::from_f64(val)
1100                        .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1101                        .into())
1102                }
1103            }
1104            Token::String(s, _) => {
1105                let val = s.clone();
1106                self.advance()?;
1107                Ok(Value::String(val))
1108            }
1109            _ => Err(self.parse_error_with_context(format!(
1110                "Expected primitive value, found {:?}",
1111                self.current_token
1112            ))),
1113        }
1114    }
1115
1116    fn parse_error_with_context(&self, message: impl Into<String>) -> ToonError {
1117        let (line, column) = self.scanner.current_position();
1118        let message = message.into();
1119
1120        let context = self.get_error_context(line, column);
1121
1122        ToonError::ParseError {
1123            line,
1124            column,
1125            message,
1126            context: Some(Box::new(context)),
1127        }
1128    }
1129
1130    fn get_error_context(&self, line: usize, column: usize) -> ErrorContext {
1131        let lines: Vec<&str> = self.input.lines().collect();
1132
1133        let source_line = if line > 0 && line <= lines.len() {
1134            lines[line - 1].to_string()
1135        } else {
1136            String::new()
1137        };
1138
1139        let preceding_lines: Vec<String> = if line > 1 {
1140            lines[line.saturating_sub(3)..line - 1]
1141                .iter()
1142                .map(|s| s.to_string())
1143                .collect()
1144        } else {
1145            Vec::new()
1146        };
1147
1148        let following_lines: Vec<String> = if line < lines.len() {
1149            lines[line..line.saturating_add(2).min(lines.len())]
1150                .iter()
1151                .map(|s| s.to_string())
1152                .collect()
1153        } else {
1154            Vec::new()
1155        };
1156
1157        let indicator = if column > 0 {
1158            Some(format!("{:width$}^", "", width = column - 1))
1159        } else {
1160            None
1161        };
1162
1163        ErrorContext {
1164            source_line,
1165            preceding_lines,
1166            following_lines,
1167            suggestion: None,
1168            indicator,
1169        }
1170    }
1171
1172    fn validate_indentation(&self, indent_amount: usize) -> ToonResult<()> {
1173        if !self.options.strict {
1174            return Ok(());
1175        }
1176
1177        let indent_size = self.options.indent.get_spaces();
1178        // In strict mode, indentation must be a multiple of the configured indent size
1179        if indent_size > 0 && indent_amount > 0 && !indent_amount.is_multiple_of(indent_size) {
1180            Err(self.parse_error_with_context(format!(
1181                "Invalid indentation: found {indent_amount} spaces, but must be a multiple of \
1182                 {indent_size}"
1183            )))
1184        } else {
1185            Ok(())
1186        }
1187    }
1188}
1189
1190#[cfg(test)]
1191mod tests {
1192    use std::f64;
1193
1194    use serde_json::json;
1195
1196    use super::*;
1197
1198    fn parse(input: &str) -> ToonResult<Value> {
1199        let mut parser = Parser::new(input, DecodeOptions::default())?;
1200        parser.parse()
1201    }
1202
1203    #[test]
1204    fn test_parse_primitives() {
1205        assert_eq!(parse("null").unwrap(), json!(null));
1206        assert_eq!(parse("true").unwrap(), json!(true));
1207        assert_eq!(parse("false").unwrap(), json!(false));
1208        assert_eq!(parse("42").unwrap(), json!(42));
1209        assert_eq!(parse("3.141592653589793").unwrap(), json!(f64::consts::PI));
1210        assert_eq!(parse("hello").unwrap(), json!("hello"));
1211    }
1212
1213    #[test]
1214    fn test_parse_simple_object() {
1215        let result = parse("name: Alice\nage: 30").unwrap();
1216        assert_eq!(result["name"], json!("Alice"));
1217        assert_eq!(result["age"], json!(30));
1218    }
1219
1220    #[test]
1221    fn test_parse_primitive_array() {
1222        let result = parse("tags[3]: a,b,c").unwrap();
1223        assert_eq!(result["tags"], json!(["a", "b", "c"]));
1224    }
1225
1226    #[test]
1227    fn test_parse_empty_array() {
1228        let result = parse("items[0]:").unwrap();
1229        assert_eq!(result["items"], json!([]));
1230    }
1231
1232    #[test]
1233    fn test_parse_tabular_array() {
1234        let result = parse("users[2]{id,name}:\n  1,Alice\n  2,Bob").unwrap();
1235        assert_eq!(
1236            result["users"],
1237            json!([
1238                {"id": 1, "name": "Alice"},
1239                {"id": 2, "name": "Bob"}
1240            ])
1241        );
1242    }
1243
1244    #[test]
1245    fn test_empty_tokens() {
1246        let result = parse("items[3]: a,,c").unwrap();
1247        assert_eq!(result["items"], json!(["a", "", "c"]));
1248    }
1249
1250    #[test]
1251    fn test_empty_nested_object() {
1252        let result = parse("user:").unwrap();
1253        assert_eq!(result, json!({"user": {}}));
1254    }
1255
1256    #[test]
1257    fn test_list_item_object() {
1258        let result =
1259            parse("items[2]:\n  - id: 1\n    name: First\n  - id: 2\n    name: Second").unwrap();
1260        assert_eq!(
1261            result["items"],
1262            json!([
1263                {"id": 1, "name": "First"},
1264                {"id": 2, "name": "Second"}
1265            ])
1266        );
1267    }
1268
1269    #[test]
1270    fn test_nested_array_in_list_item() {
1271        let result = parse("items[1]:\n  - tags[3]: a,b,c").unwrap();
1272        assert_eq!(result["items"], json!([{"tags": ["a", "b", "c"]}]));
1273    }
1274}