Skip to main content

toon_format/decode/
parser.rs

1use serde_json::{
2    Map,
3    Number,
4    Value,
5};
6
7use crate::{
8    constants::{
9        KEYWORDS,
10        MAX_DEPTH,
11        QUOTED_KEY_MARKER,
12    },
13    decode::{
14        scanner::{
15            Scanner,
16            Token,
17        },
18        validation,
19    },
20    types::{
21        DecodeOptions,
22        Delimiter,
23        ErrorContext,
24        ToonError,
25        ToonResult,
26    },
27    utils::validation::validate_depth,
28};
29
30/// Context for parsing arrays to determine correct indentation depth.
31///
32/// Arrays as the first field of list-item objects require special indentation:
33/// their content (rows for tabular, items for non-uniform) appears at depth +2
34/// relative to the hyphen line, while arrays in other contexts use depth +1.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36enum ArrayParseContext {
37    /// Normal array parsing context (content at depth +1)
38    Normal,
39
40    /// Array as first field of list-item object
41    /// (content at depth +2 relative to hyphen line)
42    ListItemFirstField,
43}
44
45/// Parser that builds JSON values from a sequence of tokens.
46#[allow(unused)]
47pub struct Parser<'a> {
48    scanner: Scanner,
49    current_token: Token,
50    options: DecodeOptions,
51    delimiter: Option<Delimiter>,
52    input: &'a str,
53}
54
55impl<'a> Parser<'a> {
56    /// Create a new parser with the given input and options.
57    pub fn new(input: &'a str, options: DecodeOptions) -> ToonResult<Self> {
58        let mut scanner = Scanner::new(input);
59        let chosen_delim = options.delimiter;
60        scanner.set_active_delimiter(chosen_delim);
61        let current_token = scanner.scan_token()?;
62
63        Ok(Self {
64            scanner,
65            current_token,
66            delimiter: chosen_delim,
67            options,
68            input,
69        })
70    }
71
72    /// Parse the input into a JSON value.
73    pub fn parse(&mut self) -> ToonResult<Value> {
74        if self.options.strict {
75            self.validate_indentation(self.scanner.get_last_line_indent())?;
76        }
77        let value = self.parse_value()?;
78
79        // In strict mode, check for trailing content at root level
80        if self.options.strict {
81            self.skip_newlines()?;
82            if !matches!(self.current_token, Token::Eof) {
83                return Err(self
84                    .parse_error_with_context(
85                        "Multiple values at root level are not allowed in strict mode",
86                    )
87                    .with_suggestion("Wrap multiple values in an object or array"));
88            }
89        }
90
91        Ok(value)
92    }
93
94    fn advance(&mut self) -> ToonResult<()> {
95        self.current_token = self.scanner.scan_token()?;
96        Ok(())
97    }
98
99    fn skip_newlines(&mut self) -> ToonResult<()> {
100        while matches!(self.current_token, Token::Newline) {
101            self.advance()?;
102        }
103        Ok(())
104    }
105
106    fn parse_value(&mut self) -> ToonResult<Value> {
107        self.parse_value_with_depth(0)
108    }
109
110    fn parse_value_with_depth(&mut self, depth: usize) -> ToonResult<Value> {
111        validate_depth(depth, MAX_DEPTH)?;
112
113        let had_newline = matches!(self.current_token, Token::Newline);
114        self.skip_newlines()?;
115
116        match &self.current_token {
117            Token::Null => {
118                // Peek ahead to see if this is a key (followed by ':') or a value
119                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
120                if next_char_is_colon {
121                    let key = KEYWORDS[0].to_string();
122                    self.advance()?;
123                    self.parse_object_with_initial_key(key, depth)
124                } else {
125                    self.advance()?;
126                    Ok(Value::Null)
127                }
128            }
129            Token::Bool(b) => {
130                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
131                if next_char_is_colon {
132                    let key = if *b {
133                        KEYWORDS[1].to_string()
134                    } else {
135                        KEYWORDS[2].to_string()
136                    };
137                    self.advance()?;
138                    self.parse_object_with_initial_key(key, depth)
139                } else {
140                    let val = *b;
141                    self.advance()?;
142                    Ok(Value::Bool(val))
143                }
144            }
145            Token::Integer(i) => {
146                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
147                if next_char_is_colon {
148                    let key = i.to_string();
149                    self.advance()?;
150                    self.parse_object_with_initial_key(key, depth)
151                } else {
152                    let first_text = self.scanner.last_token_text().to_string();
153                    let val = *i;
154                    self.advance()?;
155                    // Check if followed by more value tokens on the same line
156                    match &self.current_token {
157                        Token::String(..)
158                        | Token::Integer(..)
159                        | Token::Number(..)
160                        | Token::Bool(..)
161                        | Token::Null => {
162                            let mut accumulated = first_text;
163                            while let Token::String(..)
164                            | Token::Integer(..)
165                            | Token::Number(..)
166                            | Token::Bool(..)
167                            | Token::Null = &self.current_token
168                            {
169                                let ws = self.scanner.last_whitespace_count().max(1);
170                                for _ in 0..ws {
171                                    accumulated.push(' ');
172                                }
173                                accumulated.push_str(self.scanner.last_token_text());
174                                self.advance()?;
175                            }
176                            Ok(Value::String(accumulated))
177                        }
178                        _ => Ok(serde_json::Number::from(val).into()),
179                    }
180                }
181            }
182            Token::Number(n) => {
183                let next_char_is_colon = matches!(self.scanner.peek(), Some(':'));
184                if next_char_is_colon {
185                    let key = n.to_string();
186                    self.advance()?;
187                    self.parse_object_with_initial_key(key, depth)
188                } else {
189                    let first_text = self.scanner.last_token_text().to_string();
190                    let val = *n;
191                    self.advance()?;
192                    // Check if followed by more value tokens on the same line
193                    match &self.current_token {
194                        Token::String(..)
195                        | Token::Integer(..)
196                        | Token::Number(..)
197                        | Token::Bool(..)
198                        | Token::Null => {
199                            let mut accumulated = first_text;
200                            while let Token::String(..)
201                            | Token::Integer(..)
202                            | Token::Number(..)
203                            | Token::Bool(..)
204                            | Token::Null = &self.current_token
205                            {
206                                let ws = self.scanner.last_whitespace_count().max(1);
207                                for _ in 0..ws {
208                                    accumulated.push(' ');
209                                }
210                                accumulated.push_str(self.scanner.last_token_text());
211                                self.advance()?;
212                            }
213                            Ok(Value::String(accumulated))
214                        }
215                        _ => {
216                            // Normalize floats that are actually integers
217                            if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64
218                            {
219                                Ok(serde_json::Number::from(val as i64).into())
220                            } else {
221                                Ok(serde_json::Number::from_f64(val)
222                                    .ok_or_else(|| {
223                                        ToonError::InvalidInput(format!("Invalid number: {val}"))
224                                    })?
225                                    .into())
226                            }
227                        }
228                    }
229                }
230            }
231            Token::String(s, _) => {
232                let first = s.clone();
233                self.advance()?;
234
235                match &self.current_token {
236                    Token::Colon | Token::LeftBracket => {
237                        self.parse_object_with_initial_key(first, depth)
238                    }
239                    _ => {
240                        // Strings on new indented lines could be missing colons (keys) or values
241                        // Only error in strict mode when we know it's a new line
242                        if self.options.strict && depth > 0 && had_newline {
243                            return Err(self
244                                .parse_error_with_context(format!(
245                                    "Expected ':' after '{first}' in object context"
246                                ))
247                                .with_suggestion(
248                                    "Add ':' after the key, or place the value on the same line \
249                                     as the parent key",
250                                ));
251                        }
252
253                        if matches!(self.current_token, Token::Newline | Token::Eof) {
254                            return Ok(Value::String(first));
255                        }
256                        // Root-level string value - join consecutive tokens with exact spacing
257                        let mut accumulated = first;
258                        while let Token::String(..)
259                        | Token::Integer(..)
260                        | Token::Number(..)
261                        | Token::Bool(..)
262                        | Token::Null = &self.current_token
263                        {
264                            let ws = self.scanner.last_whitespace_count().max(1);
265                            for _ in 0..ws {
266                                accumulated.push(' ');
267                            }
268                            accumulated.push_str(self.scanner.last_token_text());
269                            self.advance()?;
270                        }
271                        Ok(Value::String(accumulated))
272                    }
273                }
274            }
275            Token::LeftBracket => self.parse_root_array(depth),
276            Token::Eof => Ok(Value::Object(Map::new())),
277            _ => self.parse_object(depth),
278        }
279    }
280
281    fn parse_object(&mut self, depth: usize) -> ToonResult<Value> {
282        validate_depth(depth, MAX_DEPTH)?;
283
284        let mut obj = Map::new();
285        // Track the indentation of the first key to ensure all keys align
286        let mut base_indent: Option<usize> = None;
287
288        loop {
289            while matches!(self.current_token, Token::Newline) {
290                self.advance()?;
291            }
292
293            if matches!(self.current_token, Token::Eof) {
294                break;
295            }
296
297            let current_indent = self.scanner.get_last_line_indent();
298
299            if self.options.strict {
300                self.validate_indentation(current_indent)?;
301            }
302
303            // Once we've seen the first key, all subsequent keys must match its indent
304            if let Some(expected) = base_indent {
305                if current_indent != expected {
306                    break;
307                }
308            } else {
309                base_indent = Some(current_indent);
310            }
311
312            let key = match &self.current_token {
313                Token::String(s, was_quoted) => {
314                    // Mark quoted keys containing dots with a special prefix
315                    // so path expansion can skip them
316                    if *was_quoted && s.contains('.') {
317                        format!("{QUOTED_KEY_MARKER}{s}")
318                    } else {
319                        s.clone()
320                    }
321                }
322                _ => {
323                    return Err(self
324                        .parse_error_with_context(format!(
325                            "Expected key, found {:?}",
326                            self.current_token
327                        ))
328                        .with_suggestion("Object keys must be strings"));
329                }
330            };
331            self.advance()?;
332
333            let value = if matches!(self.current_token, Token::LeftBracket) {
334                self.parse_array(depth)?
335            } else {
336                if !matches!(self.current_token, Token::Colon) {
337                    return Err(self
338                        .parse_error_with_context(format!(
339                            "Expected ':' or '[', found {:?}",
340                            self.current_token
341                        ))
342                        .with_suggestion("Use ':' for object values or '[' for arrays"));
343                }
344                self.advance()?;
345                self.parse_field_value(depth)?
346            };
347
348            obj.insert(key, value);
349        }
350
351        Ok(Value::Object(obj))
352    }
353
354    fn parse_object_with_initial_key(&mut self, key: String, depth: usize) -> ToonResult<Value> {
355        validate_depth(depth, MAX_DEPTH)?;
356
357        let mut obj = Map::new();
358        let mut base_indent: Option<usize> = None;
359
360        // Validate indentation for the initial key if in strict mode
361        if self.options.strict {
362            let current_indent = self.scanner.get_last_line_indent();
363            self.validate_indentation(current_indent)?;
364        }
365
366        if matches!(self.current_token, Token::LeftBracket) {
367            let value = self.parse_array(depth)?;
368            obj.insert(key, value);
369        } else {
370            if !matches!(self.current_token, Token::Colon) {
371                return Err(self.parse_error_with_context(format!(
372                    "Expected ':', found {:?}",
373                    self.current_token
374                )));
375            }
376            self.advance()?;
377
378            let value = self.parse_field_value(depth)?;
379            obj.insert(key, value);
380        }
381
382        loop {
383            // Skip newlines and check if the next line belongs to this object
384            while matches!(self.current_token, Token::Newline) {
385                self.advance()?;
386
387                if !self.options.strict {
388                    while matches!(self.current_token, Token::Newline) {
389                        self.advance()?;
390                    }
391                }
392
393                if matches!(self.current_token, Token::Newline) {
394                    continue;
395                }
396
397                let next_indent = self.scanner.get_last_line_indent();
398
399                // Check if the next line is at the right indentation level
400                let should_continue = if let Some(expected) = base_indent {
401                    next_indent == expected
402                } else {
403                    // First field: use depth-based expected indent
404                    let current_depth_indent = self.options.indent.get_spaces() * depth;
405                    next_indent == current_depth_indent
406                };
407
408                if !should_continue {
409                    break;
410                }
411            }
412
413            if matches!(self.current_token, Token::Eof) {
414                break;
415            }
416
417            if !matches!(self.current_token, Token::String(_, _)) {
418                break;
419            }
420
421            if matches!(self.current_token, Token::Eof) {
422                break;
423            }
424
425            let current_indent = self.scanner.get_last_line_indent();
426
427            if let Some(expected) = base_indent {
428                if current_indent != expected {
429                    break;
430                }
431            } else {
432                // verify first additional field matches expected depth
433                let expected_depth_indent = self.options.indent.get_spaces() * depth;
434                if current_indent != expected_depth_indent {
435                    break;
436                }
437            }
438
439            if self.options.strict {
440                self.validate_indentation(current_indent)?;
441            }
442
443            if base_indent.is_none() {
444                base_indent = Some(current_indent);
445            }
446
447            let key = match &self.current_token {
448                Token::String(s, was_quoted) => {
449                    // Mark quoted keys containing dots with a special prefix
450                    // so path expansion can skip them
451                    if *was_quoted && s.contains('.') {
452                        format!("{QUOTED_KEY_MARKER}{s}")
453                    } else {
454                        s.clone()
455                    }
456                }
457                _ => break,
458            };
459            self.advance()?;
460
461            let value = if matches!(self.current_token, Token::LeftBracket) {
462                self.parse_array(depth)?
463            } else {
464                if !matches!(self.current_token, Token::Colon) {
465                    break;
466                }
467                self.advance()?;
468                self.parse_field_value(depth)?
469            };
470
471            obj.insert(key, value);
472        }
473
474        Ok(Value::Object(obj))
475    }
476
477    fn parse_field_value(&mut self, depth: usize) -> ToonResult<Value> {
478        validate_depth(depth, MAX_DEPTH)?;
479
480        if matches!(self.current_token, Token::Newline | Token::Eof) {
481            let has_children = if matches!(self.current_token, Token::Newline) {
482                let current_depth_indent = self.options.indent.get_spaces() * (depth + 1);
483                let next_indent = self.scanner.count_leading_spaces();
484                next_indent >= current_depth_indent
485            } else {
486                false
487            };
488
489            if has_children {
490                self.parse_value_with_depth(depth + 1)
491            } else {
492                Ok(Value::Object(Map::new()))
493            }
494        } else if matches!(self.current_token, Token::LeftBracket) {
495            self.parse_value_with_depth(depth + 1)
496        } else {
497            // Check if there's more content after the current token
498            let token_text = self.scanner.last_token_text().to_string();
499            let (rest, space_count) = self.scanner.read_rest_of_line_with_space_info();
500
501            let result = if rest.is_empty() && space_count == 0 {
502                // Single token - convert directly to avoid redundant parsing
503                match &self.current_token {
504                    Token::String(s, _) => Ok(Value::String(s.clone())),
505                    Token::Integer(i) => Ok(serde_json::Number::from(*i).into()),
506                    Token::Number(n) => {
507                        let val = *n;
508                        if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
509                            Ok(serde_json::Number::from(val as i64).into())
510                        } else {
511                            Ok(serde_json::Number::from_f64(val)
512                                .ok_or_else(|| {
513                                    ToonError::InvalidInput(format!("Invalid number: {val}"))
514                                })?
515                                .into())
516                        }
517                    }
518                    Token::Bool(b) => Ok(Value::Bool(*b)),
519                    Token::Null => Ok(Value::Null),
520                    _ => Err(self.parse_error_with_context("Unexpected token after colon")),
521                }
522            } else {
523                // Multi-token value - reconstruct using original token text and re-parse
524                let mut value_str = match &self.current_token {
525                    Token::String(_, true) => {
526                        // Quoted strings: use last_token_text which includes quotes
527                        token_text.clone()
528                    }
529                    Token::String(_, false)
530                    | Token::Integer(_)
531                    | Token::Number(_)
532                    | Token::Bool(_)
533                    | Token::Null => token_text.clone(),
534                    _ => {
535                        return Err(self.parse_error_with_context("Unexpected token after colon"));
536                    }
537                };
538
539                // Preserve exact spacing from the original input
540                for _ in 0..space_count {
541                    value_str.push(' ');
542                }
543                value_str.push_str(&rest);
544
545                let token = self.scanner.parse_value_string(&value_str)?;
546                match token {
547                    Token::String(s, _) => Ok(Value::String(s)),
548                    Token::Integer(i) => Ok(serde_json::Number::from(i).into()),
549                    Token::Number(n) => {
550                        if n.is_finite() && n.fract() == 0.0 && n.abs() <= i64::MAX as f64 {
551                            Ok(serde_json::Number::from(n as i64).into())
552                        } else {
553                            Ok(serde_json::Number::from_f64(n)
554                                .ok_or_else(|| {
555                                    ToonError::InvalidInput(format!("Invalid number: {n}"))
556                                })?
557                                .into())
558                        }
559                    }
560                    Token::Bool(b) => Ok(Value::Bool(b)),
561                    Token::Null => Ok(Value::Null),
562                    _ => Err(ToonError::InvalidInput("Unexpected token type".to_string())),
563                }
564            }?;
565
566            self.current_token = self.scanner.scan_token()?;
567            Ok(result)
568        }
569    }
570
571    fn parse_root_array(&mut self, depth: usize) -> ToonResult<Value> {
572        validate_depth(depth, MAX_DEPTH)?;
573
574        if !matches!(self.current_token, Token::LeftBracket) {
575            return Err(self.parse_error_with_context("Expected '[' at the start of root array"));
576        }
577
578        self.parse_array(depth)
579    }
580
581    fn parse_array_header(
582        &mut self,
583    ) -> ToonResult<(usize, Option<Delimiter>, Option<Vec<String>>)> {
584        if !matches!(self.current_token, Token::LeftBracket) {
585            return Err(self.parse_error_with_context("Expected '['"));
586        }
587        self.advance()?;
588
589        // Parse array length (plain integer only)
590        // Supports formats: [N], [N|], [N\t] (no # marker)
591        let length = if let Token::Integer(n) = &self.current_token {
592            *n as usize
593        } else if let Token::String(s, _) = &self.current_token {
594            // Check if string starts with # - this marker is not supported
595            if s.starts_with('#') {
596                return Err(self
597                    .parse_error_with_context(
598                        "Length marker '#' is not supported. Use [N] format instead of [#N]",
599                    )
600                    .with_suggestion("Remove the '#' prefix from the array length"));
601            }
602
603            // Plain string that's a number: "3"
604            s.parse::<usize>().map_err(|_| {
605                self.parse_error_with_context(format!("Expected array length, found: {s}"))
606            })?
607        } else {
608            return Err(self.parse_error_with_context(format!(
609                "Expected array length, found {:?}",
610                self.current_token
611            )));
612        };
613
614        self.advance()?;
615
616        // Check for optional delimiter after length
617        let detected_delim = match &self.current_token {
618            Token::Delimiter(d) => {
619                let delim = *d;
620                self.advance()?;
621                Some(delim)
622            }
623            Token::String(s, _) if s == "," => {
624                self.advance()?;
625                Some(Delimiter::Comma)
626            }
627            Token::String(s, _) if s == "|" => {
628                self.advance()?;
629                Some(Delimiter::Pipe)
630            }
631            Token::String(s, _) if s == "\t" => {
632                self.advance()?;
633                Some(Delimiter::Tab)
634            }
635            _ => None,
636        };
637
638        // Default to comma if no delimiter specified
639        let active_delim = detected_delim.or(Some(Delimiter::Comma));
640
641        self.scanner.set_active_delimiter(active_delim);
642
643        if !matches!(self.current_token, Token::RightBracket) {
644            return Err(self.parse_error_with_context(format!(
645                "Expected ']', found {:?}",
646                self.current_token
647            )));
648        }
649        self.advance()?;
650
651        let fields = if matches!(self.current_token, Token::LeftBrace) {
652            self.advance()?;
653            let mut fields = Vec::new();
654
655            loop {
656                match &self.current_token {
657                    Token::String(s, _) => {
658                        fields.push(s.clone());
659                        self.advance()?;
660
661                        if matches!(self.current_token, Token::RightBrace) {
662                            break;
663                        }
664
665                        if matches!(self.current_token, Token::Delimiter(_)) {
666                            self.advance()?;
667                        } else {
668                            return Err(self.parse_error_with_context(format!(
669                                "Expected delimiter or '}}', found {:?}",
670                                self.current_token
671                            )));
672                        }
673                    }
674                    Token::RightBrace => break,
675                    _ => {
676                        return Err(self.parse_error_with_context(format!(
677                            "Expected field name, found {:?}",
678                            self.current_token
679                        )))
680                    }
681                }
682            }
683
684            self.advance()?;
685            Some(fields)
686        } else {
687            None
688        };
689
690        if !matches!(self.current_token, Token::Colon) {
691            return Err(self.parse_error_with_context("Expected ':' after array header"));
692        }
693        self.advance()?;
694
695        Ok((length, detected_delim, fields))
696    }
697
698    fn parse_array(&mut self, depth: usize) -> ToonResult<Value> {
699        self.parse_array_with_context(depth, ArrayParseContext::Normal)
700    }
701
702    fn parse_array_with_context(
703        &mut self,
704        depth: usize,
705        context: ArrayParseContext,
706    ) -> ToonResult<Value> {
707        validate_depth(depth, MAX_DEPTH)?;
708
709        let (length, _detected_delim, fields) = self.parse_array_header()?;
710
711        if let Some(fields) = fields {
712            validation::validate_field_list(&fields)?;
713            self.parse_tabular_array(length, &fields, depth, context)
714        } else {
715            // Non-tabular arrays as first field of list items require depth adjustment
716            // (items at depth +2 relative to hyphen, not the usual +1)
717            let adjusted_depth = match context {
718                ArrayParseContext::Normal => depth,
719                ArrayParseContext::ListItemFirstField => depth + 1,
720            };
721            self.parse_regular_array(length, adjusted_depth)
722        }
723    }
724
725    fn parse_tabular_array(
726        &mut self,
727        length: usize,
728        fields: &[String],
729        depth: usize,
730        context: ArrayParseContext,
731    ) -> ToonResult<Value> {
732        let mut rows = Vec::new();
733
734        if !matches!(self.current_token, Token::Newline) {
735            return Err(self
736                .parse_error_with_context("Expected newline after tabular array header")
737                .with_suggestion("Tabular arrays must have rows on separate lines"));
738        }
739        self.skip_newlines()?;
740
741        for row_index in 0..length {
742            if matches!(self.current_token, Token::Eof) {
743                if self.options.strict {
744                    return Err(self.parse_error_with_context(format!(
745                        "Expected {} rows, but got {} before EOF",
746                        length,
747                        rows.len()
748                    )));
749                }
750                break;
751            }
752
753            let current_indent = self.scanner.get_last_line_indent();
754
755            // Tabular arrays as first field of list-item objects require rows at depth +2
756            // (relative to hyphen), while normal tabular arrays use depth +1
757            let row_depth_offset = match context {
758                ArrayParseContext::Normal => 1,
759                ArrayParseContext::ListItemFirstField => 2,
760            };
761            let expected_indent = self.options.indent.get_spaces() * (depth + row_depth_offset);
762
763            if self.options.strict {
764                self.validate_indentation(current_indent)?;
765
766                if current_indent != expected_indent {
767                    return Err(self.parse_error_with_context(format!(
768                        "Invalid indentation for tabular row: expected {expected_indent} spaces, \
769                         found {current_indent}"
770                    )));
771                }
772            }
773
774            let mut row = Map::new();
775
776            for (field_index, field) in fields.iter().enumerate() {
777                // Skip delimiter before each field except the first
778                if field_index > 0 {
779                    if matches!(self.current_token, Token::Delimiter(_)) {
780                        self.advance()?;
781                    } else {
782                        return Err(self
783                            .parse_error_with_context(format!(
784                                "Expected delimiter, found {:?}",
785                                self.current_token
786                            ))
787                            .with_suggestion(format!(
788                                "Tabular row {} field {} needs a delimiter",
789                                row_index + 1,
790                                field_index + 1
791                            )));
792                    }
793                }
794
795                // Empty values show up as delimiters or newlines
796                let value = if matches!(self.current_token, Token::Delimiter(_))
797                    || matches!(self.current_token, Token::Newline | Token::Eof)
798                {
799                    Value::String(String::new())
800                } else {
801                    self.parse_tabular_field_value()?
802                };
803
804                row.insert(field.clone(), value);
805
806                // Validate row completeness
807                if field_index < fields.len() - 1 {
808                    // Not the last field - shouldn't hit newline yet
809                    if matches!(self.current_token, Token::Newline | Token::Eof) {
810                        if self.options.strict {
811                            return Err(self
812                                .parse_error_with_context(format!(
813                                    "Tabular row {}: expected {} values, but found only {}",
814                                    row_index + 1,
815                                    fields.len(),
816                                    field_index + 1
817                                ))
818                                .with_suggestion(format!(
819                                    "Row {} should have exactly {} values",
820                                    row_index + 1,
821                                    fields.len()
822                                )));
823                        } else {
824                            // Fill remaining fields with null in non-strict mode
825                            for field in fields.iter().skip(field_index + 1) {
826                                row.insert(field.clone(), Value::Null);
827                            }
828                            break;
829                        }
830                    }
831                } else if !matches!(self.current_token, Token::Newline | Token::Eof)
832                    && matches!(self.current_token, Token::Delimiter(_))
833                {
834                    // Last field but there's another delimiter - too many values
835                    return Err(self
836                        .parse_error_with_context(format!(
837                            "Tabular row {}: expected {} values, but found extra values",
838                            row_index + 1,
839                            fields.len()
840                        ))
841                        .with_suggestion(format!(
842                            "Row {} should have exactly {} values",
843                            row_index + 1,
844                            fields.len()
845                        )));
846                }
847            }
848
849            if !self.options.strict && row.len() < fields.len() {
850                for field in fields.iter().skip(row.len()) {
851                    row.insert(field.clone(), Value::Null);
852                }
853            }
854
855            rows.push(Value::Object(row));
856
857            if matches!(self.current_token, Token::Eof) {
858                break;
859            }
860
861            if !matches!(self.current_token, Token::Newline) {
862                if !self.options.strict {
863                    while !matches!(self.current_token, Token::Newline | Token::Eof) {
864                        self.advance()?;
865                    }
866                    if matches!(self.current_token, Token::Eof) {
867                        break;
868                    }
869                } else {
870                    return Err(self.parse_error_with_context(format!(
871                        "Expected newline after tabular row {}",
872                        row_index + 1
873                    )));
874                }
875            }
876
877            if row_index + 1 < length {
878                self.advance()?;
879                if self.options.strict && matches!(self.current_token, Token::Newline) {
880                    return Err(self.parse_error_with_context(
881                        "Blank lines are not allowed inside tabular arrays in strict mode",
882                    ));
883                }
884
885                self.skip_newlines()?;
886            } else if matches!(self.current_token, Token::Newline) {
887                // After the last row, check if there are extra rows
888                self.advance()?;
889                self.skip_newlines()?;
890
891                let expected_indent = self.options.indent.get_spaces() * (depth + 1);
892                let actual_indent = self.scanner.get_last_line_indent();
893
894                // If something at the same indent level, it might be a new row (error)
895                // unless it's a key-value pair (which belongs to parent)
896                if actual_indent == expected_indent && !matches!(self.current_token, Token::Eof) {
897                    let is_key_value = matches!(self.current_token, Token::String(_, _))
898                        && matches!(self.scanner.peek(), Some(':'));
899
900                    if !is_key_value {
901                        return Err(self.parse_error_with_context(format!(
902                            "Array length mismatch: expected {length} rows, but more rows found",
903                        )));
904                    }
905                }
906            }
907        }
908
909        validation::validate_array_length(length, rows.len())?;
910
911        Ok(Value::Array(rows))
912    }
913
914    fn parse_regular_array(&mut self, length: usize, depth: usize) -> ToonResult<Value> {
915        let mut items = Vec::new();
916
917        // Empty arrays: return immediately without consuming the trailing newline,
918        // so the caller's field-parsing loop can correctly check indentation.
919        if length == 0 {
920            return Ok(Value::Array(items));
921        }
922
923        match &self.current_token {
924            Token::Newline => {
925                self.skip_newlines()?;
926
927                let expected_indent = self.options.indent.get_spaces() * (depth + 1);
928
929                for i in 0..length {
930                    let current_indent = self.scanner.get_last_line_indent();
931                    if self.options.strict {
932                        self.validate_indentation(current_indent)?;
933
934                        if current_indent != expected_indent {
935                            return Err(self.parse_error_with_context(format!(
936                                "Invalid indentation for list item: expected {expected_indent} \
937                                 spaces, found {current_indent}"
938                            )));
939                        }
940                    }
941                    if !matches!(self.current_token, Token::Dash) {
942                        return Err(self
943                            .parse_error_with_context(format!(
944                                "Expected '-' for list item, found {:?}",
945                                self.current_token
946                            ))
947                            .with_suggestion(format!(
948                                "List arrays need '-' prefix for each item (item {} of {})",
949                                i + 1,
950                                length
951                            )));
952                    }
953                    self.advance()?;
954
955                    let value = if matches!(self.current_token, Token::Newline | Token::Eof) {
956                        Value::Object(Map::new())
957                    } else if matches!(self.current_token, Token::LeftBracket) {
958                        self.parse_array(depth + 1)?
959                    } else if let Token::String(s, _) = &self.current_token {
960                        let key = s.clone();
961                        self.advance()?;
962
963                        if matches!(self.current_token, Token::Colon | Token::LeftBracket) {
964                            // This is an object: key followed by colon or array bracket
965                            // First field of list-item object may be an array requiring special
966                            // indentation
967                            let first_value = if matches!(self.current_token, Token::LeftBracket) {
968                                // Array directly after key (e.g., "- key[N]:")
969                                // Use ListItemFirstField context to apply correct indentation
970                                self.parse_array_with_context(
971                                    depth + 1,
972                                    ArrayParseContext::ListItemFirstField,
973                                )?
974                            } else {
975                                self.advance()?;
976                                // Handle nested arrays: "key: [2]: ..."
977                                if matches!(self.current_token, Token::LeftBracket) {
978                                    // Array after colon - not directly on hyphen line, use normal
979                                    // context
980                                    self.parse_array(depth + 2)?
981                                } else {
982                                    self.parse_field_value(depth + 2)?
983                                }
984                            };
985
986                            let mut obj = Map::new();
987                            obj.insert(key, first_value);
988
989                            let field_indent = self.options.indent.get_spaces() * (depth + 2);
990
991                            // Check if there are more fields at the same indentation level
992                            let should_parse_more_fields =
993                                if matches!(self.current_token, Token::Newline) {
994                                    let next_indent = self.scanner.count_leading_spaces();
995
996                                    if next_indent < field_indent {
997                                        false
998                                    } else {
999                                        self.advance()?;
1000
1001                                        if !self.options.strict {
1002                                            self.skip_newlines()?;
1003                                        }
1004                                        true
1005                                    }
1006                                } else if matches!(self.current_token, Token::String(_, _)) {
1007                                    // When already positioned at a field key, check its indent
1008                                    let current_indent = self.scanner.get_last_line_indent();
1009                                    current_indent == field_indent
1010                                } else {
1011                                    false
1012                                };
1013
1014                            // Parse additional fields if they're at the right indentation
1015                            if should_parse_more_fields {
1016                                while !matches!(self.current_token, Token::Eof) {
1017                                    let current_indent = self.scanner.get_last_line_indent();
1018
1019                                    if current_indent < field_indent {
1020                                        break;
1021                                    }
1022
1023                                    if current_indent != field_indent && self.options.strict {
1024                                        break;
1025                                    }
1026
1027                                    // Stop if we hit the next list item
1028                                    if matches!(self.current_token, Token::Dash) {
1029                                        break;
1030                                    }
1031
1032                                    let field_key = match &self.current_token {
1033                                        Token::String(s, _) => s.clone(),
1034                                        _ => break,
1035                                    };
1036                                    self.advance()?;
1037
1038                                    let field_value =
1039                                        if matches!(self.current_token, Token::LeftBracket) {
1040                                            self.parse_array(depth + 2)?
1041                                        } else if matches!(self.current_token, Token::Colon) {
1042                                            self.advance()?;
1043                                            if matches!(self.current_token, Token::LeftBracket) {
1044                                                self.parse_array(depth + 2)?
1045                                            } else {
1046                                                self.parse_field_value(depth + 2)?
1047                                            }
1048                                        } else {
1049                                            break;
1050                                        };
1051
1052                                    obj.insert(field_key, field_value);
1053
1054                                    if matches!(self.current_token, Token::Newline) {
1055                                        let next_indent = self.scanner.count_leading_spaces();
1056                                        if next_indent < field_indent {
1057                                            break;
1058                                        }
1059                                        self.advance()?;
1060                                        if !self.options.strict {
1061                                            self.skip_newlines()?;
1062                                        }
1063                                    } else if matches!(self.current_token, Token::String(_, _)) {
1064                                        // Tabular array parser already consumed the newline
1065                                        // and advanced to the next token — check indent
1066                                        let current_indent = self.scanner.get_last_line_indent();
1067                                        if current_indent != field_indent {
1068                                            break;
1069                                        }
1070                                    } else {
1071                                        break;
1072                                    }
1073                                }
1074                            }
1075
1076                            Value::Object(obj)
1077                        } else if matches!(self.current_token, Token::LeftBracket) {
1078                            // Array as object value: "key[2]: ..."
1079                            let array_value = self.parse_array(depth + 1)?;
1080                            let mut obj = Map::new();
1081                            obj.insert(key, array_value);
1082                            Value::Object(obj)
1083                        } else {
1084                            // Plain string value
1085                            Value::String(key)
1086                        }
1087                    } else {
1088                        self.parse_primitive()?
1089                    };
1090
1091                    items.push(value);
1092
1093                    if items.len() < length {
1094                        if matches!(self.current_token, Token::Newline) {
1095                            self.advance()?;
1096
1097                            if self.options.strict && matches!(self.current_token, Token::Newline) {
1098                                return Err(self.parse_error_with_context(
1099                                    "Blank lines are not allowed inside list arrays in strict mode",
1100                                ));
1101                            }
1102
1103                            self.skip_newlines()?;
1104                        } else if !matches!(self.current_token, Token::Dash) {
1105                            return Err(self.parse_error_with_context(format!(
1106                                "Expected newline or next list item after list item {}",
1107                                i + 1
1108                            )));
1109                        }
1110                    } else if matches!(self.current_token, Token::Newline) {
1111                        // After the last item, check for extra items
1112                        self.advance()?;
1113                        self.skip_newlines()?;
1114
1115                        let list_indent = self.options.indent.get_spaces() * (depth + 1);
1116                        let actual_indent = self.scanner.get_last_line_indent();
1117                        // If we see another dash at the same indent, there are too many items
1118                        if actual_indent == list_indent && matches!(self.current_token, Token::Dash)
1119                        {
1120                            return Err(self.parse_error_with_context(format!(
1121                                "Array length mismatch: expected {length} items, but more items \
1122                                 found",
1123                            )));
1124                        }
1125                    }
1126                }
1127            }
1128            _ => {
1129                for i in 0..length {
1130                    if i > 0 {
1131                        if matches!(self.current_token, Token::Delimiter(_)) {
1132                            self.advance()?;
1133                        } else {
1134                            return Err(self
1135                                .parse_error_with_context(format!(
1136                                    "Expected delimiter, found {:?}",
1137                                    self.current_token
1138                                ))
1139                                .with_suggestion(format!(
1140                                    "Expected delimiter between items (item {} of {})",
1141                                    i + 1,
1142                                    length
1143                                )));
1144                        }
1145                    }
1146
1147                    let value = if matches!(self.current_token, Token::Delimiter(_))
1148                        || (matches!(self.current_token, Token::Eof | Token::Newline) && i < length)
1149                    {
1150                        Value::String(String::new())
1151                    } else if matches!(self.current_token, Token::LeftBracket) {
1152                        self.parse_array(depth + 1)?
1153                    } else {
1154                        self.parse_tabular_field_value()?
1155                    };
1156
1157                    items.push(value);
1158                }
1159            }
1160        }
1161
1162        validation::validate_array_length(length, items.len())?;
1163
1164        if self.options.strict && matches!(self.current_token, Token::Delimiter(_)) {
1165            return Err(self.parse_error_with_context(format!(
1166                "Array length mismatch: expected {length} items, but more items found",
1167            )));
1168        }
1169
1170        Ok(Value::Array(items))
1171    }
1172
1173    fn parse_tabular_field_value(&mut self) -> ToonResult<Value> {
1174        // Get the original text of the current token
1175        let token_text = self.scanner.last_token_text().to_string();
1176
1177        // Read remaining text until delimiter/newline/EOF
1178        let (rest, space_count) = self.scanner.read_until_delimiter_with_space_info();
1179
1180        if rest.is_empty() && space_count == 0 {
1181            // Single token — handle as primitive directly
1182            let result = match &self.current_token {
1183                Token::Null => Ok(Value::Null),
1184                Token::Bool(b) => Ok(Value::Bool(*b)),
1185                Token::Integer(i) => Ok(Number::from(*i).into()),
1186                Token::Number(n) => {
1187                    let val = *n;
1188                    if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1189                        Ok(Number::from(val as i64).into())
1190                    } else {
1191                        Ok(Number::from_f64(val)
1192                            .ok_or_else(|| {
1193                                ToonError::InvalidInput(format!("Invalid number: {val}"))
1194                            })?
1195                            .into())
1196                    }
1197                }
1198                Token::String(s, _) => Ok(Value::String(s.clone())),
1199                _ => Err(self.parse_error_with_context(format!(
1200                    "Expected primitive value, found {:?}",
1201                    self.current_token
1202                ))),
1203            };
1204            self.advance()?;
1205            result
1206        } else {
1207            // Multiple tokens — combine original text + spaces + rest, then type-infer
1208            let mut value_str = token_text;
1209            for _ in 0..space_count {
1210                value_str.push(' ');
1211            }
1212            value_str.push_str(&rest);
1213
1214            let token = self.scanner.parse_value_string(&value_str)?;
1215            // Rescan so current_token is positioned at the next delimiter/newline
1216            self.current_token = self.scanner.scan_token()?;
1217            match token {
1218                Token::String(s, _) => Ok(Value::String(s)),
1219                Token::Integer(i) => Ok(Number::from(i).into()),
1220                Token::Number(n) => {
1221                    if n.is_finite() && n.fract() == 0.0 && n.abs() <= i64::MAX as f64 {
1222                        Ok(Number::from(n as i64).into())
1223                    } else {
1224                        Ok(Number::from_f64(n)
1225                            .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {n}")))?
1226                            .into())
1227                    }
1228                }
1229                Token::Bool(b) => Ok(Value::Bool(b)),
1230                Token::Null => Ok(Value::Null),
1231                _ => Err(ToonError::InvalidInput("Unexpected token type".to_string())),
1232            }
1233        }
1234    }
1235
1236    fn parse_primitive(&mut self) -> ToonResult<Value> {
1237        match &self.current_token {
1238            Token::Null => {
1239                self.advance()?;
1240                Ok(Value::Null)
1241            }
1242            Token::Bool(b) => {
1243                let val = *b;
1244                self.advance()?;
1245                Ok(Value::Bool(val))
1246            }
1247            Token::Integer(i) => {
1248                let val = *i;
1249                self.advance()?;
1250                Ok(Number::from(val).into())
1251            }
1252            Token::Number(n) => {
1253                let val = *n;
1254                self.advance()?;
1255
1256                if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
1257                    Ok(Number::from(val as i64).into())
1258                } else {
1259                    Ok(Number::from_f64(val)
1260                        .ok_or_else(|| ToonError::InvalidInput(format!("Invalid number: {val}")))?
1261                        .into())
1262                }
1263            }
1264            Token::String(s, _) => {
1265                let val = s.clone();
1266                self.advance()?;
1267                Ok(Value::String(val))
1268            }
1269            _ => Err(self.parse_error_with_context(format!(
1270                "Expected primitive value, found {:?}",
1271                self.current_token
1272            ))),
1273        }
1274    }
1275
1276    fn parse_error_with_context(&self, message: impl Into<String>) -> ToonError {
1277        let (line, column) = self.scanner.current_position();
1278        let message = message.into();
1279
1280        let context = self.get_error_context(line, column);
1281
1282        ToonError::ParseError {
1283            line,
1284            column,
1285            message,
1286            context: Some(Box::new(context)),
1287        }
1288    }
1289
1290    fn get_error_context(&self, line: usize, column: usize) -> ErrorContext {
1291        let lines: Vec<&str> = self.input.lines().collect();
1292
1293        let source_line = if line > 0 && line <= lines.len() {
1294            lines[line - 1].to_string()
1295        } else {
1296            String::new()
1297        };
1298
1299        let preceding_lines: Vec<String> = if line > 1 {
1300            lines[line.saturating_sub(3)..line - 1]
1301                .iter()
1302                .map(|s| s.to_string())
1303                .collect()
1304        } else {
1305            Vec::new()
1306        };
1307
1308        let following_lines: Vec<String> = if line < lines.len() {
1309            lines[line..line.saturating_add(2).min(lines.len())]
1310                .iter()
1311                .map(|s| s.to_string())
1312                .collect()
1313        } else {
1314            Vec::new()
1315        };
1316
1317        let indicator = if column > 0 {
1318            Some(format!("{:width$}^", "", width = column - 1))
1319        } else {
1320            None
1321        };
1322
1323        ErrorContext {
1324            source_line,
1325            preceding_lines,
1326            following_lines,
1327            suggestion: None,
1328            indicator,
1329        }
1330    }
1331
1332    fn validate_indentation(&self, indent_amount: usize) -> ToonResult<()> {
1333        if !self.options.strict {
1334            return Ok(());
1335        }
1336
1337        let indent_size = self.options.indent.get_spaces();
1338        // In strict mode, indentation must be a multiple of the configured indent size
1339        if indent_size > 0 && indent_amount > 0 && !indent_amount.is_multiple_of(indent_size) {
1340            Err(self.parse_error_with_context(format!(
1341                "Invalid indentation: found {indent_amount} spaces, but must be a multiple of \
1342                 {indent_size}"
1343            )))
1344        } else {
1345            Ok(())
1346        }
1347    }
1348}
1349
1350#[cfg(test)]
1351mod tests {
1352    use std::f64;
1353
1354    use serde_json::json;
1355
1356    use super::*;
1357
1358    fn parse(input: &str) -> ToonResult<Value> {
1359        let mut parser = Parser::new(input, DecodeOptions::default())?;
1360        parser.parse()
1361    }
1362
1363    #[test]
1364    fn test_parse_primitives() {
1365        assert_eq!(parse("null").unwrap(), json!(null));
1366        assert_eq!(parse("true").unwrap(), json!(true));
1367        assert_eq!(parse("false").unwrap(), json!(false));
1368        assert_eq!(parse("42").unwrap(), json!(42));
1369        assert_eq!(parse("3.141592653589793").unwrap(), json!(f64::consts::PI));
1370        assert_eq!(parse("hello").unwrap(), json!("hello"));
1371    }
1372
1373    #[test]
1374    fn test_parse_simple_object() {
1375        let result = parse("name: Alice\nage: 30").unwrap();
1376        assert_eq!(result["name"], json!("Alice"));
1377        assert_eq!(result["age"], json!(30));
1378    }
1379
1380    #[test]
1381    fn test_parse_primitive_array() {
1382        let result = parse("tags[3]: a,b,c").unwrap();
1383        assert_eq!(result["tags"], json!(["a", "b", "c"]));
1384    }
1385
1386    #[test]
1387    fn test_parse_empty_array() {
1388        let result = parse("items[0]:").unwrap();
1389        assert_eq!(result["items"], json!([]));
1390    }
1391
1392    #[test]
1393    fn test_parse_tabular_array() {
1394        let result = parse("users[2]{id,name}:\n  1,Alice\n  2,Bob").unwrap();
1395        assert_eq!(
1396            result["users"],
1397            json!([
1398                {"id": 1, "name": "Alice"},
1399                {"id": 2, "name": "Bob"}
1400            ])
1401        );
1402    }
1403
1404    #[test]
1405    fn test_empty_tokens() {
1406        let result = parse("items[3]: a,,c").unwrap();
1407        assert_eq!(result["items"], json!(["a", "", "c"]));
1408    }
1409
1410    #[test]
1411    fn test_empty_nested_object() {
1412        let result = parse("user:").unwrap();
1413        assert_eq!(result, json!({"user": {}}));
1414    }
1415
1416    #[test]
1417    fn test_list_item_object() {
1418        let result =
1419            parse("items[2]:\n  - id: 1\n    name: First\n  - id: 2\n    name: Second").unwrap();
1420        assert_eq!(
1421            result["items"],
1422            json!([
1423                {"id": 1, "name": "First"},
1424                {"id": 2, "name": "Second"}
1425            ])
1426        );
1427    }
1428
1429    #[test]
1430    fn test_nested_array_in_list_item() {
1431        let result = parse("items[1]:\n  - tags[3]: a,b,c").unwrap();
1432        assert_eq!(result["items"], json!([{"tags": ["a", "b", "c"]}]));
1433    }
1434
1435    #[test]
1436    fn test_two_level_siblings() {
1437        let input = "x:\n  y: 1\n  z: 2";
1438        let opts = DecodeOptions::default();
1439        let mut parser = Parser::new(input, opts).unwrap();
1440        let result = parser.parse().unwrap();
1441
1442        let x = result.as_object().unwrap().get("x").unwrap();
1443        let x_obj = x.as_object().unwrap();
1444
1445        assert_eq!(x_obj.len(), 2, "x should have 2 keys");
1446        assert_eq!(x_obj.get("y").unwrap(), &serde_json::json!(1));
1447        assert_eq!(x_obj.get("z").unwrap(), &serde_json::json!(2));
1448    }
1449
1450    #[test]
1451    fn test_nested_object_with_sibling() {
1452        let input = "a:\n  b:\n    c: 1\n  d: 2";
1453        let opts = DecodeOptions::default();
1454        let mut parser = Parser::new(input, opts).unwrap();
1455        let result = parser.parse().unwrap();
1456
1457        let a = result.as_object().unwrap().get("a").unwrap();
1458        let a_obj = a.as_object().unwrap();
1459
1460        assert_eq!(a_obj.len(), 2, "a should have 2 keys (b and d)");
1461        assert!(a_obj.contains_key("b"), "a should have key 'b'");
1462        assert!(a_obj.contains_key("d"), "a should have key 'd'");
1463
1464        let b = a_obj.get("b").unwrap().as_object().unwrap();
1465        assert_eq!(b.len(), 1, "b should have only 1 key (c)");
1466        assert!(b.contains_key("c"), "b should have key 'c'");
1467        assert!(!b.contains_key("d"), "b should NOT have key 'd'");
1468    }
1469
1470    #[test]
1471    fn test_field_value_with_parentheses() {
1472        let result = parse("msg: Mostly Functions (3 of 3)").unwrap();
1473        assert_eq!(result, json!({"msg": "Mostly Functions (3 of 3)"}));
1474
1475        let result = parse("val: (hello)").unwrap();
1476        assert_eq!(result, json!({"val": "(hello)"}));
1477
1478        let result = parse("test: a (b) c (d)").unwrap();
1479        assert_eq!(result, json!({"test": "a (b) c (d)"}));
1480    }
1481
1482    #[test]
1483    fn test_field_value_number_with_parentheses() {
1484        let result = parse("code: 0(f)").unwrap();
1485        assert_eq!(result, json!({"code": "0(f)"}));
1486
1487        let result = parse("val: 5(test)").unwrap();
1488        assert_eq!(result, json!({"val": "5(test)"}));
1489
1490        let result = parse("msg: test 123)").unwrap();
1491        assert_eq!(result, json!({"msg": "test 123)"}));
1492    }
1493
1494    #[test]
1495    fn test_field_value_single_token_optimization() {
1496        let result = parse("name: hello").unwrap();
1497        assert_eq!(result, json!({"name": "hello"}));
1498
1499        let result = parse("age: 42").unwrap();
1500        assert_eq!(result, json!({"age": 42}));
1501
1502        let result = parse("active: true").unwrap();
1503        assert_eq!(result, json!({"active": true}));
1504
1505        let result = parse("value: null").unwrap();
1506        assert_eq!(result, json!({"value": null}));
1507    }
1508
1509    #[test]
1510    fn test_field_value_multi_token() {
1511        let result = parse("msg: hello world").unwrap();
1512        assert_eq!(result, json!({"msg": "hello world"}));
1513
1514        let result = parse("msg: test 123 end").unwrap();
1515        assert_eq!(result, json!({"msg": "test 123 end"}));
1516    }
1517
1518    #[test]
1519    fn test_field_value_spacing_preserved() {
1520        let result = parse("val: hello world").unwrap();
1521        assert_eq!(result, json!({"val": "hello world"}));
1522
1523        let result = parse("val: 0(f)").unwrap();
1524        assert_eq!(result, json!({"val": "0(f)"}));
1525    }
1526
1527    #[test]
1528    fn test_round_trip_parentheses() {
1529        use crate::{
1530            decode::decode_default,
1531            encode::encode_default,
1532        };
1533
1534        let original = json!({
1535            "message": "Mostly Functions (3 of 3)",
1536            "code": "0(f)",
1537            "simple": "(hello)",
1538            "mixed": "test 123)"
1539        });
1540
1541        let encoded = encode_default(&original).unwrap();
1542        let decoded: Value = decode_default(&encoded).unwrap();
1543
1544        assert_eq!(original, decoded);
1545    }
1546
1547    #[test]
1548    fn test_multiple_fields_with_edge_cases() {
1549        let input = r#"message: Mostly Functions (3 of 3)
1550sone: (hello)
1551hello: 0(f)"#;
1552
1553        let result = parse(input).unwrap();
1554        assert_eq!(
1555            result,
1556            json!({
1557                "message": "Mostly Functions (3 of 3)",
1558                "sone": "(hello)",
1559                "hello": "0(f)"
1560            })
1561        );
1562    }
1563
1564    #[test]
1565    fn test_decode_list_item_tabular_array_v3() {
1566        // Tabular arrays as first field of list items
1567        // Rows must be at depth +2 relative to hyphen (6 spaces from root)
1568        let input = r#"items[1]:
1569  - users[2]{id,name}:
1570      1,Ada
1571      2,Bob
1572    status: active"#;
1573
1574        let result = parse(input).unwrap();
1575
1576        assert_eq!(
1577            result,
1578            json!({
1579                "items": [
1580                    {
1581                        "users": [
1582                            {"id": 1, "name": "Ada"},
1583                            {"id": 2, "name": "Bob"}
1584                        ],
1585                        "status": "active"
1586                    }
1587                ]
1588            })
1589        );
1590    }
1591
1592    #[test]
1593    fn test_decode_list_item_tabular_array_multiple_items() {
1594        // Multiple list items each with tabular array as first field
1595        let input = r#"data[2]:
1596  - records[1]{id,val}:
1597      1,x
1598    count: 1
1599  - records[1]{id,val}:
1600      2,y
1601    count: 1"#;
1602
1603        let result = parse(input).unwrap();
1604
1605        assert_eq!(
1606            result,
1607            json!({
1608                "data": [
1609                    {
1610                        "records": [{"id": 1, "val": "x"}],
1611                        "count": 1
1612                    },
1613                    {
1614                        "records": [{"id": 2, "val": "y"}],
1615                        "count": 1
1616                    }
1617                ]
1618            })
1619        );
1620    }
1621
1622    #[test]
1623    fn test_decode_list_item_tabular_array_with_multiple_fields() {
1624        // List item with tabular array first and multiple sibling fields
1625        let input = r#"entries[1]:
1626  - people[2]{name,age}:
1627      Alice,30
1628      Bob,25
1629    total: 2
1630    category: staff"#;
1631
1632        let result = parse(input).unwrap();
1633
1634        assert_eq!(
1635            result,
1636            json!({
1637                "entries": [
1638                    {
1639                        "people": [
1640                            {"name": "Alice", "age": 30},
1641                            {"name": "Bob", "age": 25}
1642                        ],
1643                        "total": 2,
1644                        "category": "staff"
1645                    }
1646                ]
1647            })
1648        );
1649    }
1650
1651    #[test]
1652    fn test_decode_list_item_non_tabular_array_unchanged() {
1653        // Non-tabular arrays as first field should work normally
1654        let input = r#"items[1]:
1655  - tags[3]: a,b,c
1656    name: test"#;
1657
1658        let result = parse(input).unwrap();
1659
1660        assert_eq!(
1661            result,
1662            json!({
1663                "items": [
1664                    {
1665                        "tags": ["a", "b", "c"],
1666                        "name": "test"
1667                    }
1668                ]
1669            })
1670        );
1671    }
1672
1673    #[test]
1674    fn test_decode_strict_rejects_v2_tabular_indent() {
1675        use crate::decode::decode_strict;
1676
1677        // Old format: rows at depth +1 (4 spaces from root)
1678        // Strict mode should reject this incorrect indentation
1679        let input_v2 = r#"items[1]:
1680  - users[2]{id,name}:
1681    1,Ada
1682    2,Bob"#;
1683
1684        let result = decode_strict::<Value>(input_v2);
1685
1686        // Should error due to incorrect indentation
1687        assert!(
1688            result.is_err(),
1689            "Old format with incorrect indentation should be rejected in strict mode"
1690        );
1691        let err_msg = result.unwrap_err().to_string();
1692        assert!(
1693            err_msg.contains("indentation") || err_msg.contains("Invalid indentation"),
1694            "Error should mention indentation. Got: {}",
1695            err_msg
1696        );
1697    }
1698
1699    #[test]
1700    fn test_decode_tabular_array_not_in_list_item_unchanged() {
1701        // Regular tabular arrays (not in list items) should still use depth +1
1702        let input = r#"users[2]{id,name}:
1703  1,Ada
1704  2,Bob"#;
1705
1706        let result = parse(input).unwrap();
1707
1708        assert_eq!(
1709            result,
1710            json!({
1711                "users": [
1712                    {"id": 1, "name": "Ada"},
1713                    {"id": 2, "name": "Bob"}
1714                ]
1715            })
1716        );
1717    }
1718
1719    #[test]
1720    fn test_decode_nested_tabular_not_first_field() {
1721        // Tabular array as a subsequent field (not first) should use normal depth
1722        let input = r#"items[1]:
1723  - name: test
1724    data[2]{id,val}:
1725      1,x
1726      2,y"#;
1727
1728        let result = parse(input).unwrap();
1729
1730        assert_eq!(
1731            result,
1732            json!({
1733                "items": [
1734                    {
1735                        "name": "test",
1736                        "data": [
1737                            {"id": 1, "val": "x"},
1738                            {"id": 2, "val": "y"}
1739                        ]
1740                    }
1741                ]
1742            })
1743        );
1744    }
1745
1746    #[test]
1747    fn test_array_element_number_followed_by_string() {
1748        // Issue #56: Array elements starting with a number should be parsed as string
1749        // when followed by non-numeric text
1750        let result = parse("version1[1]: 1.0 something").unwrap();
1751        assert_eq!(result["version1"], json!(["1.0 something"]));
1752
1753        let result = parse("data[1]: 42 units").unwrap();
1754        assert_eq!(result["data"], json!(["42 units"]));
1755
1756        // Pure numbers should still be parsed as numbers
1757        let result = parse("nums[1]: 42").unwrap();
1758        assert_eq!(result["nums"], json!([42]));
1759
1760        let result = parse("nums[1]: 2.75").unwrap();
1761        assert_eq!(result["nums"], json!([2.75]));
1762    }
1763
1764    #[test]
1765    fn test_issue_59_multiple_spaces_preserved() {
1766        // Issue #59: Multiple spaces between words should be preserved
1767        // Field value context
1768        let result = parse("key: a   b").unwrap();
1769        assert_eq!(result["key"], json!("a   b"));
1770
1771        // Tabular cell context
1772        let result = parse("data[2]: a   b, c   d").unwrap();
1773        assert_eq!(result["data"], json!(["a   b", "c   d"]));
1774
1775        // Root-level value
1776        let result = parse("a   b").unwrap();
1777        assert_eq!(result, json!("a   b"));
1778    }
1779
1780    #[test]
1781    fn test_issue_60_mixed_type_tokens_as_string() {
1782        // Issue #60: "1 null" and "a 1" should parse as strings in tabular rows
1783        // Tabular cell context
1784        let result = parse("data[2]: 1 null, a 1").unwrap();
1785        assert_eq!(result["data"], json!(["1 null", "a 1"]));
1786
1787        // Root-level value
1788        let result = parse("1 null").unwrap();
1789        assert_eq!(result, json!("1 null"));
1790
1791        let result = parse("a 1").unwrap();
1792        assert_eq!(result, json!("a 1"));
1793
1794        // Field value context
1795        let result = parse("key: 1 null").unwrap();
1796        assert_eq!(result["key"], json!("1 null"));
1797
1798        let result = parse("key: a 1").unwrap();
1799        assert_eq!(result["key"], json!("a 1"));
1800    }
1801
1802    #[test]
1803    fn test_issue_61_number_format_preserved() {
1804        // Issue #61: "1.0 b" should preserve "1.0", not become "1 b"
1805        // Tabular cell context
1806        let result = parse("data[2]: 1.0 b, 1e1 b").unwrap();
1807        assert_eq!(result["data"], json!(["1.0 b", "1e1 b"]));
1808
1809        // Field value context
1810        let result = parse("key: 1.0 b").unwrap();
1811        assert_eq!(result["key"], json!("1.0 b"));
1812
1813        let result = parse("key: 1e1 b").unwrap();
1814        assert_eq!(result["key"], json!("1e1 b"));
1815
1816        // Root-level value
1817        let result = parse("1.0 b").unwrap();
1818        assert_eq!(result, json!("1.0 b"));
1819
1820        let result = parse("1e1 b").unwrap();
1821        assert_eq!(result, json!("1e1 b"));
1822    }
1823}