styx_parse/
parser.rs

1//! Event-based parser for Styx.
2
3use std::borrow::Cow;
4use std::collections::HashMap;
5use std::iter::Peekable;
6
7use crate::Span;
8use crate::callback::ParseCallback;
9use crate::event::{Event, ParseErrorKind, ScalarKind, Separator};
10use crate::lexer::Lexer;
11use crate::token::{Token, TokenKind};
12#[allow(unused_imports)]
13use crate::trace;
14
15/// Event-based parser for Styx documents.
16pub struct Parser<'src> {
17    lexer: Peekable<LexerIter<'src>>,
18}
19
20/// Wrapper to make Lexer into an Iterator.
21struct LexerIter<'src> {
22    lexer: Lexer<'src>,
23    done: bool,
24}
25
26impl<'src> Iterator for LexerIter<'src> {
27    type Item = Token<'src>;
28
29    fn next(&mut self) -> Option<Self::Item> {
30        if self.done {
31            return None;
32        }
33        let token = self.lexer.next_token();
34        if token.kind == TokenKind::Eof {
35            self.done = true;
36        }
37        Some(token)
38    }
39}
40
41impl<'src> Parser<'src> {
42    /// Create a new parser for the given source.
43    pub fn new(source: &'src str) -> Self {
44        let lexer = Lexer::new(source);
45        Self {
46            lexer: LexerIter { lexer, done: false }.peekable(),
47        }
48    }
49
50    /// Parse and emit events to callback.
51    // parser[impl document.root]
52    pub fn parse<C: ParseCallback<'src>>(mut self, callback: &mut C) {
53        if !callback.event(Event::DocumentStart) {
54            return;
55        }
56
57        // Skip leading whitespace/newlines and emit any leading comments
58        self.skip_whitespace_and_newlines();
59
60        // Emit leading comments before checking for explicit root
61        while let Some(token) = self.peek() {
62            match token.kind {
63                TokenKind::LineComment => {
64                    let token = self.advance().unwrap();
65                    if !callback.event(Event::Comment {
66                        span: token.span,
67                        text: token.text,
68                    }) {
69                        return;
70                    }
71                    self.skip_whitespace_and_newlines();
72                }
73                TokenKind::DocComment => {
74                    let token = self.advance().unwrap();
75                    if !callback.event(Event::DocComment {
76                        span: token.span,
77                        text: token.text,
78                    }) {
79                        return;
80                    }
81                    self.skip_whitespace_and_newlines();
82                }
83                _ => break,
84            }
85        }
86
87        // parser[impl document.root]
88        // If the document starts with `{`, parse as a single explicit block object
89        if matches!(self.peek(), Some(t) if t.kind == TokenKind::LBrace) {
90            let obj = self.parse_object_atom();
91            self.emit_atom_as_value(&obj, callback);
92        } else {
93            // Parse top-level entries (implicit object at document root)
94            self.parse_entries(callback, None);
95        }
96
97        callback.event(Event::DocumentEnd);
98    }
99
100    /// Convenience: parse and collect all events.
101    pub fn parse_to_vec(self) -> Vec<Event<'src>> {
102        let mut events = Vec::new();
103        self.parse(&mut events);
104        events
105    }
106
107    /// Peek at the next token.
108    fn peek(&mut self) -> Option<&Token<'src>> {
109        // Skip whitespace when peeking
110        while let Some(token) = self.lexer.peek() {
111            if token.kind == TokenKind::Whitespace {
112                self.lexer.next();
113            } else {
114                break;
115            }
116        }
117        self.lexer.peek()
118    }
119
120    /// Peek at the next token without skipping whitespace.
121    fn peek_raw(&mut self) -> Option<&Token<'src>> {
122        self.lexer.peek()
123    }
124
125    /// Consume the next token.
126    fn advance(&mut self) -> Option<Token<'src>> {
127        self.lexer.next()
128    }
129
130    /// Skip whitespace tokens.
131    fn skip_whitespace(&mut self) {
132        while let Some(token) = self.lexer.peek() {
133            if token.kind == TokenKind::Whitespace {
134                self.lexer.next();
135            } else {
136                break;
137            }
138        }
139    }
140
141    /// Skip whitespace and newlines.
142    fn skip_whitespace_and_newlines(&mut self) {
143        while let Some(token) = self.lexer.peek() {
144            if token.kind == TokenKind::Whitespace || token.kind == TokenKind::Newline {
145                self.lexer.next();
146            } else {
147                break;
148            }
149        }
150    }
151
152    /// Parse entries in an object or at document level.
153    // parser[impl entry.key-equality] parser[impl entry.path.sibling] parser[impl entry.path.reopen]
154    fn parse_entries<C: ParseCallback<'src>>(
155        &mut self,
156        callback: &mut C,
157        closing: Option<TokenKind>,
158    ) {
159        trace!("Parsing entries, closing token: {:?}", closing);
160        let mut path_state = PathState::default();
161        // Track last doc comment span for dangling detection
162        // parser[impl comment.doc]
163        let mut pending_doc_comment: Option<Span> = None;
164
165        self.skip_whitespace_and_newlines();
166
167        while let Some(token) = self.peek() {
168            // Check for closing token or EOF
169            if token.kind == TokenKind::Eof {
170                break;
171            }
172            if let Some(close) = closing
173                && token.kind == close
174            {
175                break;
176            }
177
178            // Handle doc comments
179            if token.kind == TokenKind::DocComment {
180                let token = self.advance().unwrap();
181                pending_doc_comment = Some(token.span);
182                if !callback.event(Event::DocComment {
183                    span: token.span,
184                    text: token.text,
185                }) {
186                    return;
187                }
188                self.skip_whitespace_and_newlines();
189                continue;
190            }
191
192            // Handle line comments
193            if token.kind == TokenKind::LineComment {
194                let token = self.advance().unwrap();
195                if !callback.event(Event::Comment {
196                    span: token.span,
197                    text: token.text,
198                }) {
199                    return;
200                }
201                self.skip_whitespace_and_newlines();
202                continue;
203            }
204
205            // We're about to parse an entry, so any pending doc comment is attached
206            pending_doc_comment = None;
207
208            // Parse entry with path state tracking
209            if !self.parse_entry_with_path_check(callback, &mut path_state) {
210                return;
211            }
212
213            // Skip entry separator (newlines or comma handled in parse_entry)
214            self.skip_whitespace_and_newlines();
215        }
216
217        // parser[impl comment.doc]
218        // If we exited with a pending doc comment, it's dangling (not followed by entry)
219        if let Some(span) = pending_doc_comment {
220            callback.event(Event::Error {
221                span,
222                kind: ParseErrorKind::DanglingDocComment,
223            });
224        }
225    }
226
227    /// Parse a single entry with path state tracking.
228    // parser[impl entry.key-equality] parser[impl entry.structure] parser[impl entry.path]
229    // parser[impl entry.path.sibling] parser[impl entry.path.reopen]
230    fn parse_entry_with_path_check<C: ParseCallback<'src>>(
231        &mut self,
232        callback: &mut C,
233        path_state: &mut PathState,
234    ) -> bool {
235        if !callback.event(Event::EntryStart) {
236            return false;
237        }
238
239        // Collect atoms for this entry
240        let atoms = self.collect_entry_atoms();
241
242        if atoms.is_empty() {
243            // Empty entry - just end it
244            return callback.event(Event::EntryEnd);
245        }
246
247        // First atom is the key - check for duplicates and invalid key types
248        let key_atom = &atoms[0];
249
250        // parser[impl entry.keys]
251        // Heredoc scalars, objects, and sequences are not allowed as keys
252        match &key_atom.content {
253            AtomContent::Heredoc(_) => {
254                if !callback.event(Event::Error {
255                    span: key_atom.span,
256                    kind: ParseErrorKind::InvalidKey,
257                }) {
258                    return false;
259                }
260            }
261            AtomContent::Object { .. } | AtomContent::Sequence { .. } => {
262                if !callback.event(Event::Error {
263                    span: key_atom.span,
264                    kind: ParseErrorKind::InvalidKey,
265                }) {
266                    return false;
267                }
268            }
269            _ => {}
270        }
271
272        // parser[impl entry.path]
273        // Check if this is a dotted path (bare scalar containing '.')
274        if let AtomContent::Scalar(text) = &key_atom.content
275            && key_atom.kind == ScalarKind::Bare
276            && text.contains('.')
277        {
278            return self.emit_dotted_path_entry(text, key_atom.span, &atoms, callback, path_state);
279        }
280
281        // Non-dotted key: treat as single-segment path
282        let key_text = match &key_atom.content {
283            AtomContent::Scalar(text) => {
284                let processed = self.process_scalar(text, key_atom.kind);
285                processed.into_owned()
286            }
287            AtomContent::Unit => "@".to_string(),
288            AtomContent::Tag { name, .. } => format!("@{}", name),
289            _ => key_atom.span.start.to_string(), // Fallback for invalid keys
290        };
291
292        // Determine value kind
293        let value_kind = if atoms.len() >= 2 {
294            match &atoms[1].content {
295                AtomContent::Object { .. } | AtomContent::Attributes { .. } => {
296                    PathValueKind::Object
297                }
298                _ => PathValueKind::Terminal,
299            }
300        } else {
301            // Implicit unit value
302            PathValueKind::Terminal
303        };
304
305        // Check path state
306        let path = vec![key_text];
307        if let Err(err) = path_state.check_and_update(&path, key_atom.span, value_kind)
308            && !self.emit_path_error(err, key_atom.span, callback)
309        {
310            return false;
311        }
312
313        if !self.emit_atom_as_key(key_atom, callback) {
314            return false;
315        }
316
317        if atoms.len() == 1 {
318            // Just a key, implicit unit value
319            if !callback.event(Event::Unit {
320                span: key_atom.span,
321            }) {
322                return false;
323            }
324        } else if atoms.len() == 2 {
325            // Key and value
326            if !self.emit_atom_as_value(&atoms[1], callback) {
327                return false;
328            }
329        } else {
330            // parser[impl entry.toomany]
331            // 3+ atoms is an error - emit the second atom as value, then error on the third
332            if !self.emit_atom_as_value(&atoms[1], callback) {
333                return false;
334            }
335
336            // Emit error for the third atom (and beyond)
337            // Common case: `key @tag {}` where user meant `@tag{}`
338            let third_atom = &atoms[2];
339            if !callback.event(Event::Error {
340                span: third_atom.span,
341                kind: ParseErrorKind::TooManyAtoms,
342            }) {
343                return false;
344            }
345        }
346
347        callback.event(Event::EntryEnd)
348    }
349
350    /// Emit an error for a path validation failure.
351    fn emit_path_error<C: ParseCallback<'src>>(
352        &self,
353        err: PathError,
354        span: Span,
355        callback: &mut C,
356    ) -> bool {
357        let kind = match err {
358            PathError::Duplicate { original } => ParseErrorKind::DuplicateKey { original },
359            PathError::Reopened { closed_path } => ParseErrorKind::ReopenedPath { closed_path },
360            PathError::NestIntoTerminal { terminal_path } => {
361                ParseErrorKind::NestIntoTerminal { terminal_path }
362            }
363        };
364        callback.event(Event::Error { span, kind })
365    }
366
367    /// Emit a dotted path entry.
368    /// `a.b.c value` expands to `a { b { c value } }`
369    // parser[impl entry.path] parser[impl entry.path.sibling] parser[impl entry.path.reopen]
370    fn emit_dotted_path_entry<C: ParseCallback<'src>>(
371        &self,
372        path_text: &'src str,
373        path_span: Span,
374        atoms: &[Atom<'src>],
375        callback: &mut C,
376        path_state: &mut PathState,
377    ) -> bool {
378        // Split the path on '.'
379        let segments: Vec<&str> = path_text.split('.').collect();
380
381        if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
382            // Invalid path (empty segment like "a..b" or ".a" or "a.")
383            if !callback.event(Event::Error {
384                span: path_span,
385                kind: ParseErrorKind::InvalidKey,
386            }) {
387                return false;
388            }
389            return callback.event(Event::EntryEnd);
390        }
391
392        // Build full path as Vec<String>
393        let path: Vec<String> = segments.iter().map(|s| s.to_string()).collect();
394
395        // Determine value kind based on the value atom
396        let value_kind = if atoms.len() >= 2 {
397            match &atoms[1].content {
398                AtomContent::Object { .. } | AtomContent::Attributes { .. } => {
399                    PathValueKind::Object
400                }
401                _ => PathValueKind::Terminal,
402            }
403        } else {
404            // Implicit unit value
405            PathValueKind::Terminal
406        };
407
408        // Check path state for duplicates, reopening, and nesting errors
409        if let Err(err) = path_state.check_and_update(&path, path_span, value_kind)
410            && !self.emit_path_error(err, path_span, callback)
411        {
412            return false;
413        }
414
415        // Calculate spans for each segment
416        // This is approximate - we use the path span and divide it up
417        let mut current_offset = path_span.start;
418
419        // Emit nested structure: for each segment except the last, emit Key + ObjectStart
420        let depth = segments.len();
421        for (i, segment) in segments.iter().enumerate() {
422            let segment_len = segment.len() as u32;
423            let segment_span = Span::new(current_offset, current_offset + segment_len);
424
425            if i > 0 {
426                // Start a new entry for nested segments
427                if !callback.event(Event::EntryStart) {
428                    return false;
429                }
430            }
431
432            // Emit this segment as a key
433            if !callback.event(Event::Key {
434                span: segment_span,
435                tag: None,
436                payload: Some(Cow::Borrowed(segment)),
437                kind: ScalarKind::Bare,
438            }) {
439                return false;
440            }
441
442            if i < depth - 1 {
443                // Not the last segment - emit ObjectStart (value is nested object)
444                if !callback.event(Event::ObjectStart {
445                    span: segment_span,
446                    separator: Separator::Newline,
447                }) {
448                    return false;
449                }
450            }
451
452            // Move past this segment and the dot
453            current_offset += segment_len + 1; // +1 for the dot
454        }
455
456        // Emit the actual value
457        if atoms.len() == 1 {
458            // Just the path, implicit unit value
459            if !callback.event(Event::Unit { span: path_span }) {
460                return false;
461            }
462        } else if atoms.len() == 2 {
463            // Path and value
464            if !self.emit_atom_as_value(&atoms[1], callback) {
465                return false;
466            }
467        } else {
468            // parser[impl entry.toomany]
469            // 3+ atoms is an error
470            if !self.emit_atom_as_value(&atoms[1], callback) {
471                return false;
472            }
473            let third_atom = &atoms[2];
474            if !callback.event(Event::Error {
475                span: third_atom.span,
476                kind: ParseErrorKind::TooManyAtoms,
477            }) {
478                return false;
479            }
480        }
481
482        // Close all the nested structures (in reverse order)
483        for i in (0..depth).rev() {
484            if i < depth - 1 {
485                // Close the nested object
486                if !callback.event(Event::ObjectEnd {
487                    span: path_span, // Use path span for all closes
488                }) {
489                    return false;
490                }
491            }
492            // Close the entry
493            if !callback.event(Event::EntryEnd) {
494                return false;
495            }
496        }
497
498        true
499    }
500
501    /// Collect atoms until entry boundary (newline, comma, closing brace/paren, or EOF).
502    fn collect_entry_atoms(&mut self) -> Vec<Atom<'src>> {
503        let mut atoms = Vec::new();
504
505        loop {
506            self.skip_whitespace();
507
508            let Some(token) = self.peek() else {
509                break;
510            };
511
512            match token.kind {
513                // Entry boundaries
514                TokenKind::Newline | TokenKind::Comma | TokenKind::Eof => break,
515                TokenKind::RBrace | TokenKind::RParen => break,
516
517                // Comments end the entry
518                TokenKind::LineComment | TokenKind::DocComment => break,
519
520                // Nested structures
521                TokenKind::LBrace => {
522                    atoms.push(self.parse_object_atom());
523                }
524                TokenKind::LParen => {
525                    atoms.push(self.parse_sequence_atom());
526                }
527
528                // Tags
529                TokenKind::At => {
530                    atoms.push(self.parse_tag_or_unit_atom());
531                }
532
533                // Bare scalars - check for attribute syntax (key=value)
534                // parser[impl attr.syntax] parser[impl entry.keypath.attributes]
535                TokenKind::BareScalar => {
536                    if self.is_attribute_start() {
537                        atoms.push(self.parse_attributes());
538                    } else {
539                        atoms.push(self.parse_scalar_atom());
540                    }
541                }
542
543                // Other scalars (quoted, raw, heredoc) - cannot be attribute keys
544                TokenKind::QuotedScalar | TokenKind::RawScalar | TokenKind::HeredocStart => {
545                    atoms.push(self.parse_scalar_atom());
546                }
547
548                // Skip whitespace (handled above)
549                TokenKind::Whitespace => {
550                    self.advance();
551                }
552
553                // Error tokens - emit parse error
554                TokenKind::Error => {
555                    let token = self.advance().unwrap();
556                    // Record the error but continue parsing
557                    // The error will be emitted later when processing atoms
558                    atoms.push(Atom {
559                        span: token.span,
560                        kind: ScalarKind::Bare,
561                        content: AtomContent::Error,
562                        adjacent_block_span: None,
563                    });
564                }
565
566                // Unexpected tokens
567                _ => {
568                    // Skip and continue
569                    self.advance();
570                }
571            }
572        }
573
574        atoms
575    }
576
577    /// Check if current position starts an attribute (bare_scalar immediately followed by =).
578    // parser[impl attr.syntax]
579    fn is_attribute_start(&mut self) -> bool {
580        // We always try to parse as attribute; parse_attributes handles the fallback
581        // if = doesn't immediately follow the bare scalar.
582        true
583    }
584
585    /// Parse one or more attributes (key=value pairs).
586    /// If the first token is not followed by =, returns a regular scalar atom.
587    // parser[impl attr.syntax] parser[impl attr.values] parser[impl attr.atom]
588    fn parse_attributes(&mut self) -> Atom<'src> {
589        // First, consume the bare scalar (potential key)
590        let first_token = self.advance().unwrap();
591        let start_span = first_token.span;
592        let first_key = first_token.text;
593
594        // Check if > immediately follows (no whitespace)
595        // Extract the info we need before borrowing self again
596        let eq_info = self.peek_raw().map(|t| (t.kind, t.span.start, t.span.end));
597
598        let Some((eq_kind, eq_start, eq_end)) = eq_info else {
599            // No more tokens - return as regular scalar
600            return Atom {
601                span: start_span,
602                kind: ScalarKind::Bare,
603                content: AtomContent::Scalar(first_key),
604                adjacent_block_span: None,
605            };
606        };
607
608        if eq_kind != TokenKind::Gt || eq_start != start_span.end {
609            // No > or whitespace gap - return as regular scalar
610            // parser[impl entry.whitespace]
611            // Check if immediately followed by { or ( without whitespace
612            let adjacent_block_span = if matches!(eq_kind, TokenKind::LBrace | TokenKind::LParen)
613                && eq_start == start_span.end
614            {
615                Some(Span::new(eq_start, eq_end))
616            } else {
617                None
618            };
619            return Atom {
620                span: start_span,
621                kind: ScalarKind::Bare,
622                content: AtomContent::Scalar(first_key),
623                adjacent_block_span,
624            };
625        }
626
627        // Consume the > and record its span
628        let gt_token = self.advance().unwrap();
629        let gt_span = gt_token.span;
630
631        // Track trailing > errors
632        let mut trailing_gt_spans = Vec::new();
633
634        // Value must immediately follow > (no whitespace)
635        let val_info = self.peek_raw().map(|t| (t.span.start, t.kind));
636
637        let Some((val_start, val_kind)) = val_info else {
638            // Error: missing value after > (EOF)
639            trailing_gt_spans.push(gt_span);
640            return Atom {
641                span: Span::new(start_span.start, gt_span.end),
642                kind: ScalarKind::Bare,
643                content: AtomContent::Attributes {
644                    entries: vec![],
645                    trailing_gt_spans,
646                },
647                adjacent_block_span: None,
648            };
649        };
650
651        if val_start != eq_end {
652            // Error: whitespace after >
653            trailing_gt_spans.push(gt_span);
654            return Atom {
655                span: Span::new(start_span.start, gt_span.end),
656                kind: ScalarKind::Bare,
657                content: AtomContent::Attributes {
658                    entries: vec![],
659                    trailing_gt_spans,
660                },
661                adjacent_block_span: None,
662            };
663        }
664
665        // Check if what follows is a valid attribute value
666        if !matches!(
667            val_kind,
668            TokenKind::BareScalar
669                | TokenKind::QuotedScalar
670                | TokenKind::RawScalar
671                | TokenKind::LParen
672                | TokenKind::LBrace
673                | TokenKind::At
674                | TokenKind::HeredocStart
675        ) {
676            // Error: invalid token after > (e.g., newline, comma, etc.)
677            trailing_gt_spans.push(gt_span);
678            return Atom {
679                span: Span::new(start_span.start, gt_span.end),
680                kind: ScalarKind::Bare,
681                content: AtomContent::Attributes {
682                    entries: vec![],
683                    trailing_gt_spans,
684                },
685                adjacent_block_span: None,
686            };
687        }
688
689        // Parse the first value
690        let first_value = self.parse_attribute_value();
691        let Some(first_value) = first_value else {
692            // Invalid value type - this shouldn't happen given the check above
693            trailing_gt_spans.push(gt_span);
694            return Atom {
695                span: Span::new(start_span.start, gt_span.end),
696                kind: ScalarKind::Bare,
697                content: AtomContent::Attributes {
698                    entries: vec![],
699                    trailing_gt_spans,
700                },
701                adjacent_block_span: None,
702            };
703        };
704
705        let mut attrs = vec![AttributeEntry {
706            key: first_key,
707            key_span: start_span,
708            value: first_value,
709        }];
710
711        // Continue parsing more attributes (key=value pairs separated by whitespace)
712        loop {
713            self.skip_whitespace();
714
715            // Extract token info before consuming
716            let token_info = self.peek().map(|t| (t.kind, t.span, t.text));
717            let Some((token_kind, key_span, key_text)) = token_info else {
718                break;
719            };
720
721            // Must be a bare scalar
722            if token_kind != TokenKind::BareScalar {
723                break;
724            }
725
726            // Consume the scalar
727            self.advance();
728
729            // Check for > immediately after key
730            let eq_info = self.peek_raw().map(|t| (t.kind, t.span, t.span.end));
731            let Some((eq_kind, loop_gt_span, loop_eq_end)) = eq_info else {
732                // No more tokens - we consumed a bare scalar that's not an attribute
733                // This is lost, but we stop here
734                break;
735            };
736
737            if eq_kind != TokenKind::Gt || loop_gt_span.start != key_span.end {
738                // Not an attribute - the consumed scalar is lost
739                break;
740            }
741
742            // Consume >
743            self.advance();
744
745            // Check for value
746            let val_info = self.peek_raw().map(|t| (t.span.start, t.kind));
747            let Some((val_start, val_kind)) = val_info else {
748                // Error: trailing > at end of input
749                trailing_gt_spans.push(loop_gt_span);
750                break;
751            };
752
753            if val_start != loop_eq_end {
754                // Error: whitespace after >
755                trailing_gt_spans.push(loop_gt_span);
756                break;
757            }
758
759            // Check if valid attribute value follows
760            if !matches!(
761                val_kind,
762                TokenKind::BareScalar
763                    | TokenKind::QuotedScalar
764                    | TokenKind::RawScalar
765                    | TokenKind::LParen
766                    | TokenKind::LBrace
767                    | TokenKind::At
768                    | TokenKind::HeredocStart
769            ) {
770                // Error: invalid token after >
771                trailing_gt_spans.push(loop_gt_span);
772                break;
773            }
774
775            let Some(value) = self.parse_attribute_value() else {
776                // Shouldn't happen given the check above
777                trailing_gt_spans.push(loop_gt_span);
778                break;
779            };
780
781            attrs.push(AttributeEntry {
782                key: key_text,
783                key_span,
784                value,
785            });
786        }
787
788        let end_span = attrs
789            .last()
790            .map(|a| a.value.span.end)
791            .or_else(|| trailing_gt_spans.last().map(|s| s.end))
792            .unwrap_or(start_span.end);
793
794        Atom {
795            span: Span {
796                start: start_span.start,
797                end: end_span,
798            },
799            kind: ScalarKind::Bare,
800            content: AtomContent::Attributes {
801                entries: attrs,
802                trailing_gt_spans,
803            },
804            adjacent_block_span: None,
805        }
806    }
807
808    /// Parse an attribute value (bare/quoted/raw scalar, sequence, or object).
809    // parser[impl attr.values]
810    fn parse_attribute_value(&mut self) -> Option<Atom<'src>> {
811        let token = self.peek()?;
812
813        match token.kind {
814            TokenKind::BareScalar | TokenKind::QuotedScalar | TokenKind::RawScalar => {
815                Some(self.parse_scalar_atom())
816            }
817            TokenKind::LParen => Some(self.parse_sequence_atom()),
818            TokenKind::LBrace => Some(self.parse_object_atom()),
819            TokenKind::At => Some(self.parse_tag_or_unit_atom()),
820            // Heredocs are not typically used as attribute values, but support them
821            TokenKind::HeredocStart => Some(self.parse_scalar_atom()),
822            _ => None,
823        }
824    }
825
826    /// Parse a scalar atom.
827    fn parse_scalar_atom(&mut self) -> Atom<'src> {
828        let token = self.advance().unwrap();
829        trace!("Parsing scalar: {:?}", token.kind);
830        match token.kind {
831            TokenKind::BareScalar => Atom {
832                span: token.span,
833                kind: ScalarKind::Bare,
834                content: AtomContent::Scalar(token.text),
835                adjacent_block_span: None,
836            },
837            TokenKind::QuotedScalar => Atom {
838                span: token.span,
839                kind: ScalarKind::Quoted,
840                content: AtomContent::Scalar(token.text),
841                adjacent_block_span: None,
842            },
843            TokenKind::RawScalar => Atom {
844                span: token.span,
845                kind: ScalarKind::Raw,
846                content: AtomContent::Scalar(token.text),
847                adjacent_block_span: None,
848            },
849            TokenKind::HeredocStart => {
850                // Collect heredoc content
851                // parser[impl scalar.heredoc.syntax]
852                let start_span = token.span;
853                let mut content = String::new();
854                let mut end_span = start_span;
855                let mut is_error = false;
856                let mut end_token_text = "";
857
858                loop {
859                    let Some(token) = self.advance() else {
860                        break;
861                    };
862                    match token.kind {
863                        TokenKind::HeredocContent => {
864                            content.push_str(token.text);
865                        }
866                        TokenKind::HeredocEnd => {
867                            end_span = token.span;
868                            end_token_text = token.text;
869                            break;
870                        }
871                        TokenKind::Error => {
872                            // Unterminated heredoc
873                            end_span = token.span;
874                            is_error = true;
875                            break;
876                        }
877                        _ => break,
878                    }
879                }
880
881                // If the closing delimiter was indented, strip that indentation from content lines
882                // Per parser[scalar.heredoc.syntax]: The closing delimiter line MAY be indented;
883                // that indentation is stripped from content lines.
884                let indent_len = end_token_text
885                    .chars()
886                    .take_while(|c| *c == ' ' || *c == '\t')
887                    .count();
888                if indent_len > 0 && !content.is_empty() {
889                    content = Self::dedent_heredoc_content(&content, indent_len);
890                }
891
892                if is_error {
893                    Atom {
894                        span: Span {
895                            start: start_span.start,
896                            end: end_span.end,
897                        },
898                        kind: ScalarKind::Heredoc,
899                        content: AtomContent::Error,
900                        adjacent_block_span: None,
901                    }
902                } else {
903                    Atom {
904                        span: Span {
905                            start: start_span.start,
906                            end: end_span.end,
907                        },
908                        kind: ScalarKind::Heredoc,
909                        content: AtomContent::Heredoc(content),
910                        adjacent_block_span: None,
911                    }
912                }
913            }
914            _ => unreachable!(),
915        }
916    }
917
918    /// Parse an object atom (for nested objects).
919    // parser[impl object.syntax]
920    fn parse_object_atom(&mut self) -> Atom<'src> {
921        trace!("Parsing object");
922        let open = self.advance().unwrap(); // consume '{'
923        let start_span = open.span;
924
925        let mut entries: Vec<ObjectEntry<'src>> = Vec::new();
926        let mut separator_mode: Option<Separator> = None;
927        let mut end_span = start_span;
928        // parser[impl entry.key-equality]
929        // Maps key value to its first occurrence span
930        let mut seen_keys: HashMap<KeyValue, Span> = HashMap::new();
931        // Pairs of (original_span, duplicate_span) for duplicate keys
932        let mut duplicate_key_spans: Vec<(Span, Span)> = Vec::new();
933        // parser[impl object.separators]
934        let mut mixed_separator_spans: Vec<Span> = Vec::new();
935        // parser[impl comment.doc]
936        let mut pending_doc_comments: Vec<(Span, &'src str)> = Vec::new();
937        let mut dangling_doc_comment_spans: Vec<Span> = Vec::new();
938        // Track whether the object was properly closed
939        let mut unclosed = false;
940
941        loop {
942            // Only skip horizontal whitespace initially
943            self.skip_whitespace();
944
945            let Some(token) = self.peek() else {
946                // Unclosed object - EOF
947                unclosed = true;
948                // Check for dangling doc comments
949                for (span, _) in &pending_doc_comments {
950                    dangling_doc_comment_spans.push(*span);
951                }
952                break;
953            };
954
955            // Capture span before matching (needed for error reporting)
956            let token_span = token.span;
957
958            match token.kind {
959                TokenKind::RBrace => {
960                    // Check for dangling doc comments before closing
961                    for (span, _) in &pending_doc_comments {
962                        dangling_doc_comment_spans.push(*span);
963                    }
964                    let close = self.advance().unwrap();
965                    end_span = close.span;
966                    break;
967                }
968
969                TokenKind::Newline => {
970                    // parser[impl object.separators]
971                    if separator_mode == Some(Separator::Comma) {
972                        // Error: mixed separators - record span and continue parsing
973                        mixed_separator_spans.push(token_span);
974                    }
975                    separator_mode = Some(Separator::Newline);
976                    self.advance();
977                    // Consume consecutive newlines
978                    while matches!(self.peek(), Some(t) if t.kind == TokenKind::Newline) {
979                        self.advance();
980                    }
981                }
982
983                TokenKind::Comma => {
984                    // parser[impl object.separators]
985                    if separator_mode == Some(Separator::Newline) {
986                        // Error: mixed separators - record span and continue parsing
987                        mixed_separator_spans.push(token_span);
988                    }
989                    separator_mode = Some(Separator::Comma);
990                    self.advance();
991                }
992
993                TokenKind::LineComment => {
994                    // Skip line comments
995                    self.advance();
996                }
997
998                TokenKind::DocComment => {
999                    // Accumulate doc comments for the next entry (supports multi-line)
1000                    let doc_token = self.advance().unwrap();
1001                    pending_doc_comments.push((doc_token.span, doc_token.text));
1002                }
1003
1004                TokenKind::Eof => {
1005                    // Unclosed object
1006                    unclosed = true;
1007                    for (span, _) in &pending_doc_comments {
1008                        dangling_doc_comment_spans.push(*span);
1009                    }
1010                    break;
1011                }
1012
1013                _ => {
1014                    // Capture and clear pending doc comments for this entry
1015                    let doc_comments = std::mem::take(&mut pending_doc_comments);
1016
1017                    // Parse entry atoms
1018                    let entry_atoms = self.collect_entry_atoms();
1019                    if !entry_atoms.is_empty() {
1020                        let key = entry_atoms[0].clone();
1021
1022                        // parser[impl entry.key-equality]
1023                        // Check for duplicate key
1024                        let key_value = KeyValue::from_atom(&key, self);
1025                        if let Some(&original_span) = seen_keys.get(&key_value) {
1026                            duplicate_key_spans.push((original_span, key.span));
1027                        } else {
1028                            seen_keys.insert(key_value, key.span);
1029                        }
1030
1031                        let (value, too_many_atoms_span) = if entry_atoms.len() == 1 {
1032                            // Just a key, implicit unit value
1033                            (
1034                                Atom {
1035                                    span: key.span,
1036                                    kind: ScalarKind::Bare,
1037                                    content: AtomContent::Unit,
1038                                    adjacent_block_span: None,
1039                                },
1040                                None,
1041                            )
1042                        } else if entry_atoms.len() == 2 {
1043                            // Key and value
1044                            (entry_atoms[1].clone(), None)
1045                        } else {
1046                            // parser[impl entry.toomany]
1047                            // 3+ atoms is an error - use second as value, record third for error
1048                            (entry_atoms[1].clone(), Some(entry_atoms[2].span))
1049                        };
1050                        entries.push(ObjectEntry {
1051                            key,
1052                            value,
1053                            doc_comments,
1054                            too_many_atoms_span,
1055                        });
1056                    }
1057                }
1058            }
1059        }
1060
1061        Atom {
1062            span: Span {
1063                start: start_span.start,
1064                end: end_span.end,
1065            },
1066            kind: ScalarKind::Bare,
1067            content: AtomContent::Object {
1068                entries,
1069                // No separators seen = inline format (like comma-separated)
1070                separator: separator_mode.unwrap_or(Separator::Comma),
1071                duplicate_key_spans,
1072                mixed_separator_spans,
1073                dangling_doc_comment_spans,
1074                unclosed,
1075            },
1076            adjacent_block_span: None,
1077        }
1078    }
1079
1080    /// Parse a sequence atom.
1081    // parser[impl sequence.syntax] parser[impl sequence.elements]
1082    fn parse_sequence_atom(&mut self) -> Atom<'src> {
1083        trace!("Parsing sequence");
1084        let open = self.advance().unwrap(); // consume '('
1085        let start_span = open.span;
1086
1087        let mut elements: Vec<Atom<'src>> = Vec::new();
1088        let mut end_span = start_span;
1089        let mut unclosed = false;
1090        let mut comma_spans: Vec<Span> = Vec::new();
1091
1092        loop {
1093            // Sequences allow whitespace and newlines between elements
1094            self.skip_whitespace_and_newlines();
1095
1096            let Some(token) = self.peek() else {
1097                // Unclosed sequence - EOF
1098                unclosed = true;
1099                break;
1100            };
1101
1102            match token.kind {
1103                TokenKind::RParen => {
1104                    let close = self.advance().unwrap();
1105                    end_span = close.span;
1106                    break;
1107                }
1108
1109                TokenKind::Comma => {
1110                    // Commas are NOT allowed in sequences per spec
1111                    let comma = self.advance().unwrap();
1112                    comma_spans.push(comma.span);
1113                }
1114
1115                TokenKind::LineComment | TokenKind::DocComment => {
1116                    // Skip comments inside sequences
1117                    self.advance();
1118                }
1119
1120                TokenKind::Eof => {
1121                    // Unclosed sequence
1122                    unclosed = true;
1123                    break;
1124                }
1125
1126                _ => {
1127                    // Parse a single element
1128                    if let Some(elem) = self.parse_single_atom() {
1129                        elements.push(elem);
1130                    }
1131                }
1132            }
1133        }
1134
1135        Atom {
1136            span: Span {
1137                start: start_span.start,
1138                end: end_span.end,
1139            },
1140            kind: ScalarKind::Bare,
1141            content: AtomContent::Sequence {
1142                elements,
1143                unclosed,
1144                comma_spans,
1145            },
1146            adjacent_block_span: None,
1147        }
1148    }
1149
1150    /// Parse a single atom (for sequence elements).
1151    fn parse_single_atom(&mut self) -> Option<Atom<'src>> {
1152        let token = self.peek()?;
1153
1154        match token.kind {
1155            TokenKind::BareScalar
1156            | TokenKind::QuotedScalar
1157            | TokenKind::RawScalar
1158            | TokenKind::HeredocStart => Some(self.parse_scalar_atom()),
1159            TokenKind::LBrace => Some(self.parse_object_atom()),
1160            TokenKind::LParen => Some(self.parse_sequence_atom()),
1161            TokenKind::At => Some(self.parse_tag_or_unit_atom()),
1162            _ => None,
1163        }
1164    }
1165
1166    /// Parse a tag or unit atom.
1167    // parser[impl tag.payload] parser[impl value.unit]
1168    fn parse_tag_or_unit_atom(&mut self) -> Atom<'src> {
1169        trace!("Parsing tag or unit");
1170        let at = self.advance().unwrap(); // consume '@'
1171        let start_span = at.span;
1172
1173        // Check if followed by a tag name (must be immediately adjacent, no whitespace)
1174        if let Some(token) = self.peek_raw()
1175            && token.kind == TokenKind::BareScalar
1176            && token.span.start == start_span.end
1177        {
1178            // Tag name immediately follows @
1179            // But the bare scalar may contain @ which is not valid in tag names.
1180            // We need to split at the first @ if present.
1181            let name_token = self.advance().unwrap();
1182            let full_text = name_token.text;
1183
1184            // Find where the tag name ends (at first @ or end of token)
1185            let tag_name_len = full_text.find('@').unwrap_or(full_text.len());
1186            let name = &full_text[..tag_name_len];
1187            let name_span = Span {
1188                start: name_token.span.start,
1189                end: name_token.span.start + tag_name_len as u32,
1190            };
1191            let name_end = name_span.end;
1192
1193            // If there's leftover after the tag name (starting with @), we need to handle it
1194            // For now, if the tag name is empty (token started with @), this is @@ which is
1195            // unit followed by unit - but that should have been lexed differently.
1196            // If tag name is non-empty and there's @ after, that @ is the unit payload.
1197            let has_trailing_at = tag_name_len < full_text.len();
1198
1199            // parser[impl tag.syntax]
1200            // Validate tag name: must match @[A-Za-z_][A-Za-z0-9_.-]*
1201            let invalid_tag_name = name.is_empty() || !Self::is_valid_tag_name(name);
1202
1203            // Check for payload
1204            let payload = if has_trailing_at {
1205                // The @ after the tag name is the payload (unit)
1206                // Any text after that @ is also part of this token but we ignore it
1207                // since it would be invalid anyway (e.g., @foo@bar is @foo with unit @, then bar is separate)
1208                let at_pos = name_token.span.start + tag_name_len as u32;
1209                Some(Atom {
1210                    span: Span {
1211                        start: at_pos,
1212                        end: at_pos + 1,
1213                    },
1214                    kind: ScalarKind::Bare,
1215                    content: AtomContent::Unit,
1216                    adjacent_block_span: None,
1217                })
1218            } else {
1219                // Check for payload (must immediately follow tag name, no whitespace)
1220                self.parse_tag_payload(name_end)
1221            };
1222            let end_span = payload.as_ref().map(|p| p.span.end).unwrap_or(name_end);
1223
1224            return Atom {
1225                span: Span {
1226                    start: start_span.start,
1227                    end: end_span,
1228                },
1229                kind: ScalarKind::Bare,
1230                content: AtomContent::Tag {
1231                    name,
1232                    payload: payload.map(Box::new),
1233                    invalid_name_span: if invalid_tag_name {
1234                        Some(name_span)
1235                    } else {
1236                        None
1237                    },
1238                },
1239                adjacent_block_span: None,
1240            };
1241        }
1242
1243        // Just @ (unit)
1244        Atom {
1245            span: start_span,
1246            kind: ScalarKind::Bare,
1247            content: AtomContent::Unit,
1248            adjacent_block_span: None,
1249        }
1250    }
1251
1252    /// Check if a tag name is valid per parser[tag.syntax].
1253    /// Must match pattern: [A-Za-z_][A-Za-z0-9_-]*
1254    /// Note: dots are NOT allowed in tag names (they are path separators in keys).
1255    // parser[impl tag.syntax]
1256    fn is_valid_tag_name(name: &str) -> bool {
1257        let mut chars = name.chars();
1258
1259        // First char: letter or underscore
1260        match chars.next() {
1261            Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1262            _ => return false,
1263        }
1264
1265        // Rest: alphanumeric, underscore, or hyphen (no dots!)
1266        chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
1267    }
1268
1269    /// Parse a tag payload if present (must immediately follow tag name).
1270    // parser[impl tag.payload]
1271    fn parse_tag_payload(&mut self, after_name: u32) -> Option<Atom<'src>> {
1272        let Some(token) = self.peek_raw() else {
1273            return None; // implicit unit
1274        };
1275
1276        // Payload must immediately follow tag name (no whitespace)
1277        if token.span.start != after_name {
1278            return None; // implicit unit
1279        }
1280
1281        match token.kind {
1282            // @tag{...} - tagged object
1283            TokenKind::LBrace => Some(self.parse_object_atom()),
1284            // @tag(...) - tagged sequence
1285            TokenKind::LParen => Some(self.parse_sequence_atom()),
1286            // @tag"..." or @tagr#"..."# or @tag<<HEREDOC - tagged scalar
1287            TokenKind::QuotedScalar | TokenKind::RawScalar | TokenKind::HeredocStart => {
1288                Some(self.parse_scalar_atom())
1289            }
1290            // @tag@ - explicit tagged unit
1291            TokenKind::At => {
1292                let at = self.advance().unwrap();
1293                Some(Atom {
1294                    span: at.span,
1295                    kind: ScalarKind::Bare,
1296                    content: AtomContent::Unit,
1297                    adjacent_block_span: None,
1298                })
1299            }
1300            // Anything else - implicit unit (no payload)
1301            _ => None,
1302        }
1303    }
1304
1305    /// Emit an atom as a value event.
1306    fn emit_atom_as_value<C: ParseCallback<'src>>(
1307        &self,
1308        atom: &Atom<'src>,
1309        callback: &mut C,
1310    ) -> bool {
1311        match &atom.content {
1312            AtomContent::Scalar(text) => {
1313                // parser[impl scalar.quoted.escapes]
1314                // Validate escape sequences for quoted scalars
1315                if atom.kind == ScalarKind::Quoted {
1316                    for (offset, seq) in Self::validate_quoted_escapes(text) {
1317                        let error_start = atom.span.start + offset as u32;
1318                        let error_span = Span::new(error_start, error_start + seq.len() as u32);
1319                        if !callback.event(Event::Error {
1320                            span: error_span,
1321                            kind: ParseErrorKind::InvalidEscape(seq),
1322                        }) {
1323                            return false;
1324                        }
1325                    }
1326                }
1327                callback.event(Event::Scalar {
1328                    span: atom.span,
1329                    value: self.process_scalar(text, atom.kind),
1330                    kind: atom.kind,
1331                })
1332            }
1333            AtomContent::Heredoc(content) => callback.event(Event::Scalar {
1334                span: atom.span,
1335                value: Cow::Owned(content.clone()),
1336                kind: ScalarKind::Heredoc,
1337            }),
1338            AtomContent::Unit => callback.event(Event::Unit { span: atom.span }),
1339            // parser[impl tag.payload]
1340            AtomContent::Tag {
1341                name,
1342                payload,
1343                invalid_name_span,
1344            } => {
1345                // parser[impl tag.syntax]
1346                // Emit error for invalid tag name
1347                if let Some(span) = invalid_name_span
1348                    && !callback.event(Event::Error {
1349                        span: *span,
1350                        kind: ParseErrorKind::InvalidTagName,
1351                    })
1352                {
1353                    return false;
1354                }
1355
1356                if !callback.event(Event::TagStart {
1357                    span: atom.span,
1358                    name,
1359                }) {
1360                    return false;
1361                }
1362                // Emit payload if present
1363                if let Some(payload) = payload
1364                    && !self.emit_atom_as_value(payload, callback)
1365                {
1366                    return false;
1367                }
1368                // If no payload, it's an implicit unit (TagEnd implies it)
1369                callback.event(Event::TagEnd)
1370            }
1371            // parser[impl object.syntax]
1372            AtomContent::Object {
1373                entries,
1374                separator,
1375                duplicate_key_spans,
1376                mixed_separator_spans,
1377                dangling_doc_comment_spans,
1378                unclosed,
1379            } => {
1380                if !callback.event(Event::ObjectStart {
1381                    span: atom.span,
1382                    separator: *separator,
1383                }) {
1384                    return false;
1385                }
1386
1387                // Emit error for unclosed object
1388                if *unclosed
1389                    && !callback.event(Event::Error {
1390                        span: atom.span,
1391                        kind: ParseErrorKind::UnclosedObject,
1392                    })
1393                {
1394                    return false;
1395                }
1396
1397                // parser[impl entry.key-equality]
1398                // Emit errors for duplicate keys
1399                for (original_span, dup_span) in duplicate_key_spans {
1400                    if !callback.event(Event::Error {
1401                        span: *dup_span,
1402                        kind: ParseErrorKind::DuplicateKey {
1403                            original: *original_span,
1404                        },
1405                    }) {
1406                        return false;
1407                    }
1408                }
1409
1410                // parser[impl object.separators]
1411                // Emit errors for mixed separators
1412                for mix_span in mixed_separator_spans {
1413                    if !callback.event(Event::Error {
1414                        span: *mix_span,
1415                        kind: ParseErrorKind::MixedSeparators,
1416                    }) {
1417                        return false;
1418                    }
1419                }
1420
1421                // parser[impl comment.doc]
1422                // Emit errors for dangling doc comments
1423                for doc_span in dangling_doc_comment_spans {
1424                    if !callback.event(Event::Error {
1425                        span: *doc_span,
1426                        kind: ParseErrorKind::DanglingDocComment,
1427                    }) {
1428                        return false;
1429                    }
1430                }
1431
1432                for entry in entries {
1433                    // Emit doc comments before entry (supports multi-line)
1434                    for (span, text) in &entry.doc_comments {
1435                        if !callback.event(Event::DocComment { span: *span, text }) {
1436                            return false;
1437                        }
1438                    }
1439                    if !callback.event(Event::EntryStart) {
1440                        return false;
1441                    }
1442                    if !self.emit_atom_as_key(&entry.key, callback) {
1443                        return false;
1444                    }
1445                    if !self.emit_atom_as_value(&entry.value, callback) {
1446                        return false;
1447                    }
1448                    // parser[impl entry.toomany]
1449                    // Emit error for too many atoms
1450                    if let Some(span) = entry.too_many_atoms_span
1451                        && !callback.event(Event::Error {
1452                            span,
1453                            kind: ParseErrorKind::TooManyAtoms,
1454                        })
1455                    {
1456                        return false;
1457                    }
1458                    if !callback.event(Event::EntryEnd) {
1459                        return false;
1460                    }
1461                }
1462
1463                callback.event(Event::ObjectEnd { span: atom.span })
1464            }
1465            // parser[impl sequence.syntax] parser[impl sequence.elements]
1466            AtomContent::Sequence {
1467                elements,
1468                unclosed,
1469                comma_spans,
1470            } => {
1471                if !callback.event(Event::SequenceStart { span: atom.span }) {
1472                    return false;
1473                }
1474
1475                // Emit error for unclosed sequence
1476                if *unclosed
1477                    && !callback.event(Event::Error {
1478                        span: atom.span,
1479                        kind: ParseErrorKind::UnclosedSequence,
1480                    })
1481                {
1482                    return false;
1483                }
1484
1485                // Emit errors for commas in sequence
1486                for comma_span in comma_spans {
1487                    if !callback.event(Event::Error {
1488                        span: *comma_span,
1489                        kind: ParseErrorKind::CommaInSequence,
1490                    }) {
1491                        return false;
1492                    }
1493                }
1494
1495                for elem in elements {
1496                    if !self.emit_atom_as_value(elem, callback) {
1497                        return false;
1498                    }
1499                }
1500
1501                callback.event(Event::SequenceEnd { span: atom.span })
1502            }
1503            // parser[impl attr.atom]
1504            AtomContent::Attributes {
1505                entries,
1506                trailing_gt_spans,
1507            } => {
1508                // Emit errors for trailing > without value
1509                for gt_span in trailing_gt_spans {
1510                    if !callback.event(Event::Error {
1511                        span: *gt_span,
1512                        kind: ParseErrorKind::ExpectedValue,
1513                    }) {
1514                        return false;
1515                    }
1516                }
1517
1518                // Emit as comma-separated object
1519                if !callback.event(Event::ObjectStart {
1520                    span: atom.span,
1521                    separator: Separator::Comma,
1522                }) {
1523                    return false;
1524                }
1525
1526                for attr in entries {
1527                    if !callback.event(Event::EntryStart) {
1528                        return false;
1529                    }
1530                    // Attribute keys are always bare scalars
1531                    if !callback.event(Event::Key {
1532                        span: attr.key_span,
1533                        tag: None,
1534                        payload: Some(Cow::Borrowed(attr.key)),
1535                        kind: ScalarKind::Bare,
1536                    }) {
1537                        return false;
1538                    }
1539                    if !self.emit_atom_as_value(&attr.value, callback) {
1540                        return false;
1541                    }
1542                    if !callback.event(Event::EntryEnd) {
1543                        return false;
1544                    }
1545                }
1546
1547                callback.event(Event::ObjectEnd { span: atom.span })
1548            }
1549            AtomContent::Error => {
1550                // Error atom - emit as unexpected token error
1551                callback.event(Event::Error {
1552                    span: atom.span,
1553                    kind: ParseErrorKind::UnexpectedToken,
1554                })
1555            }
1556        }
1557    }
1558
1559    /// Emit an atom as a key event.
1560    ///
1561    /// Keys can be scalars or unit, optionally tagged.
1562    /// Objects, sequences, and heredocs are not allowed as keys.
1563    // parser[impl entry.keys]
1564    fn emit_atom_as_key<C: ParseCallback<'src>>(
1565        &self,
1566        atom: &Atom<'src>,
1567        callback: &mut C,
1568    ) -> bool {
1569        // parser[impl entry.whitespace]
1570        // Check for missing whitespace before { or ( after bare scalar
1571        if let Some(span) = atom.adjacent_block_span
1572            && !callback.event(Event::Error {
1573                span,
1574                kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
1575            })
1576        {
1577            return false;
1578        }
1579
1580        match &atom.content {
1581            AtomContent::Scalar(text) => {
1582                // parser[impl scalar.quoted.escapes]
1583                // Validate escape sequences for quoted scalars
1584                if atom.kind == ScalarKind::Quoted {
1585                    for (offset, seq) in Self::validate_quoted_escapes(text) {
1586                        let error_start = atom.span.start + offset as u32;
1587                        let error_span = Span::new(error_start, error_start + seq.len() as u32);
1588                        if !callback.event(Event::Error {
1589                            span: error_span,
1590                            kind: ParseErrorKind::InvalidEscape(seq),
1591                        }) {
1592                            return false;
1593                        }
1594                    }
1595                }
1596                callback.event(Event::Key {
1597                    span: atom.span,
1598                    tag: None,
1599                    payload: Some(self.process_scalar(text, atom.kind)),
1600                    kind: atom.kind,
1601                })
1602            }
1603            AtomContent::Heredoc(_) => {
1604                // Heredocs are not allowed as keys
1605                callback.event(Event::Error {
1606                    span: atom.span,
1607                    kind: ParseErrorKind::InvalidKey,
1608                })
1609            }
1610            AtomContent::Unit => callback.event(Event::Key {
1611                span: atom.span,
1612                tag: None,
1613                payload: None,
1614                kind: ScalarKind::Bare,
1615            }),
1616            AtomContent::Tag {
1617                name,
1618                payload,
1619                invalid_name_span,
1620            } => {
1621                // Emit error for invalid tag name
1622                if let Some(span) = invalid_name_span
1623                    && !callback.event(Event::Error {
1624                        span: *span,
1625                        kind: ParseErrorKind::InvalidTagName,
1626                    })
1627                {
1628                    return false;
1629                }
1630
1631                match payload {
1632                    None => {
1633                        // Tagged unit key: @tag
1634                        callback.event(Event::Key {
1635                            span: atom.span,
1636                            tag: Some(name),
1637                            payload: None,
1638                            kind: ScalarKind::Bare,
1639                        })
1640                    }
1641                    Some(inner) => match &inner.content {
1642                        AtomContent::Scalar(text) => {
1643                            // parser[impl scalar.quoted.escapes]
1644                            // Validate escape sequences for quoted scalars
1645                            if inner.kind == ScalarKind::Quoted {
1646                                for (offset, seq) in Self::validate_quoted_escapes(text) {
1647                                    let error_start = inner.span.start + offset as u32;
1648                                    let error_span =
1649                                        Span::new(error_start, error_start + seq.len() as u32);
1650                                    if !callback.event(Event::Error {
1651                                        span: error_span,
1652                                        kind: ParseErrorKind::InvalidEscape(seq),
1653                                    }) {
1654                                        return false;
1655                                    }
1656                                }
1657                            }
1658                            // Tagged scalar key: @tag"value"
1659                            callback.event(Event::Key {
1660                                span: atom.span,
1661                                tag: Some(name),
1662                                payload: Some(self.process_scalar(text, inner.kind)),
1663                                kind: inner.kind,
1664                            })
1665                        }
1666                        AtomContent::Unit => {
1667                            // Tagged unit key: @tag@
1668                            callback.event(Event::Key {
1669                                span: atom.span,
1670                                tag: Some(name),
1671                                payload: None,
1672                                kind: ScalarKind::Bare,
1673                            })
1674                        }
1675                        AtomContent::Heredoc(_)
1676                        | AtomContent::Object { .. }
1677                        | AtomContent::Sequence { .. }
1678                        | AtomContent::Tag { .. }
1679                        | AtomContent::Attributes { .. }
1680                        | AtomContent::Error => {
1681                            // Invalid key payload
1682                            callback.event(Event::Error {
1683                                span: inner.span,
1684                                kind: ParseErrorKind::InvalidKey,
1685                            })
1686                        }
1687                    },
1688                }
1689            }
1690            AtomContent::Object { .. }
1691            | AtomContent::Sequence { .. }
1692            | AtomContent::Attributes { .. }
1693            | AtomContent::Error => {
1694                // Objects, sequences, error tokens not allowed as keys
1695                callback.event(Event::Error {
1696                    span: atom.span,
1697                    kind: ParseErrorKind::InvalidKey,
1698                })
1699            }
1700        }
1701    }
1702
1703    /// Process a scalar, handling escapes for quoted strings and stripping delimiters for raw strings.
1704    fn process_scalar(&self, text: &'src str, kind: ScalarKind) -> Cow<'src, str> {
1705        match kind {
1706            ScalarKind::Bare | ScalarKind::Heredoc => Cow::Borrowed(text),
1707            ScalarKind::Raw => Cow::Borrowed(Self::strip_raw_delimiters(text)),
1708            ScalarKind::Quoted => self.unescape_quoted(text),
1709        }
1710    }
1711
1712    /// Validate escape sequences in a quoted string and return invalid escapes.
1713    /// Returns a list of (byte_offset_within_string, invalid_sequence) pairs.
1714    /// parser[impl scalar.quoted.escapes]
1715    fn validate_quoted_escapes(text: &str) -> Vec<(usize, String)> {
1716        let mut errors = Vec::new();
1717
1718        // Remove surrounding quotes for validation
1719        let inner = if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
1720            &text[1..text.len() - 1]
1721        } else {
1722            text
1723        };
1724
1725        let mut chars = inner.char_indices().peekable();
1726
1727        while let Some((i, c)) = chars.next() {
1728            if c == '\\' {
1729                let escape_start = i;
1730                match chars.next() {
1731                    Some((_, 'n' | 'r' | 't' | '\\' | '"')) => {
1732                        // Valid escape
1733                    }
1734                    Some((_, 'u')) => {
1735                        // Unicode escape - validate format
1736                        match chars.peek() {
1737                            Some((_, '{')) => {
1738                                // \u{X...} form - consume until }
1739                                chars.next(); // consume '{'
1740                                let mut valid = true;
1741                                let mut found_close = false;
1742                                for (_, c) in chars.by_ref() {
1743                                    if c == '}' {
1744                                        found_close = true;
1745                                        break;
1746                                    }
1747                                    if !c.is_ascii_hexdigit() {
1748                                        valid = false;
1749                                    }
1750                                }
1751                                if !found_close || !valid {
1752                                    // Extract the sequence for error reporting
1753                                    let end = chars.peek().map(|(i, _)| *i).unwrap_or(inner.len());
1754                                    let seq = &inner[escape_start..end.min(escape_start + 12)];
1755                                    errors.push((escape_start + 1, format!("\\{}", &seq[1..])));
1756                                }
1757                            }
1758                            Some((_, c)) if c.is_ascii_hexdigit() => {
1759                                // \uXXXX form - need exactly 4 hex digits
1760                                let mut count = 1;
1761                                while count < 4 {
1762                                    match chars.peek() {
1763                                        Some((_, c)) if c.is_ascii_hexdigit() => {
1764                                            chars.next();
1765                                            count += 1;
1766                                        }
1767                                        _ => break,
1768                                    }
1769                                }
1770                                if count != 4 {
1771                                    let end = chars.peek().map(|(i, _)| *i).unwrap_or(inner.len());
1772                                    let seq = &inner[escape_start..end];
1773                                    errors.push((escape_start + 1, seq.to_string()));
1774                                }
1775                            }
1776                            _ => {
1777                                // Invalid \u with no hex digits
1778                                errors.push((escape_start + 1, "\\u".to_string()));
1779                            }
1780                        }
1781                    }
1782                    Some((_, c)) => {
1783                        // Invalid escape sequence
1784                        errors.push((escape_start + 1, format!("\\{}", c)));
1785                    }
1786                    None => {
1787                        // Trailing backslash
1788                        errors.push((escape_start + 1, "\\".to_string()));
1789                    }
1790                }
1791            }
1792        }
1793
1794        errors
1795    }
1796
1797    /// Dedent heredoc content by stripping `indent_len` characters from the start of each line.
1798    /// Per parser[scalar.heredoc.syntax]: when the closing delimiter is indented,
1799    /// that indentation is stripped from content lines.
1800    fn dedent_heredoc_content(content: &str, indent_len: usize) -> String {
1801        content
1802            .lines()
1803            .map(|line| {
1804                // Strip up to indent_len whitespace characters from the start of each line
1805                let mut chars = line.chars();
1806                let mut stripped = 0;
1807                while stripped < indent_len {
1808                    match chars.clone().next() {
1809                        Some(' ') | Some('\t') => {
1810                            chars.next();
1811                            stripped += 1;
1812                        }
1813                        _ => break,
1814                    }
1815                }
1816                chars.as_str()
1817            })
1818            .collect::<Vec<_>>()
1819            .join("\n")
1820            + if content.ends_with('\n') { "\n" } else { "" }
1821    }
1822
1823    /// Strip the r#*"..."#* delimiters from a raw string, returning just the content.
1824    fn strip_raw_delimiters(text: &str) -> &str {
1825        // Raw string format: r#*"content"#*
1826        // Skip the 'r'
1827        let after_r = text.strip_prefix('r').unwrap_or(text);
1828
1829        // Count and skip opening #s
1830        let hash_count = after_r.chars().take_while(|&c| c == '#').count();
1831        let after_hashes = &after_r[hash_count..];
1832
1833        // Skip opening "
1834        let after_quote = after_hashes.strip_prefix('"').unwrap_or(after_hashes);
1835
1836        // Remove closing "# sequence
1837        let closing_len = 1 + hash_count; // " + #s
1838        if after_quote.len() >= closing_len {
1839            &after_quote[..after_quote.len() - closing_len]
1840        } else {
1841            after_quote
1842        }
1843    }
1844
1845    /// Unescape a quoted string.
1846    fn unescape_quoted(&self, text: &'src str) -> Cow<'src, str> {
1847        // Remove surrounding quotes
1848        let inner = if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
1849            &text[1..text.len() - 1]
1850        } else {
1851            text
1852        };
1853
1854        // Check if any escapes present
1855        if !inner.contains('\\') {
1856            return Cow::Borrowed(inner);
1857        }
1858
1859        // Process escapes
1860        let mut result = String::with_capacity(inner.len());
1861        let mut chars = inner.chars().peekable();
1862
1863        while let Some(c) = chars.next() {
1864            if c == '\\' {
1865                match chars.next() {
1866                    Some('n') => result.push('\n'),
1867                    Some('r') => result.push('\r'),
1868                    Some('t') => result.push('\t'),
1869                    Some('\\') => result.push('\\'),
1870                    Some('"') => result.push('"'),
1871                    // parser[impl scalar.quoted.escapes]
1872                    Some('u') => {
1873                        // Unicode escape: \u{X...} or \uXXXX
1874                        match chars.peek() {
1875                            Some('{') => {
1876                                // \u{X...} form - variable length
1877                                chars.next(); // consume '{'
1878                                let mut hex = String::new();
1879                                while let Some(&c) = chars.peek() {
1880                                    if c == '}' {
1881                                        chars.next();
1882                                        break;
1883                                    }
1884                                    hex.push(chars.next().unwrap());
1885                                }
1886                                if let Ok(code) = u32::from_str_radix(&hex, 16)
1887                                    && let Some(ch) = char::from_u32(code)
1888                                {
1889                                    result.push(ch);
1890                                }
1891                            }
1892                            Some(c) if c.is_ascii_hexdigit() => {
1893                                // \uXXXX form - exactly 4 hex digits
1894                                let mut hex = String::with_capacity(4);
1895                                for _ in 0..4 {
1896                                    if let Some(&c) = chars.peek() {
1897                                        if c.is_ascii_hexdigit() {
1898                                            hex.push(chars.next().unwrap());
1899                                        } else {
1900                                            break;
1901                                        }
1902                                    } else {
1903                                        break;
1904                                    }
1905                                }
1906                                if hex.len() == 4 {
1907                                    if let Ok(code) = u32::from_str_radix(&hex, 16)
1908                                        && let Some(ch) = char::from_u32(code)
1909                                    {
1910                                        result.push(ch);
1911                                    }
1912                                } else {
1913                                    // Invalid escape - not enough digits, keep as-is
1914                                    result.push_str("\\u");
1915                                    result.push_str(&hex);
1916                                }
1917                            }
1918                            _ => {
1919                                // Invalid \u - keep as-is
1920                                result.push_str("\\u");
1921                            }
1922                        }
1923                    }
1924                    Some(c) => {
1925                        // Unknown escape, keep as-is
1926                        result.push('\\');
1927                        result.push(c);
1928                    }
1929                    None => {
1930                        result.push('\\');
1931                    }
1932                }
1933            } else {
1934                result.push(c);
1935            }
1936        }
1937
1938        Cow::Owned(result)
1939    }
1940}
1941
1942/// An atom collected during entry parsing.
1943#[derive(Debug, Clone)]
1944struct Atom<'src> {
1945    span: Span,
1946    kind: ScalarKind,
1947    content: AtomContent<'src>,
1948    /// Span of adjacent `{` or `(` without whitespace (for error reporting).
1949    /// parser[impl entry.whitespace]
1950    adjacent_block_span: Option<Span>,
1951}
1952
1953/// Content of an atom.
1954// parser[impl object.syntax] parser[impl sequence.syntax]
1955#[derive(Debug, Clone)]
1956enum AtomContent<'src> {
1957    /// A scalar value (bare, quoted, or raw).
1958    Scalar(&'src str),
1959    /// Heredoc content (owned because it may be processed).
1960    Heredoc(String),
1961    /// Unit value `@`.
1962    Unit,
1963    /// A tag with optional payload.
1964    // parser[impl tag.payload]
1965    Tag {
1966        name: &'src str,
1967        payload: Option<Box<Atom<'src>>>,
1968        /// Span of invalid tag name (for error reporting).
1969        // parser[impl tag.syntax]
1970        invalid_name_span: Option<Span>,
1971    },
1972    /// An object with parsed entries.
1973    // parser[impl object.syntax]
1974    Object {
1975        entries: Vec<ObjectEntry<'src>>,
1976        separator: Separator,
1977        /// Pairs of (original_span, duplicate_span) for duplicate keys.
1978        duplicate_key_spans: Vec<(Span, Span)>,
1979        /// Spans of mixed separators (for error reporting).
1980        // parser[impl object.separators]
1981        mixed_separator_spans: Vec<Span>,
1982        /// Spans of dangling doc comments (for error reporting).
1983        // parser[impl comment.doc]
1984        dangling_doc_comment_spans: Vec<Span>,
1985        /// Whether the object was not properly closed (missing `}`).
1986        unclosed: bool,
1987    },
1988    /// A sequence with parsed elements.
1989    // parser[impl sequence.syntax] parser[impl sequence.elements]
1990    Sequence {
1991        elements: Vec<Atom<'src>>,
1992        /// Whether the sequence was not properly closed (missing `)`).
1993        unclosed: bool,
1994        /// Spans of commas found in the sequence (for error reporting).
1995        comma_spans: Vec<Span>,
1996    },
1997    /// Attributes (key=value pairs that become an object).
1998    // parser[impl attr.syntax] parser[impl attr.atom]
1999    Attributes {
2000        entries: Vec<AttributeEntry<'src>>,
2001        /// Spans of trailing `>` without values (for error reporting).
2002        trailing_gt_spans: Vec<Span>,
2003    },
2004    /// A lexer error token.
2005    Error,
2006}
2007
2008/// An attribute entry (key=value).
2009#[derive(Debug, Clone)]
2010struct AttributeEntry<'src> {
2011    key: &'src str,
2012    key_span: Span,
2013    value: Atom<'src>,
2014}
2015
2016/// An entry in an object (key-value pair).
2017#[derive(Debug, Clone)]
2018struct ObjectEntry<'src> {
2019    key: Atom<'src>,
2020    value: Atom<'src>,
2021    /// Doc comments preceding this entry (supports multi-line doc comments).
2022    doc_comments: Vec<(Span, &'src str)>,
2023    /// Span of unexpected third atom (for TooManyAtoms error).
2024    // parser[impl entry.toomany]
2025    too_many_atoms_span: Option<Span>,
2026}
2027
2028/// A parsed key for equality comparison (duplicate key detection).
2029// parser[impl entry.key-equality]
2030#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2031enum KeyValue {
2032    /// Scalar key (after escape processing).
2033    Scalar(String),
2034    /// Unit key (@).
2035    Unit,
2036    /// Tagged key.
2037    Tagged {
2038        name: String,
2039        payload: Option<Box<KeyValue>>,
2040    },
2041}
2042
2043impl KeyValue {
2044    /// Create a KeyValue from an Atom for duplicate key comparison.
2045    // parser[impl entry.key-equality]
2046    fn from_atom<'a>(atom: &Atom<'a>, parser: &Parser<'a>) -> Self {
2047        match &atom.content {
2048            AtomContent::Scalar(text) => {
2049                // Process escapes for quoted strings
2050                let processed = parser.process_scalar(text, atom.kind);
2051                KeyValue::Scalar(processed.into_owned())
2052            }
2053            AtomContent::Heredoc(content) => KeyValue::Scalar(content.clone()),
2054            AtomContent::Unit => KeyValue::Unit,
2055            AtomContent::Tag { name, payload, .. } => KeyValue::Tagged {
2056                name: (*name).to_string(),
2057                payload: payload
2058                    .as_ref()
2059                    .map(|p| Box::new(KeyValue::from_atom(p, parser))),
2060            },
2061            // Objects/Sequences as keys are unusual, treat as their text repr
2062            AtomContent::Object { .. } => KeyValue::Scalar("{}".into()),
2063            AtomContent::Sequence { .. } => KeyValue::Scalar("()".into()),
2064            AtomContent::Attributes { .. } => KeyValue::Scalar("{}".into()),
2065            AtomContent::Error => KeyValue::Scalar("<error>".into()),
2066        }
2067    }
2068}
2069
2070/// Whether a path leads to an object (can have children) or a terminal value.
2071#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2072enum PathValueKind {
2073    /// Path leads to an object (explicit `{}` or implicit from dotted path).
2074    Object,
2075    /// Path leads to a terminal value (scalar, sequence, tag, unit).
2076    Terminal,
2077}
2078
2079/// Tracks dotted path state for sibling detection and reopen errors.
2080// parser[impl entry.path.sibling] parser[impl entry.path.reopen]
2081#[derive(Default)]
2082struct PathState {
2083    /// The current open path segments.
2084    current_path: Vec<String>,
2085    /// Paths that have been closed (sibling appeared at same level).
2086    closed_paths: std::collections::HashSet<Vec<String>>,
2087    /// Full paths that have been assigned, with their value kind and span.
2088    assigned_paths: HashMap<Vec<String>, (Span, PathValueKind)>,
2089}
2090
2091/// Error returned when path validation fails.
2092#[derive(Debug)]
2093enum PathError {
2094    /// Exact duplicate path.
2095    Duplicate { original: Span },
2096    /// Trying to reopen a closed path.
2097    Reopened { closed_path: Vec<String> },
2098    /// Trying to nest into a terminal value.
2099    NestIntoTerminal { terminal_path: Vec<String> },
2100}
2101
2102impl PathState {
2103    /// Check a path and update state. Returns error if path is invalid.
2104    fn check_and_update(
2105        &mut self,
2106        path: &[String],
2107        span: Span,
2108        value_kind: PathValueKind,
2109    ) -> Result<(), PathError> {
2110        // 1. Check for duplicate (exact same path)
2111        if let Some(&(original, _)) = self.assigned_paths.get(path) {
2112            return Err(PathError::Duplicate { original });
2113        }
2114
2115        // 2. Check if any proper prefix is closed or has a terminal value
2116        for i in 1..path.len() {
2117            let prefix = &path[..i];
2118            if self.closed_paths.contains(prefix) {
2119                return Err(PathError::Reopened {
2120                    closed_path: prefix.to_vec(),
2121                });
2122            }
2123            if let Some(&(_, PathValueKind::Terminal)) = self.assigned_paths.get(prefix) {
2124                return Err(PathError::NestIntoTerminal {
2125                    terminal_path: prefix.to_vec(),
2126                });
2127            }
2128        }
2129
2130        // 3. Find common prefix length with current path
2131        let common_len = self
2132            .current_path
2133            .iter()
2134            .zip(path.iter())
2135            .take_while(|(a, b)| a == b)
2136            .count();
2137
2138        // 4. Close paths beyond the common prefix
2139        // Everything in current_path[common_len..] gets closed
2140        for i in common_len..self.current_path.len() {
2141            let closed: Vec<String> = self.current_path[..=i].to_vec();
2142            self.closed_paths.insert(closed);
2143        }
2144
2145        // 5. Record intermediate path segments as objects (if not already assigned)
2146        for i in 1..path.len() {
2147            let prefix = path[..i].to_vec();
2148            self.assigned_paths
2149                .entry(prefix)
2150                .or_insert((span, PathValueKind::Object));
2151        }
2152
2153        // 6. Update assigned paths and current path
2154        self.assigned_paths
2155            .insert(path.to_vec(), (span, value_kind));
2156        self.current_path = path.to_vec();
2157
2158        Ok(())
2159    }
2160}
2161
2162#[cfg(test)]
2163mod tests {
2164    use super::*;
2165    use facet_testhelpers::test;
2166
2167    fn parse(source: &str) -> Vec<Event<'_>> {
2168        tracing::debug!(source, "parsing");
2169        let events = Parser::new(source).parse_to_vec();
2170        tracing::debug!(?events, "parsed");
2171        events
2172    }
2173
2174    /// Parse and log events for debugging
2175    #[allow(dead_code)]
2176    fn parse_debug(source: &str) -> Vec<Event<'_>> {
2177        tracing::info!(source, "parsing (debug mode)");
2178        let events = Parser::new(source).parse_to_vec();
2179        tracing::info!(?events, "parsed events");
2180        events
2181    }
2182
2183    #[test]
2184    fn test_empty_document() {
2185        let events = parse("");
2186        assert_eq!(events, vec![Event::DocumentStart, Event::DocumentEnd]);
2187    }
2188
2189    #[test]
2190    fn test_simple_entry() {
2191        let events = parse("foo bar");
2192        assert!(events.contains(&Event::DocumentStart));
2193        assert!(events.contains(&Event::DocumentEnd));
2194        assert!(
2195            events
2196                .iter()
2197                .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "foo"))
2198        );
2199        assert!(
2200            events
2201                .iter()
2202                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "bar"))
2203        );
2204    }
2205
2206    #[test]
2207    fn test_key_only() {
2208        let events = parse("foo");
2209        assert!(
2210            events
2211                .iter()
2212                .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "foo"))
2213        );
2214        assert!(events.iter().any(|e| matches!(e, Event::Unit { .. })));
2215    }
2216
2217    #[test]
2218    fn test_multiple_entries() {
2219        let events = parse("foo bar\nbaz qux");
2220        let keys: Vec<_> = events
2221            .iter()
2222            .filter_map(|e| match e {
2223                Event::Key {
2224                    payload: Some(value),
2225                    ..
2226                } => Some(value.as_ref()),
2227                _ => None,
2228            })
2229            .collect();
2230        assert_eq!(keys, vec!["foo", "baz"]);
2231    }
2232
2233    #[test]
2234    fn test_quoted_string() {
2235        let events = parse(r#"name "hello world""#);
2236        assert!(events
2237            .iter()
2238            .any(|e| matches!(e, Event::Scalar { value, kind: ScalarKind::Quoted, .. } if value == "hello world")));
2239    }
2240
2241    #[test]
2242    fn test_quoted_escape() {
2243        let events = parse(r#"msg "hello\nworld""#);
2244        assert!(
2245            events
2246                .iter()
2247                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "hello\nworld"))
2248        );
2249    }
2250
2251    #[test]
2252    fn test_too_many_atoms() {
2253        // parser[verify entry.toomany]
2254        // 3+ atoms should produce an error
2255        let events = parse("a b c");
2256        // Should produce: key=a, value=b, error on c
2257        assert!(
2258            events
2259                .iter()
2260                .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "a"))
2261        );
2262        assert!(
2263            events
2264                .iter()
2265                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "b"))
2266        );
2267        assert!(events.iter().any(|e| matches!(
2268            e,
2269            Event::Error {
2270                kind: ParseErrorKind::TooManyAtoms,
2271                ..
2272            }
2273        )));
2274    }
2275
2276    #[test]
2277    fn test_unit_value() {
2278        let events = parse("flag @");
2279        assert!(events.iter().any(|e| matches!(e, Event::Unit { .. })));
2280    }
2281
2282    #[test]
2283    fn test_unit_key() {
2284        // @ followed by whitespace then value should emit Key with payload: None (unit key)
2285        let events = parse("@ server.schema.styx");
2286        trace!(?events, "parsed events for unit key test");
2287        // Should have: DocumentStart, EntryStart, Key (unit), Scalar (value), EntryEnd, DocumentEnd
2288        assert!(
2289            events.iter().any(|e| matches!(
2290                e,
2291                Event::Key {
2292                    payload: None,
2293                    tag: None,
2294                    ..
2295                }
2296            )),
2297            "should have Key event with payload: None (unit key), got: {:?}",
2298            events
2299        );
2300    }
2301
2302    #[test]
2303    fn test_tag() {
2304        let events = parse("type @user");
2305        assert!(
2306            events
2307                .iter()
2308                .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "user"))
2309        );
2310    }
2311
2312    #[test]
2313    fn test_comments() {
2314        let events = parse("// comment\nfoo bar");
2315        assert!(events.iter().any(|e| matches!(e, Event::Comment { .. })));
2316        assert!(
2317            events
2318                .iter()
2319                .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "foo"))
2320        );
2321    }
2322
2323    #[test]
2324    fn test_doc_comments() {
2325        let events = parse("/// doc\nfoo bar");
2326        assert!(events.iter().any(|e| matches!(e, Event::DocComment { .. })));
2327    }
2328
2329    // parser[verify comment.doc]
2330    #[test]
2331    fn test_doc_comment_followed_by_entry_ok() {
2332        let events = parse("/// documentation\nkey value");
2333        // Doc comment followed by entry is valid
2334        assert!(events.iter().any(|e| matches!(e, Event::DocComment { .. })));
2335        assert!(!events.iter().any(|e| matches!(
2336            e,
2337            Event::Error {
2338                kind: ParseErrorKind::DanglingDocComment,
2339                ..
2340            }
2341        )));
2342    }
2343
2344    // parser[verify comment.doc]
2345    #[test]
2346    fn test_doc_comment_at_eof_error() {
2347        let events = parse("foo bar\n/// dangling");
2348        assert!(events.iter().any(|e| matches!(
2349            e,
2350            Event::Error {
2351                kind: ParseErrorKind::DanglingDocComment,
2352                ..
2353            }
2354        )));
2355    }
2356
2357    // parser[verify comment.doc]
2358    #[test]
2359    fn test_doc_comment_before_closing_brace_error() {
2360        let events = parse("{foo bar\n/// dangling\n}");
2361        assert!(events.iter().any(|e| matches!(
2362            e,
2363            Event::Error {
2364                kind: ParseErrorKind::DanglingDocComment,
2365                ..
2366            }
2367        )));
2368    }
2369
2370    // parser[verify comment.doc]
2371    #[test]
2372    fn test_multiple_doc_comments_before_entry_ok() {
2373        let events = parse("/// line 1\n/// line 2\nkey value");
2374        // Multiple consecutive doc comments before entry is fine
2375        let doc_count = events
2376            .iter()
2377            .filter(|e| matches!(e, Event::DocComment { .. }))
2378            .count();
2379        assert_eq!(doc_count, 2);
2380        assert!(!events.iter().any(|e| matches!(
2381            e,
2382            Event::Error {
2383                kind: ParseErrorKind::DanglingDocComment,
2384                ..
2385            }
2386        )));
2387    }
2388
2389    // parser[verify object.syntax]
2390    #[test]
2391    fn test_nested_object() {
2392        let events = parse("outer {inner {x 1}}");
2393        // Should have nested ObjectStart/ObjectEnd events
2394        let obj_starts = events
2395            .iter()
2396            .filter(|e| matches!(e, Event::ObjectStart { .. }))
2397            .count();
2398        assert_eq!(
2399            obj_starts, 2,
2400            "Expected 2 ObjectStart events for nested objects"
2401        );
2402    }
2403
2404    // parser[verify object.syntax]
2405    #[test]
2406    fn test_object_with_entries() {
2407        let events = parse("config {host localhost, port 8080}");
2408        // Check we have keys for host and port
2409        let keys: Vec<_> = events
2410            .iter()
2411            .filter_map(|e| match e {
2412                Event::Key {
2413                    payload: Some(value),
2414                    ..
2415                } => Some(value.as_ref()),
2416                _ => None,
2417            })
2418            .collect();
2419        assert!(keys.contains(&"config"), "Missing key 'config'");
2420        assert!(keys.contains(&"host"), "Missing key 'host'");
2421        assert!(keys.contains(&"port"), "Missing key 'port'");
2422    }
2423
2424    // parser[verify sequence.syntax] parser[verify sequence.elements]
2425    #[test]
2426    fn test_sequence_elements() {
2427        let events = parse("items (a b c)");
2428        let scalars: Vec<_> = events
2429            .iter()
2430            .filter_map(|e| match e {
2431                Event::Scalar { value, .. } => Some(value.as_ref()),
2432                _ => None,
2433            })
2434            .collect();
2435        assert!(scalars.contains(&"a"), "Missing element 'a'");
2436        assert!(scalars.contains(&"b"), "Missing element 'b'");
2437        assert!(scalars.contains(&"c"), "Missing element 'c'");
2438    }
2439
2440    // parser[verify sequence.syntax]
2441    #[test]
2442    fn test_nested_sequences() {
2443        let events = parse("matrix ((1 2) (3 4))");
2444        let seq_starts = events
2445            .iter()
2446            .filter(|e| matches!(e, Event::SequenceStart { .. }))
2447            .count();
2448        assert_eq!(
2449            seq_starts, 3,
2450            "Expected 3 SequenceStart events (outer + 2 inner)"
2451        );
2452    }
2453
2454    // parser[verify tag.payload]
2455    #[test]
2456    fn test_tagged_object() {
2457        let events = parse("result @err{message oops}");
2458        assert!(
2459            events
2460                .iter()
2461                .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "err")),
2462            "Missing TagStart for @err"
2463        );
2464        assert!(
2465            events
2466                .iter()
2467                .any(|e| matches!(e, Event::ObjectStart { .. })),
2468            "Missing ObjectStart for tagged object"
2469        );
2470    }
2471
2472    // parser[verify tag.payload]
2473    #[test]
2474    fn test_tagged_sequence() {
2475        let events = parse("color @rgb(255 128 0)");
2476        assert!(
2477            events
2478                .iter()
2479                .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "rgb")),
2480            "Missing TagStart for @rgb"
2481        );
2482        assert!(
2483            events
2484                .iter()
2485                .any(|e| matches!(e, Event::SequenceStart { .. })),
2486            "Missing SequenceStart for tagged sequence"
2487        );
2488    }
2489
2490    // parser[verify tag.payload]
2491    #[test]
2492    fn test_tagged_scalar() {
2493        let events = parse(r#"name @nickname"Bob""#);
2494        assert!(
2495            events
2496                .iter()
2497                .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "nickname")),
2498            "Missing TagStart for @nickname"
2499        );
2500        assert!(
2501            events
2502                .iter()
2503                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "Bob")),
2504            "Missing Scalar for tagged string"
2505        );
2506    }
2507
2508    // parser[verify tag.payload]
2509    #[test]
2510    fn test_tagged_explicit_unit() {
2511        let events = parse("nothing @empty@");
2512        assert!(
2513            events
2514                .iter()
2515                .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "empty")),
2516            "Missing TagStart for @empty"
2517        );
2518        // The explicit @ after tag creates a Unit payload
2519        let unit_count = events
2520            .iter()
2521            .filter(|e| matches!(e, Event::Unit { .. }))
2522            .count();
2523        assert!(
2524            unit_count >= 1,
2525            "Expected at least one Unit event for @empty@"
2526        );
2527    }
2528
2529    // parser[verify tag.payload]
2530    #[test]
2531    fn test_tag_whitespace_gap() {
2532        // Whitespace between tag and potential payload = no payload (implicit unit)
2533        // Use a simpler case: key with tag value that has whitespace before object
2534        let events = parse("x @tag\ny {a b}");
2535        // @tag should be its own value (implicit unit), y {a b} is a separate entry
2536        let tag_events: Vec<_> = events
2537            .iter()
2538            .filter(|e| matches!(e, Event::TagStart { .. } | Event::TagEnd))
2539            .collect();
2540        // There should be TagStart and TagEnd
2541        assert_eq!(tag_events.len(), 2, "Expected TagStart and TagEnd");
2542        // And the tag should NOT have the object as payload (object should be in a different entry)
2543        let keys: Vec<_> = events
2544            .iter()
2545            .filter_map(|e| match e {
2546                Event::Key {
2547                    payload: Some(value),
2548                    ..
2549                } => Some(value.as_ref()),
2550                _ => None,
2551            })
2552            .collect();
2553        assert!(keys.contains(&"x"), "Missing key 'x'");
2554        assert!(keys.contains(&"y"), "Missing key 'y'");
2555    }
2556
2557    // parser[verify object.syntax]
2558    #[test]
2559    fn test_object_in_sequence() {
2560        let events = parse("servers ({host a} {host b})");
2561        // Sequence containing objects
2562        let obj_starts = events
2563            .iter()
2564            .filter(|e| matches!(e, Event::ObjectStart { .. }))
2565            .count();
2566        assert_eq!(
2567            obj_starts, 2,
2568            "Expected 2 ObjectStart events for objects in sequence"
2569        );
2570    }
2571
2572    // parser[verify attr.syntax]
2573    #[test]
2574    fn test_simple_attribute() {
2575        let events = parse("server host>localhost");
2576        // key=server, value is object with {host: localhost}
2577        let keys: Vec<_> = events
2578            .iter()
2579            .filter_map(|e| match e {
2580                Event::Key {
2581                    payload: Some(value),
2582                    ..
2583                } => Some(value.as_ref()),
2584                _ => None,
2585            })
2586            .collect();
2587        assert!(keys.contains(&"server"), "Missing key 'server'");
2588        assert!(keys.contains(&"host"), "Missing key 'host' from attribute");
2589    }
2590
2591    // parser[verify attr.values]
2592    #[test]
2593    fn test_attribute_values() {
2594        let events = parse("config name>app tags>(a b) opts>{x 1}");
2595        let keys: Vec<_> = events
2596            .iter()
2597            .filter_map(|e| match e {
2598                Event::Key {
2599                    payload: Some(value),
2600                    ..
2601                } => Some(value.as_ref()),
2602                _ => None,
2603            })
2604            .collect();
2605        assert!(keys.contains(&"config"), "Missing key 'config'");
2606        assert!(keys.contains(&"name"), "Missing key 'name'");
2607        assert!(keys.contains(&"tags"), "Missing key 'tags'");
2608        assert!(keys.contains(&"opts"), "Missing key 'opts'");
2609        // Check sequence is present
2610        assert!(
2611            events
2612                .iter()
2613                .any(|e| matches!(e, Event::SequenceStart { .. })),
2614            "Missing SequenceStart for tags>(a b)"
2615        );
2616    }
2617
2618    // parser[verify attr.atom]
2619    #[test]
2620    fn test_multiple_attributes() {
2621        // When attributes are at root level without a preceding key,
2622        // the first attribute key becomes the entry key, and the rest form the value
2623        let events = parse("server host>localhost port>8080");
2624        // key=server, value is object with {host: localhost, port: 8080}
2625        let keys: Vec<_> = events
2626            .iter()
2627            .filter_map(|e| match e {
2628                Event::Key {
2629                    payload: Some(value),
2630                    ..
2631                } => Some(value.as_ref()),
2632                _ => None,
2633            })
2634            .collect();
2635        assert!(keys.contains(&"server"), "Missing key 'server'");
2636        assert!(keys.contains(&"host"), "Missing key 'host'");
2637        assert!(keys.contains(&"port"), "Missing key 'port'");
2638    }
2639
2640    // parser[verify entry.keypath.attributes]
2641    #[test]
2642    fn test_too_many_atoms_with_attributes() {
2643        // parser[verify entry.toomany]
2644        // Old key-path syntax is now an error
2645        let events = parse("spec selector matchLabels app>web tier>frontend");
2646        // Should produce error for too many atoms
2647        assert!(
2648            events.iter().any(|e| matches!(
2649                e,
2650                Event::Error {
2651                    kind: ParseErrorKind::TooManyAtoms,
2652                    ..
2653                }
2654            )),
2655            "Should have TooManyAtoms error"
2656        );
2657    }
2658
2659    // parser[verify attr.syntax]
2660    #[test]
2661    fn test_attribute_no_spaces() {
2662        // Spaces around > means it's NOT attribute syntax
2663        let events = parse("x > y");
2664        // This should be: key=x, then ">" and "y" as values (nested)
2665        // Since > is its own token when preceded by whitespace
2666        let keys: Vec<_> = events
2667            .iter()
2668            .filter_map(|e| match e {
2669                Event::Key {
2670                    payload: Some(value),
2671                    ..
2672                } => Some(value.as_ref()),
2673                _ => None,
2674            })
2675            .collect();
2676        // "x" should be the first key, and ">" should NOT be treated as attribute syntax
2677        assert!(keys.contains(&"x"), "Missing key 'x'");
2678        // There should not be ">" as a key (it would be a value)
2679    }
2680
2681    // parser[verify document.root]
2682    #[test]
2683    fn test_explicit_root_after_comment() {
2684        // Regular comment before explicit root object
2685        let events = parse("// comment\n{a 1}");
2686        // Should have ObjectStart (explicit root), not be treated as implicit root
2687        assert!(
2688            events
2689                .iter()
2690                .any(|e| matches!(e, Event::ObjectStart { .. })),
2691            "Should have ObjectStart for explicit root after comment"
2692        );
2693        assert!(
2694            events
2695                .iter()
2696                .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "a")),
2697            "Should have key 'a'"
2698        );
2699    }
2700
2701    // parser[verify document.root]
2702    #[test]
2703    fn test_explicit_root_after_doc_comment() {
2704        // Doc comment before explicit root object
2705        let events = parse("/// doc comment\n{a 1}");
2706        // Should have ObjectStart (explicit root) AND the doc comment
2707        assert!(
2708            events.iter().any(|e| matches!(e, Event::DocComment { .. })),
2709            "Should preserve doc comment"
2710        );
2711        assert!(
2712            events
2713                .iter()
2714                .any(|e| matches!(e, Event::ObjectStart { .. })),
2715            "Should have ObjectStart for explicit root after doc comment"
2716        );
2717    }
2718
2719    // parser[verify entry.key-equality]
2720    #[test]
2721    fn test_duplicate_bare_key() {
2722        let events = parse("{a 1, a 2}");
2723        assert!(
2724            events.iter().any(|e| matches!(
2725                e,
2726                Event::Error {
2727                    kind: ParseErrorKind::DuplicateKey { .. },
2728                    ..
2729                }
2730            )),
2731            "Expected DuplicateKey error"
2732        );
2733    }
2734
2735    // parser[verify entry.key-equality]
2736    #[test]
2737    fn test_duplicate_quoted_key() {
2738        let events = parse(r#"{"key" 1, "key" 2}"#);
2739        assert!(
2740            events.iter().any(|e| matches!(
2741                e,
2742                Event::Error {
2743                    kind: ParseErrorKind::DuplicateKey { .. },
2744                    ..
2745                }
2746            )),
2747            "Expected DuplicateKey error for quoted keys"
2748        );
2749    }
2750
2751    // parser[verify entry.key-equality]
2752    #[test]
2753    fn test_duplicate_key_escape_normalized() {
2754        // "ab" and "a\u{62}" should be considered duplicates after escape processing
2755        let events = parse(r#"{"ab" 1, "a\u{62}" 2}"#);
2756        assert!(
2757            events.iter().any(|e| matches!(
2758                e,
2759                Event::Error {
2760                    kind: ParseErrorKind::DuplicateKey { .. },
2761                    ..
2762                }
2763            )),
2764            "Expected DuplicateKey error for escape-normalized keys"
2765        );
2766    }
2767
2768    // parser[verify entry.key-equality]
2769    #[test]
2770    fn test_duplicate_unit_key() {
2771        let events = parse("{@ 1, @ 2}");
2772        assert!(
2773            events.iter().any(|e| matches!(
2774                e,
2775                Event::Error {
2776                    kind: ParseErrorKind::DuplicateKey { .. },
2777                    ..
2778                }
2779            )),
2780            "Expected DuplicateKey error for unit keys"
2781        );
2782    }
2783
2784    // parser[verify entry.key-equality]
2785    #[test]
2786    fn test_duplicate_tagged_key() {
2787        let events = parse("{@foo 1, @foo 2}");
2788        assert!(
2789            events.iter().any(|e| matches!(
2790                e,
2791                Event::Error {
2792                    kind: ParseErrorKind::DuplicateKey { .. },
2793                    ..
2794                }
2795            )),
2796            "Expected DuplicateKey error for tagged keys"
2797        );
2798    }
2799
2800    // parser[verify entry.key-equality]
2801    #[test]
2802    fn test_different_keys_ok() {
2803        let events = parse("{a 1, b 2, c 3}");
2804        assert!(
2805            !events.iter().any(|e| matches!(e, Event::Error { .. })),
2806            "Should not have any errors for different keys"
2807        );
2808    }
2809
2810    // parser[verify entry.key-equality]
2811    #[test]
2812    fn test_duplicate_key_at_root() {
2813        // Test duplicate keys at the document root level (implicit root object)
2814        let events = parse("a 1\na 2");
2815        assert!(
2816            events.iter().any(|e| matches!(
2817                e,
2818                Event::Error {
2819                    kind: ParseErrorKind::DuplicateKey { .. },
2820                    ..
2821                }
2822            )),
2823            "Expected DuplicateKey error at document root level"
2824        );
2825    }
2826
2827    // parser[verify object.separators]
2828    #[test]
2829    fn test_mixed_separators_comma_then_newline() {
2830        // Start with comma, then use newline - should error
2831        let events = parse("{a 1, b 2\nc 3}");
2832        assert!(
2833            events.iter().any(|e| matches!(
2834                e,
2835                Event::Error {
2836                    kind: ParseErrorKind::MixedSeparators,
2837                    ..
2838                }
2839            )),
2840            "Expected MixedSeparators error when comma mode followed by newline"
2841        );
2842    }
2843
2844    // parser[verify object.separators]
2845    #[test]
2846    fn test_mixed_separators_newline_then_comma() {
2847        // Start with newline, then use comma - should error
2848        let events = parse("{a 1\nb 2, c 3}");
2849        assert!(
2850            events.iter().any(|e| matches!(
2851                e,
2852                Event::Error {
2853                    kind: ParseErrorKind::MixedSeparators,
2854                    ..
2855                }
2856            )),
2857            "Expected MixedSeparators error when newline mode followed by comma"
2858        );
2859    }
2860
2861    // parser[verify object.separators]
2862    #[test]
2863    fn test_consistent_comma_separators() {
2864        // All commas - should be fine
2865        let events = parse("{a 1, b 2, c 3}");
2866        assert!(
2867            !events.iter().any(|e| matches!(
2868                e,
2869                Event::Error {
2870                    kind: ParseErrorKind::MixedSeparators,
2871                    ..
2872                }
2873            )),
2874            "Should not have MixedSeparators error for consistent comma separators"
2875        );
2876    }
2877
2878    // parser[verify object.separators]
2879    #[test]
2880    fn test_consistent_newline_separators() {
2881        // All newlines - should be fine
2882        let events = parse("{a 1\nb 2\nc 3}");
2883        assert!(
2884            !events.iter().any(|e| matches!(
2885                e,
2886                Event::Error {
2887                    kind: ParseErrorKind::MixedSeparators,
2888                    ..
2889                }
2890            )),
2891            "Should not have MixedSeparators error for consistent newline separators"
2892        );
2893    }
2894
2895    // parser[verify tag.syntax]
2896    #[test]
2897    fn test_valid_tag_names() {
2898        // Valid tag names should not produce errors
2899        assert!(
2900            !parse("@foo")
2901                .iter()
2902                .any(|e| matches!(e, Event::Error { .. })),
2903            "@foo should be valid"
2904        );
2905        assert!(
2906            !parse("@_private")
2907                .iter()
2908                .any(|e| matches!(e, Event::Error { .. })),
2909            "@_private should be valid"
2910        );
2911        // @Some.Type is now invalid since dots are not allowed in tag names
2912        assert!(
2913            parse("@Some.Type")
2914                .iter()
2915                .any(|e| matches!(e, Event::Error { .. })),
2916            "@Some.Type should be invalid (dots not allowed)"
2917        );
2918        assert!(
2919            !parse("@my-tag")
2920                .iter()
2921                .any(|e| matches!(e, Event::Error { .. })),
2922            "@my-tag should be valid"
2923        );
2924        assert!(
2925            !parse("@Type123")
2926                .iter()
2927                .any(|e| matches!(e, Event::Error { .. })),
2928            "@Type123 should be valid"
2929        );
2930    }
2931
2932    // parser[verify tag.syntax]
2933    #[test]
2934    fn test_invalid_tag_name_starts_with_digit() {
2935        let events = parse("x @123");
2936        assert!(
2937            events.iter().any(|e| matches!(
2938                e,
2939                Event::Error {
2940                    kind: ParseErrorKind::InvalidTagName,
2941                    ..
2942                }
2943            )),
2944            "Tag starting with digit should be invalid"
2945        );
2946    }
2947
2948    // parser[verify tag.syntax]
2949    #[test]
2950    fn test_invalid_tag_name_starts_with_hyphen() {
2951        let events = parse("x @-foo");
2952        assert!(
2953            events.iter().any(|e| matches!(
2954                e,
2955                Event::Error {
2956                    kind: ParseErrorKind::InvalidTagName,
2957                    ..
2958                }
2959            )),
2960            "Tag starting with hyphen should be invalid"
2961        );
2962    }
2963
2964    // parser[verify tag.syntax]
2965    #[test]
2966    fn test_invalid_tag_name_starts_with_dot() {
2967        let events = parse("x @.foo");
2968        assert!(
2969            events.iter().any(|e| matches!(
2970                e,
2971                Event::Error {
2972                    kind: ParseErrorKind::InvalidTagName,
2973                    ..
2974                }
2975            )),
2976            "Tag starting with dot should be invalid"
2977        );
2978    }
2979
2980    // parser[verify scalar.quoted.escapes]
2981    #[test]
2982    fn test_unicode_escape_braces() {
2983        let events = parse(r#"x "\u{1F600}""#);
2984        assert!(
2985            events
2986                .iter()
2987                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "😀")),
2988            "\\u{{1F600}} should produce 😀"
2989        );
2990    }
2991
2992    // parser[verify scalar.quoted.escapes]
2993    #[test]
2994    fn test_unicode_escape_4digit() {
2995        let events = parse(r#"x "\u0041""#);
2996        assert!(
2997            events
2998                .iter()
2999                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "A")),
3000            "\\u0041 should produce A"
3001        );
3002    }
3003
3004    // parser[verify scalar.quoted.escapes]
3005    #[test]
3006    fn test_unicode_escape_4digit_accented() {
3007        let events = parse(r#"x "\u00E9""#);
3008        assert!(
3009            events
3010                .iter()
3011                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "é")),
3012            "\\u00E9 should produce é"
3013        );
3014    }
3015
3016    // parser[verify scalar.quoted.escapes]
3017    #[test]
3018    fn test_unicode_escape_mixed() {
3019        // Mix of \uXXXX and \u{X} forms
3020        let events = parse(r#"x "\u0048\u{65}\u006C\u{6C}\u006F""#);
3021        assert!(
3022            events
3023                .iter()
3024                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "Hello")),
3025            "Mixed unicode escapes should produce Hello"
3026        );
3027    }
3028
3029    // parser[verify entry.keys]
3030    #[test]
3031    fn test_heredoc_key_rejected() {
3032        let events = parse("<<EOF\nkey\nEOF value");
3033        assert!(
3034            events.iter().any(|e| matches!(
3035                e,
3036                Event::Error {
3037                    kind: ParseErrorKind::InvalidKey,
3038                    ..
3039                }
3040            )),
3041            "Heredoc as key should be rejected"
3042        );
3043    }
3044
3045    // parser[verify scalar.quoted.escapes]
3046    #[test]
3047    fn test_invalid_escape_null() {
3048        // \0 is no longer a valid escape - must use \u{0} instead
3049        let events = parse(r#"x "\0""#);
3050        assert!(
3051            events.iter().any(|e| matches!(
3052                e,
3053                Event::Error {
3054                    kind: ParseErrorKind::InvalidEscape(seq),
3055                    ..
3056                } if seq == "\\0"
3057            )),
3058            "\\0 should be rejected as invalid escape"
3059        );
3060    }
3061
3062    // parser[verify scalar.quoted.escapes]
3063    #[test]
3064    fn test_invalid_escape_unknown() {
3065        // \q, \?, \a etc. are not valid escapes
3066        let events = parse(r#"x "\q""#);
3067        assert!(
3068            events.iter().any(|e| matches!(
3069                e,
3070                Event::Error {
3071                    kind: ParseErrorKind::InvalidEscape(seq),
3072                    ..
3073                } if seq == "\\q"
3074            )),
3075            "\\q should be rejected as invalid escape"
3076        );
3077    }
3078
3079    // parser[verify scalar.quoted.escapes]
3080    #[test]
3081    fn test_invalid_escape_multiple() {
3082        // Multiple invalid escapes should all be reported
3083        let events = parse(r#"x "\0\q\?""#);
3084        let invalid_escapes: Vec<_> = events
3085            .iter()
3086            .filter_map(|e| match e {
3087                Event::Error {
3088                    kind: ParseErrorKind::InvalidEscape(seq),
3089                    ..
3090                } => Some(seq.as_str()),
3091                _ => None,
3092            })
3093            .collect();
3094        assert_eq!(
3095            invalid_escapes.len(),
3096            3,
3097            "Should report 3 invalid escapes, got: {:?}",
3098            invalid_escapes
3099        );
3100    }
3101
3102    // parser[verify scalar.quoted.escapes]
3103    #[test]
3104    fn test_valid_escapes_still_work() {
3105        // Make sure valid escapes still work
3106        let events = parse(r#"x "a\nb\tc\\d\"e""#);
3107        assert!(
3108            events
3109                .iter()
3110                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "a\nb\tc\\d\"e")),
3111            "Valid escapes should still work"
3112        );
3113        // No errors should be reported
3114        assert!(
3115            !events.iter().any(|e| matches!(
3116                e,
3117                Event::Error {
3118                    kind: ParseErrorKind::InvalidEscape(_),
3119                    ..
3120                }
3121            )),
3122            "Valid escapes should not produce errors"
3123        );
3124    }
3125
3126    // parser[verify scalar.quoted.escapes]
3127    #[test]
3128    fn test_invalid_escape_in_key() {
3129        // Invalid escapes in keys should also be reported
3130        let events = parse(r#""\0" value"#);
3131        assert!(
3132            events.iter().any(|e| matches!(
3133                e,
3134                Event::Error {
3135                    kind: ParseErrorKind::InvalidEscape(seq),
3136                    ..
3137                } if seq == "\\0"
3138            )),
3139            "\\0 in key should be rejected as invalid escape"
3140        );
3141    }
3142
3143    // parser[verify entry.structure]
3144    #[test]
3145    fn test_simple_key_value_with_attributes() {
3146        // Simple key-value where value is an attributes object
3147        let events = parse("server host>localhost port>8080");
3148        // Should have keys: server, host, port
3149        let keys: Vec<_> = events
3150            .iter()
3151            .filter_map(|e| match e {
3152                Event::Key {
3153                    payload: Some(value),
3154                    ..
3155                } => Some(value.as_ref()),
3156                _ => None,
3157            })
3158            .collect();
3159        assert!(keys.contains(&"server"), "Missing key 'server'");
3160        assert!(keys.contains(&"host"), "Missing key 'host'");
3161        assert!(keys.contains(&"port"), "Missing key 'port'");
3162        // No errors should be reported
3163        assert!(
3164            !events.iter().any(|e| matches!(
3165                e,
3166                Event::Error {
3167                    kind: ParseErrorKind::TooManyAtoms,
3168                    ..
3169                }
3170            )),
3171            "Simple key-value with attributes should not produce TooManyAtoms"
3172        );
3173    }
3174
3175    // parser[verify entry.path]
3176    #[test]
3177    fn test_dotted_path_simple() {
3178        // a.b value should expand to a { b value }
3179        let events = parse("a.b value");
3180        let keys: Vec<_> = events
3181            .iter()
3182            .filter_map(|e| match e {
3183                Event::Key {
3184                    payload: Some(value),
3185                    ..
3186                } => Some(value.as_ref()),
3187                _ => None,
3188            })
3189            .collect();
3190        assert_eq!(keys, vec!["a", "b"], "Should have keys 'a' and 'b'");
3191        // Should have ObjectStart for the nested object
3192        assert!(
3193            events
3194                .iter()
3195                .any(|e| matches!(e, Event::ObjectStart { .. })),
3196            "Should have ObjectStart for nested structure"
3197        );
3198        // Should have the value
3199        assert!(
3200            events
3201                .iter()
3202                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "value")),
3203            "Should have scalar value 'value'"
3204        );
3205        // No errors
3206        assert!(
3207            !events.iter().any(|e| matches!(e, Event::Error { .. })),
3208            "Simple dotted path should not have errors"
3209        );
3210    }
3211
3212    // parser[verify entry.path]
3213    #[test]
3214    fn test_dotted_path_three_segments() {
3215        // a.b.c deep should expand to a { b { c deep } }
3216        let events = parse("a.b.c deep");
3217        let keys: Vec<_> = events
3218            .iter()
3219            .filter_map(|e| match e {
3220                Event::Key {
3221                    payload: Some(value),
3222                    ..
3223                } => Some(value.as_ref()),
3224                _ => None,
3225            })
3226            .collect();
3227        assert_eq!(keys, vec!["a", "b", "c"], "Should have keys 'a', 'b', 'c'");
3228        // Should have two ObjectStart events for nested objects
3229        let obj_starts: Vec<_> = events
3230            .iter()
3231            .filter(|e| matches!(e, Event::ObjectStart { .. }))
3232            .collect();
3233        assert_eq!(
3234            obj_starts.len(),
3235            2,
3236            "Should have 2 ObjectStart for nested structure"
3237        );
3238        // No errors
3239        assert!(
3240            !events.iter().any(|e| matches!(e, Event::Error { .. })),
3241            "Three-segment dotted path should not have errors"
3242        );
3243    }
3244
3245    // parser[verify entry.path]
3246    #[test]
3247    fn test_dotted_path_with_implicit_unit() {
3248        // a.b without value should have implicit unit
3249        let events = parse("a.b");
3250        let keys: Vec<_> = events
3251            .iter()
3252            .filter_map(|e| match e {
3253                Event::Key {
3254                    payload: Some(value),
3255                    ..
3256                } => Some(value.as_ref()),
3257                _ => None,
3258            })
3259            .collect();
3260        assert_eq!(keys, vec!["a", "b"], "Should have keys 'a' and 'b'");
3261        // Should have Unit for implicit value
3262        assert!(
3263            events.iter().any(|e| matches!(e, Event::Unit { .. })),
3264            "Should have implicit unit value"
3265        );
3266    }
3267
3268    // parser[verify entry.path]
3269    #[test]
3270    fn test_dotted_path_empty_segment() {
3271        // a..b value - empty segment is invalid
3272        let events = parse("a..b value");
3273        assert!(
3274            events.iter().any(|e| matches!(e, Event::Error { .. })),
3275            "Empty segment in dotted path should produce error"
3276        );
3277    }
3278
3279    // parser[verify entry.path]
3280    #[test]
3281    fn test_dotted_path_trailing_dot() {
3282        // a.b. value - trailing dot is invalid
3283        let events = parse("a.b. value");
3284        assert!(
3285            events.iter().any(|e| matches!(e, Event::Error { .. })),
3286            "Trailing dot in dotted path should produce error"
3287        );
3288    }
3289
3290    // parser[verify entry.path]
3291    #[test]
3292    fn test_dotted_path_leading_dot() {
3293        // .a.b value - leading dot is invalid
3294        let events = parse(".a.b value");
3295        assert!(
3296            events.iter().any(|e| matches!(e, Event::Error { .. })),
3297            "Leading dot in dotted path should produce error"
3298        );
3299    }
3300
3301    // parser[verify entry.path]
3302    #[test]
3303    fn test_dotted_path_with_object_value() {
3304        // a.b { c d } should expand to a { b { c d } }
3305        let events = parse("a.b { c d }");
3306        let keys: Vec<_> = events
3307            .iter()
3308            .filter_map(|e| match e {
3309                Event::Key {
3310                    payload: Some(value),
3311                    ..
3312                } => Some(value.as_ref()),
3313                _ => None,
3314            })
3315            .collect();
3316        assert!(keys.contains(&"a"), "Should have 'a'");
3317        assert!(keys.contains(&"b"), "Should have 'b'");
3318        assert!(keys.contains(&"c"), "Should have 'c'");
3319        // No errors
3320        assert!(
3321            !events.iter().any(|e| matches!(e, Event::Error { .. })),
3322            "Dotted path with object value should not have errors"
3323        );
3324    }
3325
3326    // parser[verify entry.path]
3327    #[test]
3328    fn test_dotted_path_with_attributes_value() {
3329        // selector.matchLabels app>web - dotted path with attributes as value
3330        let events = parse("selector.matchLabels app>web");
3331        let keys: Vec<_> = events
3332            .iter()
3333            .filter_map(|e| match e {
3334                Event::Key {
3335                    payload: Some(value),
3336                    ..
3337                } => Some(value.as_ref()),
3338                _ => None,
3339            })
3340            .collect();
3341        assert!(keys.contains(&"selector"), "Should have 'selector'");
3342        assert!(keys.contains(&"matchLabels"), "Should have 'matchLabels'");
3343        assert!(keys.contains(&"app"), "Should have 'app' from attribute");
3344        // No errors
3345        assert!(
3346            !events.iter().any(|e| matches!(e, Event::Error { .. })),
3347            "Dotted path with attributes value should not have errors"
3348        );
3349    }
3350
3351    // parser[verify entry.path]
3352    #[test]
3353    fn test_dot_in_value_is_literal() {
3354        // key example.com - dot in value position is literal, not path separator
3355        let events = parse("key example.com");
3356        let keys: Vec<_> = events
3357            .iter()
3358            .filter_map(|e| match e {
3359                Event::Key {
3360                    payload: Some(value),
3361                    ..
3362                } => Some(value.as_ref()),
3363                _ => None,
3364            })
3365            .collect();
3366        assert_eq!(keys, vec!["key"], "Should have only one key 'key'");
3367        // Value should be the full domain
3368        assert!(
3369            events
3370                .iter()
3371                .any(|e| matches!(e, Event::Scalar { value, .. } if value == "example.com")),
3372            "Value should be 'example.com' as a single scalar"
3373        );
3374        // No errors
3375        assert!(
3376            !events.iter().any(|e| matches!(e, Event::Error { .. })),
3377            "Dot in value should not cause errors"
3378        );
3379    }
3380
3381    // parser[verify entry.path.sibling]
3382    #[test]
3383    fn test_sibling_dotted_paths() {
3384        // Sibling paths under common prefix should be allowed
3385        let events = parse("foo.bar.x value1\nfoo.bar.y value2\nfoo.baz value3");
3386        // Should have no errors
3387        assert!(
3388            !events.iter().any(|e| matches!(e, Event::Error { .. })),
3389            "Sibling dotted paths should not cause errors"
3390        );
3391        // Should have all keys
3392        let keys: Vec<_> = events
3393            .iter()
3394            .filter_map(|e| match e {
3395                Event::Key {
3396                    payload: Some(value),
3397                    ..
3398                } => Some(value.as_ref()),
3399                _ => None,
3400            })
3401            .collect();
3402        assert!(keys.contains(&"foo"), "Should have 'foo'");
3403        assert!(keys.contains(&"bar"), "Should have 'bar'");
3404        assert!(keys.contains(&"baz"), "Should have 'baz'");
3405        assert!(keys.contains(&"x"), "Should have 'x'");
3406        assert!(keys.contains(&"y"), "Should have 'y'");
3407    }
3408
3409    // parser[verify entry.path.reopen]
3410    #[test]
3411    fn test_reopen_closed_path_error() {
3412        // Can't reopen a path after moving to a sibling
3413        let events = parse("foo.bar {}\nfoo.baz {}\nfoo.bar.x value");
3414        // Should have a reopen error
3415        let errors: Vec<_> = events
3416            .iter()
3417            .filter(|e| matches!(e, Event::Error { .. }))
3418            .collect();
3419        assert_eq!(
3420            errors.len(),
3421            1,
3422            "Should have exactly one error for reopening closed path"
3423        );
3424        assert!(
3425            events.iter().any(|e| matches!(
3426                e,
3427                Event::Error {
3428                    kind: ParseErrorKind::ReopenedPath { .. },
3429                    ..
3430                }
3431            )),
3432            "Error should be ReopenedPath"
3433        );
3434    }
3435
3436    // parser[verify entry.path.reopen]
3437    #[test]
3438    fn test_reopen_nested_closed_path_error() {
3439        // Can't reopen a nested path after moving to a higher-level sibling
3440        let events = parse("a.b.c {}\na.b.d {}\na.x {}\na.b.e {}");
3441        // Should have a reopen error for a.b
3442        let errors: Vec<_> = events
3443            .iter()
3444            .filter(|e| {
3445                matches!(
3446                    e,
3447                    Event::Error {
3448                        kind: ParseErrorKind::ReopenedPath { .. },
3449                        ..
3450                    }
3451                )
3452            })
3453            .collect();
3454        assert_eq!(errors.len(), 1, "Should have exactly one reopen error");
3455    }
3456
3457    // parser[verify entry.path.reopen]
3458    #[test]
3459    fn test_nest_into_scalar_error() {
3460        // Can't nest into a path that has a scalar value
3461        let events = parse("a.b value\na.b.c deep");
3462        // Should have a nest-into-terminal error
3463        assert!(
3464            events.iter().any(|e| matches!(
3465                e,
3466                Event::Error {
3467                    kind: ParseErrorKind::NestIntoTerminal { .. },
3468                    ..
3469                }
3470            )),
3471            "Should have NestIntoTerminal error"
3472        );
3473    }
3474
3475    // parser[verify entry.path.sibling]
3476    #[test]
3477    fn test_different_top_level_paths_ok() {
3478        // Different top-level paths don't conflict
3479        let events = parse("server.host localhost\ndatabase.port 5432");
3480        assert!(
3481            !events.iter().any(|e| matches!(e, Event::Error { .. })),
3482            "Different top-level paths should not conflict"
3483        );
3484    }
3485
3486    // parser[verify entry.whitespace]
3487    #[test]
3488    fn test_bare_key_requires_whitespace_before_brace() {
3489        // `key{}` without whitespace should be an error
3490        let events = parse("config{}");
3491        assert!(
3492            events.iter().any(|e| matches!(
3493                e,
3494                Event::Error {
3495                    kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3496                    ..
3497                }
3498            )),
3499            "config{{}} without whitespace should error"
3500        );
3501    }
3502
3503    // parser[verify entry.whitespace]
3504    #[test]
3505    fn test_bare_key_requires_whitespace_before_paren() {
3506        // `key()` without whitespace should be an error
3507        let events = parse("items(1 2 3)");
3508        assert!(
3509            events.iter().any(|e| matches!(
3510                e,
3511                Event::Error {
3512                    kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3513                    ..
3514                }
3515            )),
3516            "items() without whitespace should error"
3517        );
3518    }
3519
3520    // parser[verify entry.whitespace]
3521    #[test]
3522    fn test_bare_key_with_whitespace_before_brace_ok() {
3523        // `key {}` with whitespace should be fine
3524        let events = parse("config {}");
3525        assert!(
3526            !events.iter().any(|e| matches!(
3527                e,
3528                Event::Error {
3529                    kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3530                    ..
3531                }
3532            )),
3533            "config {{}} with whitespace should not error"
3534        );
3535    }
3536
3537    // parser[verify entry.whitespace]
3538    #[test]
3539    fn test_bare_key_with_whitespace_before_paren_ok() {
3540        // `key ()` with whitespace should be fine
3541        let events = parse("items (1 2 3)");
3542        assert!(
3543            !events.iter().any(|e| matches!(
3544                e,
3545                Event::Error {
3546                    kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3547                    ..
3548                }
3549            )),
3550            "items () with whitespace should not error"
3551        );
3552    }
3553
3554    // parser[verify entry.whitespace]
3555    #[test]
3556    fn test_tag_with_brace_no_whitespace_ok() {
3557        // `@tag{}` (tag with object payload) should NOT require whitespace
3558        let events = parse("config @object{}");
3559        assert!(
3560            !events.iter().any(|e| matches!(
3561                e,
3562                Event::Error {
3563                    kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3564                    ..
3565                }
3566            )),
3567            "@tag{{}} should not require whitespace"
3568        );
3569    }
3570
3571    // parser[verify entry.whitespace]
3572    #[test]
3573    fn test_quoted_key_no_whitespace_ok() {
3574        // `"key"{}` - quoted keys don't have this restriction
3575        let events = parse(r#""config"{}"#);
3576        assert!(
3577            !events.iter().any(|e| matches!(
3578                e,
3579                Event::Error {
3580                    kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3581                    ..
3582                }
3583            )),
3584            "quoted key before {{}} should not require whitespace"
3585        );
3586    }
3587
3588    // parser[verify entry.whitespace]
3589    #[test]
3590    fn test_minified_styx_with_whitespace() {
3591        // Minified Styx should work with required whitespace
3592        let events = parse("{server {host localhost,port 8080}}");
3593        assert!(
3594            !events.iter().any(|e| matches!(
3595                e,
3596                Event::Error {
3597                    kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3598                    ..
3599                }
3600            )),
3601            "minified styx with whitespace should work"
3602        );
3603    }
3604}