Skip to main content

rlsp_yaml_parser/
event.rs

1// SPDX-License-Identifier: MIT
2
3//! Token-to-event conversion layer.
4//!
5//! Takes the flat token stream from [`crate::tokenize`] and produces a sequence
6//! of structured [`Event`] values.  Each event carries a [`crate::pos::Span`]
7//! that covers the tokens contributing to it.
8//!
9//! The public entry point is [`parse_events`].
10
11use crate::pos::{Pos, Span};
12use crate::token::Code;
13
14/// Parsed directive information: `(version, tag_pairs)`.
15type Directives = (Option<(u8, u8)>, Vec<(String, String)>);
16
17// ---------------------------------------------------------------------------
18// Public types
19// ---------------------------------------------------------------------------
20
21/// Block scalar chomp mode (YAML §8.1.1.2).
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum Chomp {
24    /// `-` — trailing newlines stripped.
25    Strip,
26    /// (default) — single trailing newline kept.
27    Clip,
28    /// `+` — all trailing newlines kept.
29    Keep,
30}
31
32/// The style in which a scalar value was written.
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum ScalarStyle {
35    /// An unquoted scalar.
36    Plain,
37    /// A `'single-quoted'` scalar.
38    SingleQuoted,
39    /// A `"double-quoted"` scalar.
40    DoubleQuoted,
41    /// A `|` literal block scalar.
42    Literal(Chomp),
43    /// A `>` folded block scalar.
44    Folded(Chomp),
45}
46
47/// A parse error produced by the event layer.
48#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
49#[error("parse error at {pos:?}: {message}")]
50pub struct Error {
51    pub pos: Pos,
52    pub message: String,
53}
54
55/// A high-level YAML parse event.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub enum Event {
58    /// The stream has started.
59    StreamStart,
60    /// The stream has ended.
61    StreamEnd,
62    /// A document has started.
63    DocumentStart {
64        /// Whether the document was introduced with `---`.
65        explicit: bool,
66        /// The `%YAML` directive version, if present.
67        version: Option<(u8, u8)>,
68        /// The `%TAG` directive pairs `(handle, prefix)`.
69        tags: Vec<(String, String)>,
70    },
71    /// A document has ended.
72    DocumentEnd {
73        /// Whether the document was closed with `...`.
74        explicit: bool,
75    },
76    /// A mapping node has started.
77    MappingStart {
78        anchor: Option<String>,
79        tag: Option<String>,
80    },
81    /// A mapping node has ended.
82    MappingEnd,
83    /// A sequence node has started.
84    SequenceStart {
85        anchor: Option<String>,
86        tag: Option<String>,
87    },
88    /// A sequence node has ended.
89    SequenceEnd,
90    /// A scalar node.
91    Scalar {
92        value: String,
93        style: ScalarStyle,
94        anchor: Option<String>,
95        tag: Option<String>,
96    },
97    /// An alias node.
98    Alias { name: String },
99    /// A YAML comment.
100    Comment { text: String },
101}
102
103// ---------------------------------------------------------------------------
104// Public entry point
105// ---------------------------------------------------------------------------
106
107/// Parse a YAML string into an event stream.
108///
109/// The first event is always `StreamStart` and the last is always `StreamEnd`.
110///
111/// ```
112/// use rlsp_yaml_parser::parse_events;
113/// use rlsp_yaml_parser::event::Event;
114///
115/// let events: Vec<_> = parse_events("hello").collect();
116/// assert!(events.iter().any(|e| matches!(e, Ok((Event::StreamStart, _)))));
117/// ```
118pub fn parse_events(input: &str) -> impl Iterator<Item = Result<(Event, Span), Error>> + '_ {
119    let tokens = crate::tokenize(input);
120    // Pre-validation: check for patterns the parser accepts but the spec forbids.
121    let pre_error = validate_input(input, &tokens);
122    OwnedEventIter {
123        tokens,
124        pos: 0,
125        emitted_stream_start: false,
126        done: false,
127        pending_anchor: None,
128        pending_tag: None,
129        pending_doc_explicit: false,
130        pre_error,
131        _phantom: std::marker::PhantomData,
132    }
133}
134
135/// Check input for patterns the PEG parser accepts but the YAML spec forbids.
136/// Returns an error position if a violation is found.
137#[allow(
138    clippy::too_many_lines,
139    clippy::indexing_slicing,
140    clippy::wildcard_enum_match_arm
141)]
142fn validate_input(input: &str, tokens: &[crate::token::Token<'_>]) -> Option<crate::pos::Pos> {
143    use crate::token::Code;
144
145    // Already has an error from the tokenizer — skip.
146    if tokens.iter().any(|t| t.code == Code::Error) {
147        return None;
148    }
149
150    // Collect flow sequence byte ranges from tokens.
151    let mut flow_stack: Vec<(usize, bool)> = Vec::new();
152    let mut flow_seq: Vec<(usize, usize)> = Vec::new();
153
154    for t in tokens {
155        match t.code {
156            Code::Indicator if t.text == "[" || t.text == "{" => {
157                flow_stack.push((t.pos.byte_offset, t.text == "["));
158            }
159            Code::Indicator if t.text == "]" || t.text == "}" => {
160                if let Some((s, is_s)) = flow_stack.pop() {
161                    if is_s {
162                        flow_seq.push((s, t.pos.byte_offset + 1));
163                    }
164                }
165            }
166            _ => {}
167        }
168    }
169
170    let bytes = input.as_bytes();
171
172    // ZXT5: adjacent value ":value" after newline in flow SEQUENCE.
173    // Per spec §7.4, flow sequence entries use implicit key context which
174    // does not permit adjacent-value syntax across line breaks.
175    for (i, &b) in bytes.iter().enumerate() {
176        if b != b':' || i == 0 {
177            continue;
178        }
179        let is_in_seq = flow_seq.iter().any(|&(s, e)| i > s && i < e);
180        if !is_in_seq {
181            continue;
182        }
183        if i + 1 >= bytes.len() {
184            continue;
185        }
186        if matches!(
187            bytes[i + 1],
188            b' ' | b'\t' | b'\n' | b'\r' | b',' | b']' | b'}'
189        ) {
190            continue;
191        }
192        // Colon followed by non-space inside a flow sequence. Check if there's
193        // a newline between the previous non-whitespace char and this colon.
194        let before = &input[..i];
195        if let Some(pp) = before.rfind(|ch: char| !ch.is_ascii_whitespace()) {
196            if before[pp + 1..].contains('\n') {
197                return Some(crate::pos::Pos {
198                    byte_offset: i,
199                    char_offset: i,
200                    line: 0,
201                    column: 0,
202                });
203            }
204        }
205    }
206
207    // S98Z: block scalar with only blank lines at varying indentation.
208    let lines: Vec<&str> = input.lines().collect();
209    for (i, line) in lines.iter().enumerate() {
210        let indicator_pos = line.rfind(['|', '>']);
211        let Some(ip) = indicator_pos else { continue };
212        if ip > 0 && !matches!(line.as_bytes().get(ip - 1), Some(b' ' | b'\t')) {
213            continue;
214        }
215        let after_ind = &line[ip + 1..];
216        let after_trimmed =
217            after_ind.trim_start_matches(|ch: char| matches!(ch, '+' | '-' | '0'..='9'));
218        if !after_trimmed.is_empty()
219            && !after_trimmed.starts_with(' ')
220            && !after_trimmed.starts_with('\t')
221            && !after_trimmed.starts_with('#')
222        {
223            continue;
224        }
225        let base = line.len() - line.trim_start().len();
226        let mut max_blank_sp = 0usize;
227        for j in (i + 1)..lines.len() {
228            let cl = lines[j];
229            if cl.is_empty() {
230                continue;
231            }
232            let sp = cl.chars().take_while(|&ch| ch == ' ').count();
233            let rest = &cl[sp..];
234            if rest.is_empty() || rest == "\r" {
235                if sp > base {
236                    max_blank_sp = max_blank_sp.max(sp);
237                }
238                continue;
239            }
240            // End of scalar (at or below base indent) or first content line.
241            // If blank lines had more spaces than this line's indent, reject.
242            if max_blank_sp > 0 && max_blank_sp > sp {
243                let off: usize = lines[..j].iter().map(|l| l.len() + 1).sum();
244                return Some(crate::pos::Pos {
245                    byte_offset: off,
246                    char_offset: off,
247                    line: 0,
248                    column: 0,
249                });
250            }
251            break;
252        }
253    }
254
255    None
256}
257
258// ---------------------------------------------------------------------------
259// Iterator implementation
260// ---------------------------------------------------------------------------
261
262/// Iterator that owns its token buffer, avoiding lifetime complications.
263struct OwnedEventIter<'input> {
264    tokens: Vec<crate::token::Token<'input>>,
265    pos: usize,
266    emitted_stream_start: bool,
267    done: bool,
268    /// Anchor name collected from a `BeginAnchor`…`EndAnchor` block that
269    /// precedes the next content token (scalar/mapping/sequence).
270    pending_anchor: Option<String>,
271    /// Tag string collected from a `BeginTag`…`EndTag` block that
272    /// precedes the next content token.
273    pending_tag: Option<String>,
274    /// Whether the upcoming `DocumentEnd` event should be `explicit=true`.
275    /// Set when we encounter a `DocumentEnd` token (the `...` marker).
276    pending_doc_explicit: bool,
277    /// Pre-validation error detected before event iteration starts.
278    pre_error: Option<crate::pos::Pos>,
279    _phantom: std::marker::PhantomData<&'input str>,
280}
281
282impl<'input> OwnedEventIter<'input> {
283    fn peek(&self) -> Option<Code> {
284        self.tokens.get(self.pos).map(|t| t.code)
285    }
286
287    fn peek_token(&self) -> Option<&crate::token::Token<'input>> {
288        self.tokens.get(self.pos)
289    }
290
291    fn collect_anchor(&mut self) -> String {
292        let mut name = String::new();
293        while let Some(t) = self.tokens.get(self.pos) {
294            if t.code == Code::EndAnchor {
295                self.pos += 1;
296                break;
297            }
298            // Only Text tokens carry the anchor name; Indicator carries the `&` sigil.
299            if t.code == Code::Text {
300                name.push_str(t.text);
301            }
302            self.pos += 1;
303        }
304        name
305    }
306
307    fn collect_tag(&mut self) -> String {
308        let mut tag = String::new();
309        while let Some(t) = self.tokens.get(self.pos) {
310            if t.code == Code::EndTag {
311                self.pos += 1;
312                break;
313            }
314            if t.code == Code::Text || t.code == Code::Indicator {
315                tag.push_str(t.text);
316            }
317            self.pos += 1;
318        }
319        tag
320    }
321
322    fn parse_alias_block(&mut self) -> String {
323        let mut name = String::new();
324        while let Some(t) = self.tokens.get(self.pos) {
325            if t.code == Code::EndAlias {
326                self.pos += 1;
327                break;
328            }
329            // Alias name appears as Meta tokens (the tokenizer uses Meta for
330            // anchor/alias names inside BeginAlias blocks).
331            if t.code == Code::Text || t.code == Code::Meta {
332                name.push_str(t.text);
333            }
334            self.pos += 1;
335        }
336        name
337    }
338
339    fn parse_comment_block(&mut self) -> String {
340        let mut text = String::new();
341        while let Some(t) = self.tokens.get(self.pos) {
342            if t.code == Code::EndComment {
343                self.pos += 1;
344                break;
345            }
346            if t.code == Code::Text {
347                text.push_str(t.text);
348            }
349            self.pos += 1;
350        }
351        text
352    }
353
354    fn parse_scalar_block(&mut self, _start: Pos) -> (String, ScalarStyle) {
355        let mut style_indicator: Option<String> = None;
356        let mut chomp_indicator: Option<String> = None;
357        let mut text = String::new();
358
359        while let Some(t) = self.tokens.get(self.pos) {
360            if t.code == Code::EndScalar {
361                self.pos += 1;
362                break;
363            }
364            match t.code {
365                Code::Indicator => {
366                    if style_indicator.is_none() {
367                        style_indicator = Some(t.text.to_owned());
368                    } else if chomp_indicator.is_none() {
369                        chomp_indicator = Some(t.text.to_owned());
370                    }
371                    self.pos += 1;
372                }
373                Code::Text => {
374                    text.push_str(t.text);
375                    self.pos += 1;
376                }
377                Code::LineFeed => {
378                    // LineFeed token represents a newline in the scalar value.
379                    // The token text may be empty (e.g., from b-chomped-last).
380                    if t.text.is_empty() {
381                        text.push('\n');
382                    } else {
383                        text.push_str(t.text);
384                    }
385                    self.pos += 1;
386                }
387                Code::LineFold => {
388                    // LineFold token represents a space in folded scalar value.
389                    if t.text.is_empty() {
390                        text.push(' ');
391                    } else {
392                        text.push_str(t.text);
393                    }
394                    self.pos += 1;
395                }
396                Code::BeginComment => {
397                    // Skip all tokens until EndComment — trail comments inside
398                    // the scalar (per spec [167]/[168]) are structural, not content.
399                    self.pos += 1;
400                    while let Some(inner) = self.tokens.get(self.pos) {
401                        self.pos += 1;
402                        if inner.code == Code::EndComment {
403                            break;
404                        }
405                    }
406                }
407                Code::EndComment
408                | Code::BeginMapping
409                | Code::EndMapping
410                | Code::BeginSequence
411                | Code::EndSequence
412                | Code::BeginScalar
413                | Code::EndScalar
414                | Code::BeginAnchor
415                | Code::EndAnchor
416                | Code::BeginAlias
417                | Code::EndAlias
418                | Code::BeginTag
419                | Code::EndTag
420                | Code::BeginDocument
421                | Code::EndDocument
422                | Code::BeginNode
423                | Code::EndNode
424                | Code::BeginPair
425                | Code::EndPair
426                | Code::DirectivesEnd
427                | Code::DocumentEnd
428                | Code::Meta
429                | Code::White
430                | Code::Indent
431                | Code::Break
432                | Code::Error => {
433                    self.pos += 1;
434                }
435            }
436        }
437
438        let style = match style_indicator.as_deref() {
439            Some("'") => ScalarStyle::SingleQuoted,
440            Some("\"") => ScalarStyle::DoubleQuoted,
441            Some("|") => ScalarStyle::Literal(chomp_from_indicator(chomp_indicator.as_deref())),
442            Some(">") => ScalarStyle::Folded(chomp_from_indicator(chomp_indicator.as_deref())),
443            // No indicator or unrecognised indicator — treat as plain.
444            Some(_) | None => ScalarStyle::Plain,
445        };
446
447        (text, style)
448    }
449
450    fn collect_directives(&mut self) -> Directives {
451        let mut version: Option<(u8, u8)> = None;
452        let mut tags: Vec<(String, String)> = Vec::new();
453
454        loop {
455            match self.peek() {
456                None
457                | Some(
458                    Code::DirectivesEnd
459                    | Code::BeginScalar
460                    | Code::BeginMapping
461                    | Code::BeginSequence
462                    | Code::BeginNode
463                    | Code::BeginAnchor
464                    | Code::BeginTag
465                    | Code::BeginAlias
466                    | Code::EndDocument,
467                ) => break,
468                Some(Code::Meta) => {
469                    let mut meta_parts: Vec<String> = Vec::new();
470                    while let Some(t) = self.tokens.get(self.pos) {
471                        if t.code != Code::Meta {
472                            break;
473                        }
474                        meta_parts.push(t.text.to_owned());
475                        self.pos += 1;
476                    }
477                    if meta_parts.first().map(String::as_str) == Some("YAML") {
478                        if let [_, major, minor, ..] = meta_parts.as_slice() {
479                            if let (Ok(maj), Ok(min)) = (major.parse::<u8>(), minor.parse::<u8>()) {
480                                version = Some((maj, min));
481                            }
482                        }
483                    } else if meta_parts.first().map(String::as_str) == Some("TAG") {
484                        if let [_, handle, prefix, ..] = meta_parts.as_slice() {
485                            tags.push((handle.clone(), prefix.clone()));
486                        }
487                    }
488                }
489                Some(_) => {
490                    self.pos += 1;
491                }
492            }
493        }
494
495        (version, tags)
496    }
497
498    fn span_from(&self, start_tok: usize) -> Span {
499        let start_pos = self.tokens.get(start_tok).map_or(Pos::ORIGIN, |t| t.pos);
500        let end_pos = self
501            .tokens
502            .get(self.pos.saturating_sub(1))
503            .map_or(start_pos, |t| t.pos);
504        Span {
505            start: start_pos,
506            end: end_pos,
507        }
508    }
509
510    fn scan_node_properties(&mut self) -> (Option<String>, Option<String>) {
511        let mut anchor: Option<String> = None;
512        let mut tag: Option<String> = None;
513
514        loop {
515            match self.peek() {
516                Some(Code::BeginAnchor) => {
517                    self.pos += 1;
518                    anchor = Some(self.collect_anchor());
519                }
520                Some(Code::BeginTag) => {
521                    self.pos += 1;
522                    tag = Some(self.collect_tag());
523                }
524                Some(
525                    Code::White | Code::Indent | Code::LineFeed | Code::LineFold | Code::Break,
526                ) => {
527                    self.pos += 1;
528                }
529                _ => break,
530            }
531        }
532
533        (anchor, tag)
534    }
535
536    fn handle_begin_node(&mut self) -> Option<Result<(Event, Span), Error>> {
537        self.pos += 1;
538        let (anchor, tag) = self.scan_node_properties();
539        let anchor = anchor.or_else(|| self.pending_anchor.take());
540        let tag = tag.or_else(|| self.pending_tag.take());
541
542        match self.peek() {
543            Some(Code::BeginMapping) => {
544                let start = self.pos;
545                self.pos += 1;
546                let span = self.span_from(start);
547                Some(Ok((Event::MappingStart { anchor, tag }, span)))
548            }
549            Some(Code::BeginSequence) => {
550                let start = self.pos;
551                self.pos += 1;
552                let span = self.span_from(start);
553                Some(Ok((Event::SequenceStart { anchor, tag }, span)))
554            }
555            Some(Code::BeginScalar) => {
556                let scalar_pos = self.peek_token().map_or(Pos::ORIGIN, |t| t.pos);
557                let start = self.pos;
558                self.pos += 1;
559                let (value, style) = self.parse_scalar_block(scalar_pos);
560                let span = self.span_from(start);
561                Some(Ok((
562                    Event::Scalar {
563                        value,
564                        style,
565                        anchor,
566                        tag,
567                    },
568                    span,
569                )))
570            }
571            Some(Code::BeginAlias) => {
572                let start = self.pos;
573                self.pos += 1;
574                let name = self.parse_alias_block();
575                let span = self.span_from(start);
576                Some(Ok((Event::Alias { name }, span)))
577            }
578            // No content under this node — keep iterating.
579            Some(_) | None => None,
580        }
581    }
582
583    #[allow(clippy::too_many_lines)]
584    fn next_owned_event(&mut self) -> Option<Result<(Event, Span), Error>> {
585        if self.done {
586            return None;
587        }
588
589        loop {
590            let Some(code) = self.peek() else {
591                self.done = true;
592                return Some(Ok((
593                    Event::StreamEnd,
594                    Span {
595                        start: Pos::ORIGIN,
596                        end: Pos::ORIGIN,
597                    },
598                )));
599            };
600
601            match code {
602                Code::BeginDocument => {
603                    let doc_start = self.pos;
604                    self.pos += 1;
605                    let (version, tags) = self.collect_directives();
606                    let explicit = self.peek() == Some(Code::DirectivesEnd);
607                    if explicit {
608                        self.pos += 1;
609                    }
610                    self.pending_doc_explicit = false;
611                    let span = self.span_from(doc_start);
612                    return Some(Ok((
613                        Event::DocumentStart {
614                            explicit,
615                            version,
616                            tags,
617                        },
618                        span,
619                    )));
620                }
621
622                Code::DocumentEnd => {
623                    self.pending_doc_explicit = true;
624                    self.pos += 1;
625                }
626
627                Code::EndDocument => {
628                    let start = self.pos;
629                    self.pos += 1;
630                    // The `...` marker (Code::DocumentEnd) may appear after
631                    // EndDocument in the token stream because l_document_suffix
632                    // is parsed outside the document wrapper. Check the next
633                    // token and consume it if present.
634                    let explicit = if self.pending_doc_explicit {
635                        true
636                    } else {
637                        self.peek() == Some(Code::DocumentEnd)
638                    };
639                    if self.peek() == Some(Code::DocumentEnd) {
640                        self.pos += 1;
641                    }
642                    self.pending_doc_explicit = false;
643                    let span = self.span_from(start);
644                    return Some(Ok((Event::DocumentEnd { explicit }, span)));
645                }
646
647                Code::BeginAnchor => {
648                    self.pos += 1;
649                    self.pending_anchor = Some(self.collect_anchor());
650                }
651
652                Code::BeginTag => {
653                    self.pos += 1;
654                    self.pending_tag = Some(self.collect_tag());
655                }
656
657                Code::BeginNode => {
658                    if let Some(event) = self.handle_begin_node() {
659                        return Some(event);
660                    }
661                }
662
663                Code::BeginMapping => {
664                    let start = self.pos;
665                    self.pos += 1;
666                    let anchor = self.pending_anchor.take();
667                    let tag = self.pending_tag.take();
668                    let span = self.span_from(start);
669                    return Some(Ok((Event::MappingStart { anchor, tag }, span)));
670                }
671
672                Code::EndMapping => {
673                    let start = self.pos;
674                    self.pos += 1;
675                    let span = self.span_from(start);
676                    return Some(Ok((Event::MappingEnd, span)));
677                }
678
679                Code::BeginSequence => {
680                    let start = self.pos;
681                    self.pos += 1;
682                    let anchor = self.pending_anchor.take();
683                    let tag = self.pending_tag.take();
684                    let span = self.span_from(start);
685                    return Some(Ok((Event::SequenceStart { anchor, tag }, span)));
686                }
687
688                Code::EndSequence => {
689                    let start = self.pos;
690                    self.pos += 1;
691                    let span = self.span_from(start);
692                    return Some(Ok((Event::SequenceEnd, span)));
693                }
694
695                Code::BeginScalar => {
696                    let scalar_pos = self.peek_token().map_or(Pos::ORIGIN, |t| t.pos);
697                    let start = self.pos;
698                    self.pos += 1;
699                    let (value, style) = self.parse_scalar_block(scalar_pos);
700                    let anchor = self.pending_anchor.take();
701                    let tag = self.pending_tag.take();
702                    let span = self.span_from(start);
703                    return Some(Ok((
704                        Event::Scalar {
705                            value,
706                            style,
707                            anchor,
708                            tag,
709                        },
710                        span,
711                    )));
712                }
713
714                Code::BeginAlias => {
715                    // An alias cannot have properties (anchor/tag).
716                    if self.pending_anchor.is_some() || self.pending_tag.is_some() {
717                        let pos = self.tokens.get(self.pos).map_or(Pos::ORIGIN, |t| t.pos);
718                        self.done = true;
719                        return Some(Err(Error {
720                            pos,
721                            message: "alias node cannot have anchor or tag properties".to_owned(),
722                        }));
723                    }
724                    let start = self.pos;
725                    self.pos += 1;
726                    let name = self.parse_alias_block();
727                    let span = self.span_from(start);
728                    return Some(Ok((Event::Alias { name }, span)));
729                }
730
731                Code::BeginComment => {
732                    let start = self.pos;
733                    self.pos += 1;
734                    let text = self.parse_comment_block();
735                    let span = self.span_from(start);
736                    return Some(Ok((Event::Comment { text }, span)));
737                }
738
739                Code::Error => {
740                    let pos = self.tokens.get(self.pos).map_or(Pos::ORIGIN, |t| t.pos);
741                    self.done = true;
742                    return Some(Err(Error {
743                        pos,
744                        message: "unexpected or invalid YAML input".to_owned(),
745                    }));
746                }
747
748                Code::EndNode
749                | Code::BeginPair
750                | Code::EndPair
751                | Code::DirectivesEnd
752                | Code::EndAnchor
753                | Code::EndTag
754                | Code::EndScalar
755                | Code::EndAlias
756                | Code::EndComment
757                | Code::Text
758                | Code::Indicator
759                | Code::Meta
760                | Code::LineFeed
761                | Code::LineFold
762                | Code::White
763                | Code::Indent
764                | Code::Break => {
765                    self.pos += 1;
766                }
767            }
768        }
769    }
770}
771
772impl Iterator for OwnedEventIter<'_> {
773    type Item = Result<(Event, Span), Error>;
774
775    fn next(&mut self) -> Option<Self::Item> {
776        if !self.emitted_stream_start {
777            self.emitted_stream_start = true;
778            let origin = self.tokens.first().map_or(Pos::ORIGIN, |t| t.pos);
779            let span = Span {
780                start: origin,
781                end: origin,
782            };
783            return Some(Ok((Event::StreamStart, span)));
784        }
785        // Emit pre-validation error if present.
786        if let Some(pos) = self.pre_error.take() {
787            self.done = true;
788            return Some(Err(Error {
789                pos,
790                message: "unexpected or invalid YAML input".to_owned(),
791            }));
792        }
793        self.next_owned_event()
794    }
795}
796
797// ---------------------------------------------------------------------------
798// Helpers
799// ---------------------------------------------------------------------------
800
801fn chomp_from_indicator(indicator: Option<&str>) -> Chomp {
802    match indicator {
803        Some("-") => Chomp::Strip,
804        Some("+") => Chomp::Keep,
805        Some(_) | None => Chomp::Clip,
806    }
807}
808
809/// Collect all events as `Event` values, discarding spans.  Test helper.
810#[cfg(test)]
811fn events_from(input: &str) -> Vec<Event> {
812    parse_events(input)
813        .filter_map(|r| r.ok().map(|(e, _)| e))
814        .collect()
815}
816
817// ---------------------------------------------------------------------------
818// Tests
819// ---------------------------------------------------------------------------
820
821#[cfg(test)]
822#[allow(
823    clippy::indexing_slicing,
824    clippy::expect_used,
825    clippy::unwrap_used,
826    clippy::too_many_lines,
827    clippy::doc_markdown
828)]
829mod tests {
830    use super::*;
831
832    // -----------------------------------------------------------------------
833    // Group 1 — Harness and wiring
834    // -----------------------------------------------------------------------
835
836    /// Test 1 — `parse_events` is wired into lib.rs (spike)
837    #[test]
838    fn parse_events_is_wired_into_lib_rs() {
839        assert!(crate::parse_events("hello").next().is_some());
840    }
841
842    /// Test 2 — empty input yields StreamStart then StreamEnd
843    #[test]
844    fn empty_input_yields_stream_start_and_end() {
845        let events = events_from("");
846        assert_eq!(events[0], Event::StreamStart);
847        assert_eq!(events[1], Event::StreamEnd);
848    }
849
850    /// Test 3 — every result in the iterator is Ok for valid YAML
851    #[test]
852    fn valid_yaml_produces_only_ok_results() {
853        let results: Vec<_> = parse_events("key: value").collect();
854        for r in &results {
855            assert!(r.is_ok(), "unexpected Err: {r:?}");
856        }
857    }
858
859    /// Test 4 — returned iterator satisfies Iterator trait (collect works)
860    #[test]
861    fn iterator_is_collectable() {
862        assert!(parse_events("- a\n- b").next().is_some());
863    }
864
865    /// Test 5 — StreamStart is the very first event for any input
866    #[test]
867    fn first_event_is_always_stream_start() {
868        for input in ["", "foo", "- 1", "key: val", "---\n..."] {
869            let first = parse_events(input).next().unwrap().unwrap().0;
870            assert_eq!(first, Event::StreamStart, "input: {input:?}");
871        }
872    }
873
874    /// Test 6 — StreamEnd is the very last event for any input
875    #[test]
876    fn last_event_is_always_stream_end() {
877        for input in ["", "foo", "- 1", "key: val"] {
878            let last = parse_events(input)
879                .filter_map(|r| r.ok().map(|(e, _)| e))
880                .last()
881                .unwrap();
882            assert_eq!(last, Event::StreamEnd, "input: {input:?}");
883        }
884    }
885
886    // -----------------------------------------------------------------------
887    // Group 2 — DocumentStart / DocumentEnd
888    // -----------------------------------------------------------------------
889
890    /// Test 7 — implicit document start (no ---) has explicit=false
891    #[test]
892    fn implicit_document_start_has_explicit_false() {
893        let events = events_from("hello");
894        let doc_start = events
895            .iter()
896            .find(|e| matches!(e, Event::DocumentStart { .. }))
897            .unwrap();
898        assert!(matches!(
899            doc_start,
900            Event::DocumentStart {
901                explicit: false,
902                ..
903            }
904        ));
905    }
906
907    /// Test 8 — explicit document start (---) has explicit=true
908    #[test]
909    fn explicit_document_start_has_explicit_true() {
910        let events = events_from("---\nhello\n");
911        let doc_start = events
912            .iter()
913            .find(|e| matches!(e, Event::DocumentStart { .. }))
914            .unwrap();
915        assert!(matches!(
916            doc_start,
917            Event::DocumentStart { explicit: true, .. }
918        ));
919    }
920
921    /// Test 9 — implicit document end (no ...) has explicit=false
922    #[test]
923    fn implicit_document_end_has_explicit_false() {
924        let events = events_from("hello");
925        let doc_end = events
926            .iter()
927            .find(|e| matches!(e, Event::DocumentEnd { .. }))
928            .unwrap();
929        assert!(matches!(doc_end, Event::DocumentEnd { explicit: false }));
930    }
931
932    /// Test 10 — explicit document end (...) has explicit=true
933    #[test]
934    fn explicit_document_end_has_document_end() {
935        let events = events_from("---\nhello\n...\n");
936        assert!(
937            events
938                .iter()
939                .any(|e| matches!(e, Event::DocumentEnd { explicit: true }))
940        );
941    }
942
943    /// Test 11 — document with `---` and `...` emits DocumentStart and DocumentEnd.
944    #[test]
945    fn bare_explicit_markers_emit_both_events() {
946        let events = events_from("---\n...\n");
947        assert!(
948            events
949                .iter()
950                .any(|e| matches!(e, Event::DocumentStart { explicit: true, .. }))
951        );
952        assert!(
953            events
954                .iter()
955                .any(|e| matches!(e, Event::DocumentEnd { explicit: true }))
956        );
957    }
958
959    /// Test 12 — multi-document stream produces two DocumentStart events
960    #[test]
961    fn multi_document_stream_produces_two_document_starts() {
962        let events = events_from("---\nfoo\n---\nbar\n");
963        let count = events
964            .iter()
965            .filter(|e| matches!(e, Event::DocumentStart { .. }))
966            .count();
967        assert_eq!(count, 2);
968    }
969
970    /// Test 13 — multi-document stream produces two DocumentEnd events
971    #[test]
972    fn multi_document_stream_produces_two_document_ends() {
973        let events = events_from("---\nfoo\n---\nbar\n");
974        let count = events
975            .iter()
976            .filter(|e| matches!(e, Event::DocumentEnd { .. }))
977            .count();
978        assert_eq!(count, 2);
979    }
980
981    /// Test 14 — DocumentStart version is None when no %YAML directive
982    #[test]
983    fn document_start_version_is_none_without_yaml_directive() {
984        let events = events_from("hello");
985        let doc_start = events
986            .iter()
987            .find(|e| matches!(e, Event::DocumentStart { .. }))
988            .unwrap();
989        assert!(matches!(
990            doc_start,
991            Event::DocumentStart { version: None, .. }
992        ));
993    }
994
995    /// Test 15 — DocumentStart tags is empty when no %TAG directive
996    #[test]
997    fn document_start_tags_is_empty_without_tag_directive() {
998        let events = events_from("hello");
999        let doc_start = events
1000            .iter()
1001            .find(|e| matches!(e, Event::DocumentStart { .. }))
1002            .unwrap();
1003        assert!(matches!(
1004            doc_start,
1005            Event::DocumentStart { tags, .. } if tags.is_empty()
1006        ));
1007    }
1008
1009    // -----------------------------------------------------------------------
1010    // Group 3 — Scalar events
1011    // -----------------------------------------------------------------------
1012
1013    /// Test 16 — plain scalar value matches input text
1014    #[test]
1015    fn plain_scalar_value_matches_input() {
1016        let events = events_from("hello");
1017        let scalar = events
1018            .iter()
1019            .find(|e| matches!(e, Event::Scalar { .. }))
1020            .unwrap();
1021        assert!(matches!(scalar, Event::Scalar { value, .. } if value == "hello"));
1022    }
1023
1024    /// Test 17 — plain scalar has style Plain
1025    #[test]
1026    fn plain_scalar_has_style_plain() {
1027        let events = events_from("hello");
1028        let scalar = events
1029            .iter()
1030            .find(|e| matches!(e, Event::Scalar { .. }))
1031            .unwrap();
1032        assert!(matches!(
1033            scalar,
1034            Event::Scalar {
1035                style: ScalarStyle::Plain,
1036                ..
1037            }
1038        ));
1039    }
1040
1041    /// Test 18 — single-quoted scalar has style SingleQuoted
1042    #[test]
1043    fn single_quoted_scalar_has_style_single_quoted() {
1044        let events = events_from("'hello'");
1045        let scalar = events
1046            .iter()
1047            .find(|e| matches!(e, Event::Scalar { .. }))
1048            .unwrap();
1049        assert!(matches!(
1050            scalar,
1051            Event::Scalar {
1052                style: ScalarStyle::SingleQuoted,
1053                ..
1054            }
1055        ));
1056    }
1057
1058    /// Test 19 — double-quoted scalar has style DoubleQuoted
1059    #[test]
1060    fn double_quoted_scalar_has_style_double_quoted() {
1061        let events = events_from("\"hello\"");
1062        let scalar = events
1063            .iter()
1064            .find(|e| matches!(e, Event::Scalar { .. }))
1065            .unwrap();
1066        assert!(matches!(
1067            scalar,
1068            Event::Scalar {
1069                style: ScalarStyle::DoubleQuoted,
1070                ..
1071            }
1072        ));
1073    }
1074
1075    /// Test 20 — literal block scalar has style Literal
1076    #[test]
1077    fn literal_block_scalar_has_style_literal() {
1078        let events = events_from("|\n  hello\n");
1079        let scalar = events
1080            .iter()
1081            .find(|e| matches!(e, Event::Scalar { .. }))
1082            .unwrap();
1083        assert!(
1084            matches!(
1085                scalar,
1086                Event::Scalar {
1087                    style: ScalarStyle::Literal(_),
1088                    ..
1089                }
1090            ),
1091            "expected Literal, got {scalar:?}"
1092        );
1093    }
1094
1095    /// Test 21 — folded block scalar has style Folded
1096    #[test]
1097    fn folded_block_scalar_has_style_folded() {
1098        let events = events_from(">\n  hello\n");
1099        let scalar = events
1100            .iter()
1101            .find(|e| matches!(e, Event::Scalar { .. }))
1102            .unwrap();
1103        assert!(
1104            matches!(
1105                scalar,
1106                Event::Scalar {
1107                    style: ScalarStyle::Folded(_),
1108                    ..
1109                }
1110            ),
1111            "expected Folded, got {scalar:?}"
1112        );
1113    }
1114
1115    /// Test 22 — literal block with strip chomp yields Literal(Strip)
1116    #[test]
1117    fn literal_block_strip_yields_literal_strip() {
1118        let events = events_from("|-\n  hello\n");
1119        let scalar = events
1120            .iter()
1121            .find(|e| matches!(e, Event::Scalar { .. }))
1122            .unwrap();
1123        assert!(matches!(
1124            scalar,
1125            Event::Scalar {
1126                style: ScalarStyle::Literal(Chomp::Strip),
1127                ..
1128            }
1129        ));
1130    }
1131
1132    /// Test 23 — literal block with keep chomp yields Literal(Keep)
1133    #[test]
1134    fn literal_block_keep_yields_literal_keep() {
1135        let events = events_from("|+\n  hello\n");
1136        let scalar = events
1137            .iter()
1138            .find(|e| matches!(e, Event::Scalar { .. }))
1139            .unwrap();
1140        assert!(matches!(
1141            scalar,
1142            Event::Scalar {
1143                style: ScalarStyle::Literal(Chomp::Keep),
1144                ..
1145            }
1146        ));
1147    }
1148
1149    /// Test 24 — literal block with no chomp indicator yields Literal(Clip)
1150    #[test]
1151    fn literal_block_default_chomp_is_clip() {
1152        let events = events_from("|\n  hello\n");
1153        let scalar = events
1154            .iter()
1155            .find(|e| matches!(e, Event::Scalar { .. }))
1156            .unwrap();
1157        assert!(matches!(
1158            scalar,
1159            Event::Scalar {
1160                style: ScalarStyle::Literal(Chomp::Clip),
1161                ..
1162            }
1163        ));
1164    }
1165
1166    /// Test 25 — folded block with strip chomp yields Folded(Strip)
1167    #[test]
1168    fn folded_block_strip_yields_folded_strip() {
1169        let events = events_from(">-\n  hello\n");
1170        let scalar = events
1171            .iter()
1172            .find(|e| matches!(e, Event::Scalar { .. }))
1173            .unwrap();
1174        assert!(matches!(
1175            scalar,
1176            Event::Scalar {
1177                style: ScalarStyle::Folded(Chomp::Strip),
1178                ..
1179            }
1180        ));
1181    }
1182
1183    /// Test 26 — folded block with keep chomp yields Folded(Keep)
1184    #[test]
1185    fn folded_block_keep_yields_folded_keep() {
1186        let events = events_from(">+\n  hello\n");
1187        let scalar = events
1188            .iter()
1189            .find(|e| matches!(e, Event::Scalar { .. }))
1190            .unwrap();
1191        assert!(matches!(
1192            scalar,
1193            Event::Scalar {
1194                style: ScalarStyle::Folded(Chomp::Keep),
1195                ..
1196            }
1197        ));
1198    }
1199
1200    /// Test 27 — folded block with no chomp indicator yields Folded(Clip)
1201    #[test]
1202    fn folded_block_default_chomp_is_clip() {
1203        let events = events_from(">\n  hello\n");
1204        let scalar = events
1205            .iter()
1206            .find(|e| matches!(e, Event::Scalar { .. }))
1207            .unwrap();
1208        assert!(matches!(
1209            scalar,
1210            Event::Scalar {
1211                style: ScalarStyle::Folded(Chomp::Clip),
1212                ..
1213            }
1214        ));
1215    }
1216
1217    /// Test 28 — scalar anchor is None when no anchor present
1218    #[test]
1219    fn scalar_anchor_is_none_without_anchor() {
1220        let events = events_from("hello");
1221        let scalar = events
1222            .iter()
1223            .find(|e| matches!(e, Event::Scalar { .. }))
1224            .unwrap();
1225        assert!(matches!(scalar, Event::Scalar { anchor: None, .. }));
1226    }
1227
1228    /// Test 29 — scalar tag is None when no tag present
1229    #[test]
1230    fn scalar_tag_is_none_without_tag() {
1231        let events = events_from("hello");
1232        let scalar = events
1233            .iter()
1234            .find(|e| matches!(e, Event::Scalar { .. }))
1235            .unwrap();
1236        assert!(matches!(scalar, Event::Scalar { tag: None, .. }));
1237    }
1238
1239    /// Test 30 — scalar with anchor carries anchor name
1240    #[test]
1241    fn scalar_with_anchor_carries_name() {
1242        let events = events_from("&myanchor hello");
1243        let scalar = events
1244            .iter()
1245            .find(|e| matches!(e, Event::Scalar { .. }))
1246            .unwrap();
1247        assert!(
1248            matches!(scalar, Event::Scalar { anchor: Some(a), .. } if a == "myanchor"),
1249            "got: {scalar:?}"
1250        );
1251    }
1252
1253    /// Test 31 — scalar with tag carries tag string
1254    #[test]
1255    fn scalar_with_tag_carries_tag() {
1256        let events = events_from("!!str hello");
1257        let scalar = events
1258            .iter()
1259            .find(|e| matches!(e, Event::Scalar { .. }))
1260            .unwrap();
1261        assert!(
1262            matches!(scalar, Event::Scalar { tag: Some(_), .. }),
1263            "got: {scalar:?}"
1264        );
1265    }
1266
1267    // -----------------------------------------------------------------------
1268    // Group 4 — Mapping events
1269    // -----------------------------------------------------------------------
1270
1271    /// Test 32 — block mapping produces MappingStart and MappingEnd
1272    #[test]
1273    fn block_mapping_produces_mapping_start_and_end() {
1274        let events = events_from("key: value");
1275        assert!(
1276            events
1277                .iter()
1278                .any(|e| matches!(e, Event::MappingStart { .. }))
1279        );
1280        assert!(events.iter().any(|e| matches!(e, Event::MappingEnd)));
1281    }
1282
1283    /// Test 33 — flow mapping produces MappingStart and MappingEnd
1284    #[test]
1285    fn flow_mapping_produces_mapping_start_and_end() {
1286        let events = events_from("{a: 1}");
1287        assert!(
1288            events
1289                .iter()
1290                .any(|e| matches!(e, Event::MappingStart { .. }))
1291        );
1292        assert!(events.iter().any(|e| matches!(e, Event::MappingEnd)));
1293    }
1294
1295    /// Test 34 — mapping with two keys produces two scalars between MappingStart and MappingEnd
1296    #[test]
1297    fn mapping_with_two_keys_produces_scalar_pairs() {
1298        let events = events_from("a: 1\nb: 2\n");
1299        let start = events
1300            .iter()
1301            .position(|e| matches!(e, Event::MappingStart { .. }))
1302            .unwrap();
1303        let end = events
1304            .iter()
1305            .position(|e| matches!(e, Event::MappingEnd))
1306            .unwrap();
1307        let scalar_count = events[start..=end]
1308            .iter()
1309            .filter(|e| matches!(e, Event::Scalar { .. }))
1310            .count();
1311        assert!(scalar_count >= 2);
1312    }
1313
1314    /// Test 35 — mapping anchor is None when no anchor present
1315    #[test]
1316    fn mapping_anchor_is_none_without_anchor() {
1317        let events = events_from("key: value");
1318        let ms = events
1319            .iter()
1320            .find(|e| matches!(e, Event::MappingStart { .. }))
1321            .unwrap();
1322        assert!(matches!(ms, Event::MappingStart { anchor: None, .. }));
1323    }
1324
1325    /// Test 36 — mapping tag is None when no tag present
1326    #[test]
1327    fn mapping_tag_is_none_without_tag() {
1328        let events = events_from("key: value");
1329        let ms = events
1330            .iter()
1331            .find(|e| matches!(e, Event::MappingStart { .. }))
1332            .unwrap();
1333        assert!(matches!(ms, Event::MappingStart { tag: None, .. }));
1334    }
1335
1336    /// Test 37 — nested mapping produces two MappingStart events
1337    #[test]
1338    fn nested_mapping_produces_two_mapping_starts() {
1339        let events = events_from("outer:\n  inner: val\n");
1340        let count = events
1341            .iter()
1342            .filter(|e| matches!(e, Event::MappingStart { .. }))
1343            .count();
1344        assert_eq!(count, 2, "events: {events:?}");
1345    }
1346
1347    // -----------------------------------------------------------------------
1348    // Group 5 — Sequence events
1349    // -----------------------------------------------------------------------
1350
1351    /// Test 38 — block sequence produces SequenceStart and SequenceEnd
1352    #[test]
1353    fn block_sequence_produces_sequence_start_and_end() {
1354        let events = events_from("- a\n- b\n");
1355        assert!(
1356            events
1357                .iter()
1358                .any(|e| matches!(e, Event::SequenceStart { .. }))
1359        );
1360        assert!(events.iter().any(|e| matches!(e, Event::SequenceEnd)));
1361    }
1362
1363    /// Test 39 — flow sequence produces SequenceStart and SequenceEnd
1364    #[test]
1365    fn flow_sequence_produces_sequence_start_and_end() {
1366        let events = events_from("[1, 2, 3]");
1367        assert!(
1368            events
1369                .iter()
1370                .any(|e| matches!(e, Event::SequenceStart { .. }))
1371        );
1372        assert!(events.iter().any(|e| matches!(e, Event::SequenceEnd)));
1373    }
1374
1375    /// Test 40 — sequence with three items produces three Scalar events
1376    #[test]
1377    fn sequence_with_three_items_produces_three_scalars() {
1378        let events = events_from("- a\n- b\n- c\n");
1379        let start = events
1380            .iter()
1381            .position(|e| matches!(e, Event::SequenceStart { .. }))
1382            .unwrap();
1383        let end = events
1384            .iter()
1385            .position(|e| matches!(e, Event::SequenceEnd))
1386            .unwrap();
1387        let scalars = events[start..=end]
1388            .iter()
1389            .filter(|e| matches!(e, Event::Scalar { .. }))
1390            .count();
1391        assert_eq!(scalars, 3);
1392    }
1393
1394    /// Test 41 — sequence anchor is None when no anchor present
1395    #[test]
1396    fn sequence_anchor_is_none_without_anchor() {
1397        let events = events_from("- a\n");
1398        let ss = events
1399            .iter()
1400            .find(|e| matches!(e, Event::SequenceStart { .. }))
1401            .unwrap();
1402        assert!(matches!(ss, Event::SequenceStart { anchor: None, .. }));
1403    }
1404
1405    /// Test 42 — sequence tag is None when no tag present
1406    #[test]
1407    fn sequence_tag_is_none_without_tag() {
1408        let events = events_from("- a\n");
1409        let ss = events
1410            .iter()
1411            .find(|e| matches!(e, Event::SequenceStart { .. }))
1412            .unwrap();
1413        assert!(matches!(ss, Event::SequenceStart { tag: None, .. }));
1414    }
1415
1416    /// Test 43 — nested sequence produces two SequenceStart events
1417    #[test]
1418    fn nested_sequence_produces_two_sequence_starts() {
1419        let events = events_from("- - a\n  - b\n");
1420        let count = events
1421            .iter()
1422            .filter(|e| matches!(e, Event::SequenceStart { .. }))
1423            .count();
1424        assert_eq!(count, 2, "events: {events:?}");
1425    }
1426
1427    // -----------------------------------------------------------------------
1428    // Group 6 — Alias events
1429    // -----------------------------------------------------------------------
1430
1431    /// Test 44 — alias node produces Alias event with correct name
1432    #[test]
1433    fn alias_node_produces_alias_event_with_name() {
1434        let events = events_from("- &anchor hello\n- *anchor\n");
1435        let alias = events.iter().find(|e| matches!(e, Event::Alias { .. }));
1436        assert!(alias.is_some(), "no alias event found; events: {events:?}");
1437        assert!(
1438            matches!(alias.unwrap(), Event::Alias { name } if name == "anchor"),
1439            "got: {alias:?}"
1440        );
1441    }
1442
1443    /// Test 45 — Alias event name does not include the * sigil
1444    #[test]
1445    fn alias_name_does_not_include_sigil() {
1446        let events = events_from("- &a x\n- *a\n");
1447        if let Some(Event::Alias { name }) =
1448            events.iter().find(|e| matches!(e, Event::Alias { .. }))
1449        {
1450            assert!(!name.starts_with('*'), "name should not include '*'");
1451        }
1452        // If no alias event is emitted, the test passes — parser may not
1453        // detect the alias pattern in all inputs.
1454    }
1455
1456    // -----------------------------------------------------------------------
1457    // Group 7 — Comment events
1458    // -----------------------------------------------------------------------
1459
1460    /// Test 46 — trail comments after a block scalar are consumed inside the
1461    /// scalar per spec [167]/[168] and do not produce separate Comment events.
1462    #[test]
1463    fn trail_comment_after_block_scalar_is_consumed() {
1464        let events = events_from("|\n  hello\n# world\n");
1465        // The scalar parses successfully with the correct value.
1466        let has_scalar = events
1467            .iter()
1468            .any(|e| matches!(e, Event::Scalar { value, .. } if value == "hello\n"));
1469        assert!(has_scalar, "expected scalar with value 'hello\\n'");
1470    }
1471
1472    /// Test 47 — no errors parsing a document with trail comments.
1473    #[test]
1474    fn document_with_trail_comments_parses_without_error() {
1475        let results: Vec<_> = parse_events("|\n  hello\n# world\n").collect();
1476        assert!(
1477            results.iter().all(Result::is_ok),
1478            "expected no errors; results: {results:?}"
1479        );
1480    }
1481
1482    /// Test 48 — comment text contains the comment content
1483    #[test]
1484    fn comment_text_contains_content() {
1485        let events = events_from("|\n  x\n# hello world\n");
1486        if let Some(Event::Comment { text }) =
1487            events.iter().find(|e| matches!(e, Event::Comment { .. }))
1488        {
1489            assert!(
1490                text.contains("hello") || text.contains("world"),
1491                "comment text: {text:?}"
1492            );
1493        }
1494    }
1495
1496    // -----------------------------------------------------------------------
1497    // Group 8 — Span correctness
1498    // -----------------------------------------------------------------------
1499
1500    /// Test 49 — StreamStart span starts at ORIGIN
1501    #[test]
1502    fn stream_start_span_starts_at_origin() {
1503        let (_, span) = parse_events("hello").next().unwrap().unwrap();
1504        assert_eq!(span.start, Pos::ORIGIN);
1505    }
1506
1507    /// Test 50 — DocumentStart span has start before end (non-trivial input)
1508    #[test]
1509    fn document_start_span_is_non_trivial() {
1510        let results: Vec<_> = parse_events("---\nhello").collect();
1511        let doc_start_span = results
1512            .iter()
1513            .find(|r| matches!(r, Ok((Event::DocumentStart { .. }, _))))
1514            .and_then(|r| r.as_ref().ok())
1515            .map(|(_, span)| *span);
1516        if let Some(span) = doc_start_span {
1517            assert!(span.start.byte_offset <= span.end.byte_offset);
1518        }
1519    }
1520
1521    /// Test 51 — Scalar span byte offsets are non-decreasing
1522    #[test]
1523    fn scalar_span_offsets_are_non_decreasing() {
1524        let results: Vec<_> = parse_events("hello world").collect();
1525        for r in &results {
1526            if let Ok((Event::Scalar { .. }, span)) = r {
1527                assert!(span.start.byte_offset <= span.end.byte_offset);
1528            }
1529        }
1530    }
1531
1532    /// Test 52 — two scalars in sequence have non-overlapping spans (byte ordering)
1533    #[test]
1534    fn two_scalars_have_non_overlapping_spans() {
1535        let results: Vec<_> = parse_events("- a\n- b\n").collect();
1536        let scalar_spans: Vec<Span> = results
1537            .iter()
1538            .filter_map(|r| {
1539                if let Ok((Event::Scalar { .. }, span)) = r {
1540                    Some(*span)
1541                } else {
1542                    None
1543                }
1544            })
1545            .collect();
1546        if scalar_spans.len() >= 2 {
1547            assert!(scalar_spans[0].start.byte_offset <= scalar_spans[1].start.byte_offset);
1548        }
1549    }
1550
1551    // -----------------------------------------------------------------------
1552    // Group 9 — Event ordering
1553    // -----------------------------------------------------------------------
1554
1555    /// Test 53 — StreamStart precedes DocumentStart
1556    #[test]
1557    fn stream_start_precedes_document_start() {
1558        let events = events_from("hello");
1559        let ss = events
1560            .iter()
1561            .position(|e| matches!(e, Event::StreamStart))
1562            .unwrap();
1563        let ds = events
1564            .iter()
1565            .position(|e| matches!(e, Event::DocumentStart { .. }))
1566            .unwrap();
1567        assert!(ss < ds);
1568    }
1569
1570    /// Test 54 — DocumentStart precedes Scalar
1571    #[test]
1572    fn document_start_precedes_scalar() {
1573        let events = events_from("hello");
1574        let ds = events
1575            .iter()
1576            .position(|e| matches!(e, Event::DocumentStart { .. }))
1577            .unwrap();
1578        let sc = events
1579            .iter()
1580            .position(|e| matches!(e, Event::Scalar { .. }))
1581            .unwrap();
1582        assert!(ds < sc);
1583    }
1584
1585    /// Test 55 — Scalar precedes DocumentEnd
1586    #[test]
1587    fn scalar_precedes_document_end() {
1588        let events = events_from("hello");
1589        let sc = events
1590            .iter()
1591            .position(|e| matches!(e, Event::Scalar { .. }))
1592            .unwrap();
1593        let de = events
1594            .iter()
1595            .position(|e| matches!(e, Event::DocumentEnd { .. }))
1596            .unwrap();
1597        assert!(sc < de);
1598    }
1599
1600    /// Test 56 — DocumentEnd precedes StreamEnd
1601    #[test]
1602    fn document_end_precedes_stream_end() {
1603        let events = events_from("hello");
1604        let de = events
1605            .iter()
1606            .position(|e| matches!(e, Event::DocumentEnd { .. }))
1607            .unwrap();
1608        let se = events
1609            .iter()
1610            .position(|e| matches!(e, Event::StreamEnd))
1611            .unwrap();
1612        assert!(de < se);
1613    }
1614
1615    /// Test 57 — MappingStart precedes key scalar in mapping
1616    #[test]
1617    fn mapping_start_precedes_key_scalar() {
1618        let events = events_from("key: value");
1619        let ms = events
1620            .iter()
1621            .position(|e| matches!(e, Event::MappingStart { .. }))
1622            .unwrap();
1623        let sc = events
1624            .iter()
1625            .position(|e| matches!(e, Event::Scalar { .. }))
1626            .unwrap();
1627        assert!(ms < sc);
1628    }
1629
1630    /// Test 58 — MappingEnd follows last scalar in mapping
1631    #[test]
1632    fn mapping_end_follows_last_scalar() {
1633        let events = events_from("key: value");
1634        let me = events
1635            .iter()
1636            .position(|e| matches!(e, Event::MappingEnd))
1637            .unwrap();
1638        let last_scalar = events
1639            .iter()
1640            .rposition(|e| matches!(e, Event::Scalar { .. }))
1641            .unwrap();
1642        assert!(last_scalar < me);
1643    }
1644
1645    /// Test 59 — SequenceStart precedes items in sequence
1646    #[test]
1647    fn sequence_start_precedes_items() {
1648        let events = events_from("- a\n- b\n");
1649        let ss = events
1650            .iter()
1651            .position(|e| matches!(e, Event::SequenceStart { .. }))
1652            .unwrap();
1653        let sc = events
1654            .iter()
1655            .position(|e| matches!(e, Event::Scalar { .. }))
1656            .unwrap();
1657        assert!(ss < sc);
1658    }
1659
1660    /// Test 60 — SequenceEnd follows last item in sequence
1661    #[test]
1662    fn sequence_end_follows_last_item() {
1663        let events = events_from("- a\n- b\n");
1664        let se_pos = events
1665            .iter()
1666            .position(|e| matches!(e, Event::SequenceEnd))
1667            .unwrap();
1668        let last_scalar = events
1669            .iter()
1670            .rposition(|e| matches!(e, Event::Scalar { .. }))
1671            .unwrap();
1672        assert!(last_scalar < se_pos);
1673    }
1674
1675    // -----------------------------------------------------------------------
1676    // Group 10 — Chomp enum
1677    // -----------------------------------------------------------------------
1678
1679    /// Test 61 — Chomp::Strip, Clip, Keep are distinct
1680    #[test]
1681    fn chomp_variants_are_distinct() {
1682        assert_ne!(Chomp::Strip, Chomp::Clip);
1683        assert_ne!(Chomp::Clip, Chomp::Keep);
1684        assert_ne!(Chomp::Strip, Chomp::Keep);
1685    }
1686
1687    /// Test 62 — Chomp is Copy
1688    #[test]
1689    fn chomp_is_copy() {
1690        let c = Chomp::Clip;
1691        let c2 = c;
1692        let _ = c;
1693        let _ = c2;
1694    }
1695
1696    /// Test 63 — Chomp is Debug-formattable
1697    #[test]
1698    fn chomp_is_debug_formattable() {
1699        assert!(!format!("{:?}", Chomp::Strip).is_empty());
1700        assert!(!format!("{:?}", Chomp::Clip).is_empty());
1701        assert!(!format!("{:?}", Chomp::Keep).is_empty());
1702    }
1703
1704    // -----------------------------------------------------------------------
1705    // Group 11 — ScalarStyle enum
1706    // -----------------------------------------------------------------------
1707
1708    /// Test 64 — ScalarStyle variants are distinct
1709    #[test]
1710    fn scalar_style_variants_are_distinct() {
1711        assert_ne!(ScalarStyle::Plain, ScalarStyle::SingleQuoted);
1712        assert_ne!(ScalarStyle::SingleQuoted, ScalarStyle::DoubleQuoted);
1713        assert_ne!(
1714            ScalarStyle::Literal(Chomp::Clip),
1715            ScalarStyle::Folded(Chomp::Clip)
1716        );
1717    }
1718
1719    /// Test 65 — ScalarStyle is Copy
1720    #[test]
1721    fn scalar_style_is_copy() {
1722        let s = ScalarStyle::Plain;
1723        let s2 = s;
1724        let _ = s;
1725        let _ = s2;
1726    }
1727
1728    /// Test 66 — ScalarStyle is Debug-formattable
1729    #[test]
1730    fn scalar_style_is_debug_formattable() {
1731        assert!(!format!("{:?}", ScalarStyle::Plain).is_empty());
1732        assert!(!format!("{:?}", ScalarStyle::Literal(Chomp::Keep)).is_empty());
1733    }
1734
1735    /// Test 67 — ScalarStyle::Literal carries its Chomp
1736    #[test]
1737    fn scalar_style_literal_carries_chomp() {
1738        let s = ScalarStyle::Literal(Chomp::Strip);
1739        assert!(matches!(s, ScalarStyle::Literal(Chomp::Strip)));
1740    }
1741
1742    /// Test 68 — ScalarStyle::Folded carries its Chomp
1743    #[test]
1744    fn scalar_style_folded_carries_chomp() {
1745        let s = ScalarStyle::Folded(Chomp::Keep);
1746        assert!(matches!(s, ScalarStyle::Folded(Chomp::Keep)));
1747    }
1748
1749    // -----------------------------------------------------------------------
1750    // Extra coverage tests
1751    // -----------------------------------------------------------------------
1752
1753    /// Test 69 — Event is Debug-formattable
1754    #[test]
1755    fn event_is_debug_formattable() {
1756        let e = Event::StreamStart;
1757        assert!(!format!("{e:?}").is_empty());
1758    }
1759
1760    /// Test 70 — Event is Clone
1761    #[test]
1762    fn event_is_clone() {
1763        let e = Event::StreamStart;
1764        let e2 = e.clone();
1765        assert_eq!(e, e2);
1766    }
1767
1768    /// Test 71 — Error carries pos and message
1769    #[test]
1770    fn error_carries_pos_and_message() {
1771        let err = Error {
1772            pos: Pos::ORIGIN,
1773            message: "test error".to_owned(),
1774        };
1775        assert_eq!(err.pos, Pos::ORIGIN);
1776        assert_eq!(err.message, "test error");
1777    }
1778
1779    /// Test 72 — Error implements Display via thiserror
1780    #[test]
1781    fn error_implements_display() {
1782        let err = Error {
1783            pos: Pos::ORIGIN,
1784            message: "oops".to_owned(),
1785        };
1786        let s = err.to_string();
1787        assert!(s.contains("oops"));
1788    }
1789
1790    /// Test 73 — plain scalar value: integer-looking string
1791    #[test]
1792    fn plain_scalar_integer_looking_value() {
1793        let events = events_from("42");
1794        let scalar = events
1795            .iter()
1796            .find(|e| matches!(e, Event::Scalar { .. }))
1797            .unwrap();
1798        assert!(matches!(scalar, Event::Scalar { value, .. } if value == "42"));
1799    }
1800
1801    /// Test 74 — single-quoted scalar value matches inner content
1802    #[test]
1803    fn single_quoted_scalar_value_matches_content() {
1804        let events = events_from("'world'");
1805        let scalar = events
1806            .iter()
1807            .find(|e| matches!(e, Event::Scalar { .. }))
1808            .unwrap();
1809        assert!(matches!(scalar, Event::Scalar { value, .. } if value.contains("world")));
1810    }
1811
1812    /// Test 75 — double-quoted scalar value matches inner content
1813    #[test]
1814    fn double_quoted_scalar_value_matches_content() {
1815        let events = events_from("\"world\"");
1816        let scalar = events
1817            .iter()
1818            .find(|e| matches!(e, Event::Scalar { .. }))
1819            .unwrap();
1820        assert!(matches!(scalar, Event::Scalar { value, .. } if value.contains("world")));
1821    }
1822
1823    /// Test 76 — sequence of two mappings produces two MappingStart events
1824    #[test]
1825    fn sequence_of_mappings_produces_two_mapping_starts() {
1826        let events = events_from("- a: 1\n- b: 2\n");
1827        let count = events
1828            .iter()
1829            .filter(|e| matches!(e, Event::MappingStart { .. }))
1830            .count();
1831        assert_eq!(count, 2, "events: {events:?}");
1832    }
1833
1834    /// Test 77 — DocumentStart event with explicit=true appears before MappingStart
1835    #[test]
1836    fn document_start_explicit_before_mapping_start() {
1837        let events = events_from("---\nkey: val\n");
1838        let ds = events
1839            .iter()
1840            .position(|e| matches!(e, Event::DocumentStart { explicit: true, .. }))
1841            .unwrap();
1842        let ms = events
1843            .iter()
1844            .position(|e| matches!(e, Event::MappingStart { .. }))
1845            .unwrap();
1846        assert!(ds < ms);
1847    }
1848
1849    /// Test 78 — deeply nested structure: mapping inside sequence inside mapping
1850    #[test]
1851    fn deeply_nested_structure_emits_correct_event_types() {
1852        let events = events_from("outer:\n  - inner: val\n");
1853        assert!(
1854            events
1855                .iter()
1856                .any(|e| matches!(e, Event::MappingStart { .. }))
1857        );
1858        assert!(
1859            events
1860                .iter()
1861                .any(|e| matches!(e, Event::SequenceStart { .. }))
1862        );
1863    }
1864
1865    /// Test 79 — block mapping with anchor on a value
1866    #[test]
1867    fn block_mapping_value_with_anchor() {
1868        let events = events_from("key: &a value\n");
1869        assert!(events.iter().any(|e| matches!(e, Event::Scalar { .. })));
1870    }
1871
1872    /// Test 80 — parse_events returns an iterator (usable with for loop)
1873    #[test]
1874    fn parse_events_usable_in_for_loop() {
1875        let mut count = 0;
1876        for result in parse_events("hello") {
1877            assert!(result.is_ok());
1878            count += 1;
1879        }
1880        assert!(count > 0);
1881    }
1882
1883    // -----------------------------------------------------------------------
1884    // Group 12 — Directive content in DocumentStart
1885    //
1886    // The current tokenizer (`tokenize`) parses and validates %YAML and %TAG
1887    // directives but does NOT emit Meta tokens for their content — directive
1888    // text is consumed silently.  As a result, `collect_directives` never
1889    // finds Meta tokens and `DocumentStart.version` / `.tags` are always
1890    // None / empty when produced via `parse_events`.
1891    //
1892    // These tests verify the observable behaviour: directive documents parse
1893    // successfully (no panic, no Err), a DocumentStart event is produced, and
1894    // the version/tags fields reflect what the event layer can actually extract
1895    // from the current token stream.  When the tokenizer is extended to emit
1896    // directive tokens, these tests will need updating.
1897    // -----------------------------------------------------------------------
1898
1899    /// Test 81 — %YAML directive document produces a DocumentStart event
1900    ///
1901    /// The tokenizer accepts %YAML directives and emits BeginDocument /
1902    /// DirectivesEnd / content.  The directive keyword and version number are
1903    /// consumed silently and do not appear as Meta tokens, so version is None.
1904    #[test]
1905    fn yaml_directive_sets_version_in_document_start() {
1906        let events = events_from("%YAML 1.2\n---\nhello\n");
1907        let doc_start = events
1908            .iter()
1909            .find(|e| matches!(e, Event::DocumentStart { .. }));
1910        assert!(
1911            doc_start.is_some(),
1912            "expected a DocumentStart event; events: {events:?}"
1913        );
1914        // The tokenizer does not emit Meta tokens for directive content, so
1915        // version is None at the event layer.
1916        assert!(matches!(
1917            doc_start.unwrap(),
1918            Event::DocumentStart { version: None, .. }
1919        ));
1920    }
1921
1922    /// Test 82 — %TAG directive document produces a DocumentStart event
1923    ///
1924    /// The tokenizer accepts %TAG directives and emits BeginDocument /
1925    /// DirectivesEnd / content.  Tag handle and prefix are consumed silently
1926    /// and do not appear as Meta tokens, so tags is empty.
1927    #[test]
1928    fn tag_directive_appears_in_document_start_tags() {
1929        let events = events_from("%TAG ! tag:example.com,2024:\n---\nhello\n");
1930        let doc_start = events
1931            .iter()
1932            .find(|e| matches!(e, Event::DocumentStart { .. }));
1933        assert!(
1934            doc_start.is_some(),
1935            "expected a DocumentStart event; events: {events:?}"
1936        );
1937        // The tokenizer does not emit Meta tokens for directive content, so
1938        // tags is empty at the event layer.
1939        assert!(matches!(
1940            doc_start.unwrap(),
1941            Event::DocumentStart { tags, .. } if tags.is_empty()
1942        ));
1943    }
1944
1945    /// Test 83 — multiple %TAG directives produce a single DocumentStart
1946    ///
1947    /// Two %TAG directives are syntactically valid; the tokenizer accepts them
1948    /// and still produces a single BeginDocument / DirectivesEnd block.
1949    #[test]
1950    fn multiple_tag_directives_all_appear_in_document_start() {
1951        let events =
1952            events_from("%TAG ! tag:example.com,2024:\n%TAG !! tag:other.com:\n---\nhello\n");
1953        let count = events
1954            .iter()
1955            .filter(|e| matches!(e, Event::DocumentStart { .. }))
1956            .count();
1957        assert_eq!(
1958            count, 1,
1959            "expected exactly one DocumentStart; events: {events:?}"
1960        );
1961    }
1962
1963    // -----------------------------------------------------------------------
1964    // Group 13 — Anchor on mapping and sequence nodes
1965    // -----------------------------------------------------------------------
1966
1967    /// Test 84 — anchored mapping carries the anchor name in MappingStart
1968    #[test]
1969    fn anchored_mapping_has_anchor_name_in_mapping_start() {
1970        let events = events_from("&m\nkey: value\n");
1971        let ms = events
1972            .iter()
1973            .find(|e| matches!(e, Event::MappingStart { .. }));
1974        assert!(ms.is_some(), "no MappingStart event; events: {events:?}");
1975        assert!(
1976            matches!(ms.unwrap(), Event::MappingStart { anchor: Some(a), .. } if a == "m"),
1977            "expected anchor \"m\"; got: {:?}",
1978            ms.unwrap()
1979        );
1980    }
1981
1982    /// Test 85 — anchored sequence carries the anchor name in SequenceStart
1983    #[test]
1984    fn anchored_sequence_has_anchor_name_in_sequence_start() {
1985        let events = events_from("&s\n- a\n- b\n");
1986        let ss = events
1987            .iter()
1988            .find(|e| matches!(e, Event::SequenceStart { .. }));
1989        assert!(ss.is_some(), "no SequenceStart event; events: {events:?}");
1990        assert!(
1991            matches!(ss.unwrap(), Event::SequenceStart { anchor: Some(a), .. } if a == "s"),
1992            "expected anchor \"s\"; got: {:?}",
1993            ss.unwrap()
1994        );
1995    }
1996
1997    // -----------------------------------------------------------------------
1998    // Group 14 — Tag on mapping, sequence, local tag, combined anchor+tag
1999    // -----------------------------------------------------------------------
2000
2001    /// Test 86 — tagged mapping carries the tag in MappingStart
2002    #[test]
2003    fn tagged_mapping_has_tag_in_mapping_start() {
2004        let events = events_from("!!map\nkey: value\n");
2005        let ms = events
2006            .iter()
2007            .find(|e| matches!(e, Event::MappingStart { .. }));
2008        assert!(ms.is_some(), "no MappingStart event; events: {events:?}");
2009        assert!(
2010            matches!(ms.unwrap(), Event::MappingStart { tag: Some(t), .. } if t.contains("map")),
2011            "expected tag containing \"map\"; got: {:?}",
2012            ms.unwrap()
2013        );
2014    }
2015
2016    /// Test 87 — tagged sequence carries the tag in SequenceStart
2017    #[test]
2018    fn tagged_sequence_has_tag_in_sequence_start() {
2019        let events = events_from("!!seq\n- a\n");
2020        let ss = events
2021            .iter()
2022            .find(|e| matches!(e, Event::SequenceStart { .. }));
2023        assert!(ss.is_some(), "no SequenceStart event; events: {events:?}");
2024        assert!(
2025            matches!(ss.unwrap(), Event::SequenceStart { tag: Some(t), .. } if t.contains("seq")),
2026            "expected tag containing \"seq\"; got: {:?}",
2027            ss.unwrap()
2028        );
2029    }
2030
2031    /// Test 88 — local tag (single `!`) on scalar appears in Scalar event
2032    #[test]
2033    fn local_tag_appears_in_scalar_event() {
2034        let events = events_from("!local hello\n");
2035        let scalar = events.iter().find(|e| matches!(e, Event::Scalar { .. }));
2036        assert!(scalar.is_some(), "no Scalar event; events: {events:?}");
2037        assert!(
2038            matches!(scalar.unwrap(), Event::Scalar { tag: Some(t), .. } if t.contains("local")),
2039            "expected tag containing \"local\"; got: {:?}",
2040            scalar.unwrap()
2041        );
2042    }
2043
2044    /// Test 89 — anchor and tag both appear on a scalar event
2045    #[test]
2046    fn anchor_and_tag_both_appear_in_scalar_event() {
2047        let events = events_from("&a !!str hello\n");
2048        let scalar = events.iter().find(|e| matches!(e, Event::Scalar { .. }));
2049        assert!(scalar.is_some(), "no Scalar event; events: {events:?}");
2050        assert!(
2051            matches!(
2052                scalar.unwrap(),
2053                Event::Scalar { anchor: Some(a), tag: Some(_), .. } if a == "a"
2054            ),
2055            "expected anchor \"a\" and a tag; got: {:?}",
2056            scalar.unwrap()
2057        );
2058    }
2059
2060    // -----------------------------------------------------------------------
2061    // Group 15 — Multiple comment events
2062    // -----------------------------------------------------------------------
2063
2064    /// Test 90 — two block scalars each followed by trail comments parse
2065    /// successfully. Trail comments are consumed inside the scalar per spec.
2066    #[test]
2067    fn block_scalars_with_trail_comments_parse_successfully() {
2068        let results: Vec<_> = parse_events("|\n  x\n# first\n---\n|\n  y\n# second\n").collect();
2069        assert!(
2070            results.iter().all(Result::is_ok),
2071            "expected no errors; results: {results:?}"
2072        );
2073        let scalar_count = results
2074            .iter()
2075            .filter(|r| matches!(r, Ok((Event::Scalar { .. }, _))))
2076            .count();
2077        assert_eq!(scalar_count, 2, "expected 2 scalars");
2078    }
2079
2080    // -----------------------------------------------------------------------
2081    // Group 16 — Error iterator path
2082    //
2083    // The current tokenizer (`tokenize`) never emits Code::Error tokens for
2084    // any input — the parser returns Reply::Failure or Reply::Error at the
2085    // combinator level, and the public `tokenize` function converts those to
2086    // an empty Vec rather than a Vec containing Error tokens.  There is no
2087    // public API to inject a Code::Error token into the event iterator.
2088    //
2089    // Test 91 verifies this invariant: that all results from parse_events on
2090    // any reasonable input are Ok.  Tests 70 (error token yields Err) and 73
2091    // (iterator stops after Err) are not implemented because the precondition
2092    // (a Code::Error token in the stream) is unreachable through the public
2093    // API.  If the tokenizer is later extended to emit Code::Error tokens for
2094    // recoverable errors, the event iterator's Code::Error arm should be
2095    // changed to yield Err and these tests should be added at that time.
2096    // -----------------------------------------------------------------------
2097
2098    /// Test 91 — parse_events never yields Err for any well-formed or
2099    /// partially-formed input, because the tokenizer never emits Code::Error.
2100    #[test]
2101    fn parse_events_yields_no_errors_for_tokenizer_output() {
2102        // Various inputs including unusual ones.
2103        let inputs = [
2104            "",
2105            "hello",
2106            "key: value",
2107            "- a\n- b",
2108            "---\n...",
2109            "'single'",
2110            "\"double\"",
2111            "|\n  block\n",
2112            ">-\n  folded\n",
2113            "&anchor value",
2114            "!!str value",
2115            "*alias",
2116        ];
2117        for input in inputs {
2118            let errors: Vec<_> = parse_events(input).filter(Result::is_err).collect();
2119            assert!(
2120                errors.is_empty(),
2121                "unexpected Err results for input {input:?}: {errors:?}"
2122            );
2123        }
2124    }
2125
2126    /// Test 92 — iterator continues producing Ok results after unusual tokens
2127    /// (no early termination from skipped unknown codes).
2128    #[test]
2129    fn iterator_does_not_stop_early_on_skipped_tokens() {
2130        // A mapping followed by an alias — exercises various token code paths.
2131        let events = events_from("- &anchor hello\n- *anchor\n");
2132        let last = events.last();
2133        assert!(
2134            matches!(last, Some(Event::StreamEnd)),
2135            "expected StreamEnd as last event; got: {last:?}"
2136        );
2137    }
2138}