Skip to main content

granit_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, BorrowedInput},
9    scanner::{Placement, QueuedToken, QueuedTokenType, ScalarStyle, ScanError, Scanner, Span},
10    BufferedInput,
11};
12
13use alloc::{
14    borrow::Cow,
15    collections::{BTreeMap, BTreeSet, VecDeque},
16    string::{String, ToString},
17    vec::Vec,
18};
19use core::{
20    convert::Infallible,
21    fmt::{self, Display},
22};
23
24#[derive(Clone, Copy, PartialEq, Debug, Eq)]
25enum State {
26    StreamStart,
27    ImplicitDocumentStart,
28    DocumentStart,
29    DocumentContent,
30    DocumentEnd,
31    BlockNode,
32    BlockNodeOrIndentlessSequence,
33    FlowNode,
34    BlockSequenceFirstEntry,
35    BlockSequenceEntry,
36    IndentlessSequenceEntry,
37    IndentlessSequenceEntryNode,
38    BlockMappingFirstKey,
39    BlockMappingKey,
40    BlockMappingKeyNode,
41    BlockMappingValue,
42    BlockMappingValueNode,
43    FlowSequenceFirstEntry,
44    FlowSequenceEntry,
45    FlowSequenceEntryMappingKey,
46    FlowSequenceEntryMappingValue,
47    FlowSequenceEntryMappingValueNode,
48    FlowSequenceEntryMappingEnd,
49    FlowMappingFirstKey,
50    FlowMappingKey,
51    FlowMappingKeyNode,
52    FlowMappingValue,
53    FlowMappingValueNode,
54    FlowMappingEmptyValue,
55    BlockSequenceEntryNode,
56    End,
57}
58
59/// An event generated by the YAML parser.
60///
61/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
62/// [`EventReceiver`] trait.
63#[derive(Clone, PartialEq, Debug, Eq)]
64pub enum Event<'input> {
65    /// Reserved for internal use.
66    Nothing,
67    /// Event generated at the very beginning of parsing.
68    StreamStart,
69    /// Last event that will be generated by the parser. Signals EOF.
70    StreamEnd,
71    /// The start of a YAML document.
72    ///
73    /// When the boolean is `true`, it is an explicit document start
74    /// directive (`---`).
75    ///
76    /// When the boolean is `false`, it is an implicit document start
77    /// (without `---`).
78    DocumentStart(bool),
79    /// The end of a YAML document.
80    ///
81    /// This event is emitted for both explicit document end markers (`...`) and implicit document
82    /// ends.
83    DocumentEnd,
84    /// A YAML alias.
85    Alias(
86        /// The anchor ID the alias refers to.
87        usize,
88    ),
89    /// A YAML source comment.
90    ///
91    /// Comments are presentation metadata, not YAML data nodes. The payload is the raw text
92    /// exactly after `#`, excluding only the line break. The placement is a best-effort hint for
93    /// correlating the comment with nearby YAML presentation. The companion parser [`Span`] covers
94    /// the whole source comment, including `#` and excluding the line break.
95    Comment(
96        /// Raw comment payload exactly after `#`, excluding only the line break.
97        Cow<'input, str>,
98        /// Best-effort placement relative to nearby YAML content.
99        Placement,
100    ),
101    /// A YAML scalar value.
102    Scalar(
103        /// The scalar value after YAML escape processing.
104        Cow<'input, str>,
105        /// The source notation used for the scalar.
106        ScalarStyle,
107        /// The anchor ID defined on this scalar, or `0` if it has no anchor.
108        usize,
109        /// The resolved tag attached to this scalar, if any.
110        Option<Cow<'input, Tag>>,
111    ),
112    /// The start of a YAML sequence (array).
113    SequenceStart(
114        /// The notation style used for the sequence.
115        StructureStyle,
116        /// The anchor ID defined on this sequence, or `0` if it has no anchor.
117        usize,
118        /// The resolved tag attached to this sequence, if any.
119        Option<Cow<'input, Tag>>,
120    ),
121    /// The end of a YAML sequence (array).
122    SequenceEnd,
123    /// The start of a YAML mapping (object, hash).
124    MappingStart(
125        /// The notation style used for the mapping (Flow or Block).
126        StructureStyle,
127        /// The anchor ID defined on this mapping, or `0` if it has no anchor.
128        usize,
129        /// The resolved tag attached to this mapping, if any.
130        Option<Cow<'input, Tag>>,
131    ),
132    /// The end of a YAML mapping (object, hash).
133    MappingEnd,
134}
135
136/// The notation style used for a YAML sequence or mapping.
137///
138/// [`StructureStyle::Block`] means block notation:
139///
140/// ```yaml
141/// items:
142///   - milk
143///   - bread
144/// mapping:
145///   name: Ada
146///   active: true
147/// ```
148///
149/// [`StructureStyle::Flow`] means flow notation:
150///
151/// ```yaml
152/// items: [milk, bread]
153/// mapping: {name: Ada, active: true}
154/// ```
155#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
156pub enum StructureStyle {
157    /// Block notation, such as `- item` sequences and `key: value` mappings.
158    Block,
159    /// Flow notation, such as `[item]` sequences and `{key: value}` mappings.
160    Flow,
161}
162
163/// A YAML tag.
164#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
165pub struct Tag {
166    /// Resolved tag handle or prefix.
167    ///
168    /// Examples include `tag:yaml.org,2002:` for core-schema tags and `!` for local tags.
169    pub handle: String,
170    /// Tag suffix following the resolved handle or prefix.
171    pub suffix: String,
172}
173
174impl Tag {
175    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
176    ///
177    /// The YAML specification specifies [a list of
178    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function
179    /// checks whether _the handle_ (but not the suffix) is the handle for the YAML Core Schema.
180    ///
181    /// # Return
182    /// Returns `true` if the handle is `tag:yaml.org,2002:`, `false` otherwise.
183    #[must_use]
184    pub fn is_yaml_core_schema(&self) -> bool {
185        self.handle == "tag:yaml.org,2002:"
186    }
187
188    /// Return true for a YAML core-schema tag with the given suffix.
189    ///
190    /// For example, this matches core-schema tags such as `!!str`, `!!int`, `!!float`, `!!bool`,
191    /// `!!null`, `!!map`, or `!!seq` after tag resolution.
192    #[must_use]
193    pub fn is_yaml_core_schema_tag(&self, suffix: &str) -> bool {
194        self.is_yaml_core_schema() && self.suffix == suffix
195    }
196
197    /// Return true for a tag outside the YAML core-schema namespace.
198    ///
199    /// This checks only the tag handle. It returns `false` for any tag whose handle is
200    /// `tag:yaml.org,2002:`, regardless of suffix.
201    #[must_use]
202    pub fn is_custom(&self) -> bool {
203        !self.is_yaml_core_schema()
204    }
205
206    /// Return the tag as `(handle, suffix)`.
207    #[must_use]
208    pub fn parts(&self) -> (&str, &str) {
209        (&self.handle, &self.suffix)
210    }
211}
212
213impl Display for Tag {
214    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
215        if self.handle == "!" {
216            write!(f, "!{}", self.suffix)
217        } else {
218            write!(f, "{}{}", self.handle, self.suffix)
219        }
220    }
221}
222
223impl<'input> Event<'input> {
224    /// Return the anchor ID defined by this event, if any.
225    ///
226    /// Returns `Some(id)` when this event defines an anchor on a scalar, sequence, or mapping
227    /// node. Returns `None` for all other events, including `Alias` (which references an anchor
228    /// rather than defining one; use [`Self::alias_id`] to obtain the target anchor ID).
229    #[must_use]
230    pub fn anchor_id(&self) -> Option<usize> {
231        match self {
232            Self::Scalar(_, _, anchor_id, _)
233            | Self::SequenceStart(_, anchor_id, _)
234            | Self::MappingStart(_, anchor_id, _)
235                if *anchor_id != 0 =>
236            {
237                Some(*anchor_id)
238            }
239            _ => None,
240        }
241    }
242
243    /// Return the target anchor ID referenced by this alias event, if this event is an alias.
244    #[must_use]
245    pub fn alias_id(&self) -> Option<usize> {
246        match self {
247            Self::Alias(anchor_id) => Some(*anchor_id),
248            _ => None,
249        }
250    }
251
252    /// Return the resolved tag carried by this node event, if any.
253    #[must_use]
254    pub fn tag(&self) -> Option<&Tag> {
255        match self {
256            Self::Scalar(_, _, _, tag)
257            | Self::SequenceStart(_, _, tag)
258            | Self::MappingStart(_, _, tag) => tag.as_deref(),
259            _ => None,
260        }
261    }
262
263    /// Return the scalar value and style, if this event is a scalar.
264    #[must_use]
265    pub fn scalar(&self) -> Option<(&str, ScalarStyle)> {
266        match self {
267            Self::Scalar(value, style, _, _) => Some((value.as_ref(), *style)),
268            _ => None,
269        }
270    }
271
272    /// Return whether this event represents a YAML node (value).
273    ///
274    /// Returns `true` for scalars, collection starts, and aliases — all events that produce a
275    /// value in the document tree. Returns `false` for structural events such as `StreamStart`,
276    /// `DocumentStart`, collection ends, etc.
277    #[must_use]
278    pub fn is_node(&self) -> bool {
279        matches!(
280            self,
281            Self::Alias(_) | Self::Scalar(..) | Self::SequenceStart(..) | Self::MappingStart(..)
282        )
283    }
284
285    /// Create an empty scalar.
286    fn empty_scalar() -> Self {
287        // a null scalar
288        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
289    }
290
291    /// Create an empty scalar with the given anchor.
292    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
293        Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
294    }
295}
296
297// Preserve span ordering for normal-sized comment groups. Longer runs in syntactically ambiguous
298// positions are rejected before they can grow the parser queue without bound.
299const MAX_BUFFERED_COMMENT_EVENTS: usize = 32;
300
301/// A YAML parser.
302#[derive(Debug)]
303pub struct Parser<'input, T: BorrowedInput<'input>> {
304    /// The underlying scanner from which we pull tokens.
305    scanner: Scanner<'input, T>,
306    /// The stack of _previous_ states we were in.
307    ///
308    /// States are pushed in the context of subobjects to this stack. The top-most element is the
309    /// state in which to come back to when exiting the current state.
310    states: Vec<State>,
311    /// The state in which we currently are.
312    state: State,
313    /// The next token from the scanner.
314    token: Option<QueuedToken<'input>>,
315    /// The next YAML event to emit.
316    current: Option<(Event<'input>, Span)>,
317    /// YAML events buffered by parser states that need to emit an earlier synthetic node first.
318    queued_events: VecDeque<(Event<'input>, Span)>,
319
320    /// Pending indentation hint to be attached to the next emitted event span.
321    ///
322    /// This is used to communicate indentation for block mapping keys. It is set when consuming a
323    /// `TokenType::Key` in block style, and is applied to the next emitted node event (the key
324    /// itself).
325    pending_key_indent: Option<usize>,
326    /// Pending anchor ID to attach to a node after an intervening comment.
327    pending_node_anchor_id: usize,
328    /// Pending tag to attach to a node after an intervening comment.
329    pending_node_tag: Option<Cow<'input, Tag>>,
330    /// Pending empty scalar span captured before an intervening comment.
331    pending_empty_scalar_span: Option<Span>,
332    /// Anchors that have been encountered in the YAML document.
333    anchors: BTreeMap<Cow<'input, str>, usize>,
334    /// Next ID available for an anchor.
335    ///
336    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
337    /// return for the next anchor and the count of anchor IDs emitted.
338    anchor_id_count: usize,
339    /// The tag directives (`%TAG`) the parser has encountered.
340    ///
341    /// Key is the handle, and value is the prefix.
342    tags: BTreeMap<String, String>,
343    /// Whether we have emitted [`Event::StreamEnd`].
344    ///
345    /// Emitted means that it has been returned from [`Self::next`]. If it is stored in
346    /// [`Self::token`], this is set to `false`.
347    stream_end_emitted: bool,
348    /// Make tags global across all documents.
349    keep_tags: bool,
350}
351
352/// Trait to be implemented in order to use the low-level parsing API.
353///
354/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
355/// for each YAML [`Event`] that occurs.
356/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
357/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
358/// [`SpannedEventReceiver`] automatically.
359/// Non-spanned receivers receive [`Event::Comment(text, placement)`](Event::Comment) like any
360/// other event, but without source location. Spanned receivers receive the same comment event plus
361/// the comment [`Span`] in [`SpannedEventReceiver::on_event`]. For comments, that span covers the
362/// whole source comment, including `#` and excluding the line break. When parsing from an input
363/// with byte offsets, such as [`Parser::new_from_str`], [`Span::slice`] returns that source
364/// comment text.
365///
366/// # Event hierarchy
367/// The event stream starts with an [`Event::StreamStart`] event followed by an
368/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
369/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
370/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
371///
372/// In a mapping, key-values are sent as consecutive data events. Comments can appear in the raw
373/// event stream between a key and its value; they are presentation metadata, not YAML data nodes.
374/// Consumers building YAML data trees should ignore [`Event::Comment`]. Any key/value alternation
375/// shortcut applies only after filtering out comments and other presentation metadata. After that
376/// filtering, the first event after an [`Event::MappingStart`] will be the key, and the following
377/// event will be its value. If the mapping contains no sub-mapping or sub-sequence, then even events
378/// (starting from 0) will always be keys and odd ones will always be values. The mapping ends when
379/// an [`Event::MappingEnd`] event is received.
380///
381/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
382///
383/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
384/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
385/// [`Event::MappingEnd`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
386/// be part of the value and not another key-value pair or element in the sequence.
387///
388/// For instance, the following YAML:
389/// ```yaml
390/// a: b
391/// c:
392///   d: e
393/// f:
394///   - g
395///   - h
396/// ```
397/// will emit (indented and commented for visibility):
398/// ```text
399/// StreamStart, DocumentStart, MappingStart,
400///   Scalar("a", ..), Scalar("b", ..)
401///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
402///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
403/// MappingEnd, DocumentEnd, StreamEnd
404/// ```
405///
406/// # Example
407/// ```
408/// # use granit_parser::{Event, EventReceiver, Parser};
409/// #
410/// /// Sink of events. Collects them into an array.
411/// struct EventSink<'input> {
412///     events: Vec<Event<'input>>,
413/// }
414///
415/// /// Implement `on_event`, pushing into `self.events`.
416/// impl<'input> EventReceiver<'input> for EventSink<'input> {
417///     fn on_event(&mut self, ev: Event<'input>) {
418///         self.events.push(ev);
419///     }
420/// }
421///
422/// /// Load events from a YAML string.
423/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
424///     let mut sink = EventSink { events: Vec::new() };
425///     let mut parser = Parser::new_from_str(yaml);
426///     // Load events using our sink as the receiver.
427///     parser.load(&mut sink, true).unwrap();
428///     sink.events
429/// }
430/// ```
431pub trait EventReceiver<'input> {
432    /// Handler called for each YAML event that is emitted by the parser.
433    fn on_event(&mut self, ev: Event<'input>);
434}
435
436/// Trait to be implemented for using the low-level parsing API.
437///
438/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
439/// For [`Event::Comment`], the span is the source range of the whole comment.
440pub trait SpannedEventReceiver<'input> {
441    /// Handler called for each event that occurs.
442    fn on_event(&mut self, ev: Event<'input>, span: Span);
443}
444
445impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
446    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
447        self.on_event(ev);
448    }
449}
450
451/// Trait to be implemented for fallible event handling without source spans.
452///
453/// This is the fallible counterpart to [`EventReceiver`]. Use it with [`Parser::try_load`] when
454/// event handling may need to stop parsing by returning an application error.
455pub trait TryEventReceiver<'input> {
456    /// Error returned by this receiver.
457    type Error;
458
459    /// Handler called for each YAML event that is emitted by the parser.
460    ///
461    /// Returning an error stops [`Parser::try_load`] immediately.
462    ///
463    /// # Errors
464    /// Returns `Self::Error` when the receiver wants to stop parsing.
465    fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
466}
467
468/// Trait to be implemented for fallible event handling with source spans.
469///
470/// This is the fallible counterpart to [`SpannedEventReceiver`]. Use it with
471/// [`Parser::try_load`] when event handling may need to stop parsing by returning an application
472/// error.
473pub trait TrySpannedEventReceiver<'input> {
474    /// Error returned by this receiver.
475    type Error;
476
477    /// Handler called for each event that occurs.
478    ///
479    /// Returning an error stops [`Parser::try_load`] immediately.
480    ///
481    /// # Errors
482    /// Returns `Self::Error` when the receiver wants to stop parsing.
483    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
484}
485
486impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
487    type Error = R::Error;
488
489    fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
490        TryEventReceiver::on_event(self, ev)
491    }
492}
493
494/// Error returned by [`Parser::try_load`] and [`ParserTrait::try_load`].
495#[derive(Clone, PartialEq, Debug, Eq)]
496pub enum TryLoadError<E> {
497    /// Scanning or parsing failed.
498    Scan(
499        /// The scanner or parser error.
500        ScanError,
501    ),
502    /// The receiver returned an application error.
503    Receiver(
504        /// The error returned by the receiver.
505        E,
506    ),
507}
508
509impl<E> TryLoadError<E> {
510    #[cold]
511    fn scan(error: ScanError) -> Self {
512        Self::Scan(error)
513    }
514
515    #[cold]
516    fn receiver(error: E) -> Self {
517        Self::Receiver(error)
518    }
519}
520
521impl<E> From<ScanError> for TryLoadError<E> {
522    #[cold]
523    fn from(error: ScanError) -> Self {
524        Self::scan(error)
525    }
526}
527
528impl<E: Display> Display for TryLoadError<E> {
529    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
530        match self {
531            Self::Scan(error) => write!(f, "parser error: {error}"),
532            Self::Receiver(error) => write!(f, "receiver error: {error}"),
533        }
534    }
535}
536
537impl<E> core::error::Error for TryLoadError<E>
538where
539    E: core::error::Error + 'static,
540{
541    fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
542        match self {
543            Self::Scan(error) => Some(error),
544            Self::Receiver(error) => Some(error),
545        }
546    }
547}
548
549fn try_emit<'input, R>(
550    recv: &mut R,
551    ev: Event<'input>,
552    span: Span,
553) -> Result<(), TryLoadError<R::Error>>
554where
555    R: TrySpannedEventReceiver<'input>,
556{
557    recv.on_event(ev, span).map_err(TryLoadError::receiver)
558}
559
560struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
561
562impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
563    for InfallibleSpannedReceiver<'_, R>
564{
565    type Error = Infallible;
566
567    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
568        self.0.on_event(ev, span);
569        Ok(())
570    }
571}
572
573fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
574    match result {
575        Ok(()) => Ok(()),
576        Err(TryLoadError::Scan(error)) => error.into_result(),
577        Err(TryLoadError::Receiver(error)) => match error {},
578    }
579}
580
581/// A convenience alias for a parser event result.
582pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
583
584/// Trait extracted from `Parser` to support mocking and alternative implementations.
585pub trait ParserTrait<'input> {
586    /// Try to load the next event and return it without consuming it from `self`.
587    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
588
589    /// Try to load the next event and return it, consuming it from `self`.
590    fn next_event(&mut self) -> Option<ParseResult<'input>>;
591
592    /// Load the YAML from the stream in `self`, pushing events into `recv`.
593    ///
594    /// Use this method when event handling is infallible. If receiver code can return an
595    /// application error and should stop parsing, use [`ParserTrait::try_load`] instead. If the
596    /// caller should directly control when the next event is read, use [`ParserTrait::next_event`]
597    /// or [`Parser`]'s [`core::iter::Iterator`] implementation.
598    ///
599    /// # Errors
600    /// Returns `ScanError` when scanning or parsing the stream fails.
601    fn load<R: SpannedEventReceiver<'input>>(
602        &mut self,
603        recv: &mut R,
604        multi: bool,
605    ) -> Result<(), ScanError>;
606
607    /// Load the YAML from the stream in `self`, stopping if `recv` returns an error.
608    ///
609    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
610    /// inside the stream.
611    ///
612    /// If the receiver returns an error, the parser is left positioned immediately after the event
613    /// that caused the receiver error. Callers should treat the parser as partially consumed.
614    ///
615    /// # Errors
616    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
617    /// [`TryLoadError::Receiver`] when `recv` returns an error.
618    fn try_load<R: TrySpannedEventReceiver<'input>>(
619        &mut self,
620        recv: &mut R,
621        multi: bool,
622    ) -> Result<(), TryLoadError<R::Error>> {
623        while let Some(res) = self.next_event() {
624            let (ev, span) = res?;
625            let is_doc_end = matches!(ev, Event::DocumentEnd);
626            let is_stream_end = matches!(ev, Event::StreamEnd);
627
628            try_emit(recv, ev, span)?;
629
630            if is_stream_end {
631                break;
632            }
633            if !multi && is_doc_end {
634                break;
635            }
636        }
637
638        Ok(())
639    }
640}
641
642impl<'input> Parser<'input, StrInput<'input>> {
643    /// Create a parser over a borrowed string slice.
644    #[must_use]
645    pub fn new_from_str(value: &'input str) -> Self {
646        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
647        Parser::new(StrInput::new(value))
648    }
649}
650
651impl<T> Parser<'static, BufferedInput<T>>
652where
653    T: Iterator<Item = char>,
654{
655    /// Create a parser over an iterator of characters.
656    #[must_use]
657    pub fn new_from_iter(iter: T) -> Self {
658        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
659        Parser::new(BufferedInput::new(iter))
660    }
661}
662
663impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
664    /// Return the next anchor ID that will be assigned by this parser.
665    pub fn get_anchor_offset(&self) -> usize {
666        self.anchor_id_count
667    }
668
669    /// Set the next anchor ID that will be assigned by this parser.
670    pub fn set_anchor_offset(&mut self, offset: usize) {
671        self.anchor_id_count = offset;
672    }
673
674    /// Create a parser over a custom input source.
675    pub fn new(src: T) -> Self {
676        Parser {
677            scanner: Scanner::new(src),
678            states: Vec::new(),
679            state: State::StreamStart,
680            token: None,
681            current: None,
682            queued_events: VecDeque::new(),
683
684            pending_key_indent: None,
685            pending_node_anchor_id: 0,
686            pending_node_tag: None,
687            pending_empty_scalar_span: None,
688
689            anchors: BTreeMap::new(),
690            // valid anchor_id starts from 1
691            anchor_id_count: 1,
692            tags: BTreeMap::new(),
693            stream_end_emitted: false,
694            keep_tags: false,
695        }
696    }
697
698    /// Configure whether tag directives remain active across document boundaries.
699    ///
700    /// This behavior is non-standard as per the YAML specification but can be encountered in the
701    /// wild. Passing `true` enables this non-standard extension and allows the parser to accept
702    /// input from [test
703    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
704    /// of the yaml-test-suite:
705    ///
706    /// ```yaml
707    /// %TAG !prefix! tag:example.com,2011:
708    /// --- !prefix!A
709    /// a: b
710    /// --- !prefix!B
711    /// c: d
712    /// --- !prefix!C
713    /// e: f
714    /// ```
715    ///
716    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
717    /// only apply to the document immediately following them. This would error on `!prefix!B`.
718    ///
719    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
720    #[must_use]
721    pub fn keep_tags(mut self, value: bool) -> Self {
722        self.keep_tags = value;
723        self
724    }
725
726    /// Try to load the next event and return it without consuming it from `self`.
727    ///
728    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
729    /// [`Iterator::next`] or [`Parser::load`].
730    ///
731    /// # Errors
732    /// Returns `ScanError` when loading the next event fails.
733    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
734        ParserTrait::peek(self)
735    }
736
737    /// Try to load the next event and return it, consuming it from `self`.
738    ///
739    /// # Errors
740    /// Returns `ScanError` when loading the next event fails.
741    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
742        ParserTrait::next_event(self)
743    }
744
745    /// Implementation function for [`Self::next_event`] without the `Option`.
746    ///
747    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
748    /// option. This burdens the parser code. This function is used internally when an option is
749    /// undesirable.
750    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
751    where
752        'input: 'a,
753    {
754        match self.current.take() {
755            None => {
756                if let Some(event) = self.queued_events.pop_front() {
757                    Ok(self.apply_pending_key_indent(event))
758                } else if let Some(comment) = self.maybe_next_comment_event()? {
759                    Ok(comment)
760                } else {
761                    self.parse()
762                }
763            }
764            Some(v) => Ok(v),
765        }
766    }
767
768    fn apply_pending_key_indent<'a>(&mut self, (ev, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
769        if ev.is_node() {
770            if let Some(indent) = self.pending_key_indent.take() {
771                return (ev, span.with_indent(Some(indent)));
772            }
773        }
774
775        (ev, span)
776    }
777
778    /// Peek at the next token from the scanner.
779    fn peek_token(&mut self) -> Result<&QueuedToken<'_>, ScanError> {
780        match self.token {
781            None => {
782                self.token = Some(self.scan_next_token()?);
783                Ok(self.token.as_ref().unwrap())
784            }
785            Some(ref tok) => Ok(tok),
786        }
787    }
788
789    /// Extract and return the next token from the scanner.
790    ///
791    /// This function does _not_ make use of `self.token`.
792    fn scan_next_token(&mut self) -> Result<QueuedToken<'input>, ScanError> {
793        match self.scanner.next_queued_token()? {
794            None => match self.scanner.get_error() {
795                None => Err(self.unexpected_eof()),
796                Some(e) => e.into_result(),
797            },
798            Some(tok) => Ok(tok),
799        }
800    }
801
802    #[inline]
803    fn maybe_next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
804    where
805        'input: 'a,
806    {
807        if self.scanner.comments_possible() {
808            self.next_comment_event()
809        } else {
810            Ok(None)
811        }
812    }
813
814    fn next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
815    where
816        'input: 'a,
817    {
818        let is_comment = {
819            let token = self.peek_token()?;
820            matches!(token.1, QueuedTokenType::Comment(_))
821        };
822
823        if !is_comment {
824            return Ok(None);
825        }
826
827        let QueuedToken(span, token) = self.fetch_token();
828        match token {
829            QueuedTokenType::Comment(mut comment) => {
830                comment.placement = self.refined_comment_placement(span, comment.placement);
831                Ok(Some((
832                    Event::Comment(comment.text, comment.placement),
833                    span,
834                )))
835            }
836            _ => unreachable!("comment token disappeared after peek"),
837        }
838    }
839
840    #[inline]
841    fn next_comment_events(&mut self) -> Result<Vec<(Event<'input>, Span)>, ScanError> {
842        if !self.scanner.comments_possible() {
843            return Ok(Vec::new());
844        }
845
846        let mut events = Vec::new();
847        loop {
848            match self.peek_token() {
849                Ok(token) if matches!(token.1, QueuedTokenType::Comment(_)) => {}
850                Err(error) if events.is_empty() => return Err(error),
851                Ok(_) | Err(_) => return Ok(events),
852            }
853
854            if events.len() == MAX_BUFFERED_COMMENT_EVENTS {
855                return Err(ScanError::new_str(
856                    self.peek_token()?.0.start,
857                    "too many consecutive comments before resolving collection entry",
858                ));
859            }
860
861            let comment = self
862                .next_comment_event()?
863                .expect("comment token disappeared after peek");
864            events.push(comment);
865        }
866    }
867
868    fn queue_tail_and_return_first(
869        &mut self,
870        events: Vec<(Event<'input>, Span)>,
871    ) -> (Event<'input>, Span) {
872        let mut events = events.into_iter();
873        let first = events
874            .next()
875            .expect("event queue must contain at least one event");
876        self.queued_events.extend(events);
877        first
878    }
879
880    fn queue_event_by_span(
881        &mut self,
882        comments: Vec<(Event<'input>, Span)>,
883        event: (Event<'input>, Span),
884    ) -> (Event<'input>, Span) {
885        let insert_at = comments
886            .iter()
887            .position(|(_, comment_span)| {
888                comment_span.start.index() >= event.1.start.index()
889                    && comment_span.end.index() >= event.1.end.index()
890            })
891            .unwrap_or(comments.len());
892        let mut ordered = Vec::with_capacity(comments.len() + 1);
893        let mut comments = comments.into_iter();
894
895        for _ in 0..insert_at {
896            ordered.push(
897                comments
898                    .next()
899                    .expect("comment disappeared while ordering queued events"),
900            );
901        }
902        ordered.push(event);
903        ordered.extend(comments);
904
905        self.queue_tail_and_return_first(ordered)
906    }
907
908    fn queue_two_events_by_span(
909        &mut self,
910        comments: Vec<(Event<'input>, Span)>,
911        first: (Event<'input>, Span),
912        second: (Event<'input>, Span),
913    ) -> (Event<'input>, Span) {
914        let insert_at = comments
915            .iter()
916            .position(|(_, comment_span)| {
917                comment_span.start.index() >= first.1.start.index()
918                    && comment_span.end.index() >= first.1.end.index()
919            })
920            .unwrap_or(comments.len());
921        let mut ordered = Vec::with_capacity(comments.len() + 2);
922        let mut comments = comments.into_iter();
923
924        for _ in 0..insert_at {
925            ordered.push(
926                comments
927                    .next()
928                    .expect("comment disappeared while ordering queued events"),
929            );
930        }
931        ordered.push(first);
932        ordered.push(second);
933        ordered.extend(comments);
934
935        self.queue_tail_and_return_first(ordered)
936    }
937
938    fn refined_comment_placement(&mut self, span: Span, placement: Placement) -> Placement {
939        if placement == Placement::Right {
940            return Placement::Right;
941        }
942
943        let Ok(next) = self.peek_token() else {
944            return placement;
945        };
946        if matches!(next.1, QueuedTokenType::StreamEnd) {
947            return Placement::Last;
948        }
949
950        if next.0.start.line() == span.end.line() + 1 {
951            Placement::Above
952        } else {
953            Placement::Free
954        }
955    }
956
957    #[cold]
958    fn unexpected_eof(&self) -> ScanError {
959        let info = match self.state {
960            State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
961                "unexpected EOF while parsing a flow sequence"
962            }
963            State::FlowMappingFirstKey
964            | State::FlowMappingKey
965            | State::FlowMappingValue
966            | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
967            State::FlowSequenceEntryMappingKey
968            | State::FlowSequenceEntryMappingValue
969            | State::FlowSequenceEntryMappingEnd
970            | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
971            State::BlockSequenceFirstEntry | State::BlockSequenceEntry | State::BlockNode => {
972                "unexpected EOF while parsing a block sequence"
973            }
974            State::BlockMappingFirstKey
975            | State::BlockMappingKey
976            | State::BlockMappingValue
977            | State::BlockNodeOrIndentlessSequence => {
978                "unexpected EOF while parsing a block mapping"
979            }
980            _ => "unexpected eof",
981        };
982        ScanError::new_str(self.scanner.mark(), info)
983    }
984
985    fn fetch_token<'a>(&mut self) -> QueuedToken<'a>
986    where
987        'input: 'a,
988    {
989        self.token
990            .take()
991            .expect("fetch_token needs to be preceded by peek_token")
992    }
993
994    /// Skip the next token from the scanner.
995    fn skip(&mut self) {
996        self.token = None;
997    }
998    /// Pops the top-most state and make it the current state.
999    fn pop_state(&mut self) {
1000        self.state = self.states.pop().unwrap();
1001    }
1002    /// Push a new state atop the state stack.
1003    fn push_state(&mut self, state: State) {
1004        self.states.push(state);
1005    }
1006
1007    fn defer_parse_node<'a>(
1008        &mut self,
1009        node_state: State,
1010        return_state: State,
1011        block: bool,
1012        indentless_sequence: bool,
1013    ) -> ParseResult<'a>
1014    where
1015        'input: 'a,
1016    {
1017        self.push_state(return_state);
1018        self.state = node_state;
1019        if let Some(comment) = self.maybe_next_comment_event()? {
1020            Ok(comment)
1021        } else {
1022            self.parse_node(block, indentless_sequence)
1023        }
1024    }
1025
1026    fn parse<'a>(&mut self) -> ParseResult<'a>
1027    where
1028        'input: 'a,
1029    {
1030        if self.state == State::End {
1031            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
1032        }
1033        let event = self.state_machine()?;
1034        Ok(self.apply_pending_key_indent(event))
1035    }
1036
1037    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1038    ///
1039    /// The contents of the stream are parsed and the corresponding events are sent into the
1040    /// receiver. For detailed explanations about how events work, see [`EventReceiver`].
1041    ///
1042    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1043    /// inside the stream.
1044    ///
1045    /// Use this method when event handling is infallible. If receiver code can return an
1046    /// application error and should stop parsing, use [`Parser::try_load`] instead. If the caller
1047    /// should directly control when the next event is read, use [`Parser`]'s
1048    /// [`core::iter::Iterator`] implementation.
1049    ///
1050    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
1051    /// former is enough to call this function.
1052    ///
1053    /// # Example
1054    /// ```
1055    /// # use granit_parser::{Event, EventReceiver, Parser};
1056    /// # fn main() -> Result<(), granit_parser::ScanError> {
1057    /// struct EventSink<'input> {
1058    ///     events: Vec<Event<'input>>,
1059    /// }
1060    ///
1061    /// impl<'input> EventReceiver<'input> for EventSink<'input> {
1062    ///     fn on_event(&mut self, ev: Event<'input>) {
1063    ///         self.events.push(ev);
1064    ///     }
1065    /// }
1066    ///
1067    /// let mut parser = Parser::new_from_str("a: 1\n");
1068    /// let mut sink = EventSink { events: Vec::new() };
1069    ///
1070    /// parser.load(&mut sink, false)?;
1071    ///
1072    /// assert!(sink
1073    ///     .events
1074    ///     .iter()
1075    ///     .any(|ev| matches!(ev, Event::Scalar(value, ..) if value == "a")));
1076    /// # Ok(())
1077    /// # }
1078    /// ```
1079    ///
1080    /// # Errors
1081    /// Returns `ScanError` when loading fails.
1082    pub fn load<R: SpannedEventReceiver<'input>>(
1083        &mut self,
1084        recv: &mut R,
1085        multi: bool,
1086    ) -> Result<(), ScanError> {
1087        ParserTrait::load(self, recv, multi)
1088    }
1089
1090    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1091    ///
1092    /// This is the fallible counterpart to [`Parser::load`]. If `recv` returns an error, parsing
1093    /// stops immediately and that error is returned as [`TryLoadError::Receiver`].
1094    ///
1095    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1096    /// inside the stream.
1097    ///
1098    /// If the receiver returns an error, the parser is left positioned immediately after the event
1099    /// that caused the receiver error. Callers should treat the parser as partially consumed.
1100    ///
1101    /// # Example
1102    /// ```
1103    /// # use granit_parser::{Event, Parser, TryEventReceiver, TryLoadError};
1104    /// #[derive(Debug, PartialEq, Eq)]
1105    /// enum ValidationError {
1106    ///     ForbiddenScalar,
1107    /// }
1108    ///
1109    /// struct Validator;
1110    ///
1111    /// impl<'input> TryEventReceiver<'input> for Validator {
1112    ///     type Error = ValidationError;
1113    ///
1114    ///     fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
1115    ///         if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
1116    ///             Err(ValidationError::ForbiddenScalar)
1117    ///         } else {
1118    ///             Ok(())
1119    ///         }
1120    ///     }
1121    /// }
1122    ///
1123    /// let mut parser = Parser::new_from_str("value: bad\n");
1124    /// let mut validator = Validator;
1125    ///
1126    /// let err = parser.try_load(&mut validator, false).unwrap_err();
1127    ///
1128    /// assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenScalar));
1129    /// ```
1130    ///
1131    /// # Errors
1132    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
1133    /// [`TryLoadError::Receiver`] when `recv` returns an error.
1134    pub fn try_load<R: TrySpannedEventReceiver<'input>>(
1135        &mut self,
1136        recv: &mut R,
1137        multi: bool,
1138    ) -> Result<(), TryLoadError<R::Error>> {
1139        ParserTrait::try_load(self, recv, multi)
1140    }
1141
1142    #[cfg(test)]
1143    fn try_load_document<R: TrySpannedEventReceiver<'input>>(
1144        &mut self,
1145        first_ev: Event<'input>,
1146        span: Span,
1147        recv: &mut R,
1148    ) -> Result<(), TryLoadError<R::Error>> {
1149        if !matches!(first_ev, Event::DocumentStart(_)) {
1150            return Err(TryLoadError::scan(ScanError::new_str(
1151                span.start,
1152                "did not find expected <document-start>",
1153            )));
1154        }
1155        try_emit(recv, first_ev, span)?;
1156
1157        let (ev, span) = self.next_event_impl()?;
1158        self.try_load_node(ev, span, recv)?;
1159
1160        // DOCUMENT-END is expected.
1161        let (ev, mark) = self.next_event_impl()?;
1162        assert_eq!(ev, Event::DocumentEnd);
1163        try_emit(recv, ev, mark)?;
1164
1165        Ok(())
1166    }
1167
1168    #[cfg(test)]
1169    fn try_load_node<R: TrySpannedEventReceiver<'input>>(
1170        &mut self,
1171        first_ev: Event<'input>,
1172        span: Span,
1173        recv: &mut R,
1174    ) -> Result<(), TryLoadError<R::Error>> {
1175        match first_ev {
1176            Event::Alias(..) | Event::Scalar(..) => try_emit(recv, first_ev, span),
1177            Event::SequenceStart(..) => {
1178                try_emit(recv, first_ev, span)?;
1179                self.try_load_sequence(recv)
1180            }
1181            Event::MappingStart(..) => {
1182                try_emit(recv, first_ev, span)?;
1183                self.try_load_mapping(recv)
1184            }
1185            _ => {
1186                #[cfg(feature = "debug_prints")]
1187                std::println!("UNREACHABLE EVENT: {first_ev:?}");
1188                unreachable!();
1189            }
1190        }
1191    }
1192
1193    #[cfg(test)]
1194    fn try_load_mapping<R: TrySpannedEventReceiver<'input>>(
1195        &mut self,
1196        recv: &mut R,
1197    ) -> Result<(), TryLoadError<R::Error>> {
1198        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
1199        while key_ev != Event::MappingEnd {
1200            // key
1201            self.try_load_node(key_ev, key_mark, recv)?;
1202
1203            // value
1204            let (ev, mark) = self.next_event_impl()?;
1205            self.try_load_node(ev, mark, recv)?;
1206
1207            // next event
1208            let (ev, mark) = self.next_event_impl()?;
1209            key_ev = ev;
1210            key_mark = mark;
1211        }
1212        try_emit(recv, key_ev, key_mark)?;
1213        Ok(())
1214    }
1215
1216    #[cfg(test)]
1217    fn try_load_sequence<R: TrySpannedEventReceiver<'input>>(
1218        &mut self,
1219        recv: &mut R,
1220    ) -> Result<(), TryLoadError<R::Error>> {
1221        let (mut ev, mut mark) = self.next_event_impl()?;
1222        while ev != Event::SequenceEnd {
1223            self.try_load_node(ev, mark, recv)?;
1224
1225            // next event
1226            let (next_ev, next_mark) = self.next_event_impl()?;
1227            ev = next_ev;
1228            mark = next_mark;
1229        }
1230        try_emit(recv, ev, mark)?;
1231        Ok(())
1232    }
1233
1234    fn state_machine<'a>(&mut self) -> ParseResult<'a>
1235    where
1236        'input: 'a,
1237    {
1238        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
1239
1240        match self.state {
1241            State::StreamStart => self.stream_start(),
1242
1243            State::ImplicitDocumentStart => self.document_start(true),
1244            State::DocumentStart => self.document_start(false),
1245            State::DocumentContent => self.document_content(),
1246            State::DocumentEnd => self.document_end(),
1247
1248            State::BlockNode => self.parse_node(true, false),
1249            State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
1250            State::FlowNode => self.parse_node(false, false),
1251            State::BlockMappingFirstKey => self.block_mapping_key(true),
1252            State::BlockMappingKey => self.block_mapping_key(false),
1253            State::BlockMappingKeyNode => self.block_mapping_key_node(),
1254            State::BlockMappingValue => self.block_mapping_value(),
1255            State::BlockMappingValueNode => self.block_mapping_value_node(),
1256
1257            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
1258            State::BlockSequenceEntry => self.block_sequence_entry(false),
1259            State::BlockSequenceEntryNode => self.block_sequence_entry_node(),
1260
1261            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
1262            State::FlowSequenceEntry => self.flow_sequence_entry(false),
1263
1264            State::FlowMappingFirstKey => self.flow_mapping_key(true),
1265            State::FlowMappingKey => self.flow_mapping_key(false),
1266            State::FlowMappingKeyNode => self.flow_mapping_key_node(),
1267            State::FlowMappingValue => self.flow_mapping_value(false),
1268            State::FlowMappingValueNode => self.flow_mapping_value_node(),
1269
1270            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
1271            State::IndentlessSequenceEntryNode => self.indentless_sequence_entry_node(),
1272
1273            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
1274            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
1275            State::FlowSequenceEntryMappingValueNode => {
1276                self.flow_sequence_entry_mapping_value_node()
1277            }
1278            State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
1279            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
1280
1281            /* impossible */
1282            State::End => unreachable!(),
1283        }
1284    }
1285
1286    fn stream_start<'a>(&mut self) -> ParseResult<'a>
1287    where
1288        'input: 'a,
1289    {
1290        match *self.peek_token()? {
1291            QueuedToken(span, QueuedTokenType::StreamStart(_)) => {
1292                self.state = State::ImplicitDocumentStart;
1293                self.skip();
1294                Ok((Event::StreamStart, span))
1295            }
1296            QueuedToken(span, _) => Err(ScanError::new_str(
1297                span.start,
1298                "did not find expected <stream-start>",
1299            )),
1300        }
1301    }
1302
1303    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
1304    where
1305        'input: 'a,
1306    {
1307        while let QueuedTokenType::DocumentEnd = self.peek_token()?.1 {
1308            self.skip();
1309        }
1310
1311        // Anchors are scoped to a single document.
1312        self.anchors.clear();
1313
1314        match *self.peek_token()? {
1315            QueuedToken(span, QueuedTokenType::StreamEnd) => {
1316                self.state = State::End;
1317                self.skip();
1318                Ok((Event::StreamEnd, span))
1319            }
1320            QueuedToken(
1321                _,
1322                QueuedTokenType::VersionDirective(..)
1323                | QueuedTokenType::TagDirective(..)
1324                | QueuedTokenType::ReservedDirective(..)
1325                | QueuedTokenType::DocumentStart,
1326            ) => {
1327                // explicit document
1328                self.explicit_document_start()
1329            }
1330            QueuedToken(span, _) if implicit => {
1331                self.parser_process_directives()?;
1332                self.push_state(State::DocumentEnd);
1333                self.state = State::BlockNode;
1334                Ok((Event::DocumentStart(false), span))
1335            }
1336            _ => {
1337                // explicit document
1338                self.explicit_document_start()
1339            }
1340        }
1341    }
1342
1343    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
1344        let mut version_directive_received = false;
1345        let mut tags = if self.keep_tags {
1346            self.tags.clone()
1347        } else {
1348            BTreeMap::new()
1349        };
1350        let mut document_tag_handles = BTreeSet::new();
1351
1352        loop {
1353            match self.peek_token()? {
1354                QueuedToken(span, QueuedTokenType::VersionDirective(_, _)) => {
1355                    // YAML version compatibility is non-fatal here. The scanner validates the
1356                    // directive shape, and the parser rejects duplicates below, but it does not
1357                    // expose a warning channel for unsupported versions.
1358                    if version_directive_received {
1359                        return Err(ScanError::new_str(
1360                            span.start,
1361                            "duplicate version directive",
1362                        ));
1363                    }
1364                    version_directive_received = true;
1365                }
1366                QueuedToken(mark, QueuedTokenType::TagDirective(handle, prefix)) => {
1367                    if !document_tag_handles.insert(handle.to_string()) {
1368                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
1369                    }
1370                    tags.insert(handle.to_string(), prefix.to_string());
1371                }
1372                QueuedToken(_, QueuedTokenType::ReservedDirective(_, _)) => {
1373                    // Reserved directives are ignored
1374                }
1375                _ => break,
1376            }
1377            self.skip();
1378        }
1379
1380        self.tags = tags;
1381        Ok(())
1382    }
1383
1384    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
1385    where
1386        'input: 'a,
1387    {
1388        self.parser_process_directives()?;
1389        if let Some(comment) = self.maybe_next_comment_event()? {
1390            return Ok(comment);
1391        }
1392        match *self.peek_token()? {
1393            QueuedToken(mark, QueuedTokenType::DocumentStart) => {
1394                self.push_state(State::DocumentEnd);
1395                self.state = State::DocumentContent;
1396                self.skip();
1397                Ok((Event::DocumentStart(true), mark))
1398            }
1399            QueuedToken(span, _) => Err(ScanError::new_str(
1400                span.start,
1401                "did not find expected <document start>",
1402            )),
1403        }
1404    }
1405
1406    fn document_content<'a>(&mut self) -> ParseResult<'a>
1407    where
1408        'input: 'a,
1409    {
1410        if let QueuedToken(
1411            mark,
1412            QueuedTokenType::VersionDirective(..)
1413            | QueuedTokenType::TagDirective(..)
1414            | QueuedTokenType::ReservedDirective(..)
1415            | QueuedTokenType::DocumentStart
1416            | QueuedTokenType::DocumentEnd
1417            | QueuedTokenType::StreamEnd,
1418        ) = *self.peek_token()?
1419        {
1420            self.pop_state();
1421            // empty scalar
1422            Ok((Event::empty_scalar(), mark))
1423        } else {
1424            self.state = State::BlockNode;
1425            self.parse_node(true, false)
1426        }
1427    }
1428
1429    fn document_end<'a>(&mut self) -> ParseResult<'a>
1430    where
1431        'input: 'a,
1432    {
1433        let mut explicit_end = false;
1434        let span: Span = match *self.peek_token()? {
1435            QueuedToken(span, QueuedTokenType::DocumentEnd) => {
1436                explicit_end = true;
1437                self.skip();
1438                span
1439            }
1440            QueuedToken(span, _) => span,
1441        };
1442
1443        if self.keep_tags {
1444            // Never persist default handles across document boundaries. Allowing `%TAG !! ...`
1445            // or `%TAG ! ...` to leak into following documents lets earlier documents alter how
1446            // explicit tags are interpreted later on.
1447            self.tags.remove("!!");
1448            self.tags.remove("");
1449        } else {
1450            self.tags.clear();
1451        }
1452        if explicit_end {
1453            self.state = State::ImplicitDocumentStart;
1454        } else {
1455            if let QueuedToken(
1456                span,
1457                QueuedTokenType::VersionDirective(..)
1458                | QueuedTokenType::TagDirective(..)
1459                | QueuedTokenType::ReservedDirective(..),
1460            ) = *self.peek_token()?
1461            {
1462                return Err(ScanError::new_str(
1463                    span.start,
1464                    "missing explicit document end marker before directive",
1465                ));
1466            }
1467            self.state = State::DocumentStart;
1468        }
1469
1470        Ok((Event::DocumentEnd, span))
1471    }
1472
1473    fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
1474        // YAML permits anchor names to be reused. Aliases resolve to the most recent definition.
1475        let new_id = self.anchor_id_count;
1476        self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
1477            ScanError::new_str(
1478                mark.start,
1479                "while parsing anchor, anchor count exceeded supported limit",
1480            )
1481        })?;
1482        self.anchors.insert(name, new_id);
1483        Ok(new_id)
1484    }
1485
1486    fn save_pending_node_properties(&mut self, anchor_id: usize, tag: Option<Cow<'input, Tag>>) {
1487        self.pending_node_anchor_id = anchor_id;
1488        self.pending_node_tag = tag;
1489    }
1490
1491    #[allow(clippy::too_many_lines)]
1492    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
1493    where
1494        'input: 'a,
1495    {
1496        if let Some(comment) = self.maybe_next_comment_event()? {
1497            return Ok(comment);
1498        }
1499
1500        let mut anchor_id = core::mem::take(&mut self.pending_node_anchor_id);
1501        let mut tag = self.pending_node_tag.take();
1502        match *self.peek_token()? {
1503            QueuedToken(_, QueuedTokenType::Alias(_)) => {
1504                self.pop_state();
1505                if let QueuedToken(span, QueuedTokenType::Alias(name)) = self.fetch_token() {
1506                    match self.anchors.get(&*name) {
1507                        None => {
1508                            return Err(ScanError::new_str(
1509                                span.start,
1510                                "while parsing node, found unknown anchor",
1511                            ))
1512                        }
1513                        Some(id) => return Ok((Event::Alias(*id), span)),
1514                    }
1515                }
1516                unreachable!()
1517            }
1518            QueuedToken(_, QueuedTokenType::Anchor(_)) => {
1519                if let QueuedToken(span, QueuedTokenType::Anchor(name)) = self.fetch_token() {
1520                    anchor_id = self.register_anchor(name, &span)?;
1521                    if let QueuedTokenType::Tag(..) = self.peek_token()?.1 {
1522                        if let QueuedTokenType::Tag(handle, suffix) = self.fetch_token().1 {
1523                            tag = Some(self.resolve_tag(span, &handle, suffix)?);
1524                        } else {
1525                            unreachable!()
1526                        }
1527                    }
1528                    if let Some(comment) = self.maybe_next_comment_event()? {
1529                        self.save_pending_node_properties(anchor_id, tag);
1530                        return Ok(comment);
1531                    }
1532                } else {
1533                    unreachable!()
1534                }
1535            }
1536            QueuedToken(mark, QueuedTokenType::Tag(..)) => {
1537                if let QueuedTokenType::Tag(handle, suffix) = self.fetch_token().1 {
1538                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
1539                    if let QueuedTokenType::Anchor(_) = &self.peek_token()?.1 {
1540                        if let QueuedToken(mark, QueuedTokenType::Anchor(name)) = self.fetch_token()
1541                        {
1542                            anchor_id = self.register_anchor(name, &mark)?;
1543                        } else {
1544                            unreachable!()
1545                        }
1546                    }
1547                    if let Some(comment) = self.maybe_next_comment_event()? {
1548                        self.save_pending_node_properties(anchor_id, tag);
1549                        return Ok(comment);
1550                    }
1551                } else {
1552                    unreachable!()
1553                }
1554            }
1555            _ => {}
1556        }
1557        match *self.peek_token()? {
1558            QueuedToken(mark, QueuedTokenType::BlockEntry) if indentless_sequence => {
1559                self.skip();
1560                let comments = self.next_comment_events()?;
1561                let start = (
1562                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1563                    mark,
1564                );
1565                if comments.is_empty() {
1566                    self.pending_empty_scalar_span = Some(mark);
1567                    self.state = State::IndentlessSequenceEntryNode;
1568                    Ok(start)
1569                } else if let Ok(QueuedToken(
1570                    _,
1571                    QueuedTokenType::BlockEntry
1572                    | QueuedTokenType::Key
1573                    | QueuedTokenType::Value
1574                    | QueuedTokenType::BlockEnd,
1575                )) = self.peek_token()
1576                {
1577                    self.state = State::IndentlessSequenceEntry;
1578                    Ok(self.queue_two_events_by_span(
1579                        comments,
1580                        start,
1581                        (Event::empty_scalar(), mark),
1582                    ))
1583                } else {
1584                    self.pending_empty_scalar_span = Some(mark);
1585                    self.state = State::IndentlessSequenceEntryNode;
1586                    Ok(self.queue_event_by_span(comments, start))
1587                }
1588            }
1589            QueuedToken(_, QueuedTokenType::Scalar(..)) => {
1590                self.pop_state();
1591                if let QueuedToken(mark, QueuedTokenType::Scalar(style, v)) = self.fetch_token() {
1592                    Ok((Event::Scalar(v, style, anchor_id, tag), mark))
1593                } else {
1594                    unreachable!()
1595                }
1596            }
1597            QueuedToken(mark, QueuedTokenType::FlowSequenceStart) => {
1598                self.state = State::FlowSequenceFirstEntry;
1599                self.skip();
1600                Ok((
1601                    Event::SequenceStart(StructureStyle::Flow, anchor_id, tag),
1602                    mark,
1603                ))
1604            }
1605            QueuedToken(mark, QueuedTokenType::FlowMappingStart) => {
1606                self.state = State::FlowMappingFirstKey;
1607                self.skip();
1608                Ok((
1609                    Event::MappingStart(StructureStyle::Flow, anchor_id, tag),
1610                    mark,
1611                ))
1612            }
1613            QueuedToken(mark, QueuedTokenType::BlockSequenceStart) if block => {
1614                self.state = State::BlockSequenceFirstEntry;
1615                self.skip();
1616                Ok((
1617                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1618                    mark,
1619                ))
1620            }
1621            QueuedToken(mark, QueuedTokenType::BlockMappingStart) if block => {
1622                self.state = State::BlockMappingFirstKey;
1623                self.skip();
1624                Ok((
1625                    Event::MappingStart(StructureStyle::Block, anchor_id, tag),
1626                    mark,
1627                ))
1628            }
1629            // ex 7.2, an empty scalar can follow a secondary tag
1630            QueuedToken(mark, _) if tag.is_some() || anchor_id > 0 => {
1631                self.pop_state();
1632                Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
1633            }
1634            QueuedToken(span, _) => {
1635                let info = match self.state {
1636                    State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1637                        "unexpected EOF while parsing a flow sequence"
1638                    }
1639                    State::FlowMappingFirstKey
1640                    | State::FlowMappingKey
1641                    | State::FlowMappingValue
1642                    | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1643                    State::FlowSequenceEntryMappingKey
1644                    | State::FlowSequenceEntryMappingValue
1645                    | State::FlowSequenceEntryMappingEnd
1646                    | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
1647                    State::BlockSequenceFirstEntry
1648                    | State::BlockSequenceEntry
1649                    | State::BlockNode => "unexpected EOF while parsing a block sequence",
1650                    State::BlockMappingFirstKey
1651                    | State::BlockMappingKey
1652                    | State::BlockMappingValue
1653                    | State::BlockNodeOrIndentlessSequence => {
1654                        "unexpected EOF while parsing a block mapping"
1655                    }
1656                    _ => "while parsing a node, did not find expected node content",
1657                };
1658                Err(ScanError::new_str(span.start, info))
1659            }
1660        }
1661    }
1662
1663    fn block_mapping_key<'a>(&mut self, _first: bool) -> ParseResult<'a>
1664    where
1665        'input: 'a,
1666    {
1667        match *self.peek_token()? {
1668            QueuedToken(_, QueuedTokenType::Key) => {
1669                // Indentation is only meaningful for block mapping keys.
1670                if let QueuedToken(key_span, QueuedTokenType::Key) = *self.peek_token()? {
1671                    self.pending_key_indent = Some(key_span.start.col());
1672                }
1673                self.skip();
1674                if let Some(comment) = self.maybe_next_comment_event()? {
1675                    self.state = State::BlockMappingKeyNode;
1676                    Ok(comment)
1677                } else {
1678                    self.block_mapping_key_node()
1679                }
1680            }
1681            // A missing block-mapping key before `:` is represented as an empty scalar.
1682            QueuedToken(mark, QueuedTokenType::Value) => {
1683                self.state = State::BlockMappingValue;
1684                Ok((Event::empty_scalar(), mark))
1685            }
1686            QueuedToken(mark, QueuedTokenType::BlockEnd) => {
1687                self.pop_state();
1688                self.skip();
1689                Ok((Event::MappingEnd, mark))
1690            }
1691            QueuedToken(span, _) => Err(ScanError::new_str(
1692                span.start,
1693                "while parsing a block mapping, did not find expected key",
1694            )),
1695        }
1696    }
1697
1698    fn block_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1699    where
1700        'input: 'a,
1701    {
1702        if let QueuedToken(
1703            mark,
1704            QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1705        ) = *self.peek_token()?
1706        {
1707            self.state = State::BlockMappingValue;
1708            Ok((Event::empty_scalar(), mark))
1709        } else {
1710            self.defer_parse_node(
1711                State::BlockNodeOrIndentlessSequence,
1712                State::BlockMappingValue,
1713                true,
1714                true,
1715            )
1716        }
1717    }
1718
1719    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1720    where
1721        'input: 'a,
1722    {
1723        match *self.peek_token()? {
1724            QueuedToken(mark, QueuedTokenType::Value) => {
1725                self.skip();
1726                let comments = self.next_comment_events()?;
1727                if comments.is_empty() {
1728                    self.block_mapping_value_node_with_empty_span(mark)
1729                } else if let Ok(QueuedToken(
1730                    _,
1731                    QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1732                )) = self.peek_token()
1733                {
1734                    self.state = State::BlockMappingKey;
1735                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
1736                } else {
1737                    self.pending_empty_scalar_span = Some(mark);
1738                    self.state = State::BlockMappingValueNode;
1739                    Ok(self.queue_tail_and_return_first(comments))
1740                }
1741            }
1742            QueuedToken(mark, _) => {
1743                self.state = State::BlockMappingKey;
1744                // empty scalar
1745                Ok((Event::empty_scalar(), mark))
1746            }
1747        }
1748    }
1749
1750    fn block_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
1751    where
1752        'input: 'a,
1753    {
1754        let mark = match self.pending_empty_scalar_span.take() {
1755            Some(mark) => mark,
1756            None => self.peek_token()?.0,
1757        };
1758        self.block_mapping_value_node_with_empty_span(mark)
1759    }
1760
1761    fn block_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1762    where
1763        'input: 'a,
1764    {
1765        if let QueuedToken(
1766            _,
1767            QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1768        ) = *self.peek_token()?
1769        {
1770            self.state = State::BlockMappingKey;
1771            Ok((Event::empty_scalar(), mark))
1772        } else {
1773            self.defer_parse_node(
1774                State::BlockNodeOrIndentlessSequence,
1775                State::BlockMappingKey,
1776                true,
1777                true,
1778            )
1779        }
1780    }
1781
1782    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1783    where
1784        'input: 'a,
1785    {
1786        let span: Span =
1787            if let QueuedToken(mark, QueuedTokenType::FlowMappingEnd) = *self.peek_token()? {
1788                mark
1789            } else {
1790                if !first {
1791                    match *self.peek_token()? {
1792                        QueuedToken(_, QueuedTokenType::FlowEntry) => {
1793                            self.skip();
1794                            if let Some(comment) = self.maybe_next_comment_event()? {
1795                                self.state = State::FlowMappingFirstKey;
1796                                return Ok(comment);
1797                            }
1798                        }
1799                        QueuedToken(span, _) => {
1800                            return Err(ScanError::new_str(
1801                                span.start,
1802                                "while parsing a flow mapping, did not find expected ',' or '}'",
1803                            ))
1804                        }
1805                    }
1806                }
1807
1808                match *self.peek_token()? {
1809                    QueuedToken(_, QueuedTokenType::Key) => {
1810                        self.skip();
1811                        if let Some(comment) = self.maybe_next_comment_event()? {
1812                            self.state = State::FlowMappingKeyNode;
1813                            return Ok(comment);
1814                        }
1815                        return self.flow_mapping_key_node();
1816                    }
1817                    QueuedToken(marker, QueuedTokenType::Value) => {
1818                        self.state = State::FlowMappingValue;
1819                        return Ok((Event::empty_scalar(), marker));
1820                    }
1821                    QueuedToken(_, QueuedTokenType::FlowMappingEnd) => (),
1822                    _ => {
1823                        return self.defer_parse_node(
1824                            State::FlowNode,
1825                            State::FlowMappingEmptyValue,
1826                            false,
1827                            false,
1828                        );
1829                    }
1830                }
1831
1832                self.peek_token()?.0
1833            };
1834
1835        self.pop_state();
1836        self.skip();
1837        Ok((Event::MappingEnd, span))
1838    }
1839
1840    fn flow_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1841    where
1842        'input: 'a,
1843    {
1844        if let QueuedToken(
1845            mark,
1846            QueuedTokenType::Value | QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
1847        ) = *self.peek_token()?
1848        {
1849            self.state = State::FlowMappingValue;
1850            Ok((Event::empty_scalar(), mark))
1851        } else {
1852            self.defer_parse_node(State::FlowNode, State::FlowMappingValue, false, false)
1853        }
1854    }
1855
1856    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1857    where
1858        'input: 'a,
1859    {
1860        let span: Span = {
1861            if empty {
1862                let QueuedToken(mark, _) = *self.peek_token()?;
1863                self.state = State::FlowMappingKey;
1864                return Ok((Event::empty_scalar(), mark));
1865            }
1866            match *self.peek_token()? {
1867                QueuedToken(span, QueuedTokenType::Value) => {
1868                    self.skip();
1869                    let comments = self.next_comment_events()?;
1870                    if comments.is_empty() {
1871                        return self.flow_mapping_value_node_with_empty_span(span);
1872                    }
1873                    if let Ok(QueuedToken(
1874                        _,
1875                        QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
1876                    )) = self.peek_token()
1877                    {
1878                        self.state = State::FlowMappingKey;
1879                        return Ok(
1880                            self.queue_event_by_span(comments, (Event::empty_scalar(), span))
1881                        );
1882                    }
1883
1884                    self.pending_empty_scalar_span = Some(span);
1885                    self.state = State::FlowMappingValueNode;
1886                    return Ok(self.queue_tail_and_return_first(comments));
1887                }
1888                QueuedToken(marker, _) => marker,
1889            }
1890        };
1891
1892        self.state = State::FlowMappingKey;
1893        Ok((Event::empty_scalar(), span))
1894    }
1895
1896    fn flow_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
1897    where
1898        'input: 'a,
1899    {
1900        let mark = match self.pending_empty_scalar_span.take() {
1901            Some(mark) => mark,
1902            None => Span::empty(self.peek_token()?.0.start),
1903        };
1904        self.flow_mapping_value_node_with_empty_span(mark)
1905    }
1906
1907    fn flow_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1908    where
1909        'input: 'a,
1910    {
1911        match self.peek_token()?.1 {
1912            QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd => {
1913                self.state = State::FlowMappingKey;
1914                Ok((Event::empty_scalar(), mark))
1915            }
1916            _ => self.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false),
1917        }
1918    }
1919
1920    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1921    where
1922        'input: 'a,
1923    {
1924        match *self.peek_token()? {
1925            QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
1926                self.pop_state();
1927                self.skip();
1928                return Ok((Event::SequenceEnd, mark));
1929            }
1930            QueuedToken(_, QueuedTokenType::FlowEntry) if !first => {
1931                self.skip();
1932                if let Some(comment) = self.maybe_next_comment_event()? {
1933                    self.state = State::FlowSequenceFirstEntry;
1934                    return Ok(comment);
1935                }
1936            }
1937            QueuedToken(span, _) if !first => {
1938                return Err(ScanError::new_str(
1939                    span.start,
1940                    "while parsing a flow sequence, expected ',' or ']'",
1941                ));
1942            }
1943            _ => { /* next */ }
1944        }
1945        match *self.peek_token()? {
1946            QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
1947                self.pop_state();
1948                self.skip();
1949                Ok((Event::SequenceEnd, mark))
1950            }
1951            QueuedToken(mark, QueuedTokenType::Key) => {
1952                self.state = State::FlowSequenceEntryMappingKey;
1953                self.skip();
1954                Ok((Event::MappingStart(StructureStyle::Flow, 0, None), mark))
1955            }
1956            _ => self.defer_parse_node(State::FlowNode, State::FlowSequenceEntry, false, false),
1957        }
1958    }
1959
1960    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1961    where
1962        'input: 'a,
1963    {
1964        match *self.peek_token()? {
1965            QueuedToken(mark, QueuedTokenType::BlockEntry) => {
1966                self.skip();
1967                let comments = self.next_comment_events()?;
1968                if comments.is_empty() {
1969                    self.indentless_sequence_entry_node_with_empty_span(mark)
1970                } else if let Ok(QueuedToken(
1971                    _,
1972                    QueuedTokenType::BlockEntry
1973                    | QueuedTokenType::Key
1974                    | QueuedTokenType::Value
1975                    | QueuedTokenType::BlockEnd,
1976                )) = self.peek_token()
1977                {
1978                    self.state = State::IndentlessSequenceEntry;
1979                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
1980                } else {
1981                    self.pending_empty_scalar_span = Some(mark);
1982                    self.state = State::IndentlessSequenceEntryNode;
1983                    Ok(self.queue_tail_and_return_first(comments))
1984                }
1985            }
1986            QueuedToken(mark, _) => {
1987                self.pop_state();
1988                Ok((Event::SequenceEnd, mark))
1989            }
1990        }
1991    }
1992
1993    fn indentless_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
1994    where
1995        'input: 'a,
1996    {
1997        let mark = match self.pending_empty_scalar_span.take() {
1998            Some(mark) => mark,
1999            None => self.peek_token()?.0,
2000        };
2001        self.indentless_sequence_entry_node_with_empty_span(mark)
2002    }
2003
2004    fn indentless_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2005    where
2006        'input: 'a,
2007    {
2008        if let QueuedToken(
2009            _,
2010            QueuedTokenType::BlockEntry
2011            | QueuedTokenType::Key
2012            | QueuedTokenType::Value
2013            | QueuedTokenType::BlockEnd,
2014        ) = *self.peek_token()?
2015        {
2016            self.state = State::IndentlessSequenceEntry;
2017            Ok((Event::empty_scalar(), mark))
2018        } else {
2019            self.defer_parse_node(
2020                State::BlockNode,
2021                State::IndentlessSequenceEntry,
2022                true,
2023                false,
2024            )
2025        }
2026    }
2027
2028    fn block_sequence_entry<'a>(&mut self, _first: bool) -> ParseResult<'a>
2029    where
2030        'input: 'a,
2031    {
2032        match *self.peek_token()? {
2033            QueuedToken(mark, QueuedTokenType::BlockEnd) => {
2034                self.pop_state();
2035                self.skip();
2036                Ok((Event::SequenceEnd, mark))
2037            }
2038            QueuedToken(mark, QueuedTokenType::BlockEntry) => {
2039                self.skip();
2040                let comments = self.next_comment_events()?;
2041                if comments.is_empty() {
2042                    self.block_sequence_entry_node_with_empty_span(mark)
2043                } else if let Ok(QueuedToken(
2044                    _,
2045                    QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd,
2046                )) = self.peek_token()
2047                {
2048                    self.state = State::BlockSequenceEntry;
2049                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
2050                } else {
2051                    self.pending_empty_scalar_span = Some(mark);
2052                    self.state = State::BlockSequenceEntryNode;
2053                    Ok(self.queue_tail_and_return_first(comments))
2054                }
2055            }
2056            QueuedToken(span, _) => Err(ScanError::new_str(
2057                span.start,
2058                "while parsing a block collection, did not find expected '-' indicator",
2059            )),
2060        }
2061    }
2062
2063    fn block_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
2064    where
2065        'input: 'a,
2066    {
2067        let mark = match self.pending_empty_scalar_span.take() {
2068            Some(mark) => mark,
2069            None => self.peek_token()?.0,
2070        };
2071        self.block_sequence_entry_node_with_empty_span(mark)
2072    }
2073
2074    fn block_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2075    where
2076        'input: 'a,
2077    {
2078        if let QueuedToken(_, QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd) =
2079            *self.peek_token()?
2080        {
2081            self.state = State::BlockSequenceEntry;
2082            Ok((Event::empty_scalar(), mark))
2083        } else {
2084            self.defer_parse_node(State::BlockNode, State::BlockSequenceEntry, true, false)
2085        }
2086    }
2087
2088    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
2089    where
2090        'input: 'a,
2091    {
2092        if let QueuedToken(mark, QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd) =
2093            *self.peek_token()?
2094        {
2095            self.state = State::FlowSequenceEntryMappingValue;
2096            Ok((Event::empty_scalar(), mark))
2097        } else {
2098            self.defer_parse_node(
2099                State::FlowNode,
2100                State::FlowSequenceEntryMappingValue,
2101                false,
2102                false,
2103            )
2104        }
2105    }
2106
2107    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
2108    where
2109        'input: 'a,
2110    {
2111        match *self.peek_token()? {
2112            QueuedToken(_, QueuedTokenType::Value) => {
2113                self.skip();
2114                if let Some(comment) = self.maybe_next_comment_event()? {
2115                    self.state = State::FlowSequenceEntryMappingValueNode;
2116                    Ok(comment)
2117                } else {
2118                    self.flow_sequence_entry_mapping_value_node()
2119                }
2120            }
2121            QueuedToken(mark, _) => {
2122                self.state = State::FlowSequenceEntryMappingEnd;
2123                Ok((Event::empty_scalar(), mark))
2124            }
2125        }
2126    }
2127
2128    fn flow_sequence_entry_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
2129    where
2130        'input: 'a,
2131    {
2132        let QueuedToken(span, ref tok) = *self.peek_token()?;
2133        if matches!(
2134            tok,
2135            QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd
2136        ) {
2137            self.state = State::FlowSequenceEntryMappingEnd;
2138            Ok((Event::empty_scalar(), Span::empty(span.start)))
2139        } else {
2140            self.defer_parse_node(
2141                State::FlowNode,
2142                State::FlowSequenceEntryMappingEnd,
2143                false,
2144                false,
2145            )
2146        }
2147    }
2148
2149    #[allow(clippy::unnecessary_wraps)]
2150    fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
2151    where
2152        'input: 'a,
2153    {
2154        self.state = State::FlowSequenceEntry;
2155        let QueuedToken(span, _) = *self.peek_token()?;
2156        Ok((Event::MappingEnd, Span::empty(span.start)))
2157    }
2158
2159    /// Resolve a tag from the handle and the suffix.
2160    fn resolve_tag(
2161        &self,
2162        span: Span,
2163        handle: &Cow<'input, str>,
2164        suffix: Cow<'input, str>,
2165    ) -> Result<Cow<'input, Tag>, ScanError> {
2166        let suffix = suffix.into_owned();
2167        let tag = if handle == "!!" {
2168            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
2169            // overridden.
2170            Tag {
2171                handle: self
2172                    .tags
2173                    .get("!!")
2174                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
2175                suffix,
2176            }
2177        } else if handle.is_empty() && suffix == "!" {
2178            // "!" introduces a local tag. Local tags may have their prefix overridden.
2179            match self.tags.get("") {
2180                Some(prefix) => Tag {
2181                    handle: prefix.clone(),
2182                    suffix,
2183                },
2184                None => Tag {
2185                    handle: String::new(),
2186                    suffix,
2187                },
2188            }
2189        } else {
2190            // Lookup handle in our tag directives.
2191            let prefix = self.tags.get(&**handle);
2192            if let Some(prefix) = prefix {
2193                Tag {
2194                    handle: prefix.clone(),
2195                    suffix,
2196                }
2197            } else {
2198                // Otherwise, it may be a local handle. With a local handle, the handle is set to
2199                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
2200                // If the handle is of the form "!foo!", this cannot be a local handle and we need
2201                // to error.
2202                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2203                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
2204                }
2205                Tag {
2206                    handle: handle.to_string(),
2207                    suffix,
2208                }
2209            }
2210        };
2211        Ok(Cow::Owned(tag))
2212    }
2213}
2214
2215impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
2216    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
2217        if let Some(ref x) = self.current {
2218            Some(Ok(x))
2219        } else {
2220            if self.stream_end_emitted {
2221                return None;
2222            }
2223            match self.next_event_impl() {
2224                Ok(token) => self.current = Some(token),
2225                Err(e) => return Some(e.into_result()),
2226            }
2227            self.current.as_ref().map(Ok)
2228        }
2229    }
2230
2231    fn next_event(&mut self) -> Option<ParseResult<'input>> {
2232        if self.stream_end_emitted {
2233            return None;
2234        }
2235
2236        let tok = self.next_event_impl();
2237        if matches!(tok, Ok((Event::StreamEnd, _))) {
2238            self.stream_end_emitted = true;
2239        }
2240        Some(tok)
2241    }
2242
2243    fn load<R: SpannedEventReceiver<'input>>(
2244        &mut self,
2245        recv: &mut R,
2246        multi: bool,
2247    ) -> Result<(), ScanError> {
2248        let mut recv = InfallibleSpannedReceiver(recv);
2249        into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
2250    }
2251
2252    fn try_load<R: TrySpannedEventReceiver<'input>>(
2253        &mut self,
2254        recv: &mut R,
2255        multi: bool,
2256    ) -> Result<(), TryLoadError<R::Error>> {
2257        let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
2258        if !self.scanner.stream_started() || stream_start_buffered {
2259            let (ev, span) = self.next_event_impl()?;
2260            if ev != Event::StreamStart {
2261                return Err(TryLoadError::scan(ScanError::new_str(
2262                    span.start,
2263                    "did not find expected <stream-start>",
2264                )));
2265            }
2266            try_emit(recv, ev, span)?;
2267        }
2268
2269        if self.scanner.stream_ended() {
2270            // The scanner has already reached EOF before the document loop, so emit the terminal
2271            // event and stop.
2272            try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
2273            return Ok(());
2274        }
2275
2276        loop {
2277            let (ev, span) = self.next_event_impl()?;
2278            let is_doc_end = matches!(ev, Event::DocumentEnd);
2279            let is_stream_end = matches!(ev, Event::StreamEnd);
2280
2281            try_emit(recv, ev, span)?;
2282
2283            if is_stream_end {
2284                return Ok(());
2285            }
2286            if !multi && is_doc_end {
2287                return Ok(());
2288            }
2289        }
2290    }
2291}
2292
2293impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
2294    type Item = Result<(Event<'input>, Span), ScanError>;
2295
2296    fn next(&mut self) -> Option<Self::Item> {
2297        self.next_event()
2298    }
2299}
2300
2301#[cfg(test)]
2302mod test {
2303    use alloc::{
2304        borrow::{Cow, ToOwned},
2305        string::{String, ToString},
2306        vec::Vec,
2307    };
2308    use core::{error::Error as _, fmt};
2309
2310    use crate::scanner::{Marker, ScalarStyle, ScanError, Span};
2311
2312    use super::{
2313        Event, EventReceiver, Parser, State, StructureStyle, Tag, TryEventReceiver, TryLoadError,
2314        TrySpannedEventReceiver,
2315    };
2316
2317    #[derive(Default)]
2318    struct CollectingSink<'input> {
2319        events: Vec<Event<'input>>,
2320    }
2321
2322    impl<'input> EventReceiver<'input> for CollectingSink<'input> {
2323        fn on_event(&mut self, ev: Event<'input>) {
2324            self.events.push(ev);
2325        }
2326    }
2327
2328    fn first_error_info(input: &str) -> String {
2329        for event in Parser::new_from_str(input) {
2330            if let Err(err) = event {
2331                return err.info().to_owned();
2332            }
2333        }
2334        panic!("expected parser error")
2335    }
2336
2337    #[test]
2338    fn deferred_parse_node_can_emit_comment_before_flow_node() {
2339        let mut parser = Parser::new_from_str("# deferred\nvalue\n");
2340        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2341        assert_eq!(
2342            parser.document_start(true).unwrap().0,
2343            Event::DocumentStart(false)
2344        );
2345
2346        let (event, _) = parser
2347            .defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false)
2348            .unwrap();
2349
2350        assert!(matches!(event, Event::Comment(text, _) if text == " deferred"));
2351        assert_eq!(parser.state, State::FlowNode);
2352    }
2353
2354    #[test]
2355    fn queued_node_event_gets_pending_key_indent() {
2356        let mut parser = Parser::new_from_str("");
2357        let span = Span::empty(Marker::new(0, 1, 0));
2358
2359        parser.pending_key_indent = Some(3);
2360        parser
2361            .queued_events
2362            .push_back((Event::SequenceStart(StructureStyle::Block, 0, None), span));
2363
2364        let (event, span) = parser.next_event_impl().unwrap();
2365
2366        assert!(matches!(
2367            event,
2368            Event::SequenceStart(StructureStyle::Block, 0, None)
2369        ));
2370        assert_eq!(span.indent, Some(3));
2371        assert_eq!(parser.pending_key_indent, None);
2372    }
2373
2374    #[test]
2375    fn state_machine_handles_deferred_flow_node_states() {
2376        let mut parser = Parser::new_from_str("value\n");
2377        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2378        assert_eq!(
2379            parser.document_start(true).unwrap().0,
2380            Event::DocumentStart(false)
2381        );
2382        parser.state = State::FlowNode;
2383        parser.push_state(State::End);
2384
2385        let (event, _) = parser.state_machine().unwrap();
2386
2387        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2388
2389        let mut parser = Parser::new_from_str("value\n");
2390        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2391        assert_eq!(
2392            parser.document_start(true).unwrap().0,
2393            Event::DocumentStart(false)
2394        );
2395        parser.state = State::FlowSequenceEntryMappingValueNode;
2396
2397        let (event, _) = parser.state_machine().unwrap();
2398
2399        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2400    }
2401
2402    #[test]
2403    fn display_resolved_core_tag_without_extra_bang() {
2404        let tag = Tag {
2405            handle: "tag:yaml.org,2002:".to_owned(),
2406            suffix: "str".to_owned(),
2407        };
2408
2409        assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
2410    }
2411
2412    #[test]
2413    fn tag_helpers_distinguish_core_and_local_tags() {
2414        let core = Tag {
2415            handle: "tag:yaml.org,2002:".to_owned(),
2416            suffix: "int".to_owned(),
2417        };
2418        let local = Tag {
2419            handle: "!".to_owned(),
2420            suffix: "thing".to_owned(),
2421        };
2422
2423        assert!(core.is_yaml_core_schema());
2424        assert!(core.is_yaml_core_schema_tag("int"));
2425        assert!(!core.is_yaml_core_schema_tag("str"));
2426        assert!(!core.is_custom());
2427        assert_eq!(core.parts(), ("tag:yaml.org,2002:", "int"));
2428
2429        assert!(!local.is_yaml_core_schema());
2430        assert!(!local.is_yaml_core_schema_tag("thing"));
2431        assert!(local.is_custom());
2432        assert_eq!(local.parts(), ("!", "thing"));
2433        assert_eq!(local.to_string(), "!thing");
2434    }
2435
2436    #[test]
2437    fn event_inspection_helpers_report_node_metadata() {
2438        let tag = Tag {
2439            handle: "!".to_owned(),
2440            suffix: "thing".to_owned(),
2441        };
2442        let scalar = Event::Scalar(
2443            "value".into(),
2444            ScalarStyle::DoubleQuoted,
2445            7,
2446            Some(Cow::Borrowed(&tag)),
2447        );
2448        let sequence =
2449            Event::SequenceStart(StructureStyle::Block, 8, Some(Cow::Owned(tag.clone())));
2450        let mapping = Event::MappingStart(StructureStyle::Block, 9, Some(Cow::Borrowed(&tag)));
2451
2452        assert_eq!(scalar.anchor_id(), Some(7));
2453        assert_eq!(scalar.alias_id(), None);
2454        assert_eq!(scalar.tag(), Some(&tag));
2455        assert_eq!(scalar.scalar(), Some(("value", ScalarStyle::DoubleQuoted)));
2456        assert!(scalar.is_node());
2457
2458        assert_eq!(sequence.anchor_id(), Some(8));
2459        assert_eq!(sequence.alias_id(), None);
2460        assert_eq!(sequence.tag(), Some(&tag));
2461        assert_eq!(sequence.scalar(), None);
2462        assert!(sequence.is_node());
2463
2464        assert_eq!(mapping.anchor_id(), Some(9));
2465        assert_eq!(mapping.alias_id(), None);
2466        assert_eq!(mapping.tag(), Some(&tag));
2467        assert_eq!(mapping.scalar(), None);
2468        assert!(mapping.is_node());
2469
2470        let alias = Event::Alias(10);
2471        assert_eq!(alias.anchor_id(), None);
2472        assert_eq!(alias.alias_id(), Some(10));
2473        assert_eq!(alias.tag(), None);
2474        assert_eq!(alias.scalar(), None);
2475        assert!(alias.is_node());
2476
2477        let unanchored_scalar = Event::Scalar("x".into(), ScalarStyle::Plain, 0, None);
2478        assert_eq!(unanchored_scalar.anchor_id(), None);
2479        assert_eq!(unanchored_scalar.alias_id(), None);
2480
2481        let stream_start = Event::StreamStart;
2482        assert_eq!(stream_start.anchor_id(), None);
2483        assert_eq!(stream_start.alias_id(), None);
2484        assert_eq!(stream_start.tag(), None);
2485        assert_eq!(stream_start.scalar(), None);
2486        assert!(!stream_start.is_node());
2487    }
2488
2489    #[test]
2490    fn test_peek_eq_parse() {
2491        let s = "
2492a0 bb: val
2493a1: &x
2494    b1: 4
2495    b2: d
2496a2: 4
2497a3: [1, 2, 3]
2498a4:
2499    - [a1, a2]
2500    - 2
2501a5: *x
2502";
2503        let mut p = Parser::new_from_str(s);
2504        loop {
2505            let event_peek = p.peek().unwrap().unwrap().clone();
2506            let event = p.next_event().unwrap().unwrap();
2507            assert_eq!(event, event_peek);
2508            if event.0 == Event::StreamEnd {
2509                break;
2510            }
2511        }
2512    }
2513
2514    #[test]
2515    fn test_repeated_peek_returns_buffered_event() {
2516        let mut parser = Parser::new_from_str("key: value\n");
2517
2518        let first_peek = parser.peek().unwrap().unwrap().clone();
2519        let second_peek = parser.peek().unwrap().unwrap().clone();
2520        let next = parser.next_event().unwrap().unwrap();
2521
2522        assert_eq!(first_peek, second_peek);
2523        assert_eq!(first_peek, next);
2524    }
2525
2526    #[test]
2527    fn test_peek_surfaces_scan_error_without_consuming_stream_end_state() {
2528        let mut parser = Parser::new_from_str("a: [1, 2");
2529
2530        loop {
2531            match parser.peek() {
2532                Some(Ok(_)) => {
2533                    parser.next_event().unwrap().unwrap();
2534                }
2535                Some(Err(error)) => {
2536                    assert_eq!(error.info(), "unclosed bracket '['");
2537                    break;
2538                }
2539                None => panic!("expected parse error"),
2540            }
2541        }
2542    }
2543
2544    #[test]
2545    fn test_peek_and_next_return_none_after_stream_end() {
2546        let mut parser = Parser::new_from_str("");
2547
2548        assert!(matches!(
2549            parser.next_event().unwrap().unwrap().0,
2550            Event::StreamStart
2551        ));
2552        assert!(matches!(
2553            parser.next_event().unwrap().unwrap().0,
2554            Event::StreamEnd
2555        ));
2556        assert!(parser.next_event().is_none());
2557        assert!(parser.peek().is_none());
2558    }
2559
2560    #[test]
2561    fn test_load_after_stream_already_ended_emits_stream_end() {
2562        let mut parser = Parser::new_from_str("");
2563        while parser.next_event().is_some() {}
2564
2565        let mut sink = CollectingSink::default();
2566        parser.load(&mut sink, true).unwrap();
2567
2568        assert_eq!(sink.events, vec![Event::StreamEnd]);
2569    }
2570
2571    #[test]
2572    fn test_load_visits_nested_collection_events() {
2573        let mut parser = Parser::new_from_str("root:\n  - item: value\n  - [a, b]\n");
2574        let mut sink = CollectingSink::default();
2575
2576        parser.load(&mut sink, true).unwrap();
2577
2578        assert_eq!(
2579            sink.events,
2580            vec![
2581                Event::StreamStart,
2582                Event::DocumentStart(false),
2583                Event::MappingStart(StructureStyle::Block, 0, None),
2584                Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
2585                Event::SequenceStart(StructureStyle::Block, 0, None),
2586                Event::MappingStart(StructureStyle::Block, 0, None),
2587                Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
2588                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2589                Event::MappingEnd,
2590                Event::SequenceStart(StructureStyle::Flow, 0, None),
2591                Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
2592                Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
2593                Event::SequenceEnd,
2594                Event::SequenceEnd,
2595                Event::MappingEnd,
2596                Event::DocumentEnd,
2597                Event::StreamEnd,
2598            ]
2599        );
2600    }
2601
2602    #[derive(Clone, Debug, PartialEq, Eq)]
2603    enum ValidationError {
2604        ForbiddenValue,
2605    }
2606
2607    #[derive(Debug)]
2608    struct ReceiverFailure;
2609
2610    impl fmt::Display for ReceiverFailure {
2611        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2612            write!(f, "receiver failed")
2613        }
2614    }
2615
2616    impl core::error::Error for ReceiverFailure {}
2617
2618    struct FailingSink<'input> {
2619        events: Vec<Event<'input>>,
2620    }
2621
2622    impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
2623        type Error = ValidationError;
2624
2625        fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
2626            let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
2627            self.events.push(ev);
2628            if should_fail {
2629                Err(ValidationError::ForbiddenValue)
2630            } else {
2631                Ok(())
2632            }
2633        }
2634    }
2635
2636    #[test]
2637    fn test_try_load_stops_on_receiver_error() {
2638        let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
2639        let mut sink = FailingSink { events: Vec::new() };
2640
2641        let err = parser.try_load(&mut sink, true).unwrap_err();
2642
2643        assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
2644        assert!(sink
2645            .events
2646            .iter()
2647            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
2648        assert!(sink
2649            .events
2650            .iter()
2651            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
2652        assert!(!sink
2653            .events
2654            .iter()
2655            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
2656    }
2657
2658    struct SpannedFailingSink {
2659        failed_span: Option<Span>,
2660    }
2661
2662    impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
2663        type Error = Span;
2664
2665        fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
2666            if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
2667                self.failed_span = Some(span);
2668                Err(span)
2669            } else {
2670                Ok(())
2671            }
2672        }
2673    }
2674
2675    #[test]
2676    fn test_try_load_spanned_receiver_gets_span() {
2677        let mut parser = Parser::new_from_str("value: bad\n");
2678        let mut sink = SpannedFailingSink { failed_span: None };
2679
2680        let err = parser.try_load(&mut sink, false).unwrap_err();
2681
2682        let TryLoadError::Receiver(span) = err else {
2683            panic!("expected receiver error");
2684        };
2685
2686        assert_eq!(Some(span), sink.failed_span);
2687        assert!(!span.is_empty());
2688    }
2689
2690    struct NeverFails {
2691        count: usize,
2692    }
2693
2694    impl<'input> TryEventReceiver<'input> for NeverFails {
2695        type Error = ValidationError;
2696
2697        fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
2698            self.count += 1;
2699            Ok(())
2700        }
2701    }
2702
2703    #[test]
2704    fn test_try_load_returns_scan_error() {
2705        let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
2706        let mut sink = NeverFails { count: 0 };
2707
2708        let err = parser.try_load(&mut sink, true).unwrap_err();
2709
2710        let TryLoadError::Scan(err) = err else {
2711            panic!("expected scan error");
2712        };
2713        assert_eq!(err.info(), "duplicate version directive");
2714    }
2715
2716    #[test]
2717    fn test_try_load_error_display_and_source_cover_both_variants() {
2718        let scan = ScanError::new_str(Marker::new(3, 1, 3), "bad yaml");
2719        let scan_err: TryLoadError<ReceiverFailure> = scan.into();
2720
2721        assert!(scan_err.to_string().starts_with("parser error: bad yaml"));
2722        assert!(scan_err.source().is_some());
2723
2724        let receiver_err = TryLoadError::Receiver(ReceiverFailure);
2725
2726        assert_eq!(receiver_err.to_string(), "receiver error: receiver failed");
2727        assert!(receiver_err.source().is_some());
2728    }
2729
2730    #[test]
2731    fn test_try_load_document_rejects_non_document_start_event() {
2732        let mut parser = Parser::new_from_str("");
2733        let span = Span::empty(Marker::new(0, 1, 0));
2734        let mut sink = NeverFails { count: 0 };
2735
2736        let err = parser
2737            .try_load_document(
2738                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2739                span,
2740                &mut sink,
2741            )
2742            .unwrap_err();
2743
2744        let TryLoadError::Scan(err) = err else {
2745            panic!("expected scan error");
2746        };
2747        assert_eq!(err.info(), "did not find expected <document-start>");
2748    }
2749
2750    #[test]
2751    fn test_try_load_requires_buffered_stream_start() {
2752        let mut parser = Parser::new_from_str("");
2753        let span = Span::empty(Marker::new(0, 1, 0));
2754        parser.current = Some((
2755            Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2756            span,
2757        ));
2758        let mut sink = NeverFails { count: 0 };
2759
2760        let err = parser.try_load(&mut sink, true).unwrap_err();
2761
2762        let TryLoadError::Scan(err) = err else {
2763            panic!("expected scan error");
2764        };
2765        assert_eq!(err.info(), "did not find expected <stream-start>");
2766    }
2767
2768    #[test]
2769    fn test_try_load_after_stream_already_ended_emits_stream_end() {
2770        let mut parser = Parser::new_from_str("");
2771        while parser.next_event().is_some() {}
2772
2773        let mut sink = FailingSink { events: Vec::new() };
2774        parser.try_load(&mut sink, true).unwrap();
2775
2776        assert_eq!(sink.events, vec![Event::StreamEnd]);
2777    }
2778
2779    #[test]
2780    fn test_load_single_document_stops_before_next_document() {
2781        let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
2782        let mut sink = CollectingSink::default();
2783
2784        parser.load(&mut sink, false).unwrap();
2785
2786        assert!(sink
2787            .events
2788            .iter()
2789            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
2790        assert!(!sink
2791            .events
2792            .iter()
2793            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
2794        assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
2795    }
2796
2797    #[test]
2798    fn test_duplicate_version_directive_errors() {
2799        assert_eq!(
2800            first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
2801            "duplicate version directive"
2802        );
2803    }
2804
2805    #[test]
2806    fn test_duplicate_tag_directive_errors() {
2807        assert_eq!(
2808            first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
2809            "the TAG directive must only be given at most once per handle in the same document"
2810        );
2811    }
2812
2813    #[test]
2814    fn test_directive_after_implicit_document_requires_explicit_end() {
2815        assert_eq!(
2816            first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
2817            "missing explicit document end marker before directive"
2818        );
2819    }
2820
2821    #[test]
2822    fn test_anchor_offset_overflow_reports_error() {
2823        let mut parser = Parser::new_from_str("&a value");
2824        parser.set_anchor_offset(usize::MAX);
2825
2826        let err = parser
2827            .find_map(Result::err)
2828            .expect("anchor registration should overflow");
2829
2830        assert_eq!(
2831            err.info(),
2832            "while parsing anchor, anchor count exceeded supported limit"
2833        );
2834    }
2835
2836    #[test]
2837    fn test_alias_resolves_to_registered_anchor_id() {
2838        let events = Parser::new_from_str("- &a value\n- *a\n")
2839            .map(|event| event.unwrap().0)
2840            .collect::<Vec<_>>();
2841
2842        assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
2843    }
2844
2845    #[test]
2846    fn test_anchor_then_tag_applies_both_to_scalar() {
2847        let events = Parser::new_from_str("&a !!str value")
2848            .map(|event| event.unwrap().0)
2849            .collect::<Vec<_>>();
2850
2851        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
2852            .iter()
2853            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
2854        else {
2855            panic!("expected tagged anchored scalar");
2856        };
2857
2858        assert_eq!(value, "value");
2859        assert_eq!(*anchor_id, 1);
2860        assert_eq!(tag.handle, "tag:yaml.org,2002:");
2861        assert_eq!(tag.suffix, "str");
2862    }
2863
2864    #[test]
2865    fn test_tag_then_anchor_applies_both_to_scalar() {
2866        let events = Parser::new_from_str("!!str &a value")
2867            .map(|event| event.unwrap().0)
2868            .collect::<Vec<_>>();
2869
2870        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
2871            .iter()
2872            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
2873        else {
2874            panic!("expected tagged anchored scalar");
2875        };
2876
2877        assert_eq!(value, "value");
2878        assert_eq!(*anchor_id, 1);
2879        assert_eq!(tag.handle, "tag:yaml.org,2002:");
2880        assert_eq!(tag.suffix, "str");
2881    }
2882
2883    #[test]
2884    fn test_multiple_tag_directives_are_kept_within_document() {
2885        let text = r"
2886%TAG !a! tag:a,2024:
2887%TAG !b! tag:b,2024:
2888---
2889first: !a!x foo
2890second: !b!y bar
2891";
2892
2893        let mut seen_a = false;
2894        let mut seen_b = false;
2895        for event in Parser::new_from_str(text) {
2896            let (event, _) = event.unwrap();
2897            if let Event::Scalar(_, _, _, Some(tag)) = event {
2898                if tag.handle == "tag:a,2024:" {
2899                    seen_a = true;
2900                } else if tag.handle == "tag:b,2024:" {
2901                    seen_b = true;
2902                }
2903            }
2904        }
2905
2906        assert!(seen_a);
2907        assert!(seen_b);
2908    }
2909
2910    #[test]
2911    fn test_tags_are_cleared_when_next_document_has_no_directives() {
2912        let text = r"
2913%TAG !t! tag:test,2024:
2914--- !t!1
2915foo
2916--- !t!2
2917bar
2918";
2919
2920        let mut parser = Parser::new_from_str(text);
2921        for event in parser.by_ref() {
2922            let (event, _) = event.unwrap();
2923            if let Event::DocumentEnd = event {
2924                break;
2925            }
2926        }
2927
2928        match parser.next().unwrap().unwrap().0 {
2929            Event::DocumentStart(true) => {}
2930            _ => panic!("expected explicit second document start"),
2931        }
2932
2933        let err = parser.next().unwrap().unwrap_err();
2934        assert!(format!("{err}").contains("the handle wasn't declared"));
2935    }
2936
2937    #[test]
2938    fn test_pull_parser_clears_anchors_between_documents() {
2939        let mut parser = Parser::new_from_str(
2940            "--- &a value
2941--- *a
2942",
2943        );
2944
2945        for event in parser.by_ref() {
2946            let (event, _) = event.unwrap();
2947            if matches!(event, Event::DocumentEnd) {
2948                break;
2949            }
2950        }
2951
2952        match parser.next().unwrap().unwrap().0 {
2953            Event::DocumentStart(true) => {}
2954            _ => panic!("expected explicit second document start"),
2955        }
2956
2957        let err = parser.next().unwrap().unwrap_err();
2958        assert!(format!("{err}").contains("unknown anchor"));
2959    }
2960
2961    #[test]
2962    fn test_keep_tags_across_multiple_documents() {
2963        let text = r#"
2964%YAML 1.1
2965%TAG !t! tag:test,2024:
2966--- !t!1 &1
2967foo: "bar"
2968--- !t!2 &2
2969baz: "qux"
2970"#;
2971        for x in Parser::new_from_str(text).keep_tags(true) {
2972            let x = x.unwrap();
2973            if let Event::MappingStart(_, _, tag) = x.0 {
2974                let tag = tag.unwrap();
2975                assert_eq!(tag.handle, "tag:test,2024:");
2976            }
2977        }
2978
2979        for x in Parser::new_from_str(text).keep_tags(false) {
2980            if x.is_err() {
2981                // Test successful
2982                return;
2983            }
2984        }
2985        panic!("Test failed, did not encounter error")
2986    }
2987
2988    #[test]
2989    fn test_flow_sequence_mapping_allows_empty_key() {
2990        let parser = Parser::new_from_str("[?: value]");
2991        for event in parser {
2992            event.expect("parser should accept flow sequence mappings with empty keys");
2993        }
2994    }
2995
2996    #[test]
2997    fn test_keep_tags_does_not_persist_default_tag_handles() {
2998        let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
2999
3000        let mut int_tags = Vec::new();
3001        for event in Parser::new_from_str(text).keep_tags(true) {
3002            let event = event.unwrap().0;
3003            if let Event::Scalar(_, _, _, Some(tag)) = event {
3004                if tag.suffix == "int" {
3005                    int_tags.push(tag.handle.clone());
3006                }
3007            }
3008        }
3009
3010        assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
3011    }
3012
3013    #[test]
3014    fn test_resolve_tag_uses_overridden_local_prefix() {
3015        let mut parser = Parser::new_from_str("");
3016        parser
3017            .tags
3018            .insert(String::new(), "tag:local.example,2024:".to_string());
3019
3020        let tag = parser
3021            .resolve_tag(
3022                Span::empty(Marker::new(0, 1, 0)),
3023                &Cow::Borrowed(""),
3024                Cow::Borrowed("!"),
3025            )
3026            .unwrap();
3027
3028        assert_eq!(tag.handle, "tag:local.example,2024:");
3029        assert_eq!(tag.suffix, "!");
3030    }
3031
3032    #[test]
3033    fn test_load_after_peek_stream_start() {
3034        #[derive(Default)]
3035        struct Sink<'input> {
3036            events: Vec<Event<'input>>,
3037        }
3038
3039        impl<'input> EventReceiver<'input> for Sink<'input> {
3040            fn on_event(&mut self, ev: Event<'input>) {
3041                self.events.push(ev);
3042            }
3043        }
3044
3045        let mut parser = Parser::new_from_str("key: value\n");
3046        let mut sink = Sink::default();
3047
3048        assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
3049        parser.load(&mut sink, false).unwrap();
3050
3051        assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
3052        assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
3053    }
3054}