Skip to main content

granit_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, BorrowedInput},
9    scanner::{
10        Marker, Placement, QueuedToken, QueuedTokenType, ScalarStyle, ScanError, Scanner, Span,
11    },
12    BufferedInput,
13};
14
15use alloc::{
16    borrow::Cow,
17    collections::{BTreeMap, BTreeSet, VecDeque},
18    string::{String, ToString},
19    vec::Vec,
20};
21use core::{
22    convert::Infallible,
23    fmt::{self, Display},
24};
25
26#[derive(Clone, Copy, PartialEq, Debug, Eq)]
27enum State {
28    StreamStart,
29    ImplicitDocumentStart,
30    DocumentStart,
31    DocumentContent,
32    DocumentEnd,
33    BlockNode,
34    BlockNodeOrIndentlessSequence,
35    FlowNode,
36    BlockSequenceFirstEntry,
37    BlockSequenceEntry,
38    IndentlessSequenceEntry,
39    IndentlessSequenceEntryNode,
40    BlockMappingFirstKey,
41    BlockMappingKey,
42    BlockMappingKeyNode,
43    BlockMappingValue,
44    BlockMappingValueNode,
45    FlowSequenceFirstEntry,
46    FlowSequenceEntry,
47    FlowSequenceEntryMappingKey,
48    FlowSequenceEntryMappingValue,
49    FlowSequenceEntryMappingValueNode,
50    FlowSequenceEntryMappingEnd,
51    FlowMappingFirstKey,
52    FlowMappingKey,
53    FlowMappingKeyNode,
54    FlowMappingValue,
55    FlowMappingValueNode,
56    FlowMappingEmptyValue,
57    BlockSequenceEntryNode,
58    End,
59}
60
61/// An event generated by the YAML parser.
62///
63/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
64/// [`EventReceiver`] trait.
65#[derive(Clone, PartialEq, Debug, Eq)]
66pub enum Event<'input> {
67    /// Reserved for internal use.
68    Nothing,
69    /// Event generated at the very beginning of parsing.
70    StreamStart,
71    /// Last event that will be generated by the parser. Signals EOF.
72    StreamEnd,
73    /// The start of a YAML document.
74    ///
75    /// When the boolean is `true`, it is an explicit document start
76    /// directive (`---`).
77    ///
78    /// When the boolean is `false`, it is an implicit document start
79    /// (without `---`).
80    DocumentStart(bool),
81    /// The end of a YAML document.
82    ///
83    /// This event is emitted for both explicit document end markers (`...`) and implicit document
84    /// ends.
85    DocumentEnd,
86    /// A YAML alias.
87    Alias(
88        /// The anchor ID the alias refers to.
89        usize,
90    ),
91    /// A YAML source comment.
92    ///
93    /// Comments are presentation metadata, not YAML data nodes. The payload is the raw text
94    /// exactly after `#`, excluding only the line break. The placement is a best-effort hint for
95    /// correlating the comment with nearby YAML presentation. The companion parser [`Span`] covers
96    /// the whole source comment, including `#` and excluding the line break.
97    Comment(
98        /// Raw comment payload exactly after `#`, excluding only the line break.
99        Cow<'input, str>,
100        /// Best-effort placement relative to nearby YAML content.
101        Placement,
102    ),
103    /// A YAML scalar value.
104    Scalar(
105        /// The scalar value after YAML escape processing.
106        Cow<'input, str>,
107        /// The source notation used for the scalar.
108        ScalarStyle,
109        /// The anchor ID defined on this scalar, or `0` if it has no anchor.
110        usize,
111        /// The resolved tag attached to this scalar, if any.
112        Option<Cow<'input, Tag>>,
113    ),
114    /// The start of a YAML sequence (array).
115    SequenceStart(
116        /// The notation style used for the sequence.
117        StructureStyle,
118        /// The anchor ID defined on this sequence, or `0` if it has no anchor.
119        usize,
120        /// The resolved tag attached to this sequence, if any.
121        Option<Cow<'input, Tag>>,
122    ),
123    /// The end of a YAML sequence (array).
124    SequenceEnd,
125    /// The start of a YAML mapping (object, hash).
126    MappingStart(
127        /// The notation style used for the mapping (Flow or Block).
128        StructureStyle,
129        /// The anchor ID defined on this mapping, or `0` if it has no anchor.
130        usize,
131        /// The resolved tag attached to this mapping, if any.
132        Option<Cow<'input, Tag>>,
133    ),
134    /// The end of a YAML mapping (object, hash).
135    MappingEnd,
136}
137
138/// The notation style used for a YAML sequence or mapping.
139///
140/// [`StructureStyle::Block`] means block notation:
141///
142/// ```yaml
143/// items:
144///   - milk
145///   - bread
146/// mapping:
147///   name: Ada
148///   active: true
149/// ```
150///
151/// [`StructureStyle::Flow`] means flow notation:
152///
153/// ```yaml
154/// items: [milk, bread]
155/// mapping: {name: Ada, active: true}
156/// ```
157#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
158pub enum StructureStyle {
159    /// Block notation, such as `- item` sequences and `key: value` mappings.
160    Block,
161    /// Flow notation, such as `[item]` sequences and `{key: value}` mappings.
162    Flow,
163}
164
165/// A YAML tag.
166#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
167pub struct Tag {
168    /// Resolved tag handle or prefix.
169    ///
170    /// Examples include `tag:yaml.org,2002:` for core-schema tags and `!` for local tags.
171    pub handle: String,
172    /// Tag suffix following the resolved handle or prefix.
173    pub suffix: String,
174    /// Tag handle as written in the source before `%TAG` directive resolution.
175    ///
176    /// For example, with `%TAG !e! tag:example.com,2000:`, a source tag `!e!keep` is resolved
177    /// as `handle = "tag:example.com,2000:"` and `suffix = "keep"`, while
178    /// `original_handle = "!e!"`.
179    pub original_handle: String,
180}
181
182impl Tag {
183    /// Create a tag from resolved parts.
184    ///
185    /// This is mainly useful for tests and consumers constructing parser-compatible tags by hand.
186    /// When the original source handle matters, use [`Self::with_original_handle`].
187    #[must_use]
188    pub fn new(handle: impl Into<String>, suffix: impl Into<String>) -> Self {
189        let handle = handle.into();
190        Self {
191            original_handle: handle.clone(),
192            handle,
193            suffix: suffix.into(),
194        }
195    }
196
197    /// Create a tag from resolved parts and the handle as written in the source.
198    #[must_use]
199    pub fn with_original_handle(
200        handle: impl Into<String>,
201        suffix: impl Into<String>,
202        original_handle: impl Into<String>,
203    ) -> Self {
204        Self {
205            handle: handle.into(),
206            suffix: suffix.into(),
207            original_handle: original_handle.into(),
208        }
209    }
210
211    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
212    ///
213    /// The YAML specification specifies [a list of
214    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function
215    /// checks whether _the handle_ (but not the suffix) is the handle for the YAML Core Schema.
216    ///
217    /// # Return
218    /// Returns `true` if the handle is `tag:yaml.org,2002:`, `false` otherwise.
219    #[must_use]
220    pub fn is_yaml_core_schema(&self) -> bool {
221        self.handle == "tag:yaml.org,2002:"
222    }
223
224    /// Return true for a YAML core-schema tag with the given suffix.
225    ///
226    /// For example, this matches core-schema tags such as `!!str`, `!!int`, `!!float`, `!!bool`,
227    /// `!!null`, `!!map`, or `!!seq` after tag resolution.
228    #[must_use]
229    pub fn is_yaml_core_schema_tag(&self, suffix: &str) -> bool {
230        self.is_yaml_core_schema() && self.suffix == suffix
231    }
232
233    /// Return true for a tag outside the YAML core-schema namespace.
234    ///
235    /// This checks only the tag handle. It returns `false` for any tag whose handle is
236    /// `tag:yaml.org,2002:`, regardless of suffix.
237    #[must_use]
238    pub fn is_custom(&self) -> bool {
239        !self.is_yaml_core_schema()
240    }
241
242    /// Return the tag as `(handle, suffix)`.
243    #[must_use]
244    pub fn parts(&self) -> (&str, &str) {
245        (&self.handle, &self.suffix)
246    }
247
248    /// Return the tag as `(original_handle, suffix)` using the handle from the source token.
249    ///
250    /// This is useful when a consumer needs author spelling such as `!e!keep` instead of the
251    /// resolved URI tag `tag:example.com,2000:keep`.
252    #[must_use]
253    pub fn original_parts(&self) -> (&str, &str) {
254        (&self.original_handle, &self.suffix)
255    }
256
257    /// Return the tag spelling reconstructed from the source handle and suffix.
258    ///
259    /// For ordinary shorthand tags this returns the author-facing spelling, such as `!e!keep` or
260    /// `!!str`. For verbatim tags this returns a normalized verbatim spelling such as
261    /// `!<tag:example.com,2000:thing>`, not necessarily the byte-exact source token.
262    #[must_use]
263    pub fn original(&self) -> String {
264        if self.original_handle.is_empty() && self.suffix != "!" {
265            let mut tag = String::with_capacity(self.suffix.len() + 3);
266            tag.push_str("!<");
267            tag.push_str(&self.suffix);
268            tag.push('>');
269            return tag;
270        }
271
272        let mut tag = String::with_capacity(self.original_handle.len() + self.suffix.len());
273        tag.push_str(&self.original_handle);
274        tag.push_str(&self.suffix);
275        tag
276    }
277}
278
279impl Display for Tag {
280    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
281        if self.handle == "!" {
282            write!(f, "!{}", self.suffix)
283        } else {
284            write!(f, "{}{}", self.handle, self.suffix)
285        }
286    }
287}
288
289impl<'input> Event<'input> {
290    /// Return the anchor ID defined by this event, if any.
291    ///
292    /// Returns `Some(id)` when this event defines an anchor on a scalar, sequence, or mapping
293    /// node. Returns `None` for all other events, including `Alias` (which references an anchor
294    /// rather than defining one; use [`Self::alias_id`] to obtain the target anchor ID).
295    #[must_use]
296    pub fn anchor_id(&self) -> Option<usize> {
297        match self {
298            Self::Scalar(_, _, anchor_id, _)
299            | Self::SequenceStart(_, anchor_id, _)
300            | Self::MappingStart(_, anchor_id, _)
301                if *anchor_id != 0 =>
302            {
303                Some(*anchor_id)
304            }
305            _ => None,
306        }
307    }
308
309    /// Return the target anchor ID referenced by this alias event, if this event is an alias.
310    #[must_use]
311    pub fn alias_id(&self) -> Option<usize> {
312        match self {
313            Self::Alias(anchor_id) => Some(*anchor_id),
314            _ => None,
315        }
316    }
317
318    /// Return the resolved tag carried by this node event, if any.
319    #[must_use]
320    pub fn tag(&self) -> Option<&Tag> {
321        match self {
322            Self::Scalar(_, _, _, tag)
323            | Self::SequenceStart(_, _, tag)
324            | Self::MappingStart(_, _, tag) => tag.as_deref(),
325            _ => None,
326        }
327    }
328
329    /// Return the scalar value and style, if this event is a scalar.
330    #[must_use]
331    pub fn scalar(&self) -> Option<(&str, ScalarStyle)> {
332        match self {
333            Self::Scalar(value, style, _, _) => Some((value.as_ref(), *style)),
334            _ => None,
335        }
336    }
337
338    /// Return whether this event represents a YAML node (value).
339    ///
340    /// Returns `true` for scalars, collection starts, and aliases — all events that produce a
341    /// value in the document tree. Returns `false` for structural events such as `StreamStart`,
342    /// `DocumentStart`, collection ends, etc.
343    #[must_use]
344    pub fn is_node(&self) -> bool {
345        matches!(
346            self,
347            Self::Alias(_) | Self::Scalar(..) | Self::SequenceStart(..) | Self::MappingStart(..)
348        )
349    }
350
351    /// Create an empty scalar.
352    fn empty_scalar() -> Self {
353        // a null scalar
354        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
355    }
356
357    /// Create an empty scalar with the given anchor.
358    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
359        Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
360    }
361}
362
363// Preserve span ordering for normal-sized comment groups. Longer runs in syntactically ambiguous
364// positions are rejected before they can grow the parser queue without bound.
365const MAX_BUFFERED_COMMENT_EVENTS: usize = 32;
366
367/// A YAML parser.
368#[derive(Debug)]
369pub struct Parser<'input, T: BorrowedInput<'input>> {
370    /// The underlying scanner from which we pull tokens.
371    scanner: Scanner<'input, T>,
372    /// The stack of _previous_ states we were in.
373    ///
374    /// States are pushed in the context of subobjects to this stack. The top-most element is the
375    /// state in which to come back to when exiting the current state.
376    states: Vec<State>,
377    /// The state in which we currently are.
378    state: State,
379    /// The next token from the scanner.
380    token: Option<QueuedToken<'input>>,
381    /// The next YAML event to emit.
382    current: Option<(Event<'input>, Span)>,
383    /// YAML events buffered by parser states that need to emit an earlier synthetic node first.
384    queued_events: VecDeque<(Event<'input>, Span)>,
385
386    /// Pending indentation hint to be attached to the next emitted event span.
387    ///
388    /// This is used to communicate indentation for block mapping keys. It is set when consuming a
389    /// `TokenType::Key` in block style, and is applied to the next emitted node event (the key
390    /// itself).
391    pending_key_indent: Option<usize>,
392    /// Pending anchor ID to attach to a node after an intervening comment.
393    pending_node_anchor_id: usize,
394    /// Pending tag to attach to a node after an intervening comment.
395    pending_node_tag: Option<Cow<'input, Tag>>,
396    /// Pending explicit tag token start to attach to a node after an intervening comment.
397    pending_node_tag_start: Option<Marker>,
398    /// Pending empty scalar span captured before an intervening comment.
399    pending_empty_scalar_span: Option<Span>,
400    /// Anchors that have been encountered in the YAML document.
401    anchors: BTreeMap<Cow<'input, str>, usize>,
402    /// Next ID available for an anchor.
403    ///
404    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
405    /// return for the next anchor and the count of anchor IDs emitted.
406    anchor_id_count: usize,
407    /// The tag directives (`%TAG`) the parser has encountered.
408    ///
409    /// Key is the handle, and value is the prefix.
410    tags: BTreeMap<String, String>,
411    /// Whether we have emitted [`Event::StreamEnd`].
412    ///
413    /// Emitted means that it has been returned from [`Self::next`]. If it is stored in
414    /// [`Self::token`], this is set to `false`.
415    stream_end_emitted: bool,
416    /// Make tags global across all documents.
417    keep_tags: bool,
418}
419
420/// Trait to be implemented in order to use the low-level parsing API.
421///
422/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
423/// for each YAML [`Event`] that occurs.
424/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
425/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
426/// [`SpannedEventReceiver`] automatically.
427/// Non-spanned receivers receive [`Event::Comment(text, placement)`](Event::Comment) like any
428/// other event, but without source location. Spanned receivers receive the same comment event plus
429/// the comment [`Span`] in [`SpannedEventReceiver::on_event`]. For comments, that span covers the
430/// whole source comment, including `#` and excluding the line break. When parsing from an input
431/// with byte offsets, such as [`Parser::new_from_str`], [`Span::slice`] returns that source
432/// comment text.
433///
434/// # Event hierarchy
435/// The event stream starts with an [`Event::StreamStart`] event followed by an
436/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
437/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
438/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
439///
440/// In a mapping, key-values are sent as consecutive data events. Comments can appear in the raw
441/// event stream between a key and its value; they are presentation metadata, not YAML data nodes.
442/// Consumers building YAML data trees should ignore [`Event::Comment`]. Any key/value alternation
443/// shortcut applies only after filtering out comments and other presentation metadata. After that
444/// filtering, the first event after an [`Event::MappingStart`] will be the key, and the following
445/// event will be its value. If the mapping contains no sub-mapping or sub-sequence, then even events
446/// (starting from 0) will always be keys and odd ones will always be values. The mapping ends when
447/// an [`Event::MappingEnd`] event is received.
448///
449/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
450///
451/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
452/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
453/// [`Event::MappingEnd`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
454/// be part of the value and not another key-value pair or element in the sequence.
455///
456/// For instance, the following YAML:
457/// ```yaml
458/// a: b
459/// c:
460///   d: e
461/// f:
462///   - g
463///   - h
464/// ```
465/// will emit (indented and commented for visibility):
466/// ```text
467/// StreamStart, DocumentStart, MappingStart,
468///   Scalar("a", ..), Scalar("b", ..)
469///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
470///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
471/// MappingEnd, DocumentEnd, StreamEnd
472/// ```
473///
474/// # Example
475/// ```
476/// # use granit_parser::{Event, EventReceiver, Parser};
477/// #
478/// /// Sink of events. Collects them into an array.
479/// struct EventSink<'input> {
480///     events: Vec<Event<'input>>,
481/// }
482///
483/// /// Implement `on_event`, pushing into `self.events`.
484/// impl<'input> EventReceiver<'input> for EventSink<'input> {
485///     fn on_event(&mut self, ev: Event<'input>) {
486///         self.events.push(ev);
487///     }
488/// }
489///
490/// /// Load events from a YAML string.
491/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
492///     let mut sink = EventSink { events: Vec::new() };
493///     let mut parser = Parser::new_from_str(yaml);
494///     // Load events using our sink as the receiver.
495///     parser.load(&mut sink, true).unwrap();
496///     sink.events
497/// }
498/// ```
499pub trait EventReceiver<'input> {
500    /// Handler called for each YAML event that is emitted by the parser.
501    fn on_event(&mut self, ev: Event<'input>);
502}
503
504/// Trait to be implemented for using the low-level parsing API.
505///
506/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
507/// For [`Event::Comment`], the span is the source range of the whole comment.
508pub trait SpannedEventReceiver<'input> {
509    /// Handler called for each event that occurs.
510    fn on_event(&mut self, ev: Event<'input>, span: Span);
511}
512
513impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
514    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
515        self.on_event(ev);
516    }
517}
518
519/// Trait to be implemented for fallible event handling without source spans.
520///
521/// This is the fallible counterpart to [`EventReceiver`]. Use it with [`Parser::try_load`] when
522/// event handling may need to stop parsing by returning an application error.
523pub trait TryEventReceiver<'input> {
524    /// Error returned by this receiver.
525    type Error;
526
527    /// Handler called for each YAML event that is emitted by the parser.
528    ///
529    /// Returning an error stops [`Parser::try_load`] immediately.
530    ///
531    /// # Errors
532    /// Returns `Self::Error` when the receiver wants to stop parsing.
533    fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
534}
535
536/// Trait to be implemented for fallible event handling with source spans.
537///
538/// This is the fallible counterpart to [`SpannedEventReceiver`]. Use it with
539/// [`Parser::try_load`] when event handling may need to stop parsing by returning an application
540/// error.
541pub trait TrySpannedEventReceiver<'input> {
542    /// Error returned by this receiver.
543    type Error;
544
545    /// Handler called for each event that occurs.
546    ///
547    /// Returning an error stops [`Parser::try_load`] immediately.
548    ///
549    /// # Errors
550    /// Returns `Self::Error` when the receiver wants to stop parsing.
551    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
552}
553
554impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
555    type Error = R::Error;
556
557    fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
558        TryEventReceiver::on_event(self, ev)
559    }
560}
561
562/// Error returned by [`Parser::try_load`] and [`ParserTrait::try_load`].
563#[derive(Clone, PartialEq, Debug, Eq)]
564pub enum TryLoadError<E> {
565    /// Scanning or parsing failed.
566    Scan(
567        /// The scanner or parser error.
568        ScanError,
569    ),
570    /// The receiver returned an application error.
571    Receiver(
572        /// The error returned by the receiver.
573        E,
574    ),
575}
576
577impl<E> TryLoadError<E> {
578    #[cold]
579    fn scan(error: ScanError) -> Self {
580        Self::Scan(error)
581    }
582
583    #[cold]
584    fn receiver(error: E) -> Self {
585        Self::Receiver(error)
586    }
587}
588
589impl<E> From<ScanError> for TryLoadError<E> {
590    #[cold]
591    fn from(error: ScanError) -> Self {
592        Self::scan(error)
593    }
594}
595
596impl<E: Display> Display for TryLoadError<E> {
597    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
598        match self {
599            Self::Scan(error) => write!(f, "parser error: {error}"),
600            Self::Receiver(error) => write!(f, "receiver error: {error}"),
601        }
602    }
603}
604
605impl<E> core::error::Error for TryLoadError<E>
606where
607    E: core::error::Error + 'static,
608{
609    fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
610        match self {
611            Self::Scan(error) => Some(error),
612            Self::Receiver(error) => Some(error),
613        }
614    }
615}
616
617fn try_emit<'input, R>(
618    recv: &mut R,
619    ev: Event<'input>,
620    span: Span,
621) -> Result<(), TryLoadError<R::Error>>
622where
623    R: TrySpannedEventReceiver<'input>,
624{
625    recv.on_event(ev, span).map_err(TryLoadError::receiver)
626}
627
628struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
629
630impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
631    for InfallibleSpannedReceiver<'_, R>
632{
633    type Error = Infallible;
634
635    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
636        self.0.on_event(ev, span);
637        Ok(())
638    }
639}
640
641fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
642    match result {
643        Ok(()) => Ok(()),
644        Err(TryLoadError::Scan(error)) => error.into_result(),
645        Err(TryLoadError::Receiver(error)) => match error {},
646    }
647}
648
649/// A convenience alias for a parser event result.
650pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
651
652/// Trait extracted from `Parser` to support mocking and alternative implementations.
653pub trait ParserTrait<'input> {
654    /// Try to load the next event and return it without consuming it from `self`.
655    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
656
657    /// Try to load the next event and return it, consuming it from `self`.
658    fn next_event(&mut self) -> Option<ParseResult<'input>>;
659
660    /// Load the YAML from the stream in `self`, pushing events into `recv`.
661    ///
662    /// Use this method when event handling is infallible. If receiver code can return an
663    /// application error and should stop parsing, use [`ParserTrait::try_load`] instead. If the
664    /// caller should directly control when the next event is read, use [`ParserTrait::next_event`]
665    /// or [`Parser`]'s [`core::iter::Iterator`] implementation.
666    ///
667    /// # Errors
668    /// Returns `ScanError` when scanning or parsing the stream fails.
669    fn load<R: SpannedEventReceiver<'input>>(
670        &mut self,
671        recv: &mut R,
672        multi: bool,
673    ) -> Result<(), ScanError>;
674
675    /// Load the YAML from the stream in `self`, stopping if `recv` returns an error.
676    ///
677    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
678    /// inside the stream.
679    ///
680    /// If the receiver returns an error, the parser is left positioned immediately after the event
681    /// that caused the receiver error. Callers should treat the parser as partially consumed.
682    ///
683    /// # Errors
684    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
685    /// [`TryLoadError::Receiver`] when `recv` returns an error.
686    fn try_load<R: TrySpannedEventReceiver<'input>>(
687        &mut self,
688        recv: &mut R,
689        multi: bool,
690    ) -> Result<(), TryLoadError<R::Error>> {
691        while let Some(res) = self.next_event() {
692            let (ev, span) = res?;
693            let is_doc_end = matches!(ev, Event::DocumentEnd);
694            let is_stream_end = matches!(ev, Event::StreamEnd);
695
696            try_emit(recv, ev, span)?;
697
698            if is_stream_end {
699                break;
700            }
701            if !multi && is_doc_end {
702                break;
703            }
704        }
705
706        Ok(())
707    }
708}
709
710impl<'input> Parser<'input, StrInput<'input>> {
711    /// Create a parser over a borrowed string slice.
712    #[must_use]
713    pub fn new_from_str(value: &'input str) -> Self {
714        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
715        Parser::new(StrInput::new(value))
716    }
717}
718
719impl<T> Parser<'static, BufferedInput<T>>
720where
721    T: Iterator<Item = char>,
722{
723    /// Create a parser over an iterator of characters.
724    #[must_use]
725    pub fn new_from_iter(iter: T) -> Self {
726        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
727        Parser::new(BufferedInput::new(iter))
728    }
729}
730
731impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
732    /// Return the next anchor ID that will be assigned by this parser.
733    pub fn get_anchor_offset(&self) -> usize {
734        self.anchor_id_count
735    }
736
737    /// Set the next anchor ID that will be assigned by this parser.
738    pub fn set_anchor_offset(&mut self, offset: usize) {
739        self.anchor_id_count = offset;
740    }
741
742    /// Create a parser over a custom input source.
743    pub fn new(src: T) -> Self {
744        Parser {
745            scanner: Scanner::new(src),
746            states: Vec::new(),
747            state: State::StreamStart,
748            token: None,
749            current: None,
750            queued_events: VecDeque::new(),
751
752            pending_key_indent: None,
753            pending_node_anchor_id: 0,
754            pending_node_tag: None,
755            pending_node_tag_start: None,
756            pending_empty_scalar_span: None,
757
758            anchors: BTreeMap::new(),
759            // valid anchor_id starts from 1
760            anchor_id_count: 1,
761            tags: BTreeMap::new(),
762            stream_end_emitted: false,
763            keep_tags: false,
764        }
765    }
766
767    /// Configure whether tag directives remain active across document boundaries.
768    ///
769    /// This behavior is non-standard as per the YAML specification but can be encountered in the
770    /// wild. Passing `true` enables this non-standard extension and allows the parser to accept
771    /// input from [test
772    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
773    /// of the yaml-test-suite:
774    ///
775    /// ```yaml
776    /// %TAG !prefix! tag:example.com,2011:
777    /// --- !prefix!A
778    /// a: b
779    /// --- !prefix!B
780    /// c: d
781    /// --- !prefix!C
782    /// e: f
783    /// ```
784    ///
785    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
786    /// only apply to the document immediately following them. This would error on `!prefix!B`.
787    ///
788    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
789    #[must_use]
790    pub fn keep_tags(mut self, value: bool) -> Self {
791        self.keep_tags = value;
792        self
793    }
794
795    /// Try to load the next event and return it without consuming it from `self`.
796    ///
797    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
798    /// [`Iterator::next`] or [`Parser::load`].
799    ///
800    /// # Errors
801    /// Returns `ScanError` when loading the next event fails.
802    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
803        ParserTrait::peek(self)
804    }
805
806    /// Try to load the next event and return it, consuming it from `self`.
807    ///
808    /// # Errors
809    /// Returns `ScanError` when loading the next event fails.
810    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
811        ParserTrait::next_event(self)
812    }
813
814    /// Implementation function for [`Self::next_event`] without the `Option`.
815    ///
816    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
817    /// option. This burdens the parser code. This function is used internally when an option is
818    /// undesirable.
819    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
820    where
821        'input: 'a,
822    {
823        match self.current.take() {
824            None => {
825                if let Some(event) = self.queued_events.pop_front() {
826                    Ok(self.apply_pending_key_indent(event))
827                } else if let Some(comment) = self.maybe_next_comment_event()? {
828                    Ok(comment)
829                } else {
830                    self.parse()
831                }
832            }
833            Some(v) => Ok(v),
834        }
835    }
836
837    fn apply_pending_key_indent<'a>(&mut self, (ev, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
838        if ev.is_node() {
839            if let Some(indent) = self.pending_key_indent.take() {
840                return (ev, span.with_indent(Some(indent)));
841            }
842        }
843
844        (ev, span)
845    }
846
847    /// Peek at the next token from the scanner.
848    fn peek_token(&mut self) -> Result<&QueuedToken<'_>, ScanError> {
849        match self.token {
850            None => {
851                self.token = Some(self.scan_next_token()?);
852                Ok(self.token.as_ref().unwrap())
853            }
854            Some(ref tok) => Ok(tok),
855        }
856    }
857
858    /// Extract and return the next token from the scanner.
859    ///
860    /// This function does _not_ make use of `self.token`.
861    fn scan_next_token(&mut self) -> Result<QueuedToken<'input>, ScanError> {
862        match self.scanner.next_queued_token()? {
863            None => match self.scanner.get_error() {
864                None => Err(self.unexpected_eof()),
865                Some(e) => e.into_result(),
866            },
867            Some(tok) => Ok(tok),
868        }
869    }
870
871    #[inline]
872    fn maybe_next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
873    where
874        'input: 'a,
875    {
876        if self.scanner.comments_possible() {
877            self.next_comment_event()
878        } else {
879            Ok(None)
880        }
881    }
882
883    fn next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
884    where
885        'input: 'a,
886    {
887        let is_comment = {
888            let token = self.peek_token()?;
889            matches!(token.1, QueuedTokenType::Comment(_))
890        };
891
892        if !is_comment {
893            return Ok(None);
894        }
895
896        let QueuedToken(span, token) = self.fetch_token();
897        match token {
898            QueuedTokenType::Comment(mut comment) => {
899                comment.placement = self.refined_comment_placement(span, comment.placement);
900                Ok(Some((
901                    Event::Comment(comment.text, comment.placement),
902                    span,
903                )))
904            }
905            _ => unreachable!("comment token disappeared after peek"),
906        }
907    }
908
909    #[inline]
910    fn next_comment_events(&mut self) -> Result<Vec<(Event<'input>, Span)>, ScanError> {
911        if !self.scanner.comments_possible() {
912            return Ok(Vec::new());
913        }
914
915        let mut events = Vec::new();
916        loop {
917            match self.peek_token() {
918                Ok(token) if matches!(token.1, QueuedTokenType::Comment(_)) => {}
919                Err(error) if events.is_empty() => return Err(error),
920                Ok(_) | Err(_) => return Ok(events),
921            }
922
923            if events.len() == MAX_BUFFERED_COMMENT_EVENTS {
924                return Err(ScanError::new_str(
925                    self.peek_token()?.0.start,
926                    "too many consecutive comments before resolving collection entry",
927                ));
928            }
929
930            let comment = self
931                .next_comment_event()?
932                .expect("comment token disappeared after peek");
933            events.push(comment);
934        }
935    }
936
937    fn queue_tail_and_return_first(
938        &mut self,
939        events: Vec<(Event<'input>, Span)>,
940    ) -> (Event<'input>, Span) {
941        let mut events = events.into_iter();
942        let first = events
943            .next()
944            .expect("event queue must contain at least one event");
945        self.queued_events.extend(events);
946        first
947    }
948
949    fn queue_event_by_span(
950        &mut self,
951        comments: Vec<(Event<'input>, Span)>,
952        event: (Event<'input>, Span),
953    ) -> (Event<'input>, Span) {
954        let insert_at = comments
955            .iter()
956            .position(|(_, comment_span)| {
957                comment_span.start.index() >= event.1.start.index()
958                    && comment_span.end.index() >= event.1.end.index()
959            })
960            .unwrap_or(comments.len());
961        let mut ordered = Vec::with_capacity(comments.len() + 1);
962        let mut comments = comments.into_iter();
963
964        for _ in 0..insert_at {
965            ordered.push(
966                comments
967                    .next()
968                    .expect("comment disappeared while ordering queued events"),
969            );
970        }
971        ordered.push(event);
972        ordered.extend(comments);
973
974        self.queue_tail_and_return_first(ordered)
975    }
976
977    fn queue_two_events_by_span(
978        &mut self,
979        comments: Vec<(Event<'input>, Span)>,
980        first: (Event<'input>, Span),
981        second: (Event<'input>, Span),
982    ) -> (Event<'input>, Span) {
983        let insert_at = comments
984            .iter()
985            .position(|(_, comment_span)| {
986                comment_span.start.index() >= first.1.start.index()
987                    && comment_span.end.index() >= first.1.end.index()
988            })
989            .unwrap_or(comments.len());
990        let mut ordered = Vec::with_capacity(comments.len() + 2);
991        let mut comments = comments.into_iter();
992
993        for _ in 0..insert_at {
994            ordered.push(
995                comments
996                    .next()
997                    .expect("comment disappeared while ordering queued events"),
998            );
999        }
1000        ordered.push(first);
1001        ordered.push(second);
1002        ordered.extend(comments);
1003
1004        self.queue_tail_and_return_first(ordered)
1005    }
1006
1007    fn refined_comment_placement(&mut self, span: Span, placement: Placement) -> Placement {
1008        if placement == Placement::Right {
1009            return Placement::Right;
1010        }
1011
1012        let Ok(next) = self.peek_token() else {
1013            return placement;
1014        };
1015        if matches!(next.1, QueuedTokenType::StreamEnd) {
1016            return Placement::Last;
1017        }
1018
1019        if next.0.start.line() == span.end.line() + 1 {
1020            Placement::Above
1021        } else {
1022            Placement::Free
1023        }
1024    }
1025
1026    #[cold]
1027    fn unexpected_eof(&self) -> ScanError {
1028        let info = match self.state {
1029            State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1030                "unexpected EOF while parsing a flow sequence"
1031            }
1032            State::FlowMappingFirstKey
1033            | State::FlowMappingKey
1034            | State::FlowMappingValue
1035            | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1036            State::FlowSequenceEntryMappingKey
1037            | State::FlowSequenceEntryMappingValue
1038            | State::FlowSequenceEntryMappingEnd
1039            | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
1040            State::BlockSequenceFirstEntry | State::BlockSequenceEntry | State::BlockNode => {
1041                "unexpected EOF while parsing a block sequence"
1042            }
1043            State::BlockMappingFirstKey
1044            | State::BlockMappingKey
1045            | State::BlockMappingValue
1046            | State::BlockNodeOrIndentlessSequence => {
1047                "unexpected EOF while parsing a block mapping"
1048            }
1049            _ => "unexpected eof",
1050        };
1051        ScanError::new_str(self.scanner.mark(), info)
1052    }
1053
1054    fn fetch_token<'a>(&mut self) -> QueuedToken<'a>
1055    where
1056        'input: 'a,
1057    {
1058        self.token
1059            .take()
1060            .expect("fetch_token needs to be preceded by peek_token")
1061    }
1062
1063    /// Skip the next token from the scanner.
1064    fn skip(&mut self) {
1065        self.token = None;
1066    }
1067    /// Pops the top-most state and make it the current state.
1068    fn pop_state(&mut self) {
1069        self.state = self.states.pop().unwrap();
1070    }
1071    /// Push a new state atop the state stack.
1072    fn push_state(&mut self, state: State) {
1073        self.states.push(state);
1074    }
1075
1076    fn defer_parse_node<'a>(
1077        &mut self,
1078        node_state: State,
1079        return_state: State,
1080        block: bool,
1081        indentless_sequence: bool,
1082    ) -> ParseResult<'a>
1083    where
1084        'input: 'a,
1085    {
1086        self.push_state(return_state);
1087        self.state = node_state;
1088        if let Some(comment) = self.maybe_next_comment_event()? {
1089            Ok(comment)
1090        } else {
1091            self.parse_node(block, indentless_sequence)
1092        }
1093    }
1094
1095    fn parse<'a>(&mut self) -> ParseResult<'a>
1096    where
1097        'input: 'a,
1098    {
1099        if self.state == State::End {
1100            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
1101        }
1102        let event = self.state_machine()?;
1103        Ok(self.apply_pending_key_indent(event))
1104    }
1105
1106    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1107    ///
1108    /// The contents of the stream are parsed and the corresponding events are sent into the
1109    /// receiver. For detailed explanations about how events work, see [`EventReceiver`].
1110    ///
1111    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1112    /// inside the stream.
1113    ///
1114    /// Use this method when event handling is infallible. If receiver code can return an
1115    /// application error and should stop parsing, use [`Parser::try_load`] instead. If the caller
1116    /// should directly control when the next event is read, use [`Parser`]'s
1117    /// [`core::iter::Iterator`] implementation.
1118    ///
1119    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
1120    /// former is enough to call this function.
1121    ///
1122    /// # Example
1123    /// ```
1124    /// # use granit_parser::{Event, EventReceiver, Parser};
1125    /// # fn main() -> Result<(), granit_parser::ScanError> {
1126    /// struct EventSink<'input> {
1127    ///     events: Vec<Event<'input>>,
1128    /// }
1129    ///
1130    /// impl<'input> EventReceiver<'input> for EventSink<'input> {
1131    ///     fn on_event(&mut self, ev: Event<'input>) {
1132    ///         self.events.push(ev);
1133    ///     }
1134    /// }
1135    ///
1136    /// let mut parser = Parser::new_from_str("a: 1\n");
1137    /// let mut sink = EventSink { events: Vec::new() };
1138    ///
1139    /// parser.load(&mut sink, false)?;
1140    ///
1141    /// assert!(sink
1142    ///     .events
1143    ///     .iter()
1144    ///     .any(|ev| matches!(ev, Event::Scalar(value, ..) if value == "a")));
1145    /// # Ok(())
1146    /// # }
1147    /// ```
1148    ///
1149    /// # Errors
1150    /// Returns `ScanError` when loading fails.
1151    pub fn load<R: SpannedEventReceiver<'input>>(
1152        &mut self,
1153        recv: &mut R,
1154        multi: bool,
1155    ) -> Result<(), ScanError> {
1156        ParserTrait::load(self, recv, multi)
1157    }
1158
1159    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1160    ///
1161    /// This is the fallible counterpart to [`Parser::load`]. If `recv` returns an error, parsing
1162    /// stops immediately and that error is returned as [`TryLoadError::Receiver`].
1163    ///
1164    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1165    /// inside the stream.
1166    ///
1167    /// If the receiver returns an error, the parser is left positioned immediately after the event
1168    /// that caused the receiver error. Callers should treat the parser as partially consumed.
1169    ///
1170    /// # Example
1171    /// ```
1172    /// # use granit_parser::{Event, Parser, TryEventReceiver, TryLoadError};
1173    /// #[derive(Debug, PartialEq, Eq)]
1174    /// enum ValidationError {
1175    ///     ForbiddenScalar,
1176    /// }
1177    ///
1178    /// struct Validator;
1179    ///
1180    /// impl<'input> TryEventReceiver<'input> for Validator {
1181    ///     type Error = ValidationError;
1182    ///
1183    ///     fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
1184    ///         if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
1185    ///             Err(ValidationError::ForbiddenScalar)
1186    ///         } else {
1187    ///             Ok(())
1188    ///         }
1189    ///     }
1190    /// }
1191    ///
1192    /// let mut parser = Parser::new_from_str("value: bad\n");
1193    /// let mut validator = Validator;
1194    ///
1195    /// let err = parser.try_load(&mut validator, false).unwrap_err();
1196    ///
1197    /// assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenScalar));
1198    /// ```
1199    ///
1200    /// # Errors
1201    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
1202    /// [`TryLoadError::Receiver`] when `recv` returns an error.
1203    pub fn try_load<R: TrySpannedEventReceiver<'input>>(
1204        &mut self,
1205        recv: &mut R,
1206        multi: bool,
1207    ) -> Result<(), TryLoadError<R::Error>> {
1208        ParserTrait::try_load(self, recv, multi)
1209    }
1210
1211    #[cfg(test)]
1212    fn try_load_document<R: TrySpannedEventReceiver<'input>>(
1213        &mut self,
1214        first_ev: Event<'input>,
1215        span: Span,
1216        recv: &mut R,
1217    ) -> Result<(), TryLoadError<R::Error>> {
1218        if !matches!(first_ev, Event::DocumentStart(_)) {
1219            return Err(TryLoadError::scan(ScanError::new_str(
1220                span.start,
1221                "did not find expected <document-start>",
1222            )));
1223        }
1224        try_emit(recv, first_ev, span)?;
1225
1226        let (ev, span) = self.next_event_impl()?;
1227        self.try_load_node(ev, span, recv)?;
1228
1229        // DOCUMENT-END is expected.
1230        let (ev, mark) = self.next_event_impl()?;
1231        assert_eq!(ev, Event::DocumentEnd);
1232        try_emit(recv, ev, mark)?;
1233
1234        Ok(())
1235    }
1236
1237    #[cfg(test)]
1238    fn try_load_node<R: TrySpannedEventReceiver<'input>>(
1239        &mut self,
1240        first_ev: Event<'input>,
1241        span: Span,
1242        recv: &mut R,
1243    ) -> Result<(), TryLoadError<R::Error>> {
1244        match first_ev {
1245            Event::Alias(..) | Event::Scalar(..) => try_emit(recv, first_ev, span),
1246            Event::SequenceStart(..) => {
1247                try_emit(recv, first_ev, span)?;
1248                self.try_load_sequence(recv)
1249            }
1250            Event::MappingStart(..) => {
1251                try_emit(recv, first_ev, span)?;
1252                self.try_load_mapping(recv)
1253            }
1254            _ => {
1255                #[cfg(feature = "debug_prints")]
1256                std::println!("UNREACHABLE EVENT: {first_ev:?}");
1257                unreachable!();
1258            }
1259        }
1260    }
1261
1262    #[cfg(test)]
1263    fn try_load_mapping<R: TrySpannedEventReceiver<'input>>(
1264        &mut self,
1265        recv: &mut R,
1266    ) -> Result<(), TryLoadError<R::Error>> {
1267        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
1268        while key_ev != Event::MappingEnd {
1269            // key
1270            self.try_load_node(key_ev, key_mark, recv)?;
1271
1272            // value
1273            let (ev, mark) = self.next_event_impl()?;
1274            self.try_load_node(ev, mark, recv)?;
1275
1276            // next event
1277            let (ev, mark) = self.next_event_impl()?;
1278            key_ev = ev;
1279            key_mark = mark;
1280        }
1281        try_emit(recv, key_ev, key_mark)?;
1282        Ok(())
1283    }
1284
1285    #[cfg(test)]
1286    fn try_load_sequence<R: TrySpannedEventReceiver<'input>>(
1287        &mut self,
1288        recv: &mut R,
1289    ) -> Result<(), TryLoadError<R::Error>> {
1290        let (mut ev, mut mark) = self.next_event_impl()?;
1291        while ev != Event::SequenceEnd {
1292            self.try_load_node(ev, mark, recv)?;
1293
1294            // next event
1295            let (next_ev, next_mark) = self.next_event_impl()?;
1296            ev = next_ev;
1297            mark = next_mark;
1298        }
1299        try_emit(recv, ev, mark)?;
1300        Ok(())
1301    }
1302
1303    fn state_machine<'a>(&mut self) -> ParseResult<'a>
1304    where
1305        'input: 'a,
1306    {
1307        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
1308
1309        match self.state {
1310            State::StreamStart => self.stream_start(),
1311
1312            State::ImplicitDocumentStart => self.document_start(true),
1313            State::DocumentStart => self.document_start(false),
1314            State::DocumentContent => self.document_content(),
1315            State::DocumentEnd => self.document_end(),
1316
1317            State::BlockNode => self.parse_node(true, false),
1318            State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
1319            State::FlowNode => self.parse_node(false, false),
1320            State::BlockMappingFirstKey => self.block_mapping_key(true),
1321            State::BlockMappingKey => self.block_mapping_key(false),
1322            State::BlockMappingKeyNode => self.block_mapping_key_node(),
1323            State::BlockMappingValue => self.block_mapping_value(),
1324            State::BlockMappingValueNode => self.block_mapping_value_node(),
1325
1326            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
1327            State::BlockSequenceEntry => self.block_sequence_entry(false),
1328            State::BlockSequenceEntryNode => self.block_sequence_entry_node(),
1329
1330            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
1331            State::FlowSequenceEntry => self.flow_sequence_entry(false),
1332
1333            State::FlowMappingFirstKey => self.flow_mapping_key(true),
1334            State::FlowMappingKey => self.flow_mapping_key(false),
1335            State::FlowMappingKeyNode => self.flow_mapping_key_node(),
1336            State::FlowMappingValue => self.flow_mapping_value(false),
1337            State::FlowMappingValueNode => self.flow_mapping_value_node(),
1338
1339            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
1340            State::IndentlessSequenceEntryNode => self.indentless_sequence_entry_node(),
1341
1342            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
1343            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
1344            State::FlowSequenceEntryMappingValueNode => {
1345                self.flow_sequence_entry_mapping_value_node()
1346            }
1347            State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
1348            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
1349
1350            /* impossible */
1351            State::End => unreachable!(),
1352        }
1353    }
1354
1355    fn stream_start<'a>(&mut self) -> ParseResult<'a>
1356    where
1357        'input: 'a,
1358    {
1359        match *self.peek_token()? {
1360            QueuedToken(span, QueuedTokenType::StreamStart(_)) => {
1361                self.state = State::ImplicitDocumentStart;
1362                self.skip();
1363                Ok((Event::StreamStart, span))
1364            }
1365            QueuedToken(span, _) => Err(ScanError::new_str(
1366                span.start,
1367                "did not find expected <stream-start>",
1368            )),
1369        }
1370    }
1371
1372    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
1373    where
1374        'input: 'a,
1375    {
1376        while let QueuedTokenType::DocumentEnd = self.peek_token()?.1 {
1377            self.skip();
1378        }
1379
1380        // Anchors are scoped to a single document.
1381        self.anchors.clear();
1382
1383        match *self.peek_token()? {
1384            QueuedToken(span, QueuedTokenType::StreamEnd) => {
1385                self.state = State::End;
1386                self.skip();
1387                Ok((Event::StreamEnd, span))
1388            }
1389            QueuedToken(
1390                _,
1391                QueuedTokenType::VersionDirective(..)
1392                | QueuedTokenType::TagDirective(..)
1393                | QueuedTokenType::ReservedDirective(..)
1394                | QueuedTokenType::DocumentStart,
1395            ) => {
1396                // explicit document
1397                self.explicit_document_start()
1398            }
1399            QueuedToken(span, _) if implicit => {
1400                self.parser_process_directives()?;
1401                self.push_state(State::DocumentEnd);
1402                self.state = State::BlockNode;
1403                Ok((Event::DocumentStart(false), span))
1404            }
1405            _ => {
1406                // explicit document
1407                self.explicit_document_start()
1408            }
1409        }
1410    }
1411
1412    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
1413        let mut version_directive_received = false;
1414        let mut tags = if self.keep_tags {
1415            self.tags.clone()
1416        } else {
1417            BTreeMap::new()
1418        };
1419        let mut document_tag_handles = BTreeSet::new();
1420
1421        loop {
1422            match self.peek_token()? {
1423                QueuedToken(span, QueuedTokenType::VersionDirective(_, _)) => {
1424                    // YAML version compatibility is non-fatal here. The scanner validates the
1425                    // directive shape, and the parser rejects duplicates below, but it does not
1426                    // expose a warning channel for unsupported versions.
1427                    if version_directive_received {
1428                        return Err(ScanError::new_str(
1429                            span.start,
1430                            "duplicate version directive",
1431                        ));
1432                    }
1433                    version_directive_received = true;
1434                }
1435                QueuedToken(mark, QueuedTokenType::TagDirective(handle, prefix)) => {
1436                    if !document_tag_handles.insert(handle.to_string()) {
1437                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
1438                    }
1439                    tags.insert(handle.to_string(), prefix.to_string());
1440                }
1441                QueuedToken(_, QueuedTokenType::ReservedDirective(_, _)) => {
1442                    // Reserved directives are ignored
1443                }
1444                _ => break,
1445            }
1446            self.skip();
1447        }
1448
1449        self.tags = tags;
1450        Ok(())
1451    }
1452
1453    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
1454    where
1455        'input: 'a,
1456    {
1457        self.parser_process_directives()?;
1458        if let Some(comment) = self.maybe_next_comment_event()? {
1459            return Ok(comment);
1460        }
1461        match *self.peek_token()? {
1462            QueuedToken(mark, QueuedTokenType::DocumentStart) => {
1463                self.push_state(State::DocumentEnd);
1464                self.state = State::DocumentContent;
1465                self.skip();
1466                Ok((Event::DocumentStart(true), mark))
1467            }
1468            QueuedToken(span, _) => Err(ScanError::new_str(
1469                span.start,
1470                "did not find expected <document start>",
1471            )),
1472        }
1473    }
1474
1475    fn document_content<'a>(&mut self) -> ParseResult<'a>
1476    where
1477        'input: 'a,
1478    {
1479        if let QueuedToken(
1480            mark,
1481            QueuedTokenType::VersionDirective(..)
1482            | QueuedTokenType::TagDirective(..)
1483            | QueuedTokenType::ReservedDirective(..)
1484            | QueuedTokenType::DocumentStart
1485            | QueuedTokenType::DocumentEnd
1486            | QueuedTokenType::StreamEnd,
1487        ) = *self.peek_token()?
1488        {
1489            self.pop_state();
1490            // empty scalar
1491            Ok((Event::empty_scalar(), mark))
1492        } else {
1493            self.state = State::BlockNode;
1494            self.parse_node(true, false)
1495        }
1496    }
1497
1498    fn document_end<'a>(&mut self) -> ParseResult<'a>
1499    where
1500        'input: 'a,
1501    {
1502        let mut explicit_end = false;
1503        let span: Span = match *self.peek_token()? {
1504            QueuedToken(span, QueuedTokenType::DocumentEnd) => {
1505                explicit_end = true;
1506                self.skip();
1507                span
1508            }
1509            QueuedToken(span, _) => span,
1510        };
1511
1512        if self.keep_tags {
1513            // Never persist default handles across document boundaries. Allowing `%TAG !! ...`
1514            // or `%TAG ! ...` to leak into following documents lets earlier documents alter how
1515            // explicit tags are interpreted later on.
1516            self.tags.remove("!!");
1517            self.tags.remove("");
1518        } else {
1519            self.tags.clear();
1520        }
1521        if explicit_end {
1522            self.state = State::ImplicitDocumentStart;
1523        } else {
1524            if let QueuedToken(
1525                span,
1526                QueuedTokenType::VersionDirective(..)
1527                | QueuedTokenType::TagDirective(..)
1528                | QueuedTokenType::ReservedDirective(..),
1529            ) = *self.peek_token()?
1530            {
1531                return Err(ScanError::new_str(
1532                    span.start,
1533                    "missing explicit document end marker before directive",
1534                ));
1535            }
1536            self.state = State::DocumentStart;
1537        }
1538
1539        Ok((Event::DocumentEnd, span))
1540    }
1541
1542    fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
1543        // YAML permits anchor names to be reused. Aliases resolve to the most recent definition.
1544        let new_id = self.anchor_id_count;
1545        self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
1546            ScanError::new_str(
1547                mark.start,
1548                "while parsing anchor, anchor count exceeded supported limit",
1549            )
1550        })?;
1551        self.anchors.insert(name, new_id);
1552        Ok(new_id)
1553    }
1554
1555    fn save_pending_node_properties(
1556        &mut self,
1557        anchor_id: usize,
1558        tag: Option<Cow<'input, Tag>>,
1559        tag_start: Option<Marker>,
1560    ) {
1561        self.pending_node_anchor_id = anchor_id;
1562        self.pending_node_tag = tag;
1563        self.pending_node_tag_start = tag_start;
1564    }
1565
1566    fn attach_tag_start(event: Event<'_>, span: Span, start: Option<Marker>) -> (Event<'_>, Span) {
1567        (event, span.with_tag_start(start))
1568    }
1569
1570    #[allow(clippy::too_many_lines)]
1571    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
1572    where
1573        'input: 'a,
1574    {
1575        if let Some(comment) = self.maybe_next_comment_event()? {
1576            return Ok(comment);
1577        }
1578
1579        let mut anchor_id = core::mem::take(&mut self.pending_node_anchor_id);
1580        let mut tag = self.pending_node_tag.take();
1581        let mut tag_start = self.pending_node_tag_start.take();
1582        match *self.peek_token()? {
1583            QueuedToken(_, QueuedTokenType::Alias(_)) => {
1584                self.pop_state();
1585                if let QueuedToken(span, QueuedTokenType::Alias(name)) = self.fetch_token() {
1586                    match self.anchors.get(&*name) {
1587                        None => {
1588                            return Err(ScanError::new_str(
1589                                span.start,
1590                                "while parsing node, found unknown anchor",
1591                            ))
1592                        }
1593                        Some(id) => return Ok((Event::Alias(*id), span)),
1594                    }
1595                }
1596                unreachable!()
1597            }
1598            QueuedToken(_, QueuedTokenType::Anchor(_)) => {
1599                if let QueuedToken(span, QueuedTokenType::Anchor(name)) = self.fetch_token() {
1600                    anchor_id = self.register_anchor(name, &span)?;
1601                    if matches!(self.peek_token()?.1, QueuedTokenType::Tag(..)) {
1602                        if let QueuedToken(tag_span, QueuedTokenType::Tag(handle, suffix)) =
1603                            self.fetch_token()
1604                        {
1605                            tag_start = Some(tag_span.start);
1606                            tag = Some(self.resolve_tag(tag_span, &handle, suffix)?);
1607                        } else {
1608                            unreachable!()
1609                        }
1610                    }
1611                    if let Some(comment) = self.maybe_next_comment_event()? {
1612                        self.save_pending_node_properties(anchor_id, tag, tag_start);
1613                        return Ok(comment);
1614                    }
1615                } else {
1616                    unreachable!()
1617                }
1618            }
1619            QueuedToken(mark, QueuedTokenType::Tag(..)) => {
1620                if let QueuedTokenType::Tag(handle, suffix) = self.fetch_token().1 {
1621                    tag_start = Some(mark.start);
1622                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
1623                    if let QueuedTokenType::Anchor(_) = &self.peek_token()?.1 {
1624                        if let QueuedToken(mark, QueuedTokenType::Anchor(name)) = self.fetch_token()
1625                        {
1626                            anchor_id = self.register_anchor(name, &mark)?;
1627                        } else {
1628                            unreachable!()
1629                        }
1630                    }
1631                    if let Some(comment) = self.maybe_next_comment_event()? {
1632                        self.save_pending_node_properties(anchor_id, tag, tag_start);
1633                        return Ok(comment);
1634                    }
1635                } else {
1636                    unreachable!()
1637                }
1638            }
1639            _ => {}
1640        }
1641        match *self.peek_token()? {
1642            QueuedToken(mark, QueuedTokenType::BlockEntry) if indentless_sequence => {
1643                self.skip();
1644                let comments = self.next_comment_events()?;
1645                let start = (
1646                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1647                    mark.with_tag_start(tag_start),
1648                );
1649                if comments.is_empty() {
1650                    self.pending_empty_scalar_span = Some(mark);
1651                    self.state = State::IndentlessSequenceEntryNode;
1652                    Ok(start)
1653                } else if let Ok(QueuedToken(
1654                    _,
1655                    QueuedTokenType::BlockEntry
1656                    | QueuedTokenType::Key
1657                    | QueuedTokenType::Value
1658                    | QueuedTokenType::BlockEnd,
1659                )) = self.peek_token()
1660                {
1661                    self.state = State::IndentlessSequenceEntry;
1662                    Ok(self.queue_two_events_by_span(
1663                        comments,
1664                        start,
1665                        (Event::empty_scalar(), mark),
1666                    ))
1667                } else {
1668                    self.pending_empty_scalar_span = Some(mark);
1669                    self.state = State::IndentlessSequenceEntryNode;
1670                    Ok(self.queue_event_by_span(comments, start))
1671                }
1672            }
1673            QueuedToken(_, QueuedTokenType::Scalar(..)) => {
1674                self.pop_state();
1675                if let QueuedToken(mark, QueuedTokenType::Scalar(style, v)) = self.fetch_token() {
1676                    Ok(Self::attach_tag_start(
1677                        Event::Scalar(v, style, anchor_id, tag),
1678                        mark,
1679                        tag_start,
1680                    ))
1681                } else {
1682                    unreachable!()
1683                }
1684            }
1685            QueuedToken(mark, QueuedTokenType::FlowSequenceStart) => {
1686                self.state = State::FlowSequenceFirstEntry;
1687                self.skip();
1688                Ok(Self::attach_tag_start(
1689                    Event::SequenceStart(StructureStyle::Flow, anchor_id, tag),
1690                    mark,
1691                    tag_start,
1692                ))
1693            }
1694            QueuedToken(mark, QueuedTokenType::FlowMappingStart) => {
1695                self.state = State::FlowMappingFirstKey;
1696                self.skip();
1697                Ok(Self::attach_tag_start(
1698                    Event::MappingStart(StructureStyle::Flow, anchor_id, tag),
1699                    mark,
1700                    tag_start,
1701                ))
1702            }
1703            QueuedToken(mark, QueuedTokenType::BlockSequenceStart) if block => {
1704                self.state = State::BlockSequenceFirstEntry;
1705                self.skip();
1706                Ok(Self::attach_tag_start(
1707                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1708                    mark,
1709                    tag_start,
1710                ))
1711            }
1712            QueuedToken(mark, QueuedTokenType::BlockMappingStart) if block => {
1713                self.state = State::BlockMappingFirstKey;
1714                self.skip();
1715                Ok(Self::attach_tag_start(
1716                    Event::MappingStart(StructureStyle::Block, anchor_id, tag),
1717                    mark,
1718                    tag_start,
1719                ))
1720            }
1721            // ex 7.2, an empty scalar can follow a secondary tag
1722            QueuedToken(mark, _) if tag.is_some() || anchor_id > 0 => {
1723                self.pop_state();
1724                Ok(Self::attach_tag_start(
1725                    Event::empty_scalar_with_anchor(anchor_id, tag),
1726                    mark,
1727                    tag_start,
1728                ))
1729            }
1730            QueuedToken(span, _) => {
1731                let info = match self.state {
1732                    State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1733                        "unexpected EOF while parsing a flow sequence"
1734                    }
1735                    State::FlowMappingFirstKey
1736                    | State::FlowMappingKey
1737                    | State::FlowMappingValue
1738                    | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1739                    State::FlowSequenceEntryMappingKey
1740                    | State::FlowSequenceEntryMappingValue
1741                    | State::FlowSequenceEntryMappingEnd
1742                    | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
1743                    State::BlockSequenceFirstEntry
1744                    | State::BlockSequenceEntry
1745                    | State::BlockNode => "unexpected EOF while parsing a block sequence",
1746                    State::BlockMappingFirstKey
1747                    | State::BlockMappingKey
1748                    | State::BlockMappingValue
1749                    | State::BlockNodeOrIndentlessSequence => {
1750                        "unexpected EOF while parsing a block mapping"
1751                    }
1752                    _ => "while parsing a node, did not find expected node content",
1753                };
1754                Err(ScanError::new_str(span.start, info))
1755            }
1756        }
1757    }
1758
1759    fn block_mapping_key<'a>(&mut self, _first: bool) -> ParseResult<'a>
1760    where
1761        'input: 'a,
1762    {
1763        match *self.peek_token()? {
1764            QueuedToken(_, QueuedTokenType::Key) => {
1765                // Indentation is only meaningful for block mapping keys.
1766                if let QueuedToken(key_span, QueuedTokenType::Key) = *self.peek_token()? {
1767                    self.pending_key_indent = Some(key_span.start.col());
1768                }
1769                self.skip();
1770                if let Some(comment) = self.maybe_next_comment_event()? {
1771                    self.state = State::BlockMappingKeyNode;
1772                    Ok(comment)
1773                } else {
1774                    self.block_mapping_key_node()
1775                }
1776            }
1777            // A missing block-mapping key before `:` is represented as an empty scalar.
1778            QueuedToken(mark, QueuedTokenType::Value) => {
1779                self.state = State::BlockMappingValue;
1780                Ok((Event::empty_scalar(), mark))
1781            }
1782            QueuedToken(mark, QueuedTokenType::BlockEnd) => {
1783                self.pop_state();
1784                self.skip();
1785                Ok((Event::MappingEnd, mark))
1786            }
1787            QueuedToken(span, _) => Err(ScanError::new_str(
1788                span.start,
1789                "while parsing a block mapping, did not find expected key",
1790            )),
1791        }
1792    }
1793
1794    fn block_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1795    where
1796        'input: 'a,
1797    {
1798        if let QueuedToken(
1799            mark,
1800            QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1801        ) = *self.peek_token()?
1802        {
1803            self.state = State::BlockMappingValue;
1804            Ok((Event::empty_scalar(), mark))
1805        } else {
1806            self.defer_parse_node(
1807                State::BlockNodeOrIndentlessSequence,
1808                State::BlockMappingValue,
1809                true,
1810                true,
1811            )
1812        }
1813    }
1814
1815    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1816    where
1817        'input: 'a,
1818    {
1819        match *self.peek_token()? {
1820            QueuedToken(mark, QueuedTokenType::Value) => {
1821                self.skip();
1822                let comments = self.next_comment_events()?;
1823                if comments.is_empty() {
1824                    self.block_mapping_value_node_with_empty_span(mark)
1825                } else if let Ok(QueuedToken(
1826                    _,
1827                    QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1828                )) = self.peek_token()
1829                {
1830                    self.state = State::BlockMappingKey;
1831                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
1832                } else {
1833                    self.pending_empty_scalar_span = Some(mark);
1834                    self.state = State::BlockMappingValueNode;
1835                    Ok(self.queue_tail_and_return_first(comments))
1836                }
1837            }
1838            QueuedToken(mark, _) => {
1839                self.state = State::BlockMappingKey;
1840                // empty scalar
1841                Ok((Event::empty_scalar(), mark))
1842            }
1843        }
1844    }
1845
1846    fn block_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
1847    where
1848        'input: 'a,
1849    {
1850        let mark = match self.pending_empty_scalar_span.take() {
1851            Some(mark) => mark,
1852            None => self.peek_token()?.0,
1853        };
1854        self.block_mapping_value_node_with_empty_span(mark)
1855    }
1856
1857    fn block_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1858    where
1859        'input: 'a,
1860    {
1861        if let QueuedToken(
1862            _,
1863            QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1864        ) = *self.peek_token()?
1865        {
1866            self.state = State::BlockMappingKey;
1867            Ok((Event::empty_scalar(), mark))
1868        } else {
1869            self.defer_parse_node(
1870                State::BlockNodeOrIndentlessSequence,
1871                State::BlockMappingKey,
1872                true,
1873                true,
1874            )
1875        }
1876    }
1877
1878    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1879    where
1880        'input: 'a,
1881    {
1882        let span: Span =
1883            if let QueuedToken(mark, QueuedTokenType::FlowMappingEnd) = *self.peek_token()? {
1884                mark
1885            } else {
1886                if !first {
1887                    match *self.peek_token()? {
1888                        QueuedToken(_, QueuedTokenType::FlowEntry) => {
1889                            self.skip();
1890                            if let Some(comment) = self.maybe_next_comment_event()? {
1891                                self.state = State::FlowMappingFirstKey;
1892                                return Ok(comment);
1893                            }
1894                        }
1895                        QueuedToken(span, _) => {
1896                            return Err(ScanError::new_str(
1897                                span.start,
1898                                "while parsing a flow mapping, did not find expected ',' or '}'",
1899                            ))
1900                        }
1901                    }
1902                }
1903
1904                match *self.peek_token()? {
1905                    QueuedToken(_, QueuedTokenType::Key) => {
1906                        self.skip();
1907                        if let Some(comment) = self.maybe_next_comment_event()? {
1908                            self.state = State::FlowMappingKeyNode;
1909                            return Ok(comment);
1910                        }
1911                        return self.flow_mapping_key_node();
1912                    }
1913                    QueuedToken(marker, QueuedTokenType::Value) => {
1914                        self.state = State::FlowMappingValue;
1915                        return Ok((Event::empty_scalar(), marker));
1916                    }
1917                    QueuedToken(_, QueuedTokenType::FlowMappingEnd) => (),
1918                    _ => {
1919                        return self.defer_parse_node(
1920                            State::FlowNode,
1921                            State::FlowMappingEmptyValue,
1922                            false,
1923                            false,
1924                        );
1925                    }
1926                }
1927
1928                self.peek_token()?.0
1929            };
1930
1931        self.pop_state();
1932        self.skip();
1933        Ok((Event::MappingEnd, span))
1934    }
1935
1936    fn flow_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1937    where
1938        'input: 'a,
1939    {
1940        if let QueuedToken(
1941            mark,
1942            QueuedTokenType::Value | QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
1943        ) = *self.peek_token()?
1944        {
1945            self.state = State::FlowMappingValue;
1946            Ok((Event::empty_scalar(), mark))
1947        } else {
1948            self.defer_parse_node(State::FlowNode, State::FlowMappingValue, false, false)
1949        }
1950    }
1951
1952    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1953    where
1954        'input: 'a,
1955    {
1956        let span: Span = {
1957            if empty {
1958                let QueuedToken(mark, _) = *self.peek_token()?;
1959                self.state = State::FlowMappingKey;
1960                return Ok((Event::empty_scalar(), mark));
1961            }
1962            match *self.peek_token()? {
1963                QueuedToken(span, QueuedTokenType::Value) => {
1964                    self.skip();
1965                    let comments = self.next_comment_events()?;
1966                    if comments.is_empty() {
1967                        return self.flow_mapping_value_node_with_empty_span(span);
1968                    }
1969                    if let Ok(QueuedToken(
1970                        _,
1971                        QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
1972                    )) = self.peek_token()
1973                    {
1974                        self.state = State::FlowMappingKey;
1975                        return Ok(
1976                            self.queue_event_by_span(comments, (Event::empty_scalar(), span))
1977                        );
1978                    }
1979
1980                    self.pending_empty_scalar_span = Some(span);
1981                    self.state = State::FlowMappingValueNode;
1982                    return Ok(self.queue_tail_and_return_first(comments));
1983                }
1984                QueuedToken(marker, _) => marker,
1985            }
1986        };
1987
1988        self.state = State::FlowMappingKey;
1989        Ok((Event::empty_scalar(), span))
1990    }
1991
1992    fn flow_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
1993    where
1994        'input: 'a,
1995    {
1996        let mark = match self.pending_empty_scalar_span.take() {
1997            Some(mark) => mark,
1998            None => Span::empty(self.peek_token()?.0.start),
1999        };
2000        self.flow_mapping_value_node_with_empty_span(mark)
2001    }
2002
2003    fn flow_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2004    where
2005        'input: 'a,
2006    {
2007        match self.peek_token()?.1 {
2008            QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd => {
2009                self.state = State::FlowMappingKey;
2010                Ok((Event::empty_scalar(), mark))
2011            }
2012            _ => self.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false),
2013        }
2014    }
2015
2016    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
2017    where
2018        'input: 'a,
2019    {
2020        match *self.peek_token()? {
2021            QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
2022                self.pop_state();
2023                self.skip();
2024                return Ok((Event::SequenceEnd, mark));
2025            }
2026            QueuedToken(_, QueuedTokenType::FlowEntry) if !first => {
2027                self.skip();
2028                if let Some(comment) = self.maybe_next_comment_event()? {
2029                    self.state = State::FlowSequenceFirstEntry;
2030                    return Ok(comment);
2031                }
2032            }
2033            QueuedToken(span, _) if !first => {
2034                return Err(ScanError::new_str(
2035                    span.start,
2036                    "while parsing a flow sequence, expected ',' or ']'",
2037                ));
2038            }
2039            _ => { /* next */ }
2040        }
2041        match *self.peek_token()? {
2042            QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
2043                self.pop_state();
2044                self.skip();
2045                Ok((Event::SequenceEnd, mark))
2046            }
2047            QueuedToken(mark, QueuedTokenType::Key) => {
2048                self.state = State::FlowSequenceEntryMappingKey;
2049                self.skip();
2050                Ok((Event::MappingStart(StructureStyle::Flow, 0, None), mark))
2051            }
2052            _ => self.defer_parse_node(State::FlowNode, State::FlowSequenceEntry, false, false),
2053        }
2054    }
2055
2056    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
2057    where
2058        'input: 'a,
2059    {
2060        match *self.peek_token()? {
2061            QueuedToken(mark, QueuedTokenType::BlockEntry) => {
2062                self.skip();
2063                let comments = self.next_comment_events()?;
2064                if comments.is_empty() {
2065                    self.indentless_sequence_entry_node_with_empty_span(mark)
2066                } else if let Ok(QueuedToken(
2067                    _,
2068                    QueuedTokenType::BlockEntry
2069                    | QueuedTokenType::Key
2070                    | QueuedTokenType::Value
2071                    | QueuedTokenType::BlockEnd,
2072                )) = self.peek_token()
2073                {
2074                    self.state = State::IndentlessSequenceEntry;
2075                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
2076                } else {
2077                    self.pending_empty_scalar_span = Some(mark);
2078                    self.state = State::IndentlessSequenceEntryNode;
2079                    Ok(self.queue_tail_and_return_first(comments))
2080                }
2081            }
2082            QueuedToken(mark, _) => {
2083                self.pop_state();
2084                Ok((Event::SequenceEnd, mark))
2085            }
2086        }
2087    }
2088
2089    fn indentless_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
2090    where
2091        'input: 'a,
2092    {
2093        let mark = match self.pending_empty_scalar_span.take() {
2094            Some(mark) => mark,
2095            None => self.peek_token()?.0,
2096        };
2097        self.indentless_sequence_entry_node_with_empty_span(mark)
2098    }
2099
2100    fn indentless_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2101    where
2102        'input: 'a,
2103    {
2104        if let QueuedToken(
2105            _,
2106            QueuedTokenType::BlockEntry
2107            | QueuedTokenType::Key
2108            | QueuedTokenType::Value
2109            | QueuedTokenType::BlockEnd,
2110        ) = *self.peek_token()?
2111        {
2112            self.state = State::IndentlessSequenceEntry;
2113            Ok((Event::empty_scalar(), mark))
2114        } else {
2115            self.defer_parse_node(
2116                State::BlockNode,
2117                State::IndentlessSequenceEntry,
2118                true,
2119                false,
2120            )
2121        }
2122    }
2123
2124    fn block_sequence_entry<'a>(&mut self, _first: bool) -> ParseResult<'a>
2125    where
2126        'input: 'a,
2127    {
2128        match *self.peek_token()? {
2129            QueuedToken(mark, QueuedTokenType::BlockEnd) => {
2130                self.pop_state();
2131                self.skip();
2132                Ok((Event::SequenceEnd, mark))
2133            }
2134            QueuedToken(mark, QueuedTokenType::BlockEntry) => {
2135                self.skip();
2136                let comments = self.next_comment_events()?;
2137                if comments.is_empty() {
2138                    self.block_sequence_entry_node_with_empty_span(mark)
2139                } else if let Ok(QueuedToken(
2140                    _,
2141                    QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd,
2142                )) = self.peek_token()
2143                {
2144                    self.state = State::BlockSequenceEntry;
2145                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
2146                } else {
2147                    self.pending_empty_scalar_span = Some(mark);
2148                    self.state = State::BlockSequenceEntryNode;
2149                    Ok(self.queue_tail_and_return_first(comments))
2150                }
2151            }
2152            QueuedToken(span, _) => Err(ScanError::new_str(
2153                span.start,
2154                "while parsing a block collection, did not find expected '-' indicator",
2155            )),
2156        }
2157    }
2158
2159    fn block_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
2160    where
2161        'input: 'a,
2162    {
2163        let mark = match self.pending_empty_scalar_span.take() {
2164            Some(mark) => mark,
2165            None => self.peek_token()?.0,
2166        };
2167        self.block_sequence_entry_node_with_empty_span(mark)
2168    }
2169
2170    fn block_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2171    where
2172        'input: 'a,
2173    {
2174        if let QueuedToken(_, QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd) =
2175            *self.peek_token()?
2176        {
2177            self.state = State::BlockSequenceEntry;
2178            Ok((Event::empty_scalar(), mark))
2179        } else {
2180            self.defer_parse_node(State::BlockNode, State::BlockSequenceEntry, true, false)
2181        }
2182    }
2183
2184    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
2185    where
2186        'input: 'a,
2187    {
2188        if let QueuedToken(mark, QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd) =
2189            *self.peek_token()?
2190        {
2191            self.state = State::FlowSequenceEntryMappingValue;
2192            Ok((Event::empty_scalar(), mark))
2193        } else {
2194            self.defer_parse_node(
2195                State::FlowNode,
2196                State::FlowSequenceEntryMappingValue,
2197                false,
2198                false,
2199            )
2200        }
2201    }
2202
2203    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
2204    where
2205        'input: 'a,
2206    {
2207        match *self.peek_token()? {
2208            QueuedToken(_, QueuedTokenType::Value) => {
2209                self.skip();
2210                if let Some(comment) = self.maybe_next_comment_event()? {
2211                    self.state = State::FlowSequenceEntryMappingValueNode;
2212                    Ok(comment)
2213                } else {
2214                    self.flow_sequence_entry_mapping_value_node()
2215                }
2216            }
2217            QueuedToken(mark, _) => {
2218                self.state = State::FlowSequenceEntryMappingEnd;
2219                Ok((Event::empty_scalar(), mark))
2220            }
2221        }
2222    }
2223
2224    fn flow_sequence_entry_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
2225    where
2226        'input: 'a,
2227    {
2228        let QueuedToken(span, ref tok) = *self.peek_token()?;
2229        if matches!(
2230            tok,
2231            QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd
2232        ) {
2233            self.state = State::FlowSequenceEntryMappingEnd;
2234            Ok((Event::empty_scalar(), Span::empty(span.start)))
2235        } else {
2236            self.defer_parse_node(
2237                State::FlowNode,
2238                State::FlowSequenceEntryMappingEnd,
2239                false,
2240                false,
2241            )
2242        }
2243    }
2244
2245    #[allow(clippy::unnecessary_wraps)]
2246    fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
2247    where
2248        'input: 'a,
2249    {
2250        self.state = State::FlowSequenceEntry;
2251        let QueuedToken(span, _) = *self.peek_token()?;
2252        Ok((Event::MappingEnd, Span::empty(span.start)))
2253    }
2254
2255    /// Resolve a tag from the handle and the suffix.
2256    fn resolve_tag(
2257        &self,
2258        span: Span,
2259        handle: &Cow<'input, str>,
2260        suffix: Cow<'input, str>,
2261    ) -> Result<Cow<'input, Tag>, ScanError> {
2262        let original_handle = handle.to_string();
2263        let suffix = suffix.into_owned();
2264        let tag = if handle == "!!" {
2265            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
2266            // overridden.
2267            Tag::with_original_handle(
2268                self.tags
2269                    .get("!!")
2270                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
2271                suffix,
2272                original_handle,
2273            )
2274        } else if handle.is_empty() && suffix == "!" {
2275            // "!" introduces a local tag. Local tags may have their prefix overridden.
2276            match self.tags.get("") {
2277                Some(prefix) => Tag::with_original_handle(prefix.clone(), suffix, original_handle),
2278                None => Tag::with_original_handle(String::new(), suffix, original_handle),
2279            }
2280        } else {
2281            // Lookup handle in our tag directives.
2282            let prefix = self.tags.get(&**handle);
2283            if let Some(prefix) = prefix {
2284                Tag::with_original_handle(prefix.clone(), suffix, original_handle)
2285            } else {
2286                // Otherwise, it may be a local handle. With a local handle, the handle is set to
2287                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
2288                // If the handle is of the form "!foo!", this cannot be a local handle and we need
2289                // to error.
2290                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2291                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
2292                }
2293                Tag::with_original_handle(handle.to_string(), suffix, original_handle)
2294            }
2295        };
2296        Ok(Cow::Owned(tag))
2297    }
2298}
2299
2300impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
2301    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
2302        if let Some(ref x) = self.current {
2303            Some(Ok(x))
2304        } else {
2305            if self.stream_end_emitted {
2306                return None;
2307            }
2308            match self.next_event_impl() {
2309                Ok(token) => self.current = Some(token),
2310                Err(e) => return Some(e.into_result()),
2311            }
2312            self.current.as_ref().map(Ok)
2313        }
2314    }
2315
2316    fn next_event(&mut self) -> Option<ParseResult<'input>> {
2317        if self.stream_end_emitted {
2318            return None;
2319        }
2320
2321        let tok = self.next_event_impl();
2322        if matches!(tok, Ok((Event::StreamEnd, _))) {
2323            self.stream_end_emitted = true;
2324        }
2325        Some(tok)
2326    }
2327
2328    fn load<R: SpannedEventReceiver<'input>>(
2329        &mut self,
2330        recv: &mut R,
2331        multi: bool,
2332    ) -> Result<(), ScanError> {
2333        let mut recv = InfallibleSpannedReceiver(recv);
2334        into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
2335    }
2336
2337    fn try_load<R: TrySpannedEventReceiver<'input>>(
2338        &mut self,
2339        recv: &mut R,
2340        multi: bool,
2341    ) -> Result<(), TryLoadError<R::Error>> {
2342        let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
2343        if !self.scanner.stream_started() || stream_start_buffered {
2344            let (ev, span) = self.next_event_impl()?;
2345            if ev != Event::StreamStart {
2346                return Err(TryLoadError::scan(ScanError::new_str(
2347                    span.start,
2348                    "did not find expected <stream-start>",
2349                )));
2350            }
2351            try_emit(recv, ev, span)?;
2352        }
2353
2354        if self.scanner.stream_ended() {
2355            // The scanner has already reached EOF before the document loop, so emit the terminal
2356            // event and stop.
2357            try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
2358            return Ok(());
2359        }
2360
2361        loop {
2362            let (ev, span) = self.next_event_impl()?;
2363            let is_doc_end = matches!(ev, Event::DocumentEnd);
2364            let is_stream_end = matches!(ev, Event::StreamEnd);
2365
2366            try_emit(recv, ev, span)?;
2367
2368            if is_stream_end {
2369                return Ok(());
2370            }
2371            if !multi && is_doc_end {
2372                return Ok(());
2373            }
2374        }
2375    }
2376}
2377
2378impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
2379    type Item = Result<(Event<'input>, Span), ScanError>;
2380
2381    fn next(&mut self) -> Option<Self::Item> {
2382        self.next_event()
2383    }
2384}
2385
2386#[cfg(test)]
2387mod test {
2388    use alloc::{
2389        borrow::{Cow, ToOwned},
2390        string::{String, ToString},
2391        vec::Vec,
2392    };
2393    use core::{error::Error as _, fmt};
2394
2395    use crate::scanner::{Marker, ScalarStyle, ScanError, Span};
2396
2397    use super::{
2398        Event, EventReceiver, Parser, State, StructureStyle, Tag, TryEventReceiver, TryLoadError,
2399        TrySpannedEventReceiver,
2400    };
2401
2402    #[derive(Default)]
2403    struct CollectingSink<'input> {
2404        events: Vec<Event<'input>>,
2405    }
2406
2407    impl<'input> EventReceiver<'input> for CollectingSink<'input> {
2408        fn on_event(&mut self, ev: Event<'input>) {
2409            self.events.push(ev);
2410        }
2411    }
2412
2413    fn first_error_info(input: &str) -> String {
2414        for event in Parser::new_from_str(input) {
2415            if let Err(err) = event {
2416                return err.info().to_owned();
2417            }
2418        }
2419        panic!("expected parser error")
2420    }
2421
2422    #[test]
2423    fn deferred_parse_node_can_emit_comment_before_flow_node() {
2424        let mut parser = Parser::new_from_str("# deferred\nvalue\n");
2425        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2426        assert_eq!(
2427            parser.document_start(true).unwrap().0,
2428            Event::DocumentStart(false)
2429        );
2430
2431        let (event, _) = parser
2432            .defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false)
2433            .unwrap();
2434
2435        assert!(matches!(event, Event::Comment(text, _) if text == " deferred"));
2436        assert_eq!(parser.state, State::FlowNode);
2437    }
2438
2439    #[test]
2440    fn queued_node_event_gets_pending_key_indent() {
2441        let mut parser = Parser::new_from_str("");
2442        let span = Span::empty(Marker::new(0, 1, 0));
2443
2444        parser.pending_key_indent = Some(3);
2445        parser
2446            .queued_events
2447            .push_back((Event::SequenceStart(StructureStyle::Block, 0, None), span));
2448
2449        let (event, span) = parser.next_event_impl().unwrap();
2450
2451        assert!(matches!(
2452            event,
2453            Event::SequenceStart(StructureStyle::Block, 0, None)
2454        ));
2455        assert_eq!(span.indent, Some(3));
2456        assert_eq!(parser.pending_key_indent, None);
2457    }
2458
2459    #[test]
2460    fn state_machine_handles_deferred_flow_node_states() {
2461        let mut parser = Parser::new_from_str("value\n");
2462        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2463        assert_eq!(
2464            parser.document_start(true).unwrap().0,
2465            Event::DocumentStart(false)
2466        );
2467        parser.state = State::FlowNode;
2468        parser.push_state(State::End);
2469
2470        let (event, _) = parser.state_machine().unwrap();
2471
2472        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2473
2474        let mut parser = Parser::new_from_str("value\n");
2475        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2476        assert_eq!(
2477            parser.document_start(true).unwrap().0,
2478            Event::DocumentStart(false)
2479        );
2480        parser.state = State::FlowSequenceEntryMappingValueNode;
2481
2482        let (event, _) = parser.state_machine().unwrap();
2483
2484        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2485    }
2486
2487    #[test]
2488    fn display_resolved_core_tag_without_extra_bang() {
2489        let tag = Tag::with_original_handle("tag:yaml.org,2002:", "str", "!!");
2490
2491        assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
2492    }
2493
2494    #[test]
2495    fn tag_helpers_distinguish_core_and_local_tags() {
2496        let core = Tag::with_original_handle("tag:yaml.org,2002:", "int", "!!");
2497        let local = Tag::new("!", "thing");
2498        let non_specific = Tag::with_original_handle("", "!", "");
2499        let verbatim = Tag::with_original_handle("", "tag:example.com,2000:thing", "");
2500
2501        assert!(core.is_yaml_core_schema());
2502        assert!(core.is_yaml_core_schema_tag("int"));
2503        assert!(!core.is_yaml_core_schema_tag("str"));
2504        assert!(!core.is_custom());
2505        assert_eq!(core.parts(), ("tag:yaml.org,2002:", "int"));
2506        assert_eq!(core.original_parts(), ("!!", "int"));
2507        assert_eq!(core.original(), "!!int");
2508
2509        assert!(!local.is_yaml_core_schema());
2510        assert!(!local.is_yaml_core_schema_tag("thing"));
2511        assert!(local.is_custom());
2512        assert_eq!(local.parts(), ("!", "thing"));
2513        assert_eq!(local.original_parts(), ("!", "thing"));
2514        assert_eq!(local.original(), "!thing");
2515        assert_eq!(local.to_string(), "!thing");
2516
2517        assert_eq!(non_specific.parts(), ("", "!"));
2518        assert_eq!(non_specific.original_parts(), ("", "!"));
2519        assert_eq!(non_specific.original(), "!");
2520
2521        assert_eq!(verbatim.parts(), ("", "tag:example.com,2000:thing"));
2522        assert_eq!(
2523            verbatim.original_parts(),
2524            ("", "tag:example.com,2000:thing")
2525        );
2526        assert_eq!(verbatim.original(), "!<tag:example.com,2000:thing>");
2527    }
2528
2529    #[test]
2530    fn attach_tag_start_applies_marker_to_span() {
2531        let event = Event::Scalar("value".into(), ScalarStyle::Plain, 0, None);
2532        let span = Span::new(Marker::new(6, 1, 6), Marker::new(11, 1, 11));
2533        let tag_start = Marker::new(0, 1, 0);
2534
2535        let (attached_event, attached_span) =
2536            Parser::<crate::input::str::StrInput<'_>>::attach_tag_start(
2537                event.clone(),
2538                span,
2539                Some(tag_start),
2540            );
2541
2542        assert_eq!(attached_event, event);
2543        assert_eq!(attached_span.start, span.start);
2544        assert_eq!(attached_span.end, span.end);
2545        assert_eq!(attached_span.tag_start(), Some(tag_start));
2546    }
2547
2548    #[test]
2549    fn event_inspection_helpers_report_node_metadata() {
2550        let tag = Tag::new("!", "thing");
2551        let scalar = Event::Scalar(
2552            "value".into(),
2553            ScalarStyle::DoubleQuoted,
2554            7,
2555            Some(Cow::Borrowed(&tag)),
2556        );
2557        let sequence =
2558            Event::SequenceStart(StructureStyle::Block, 8, Some(Cow::Owned(tag.clone())));
2559        let mapping = Event::MappingStart(StructureStyle::Block, 9, Some(Cow::Borrowed(&tag)));
2560
2561        assert_eq!(scalar.anchor_id(), Some(7));
2562        assert_eq!(scalar.alias_id(), None);
2563        assert_eq!(scalar.tag(), Some(&tag));
2564        assert_eq!(scalar.scalar(), Some(("value", ScalarStyle::DoubleQuoted)));
2565        assert!(scalar.is_node());
2566
2567        assert_eq!(sequence.anchor_id(), Some(8));
2568        assert_eq!(sequence.alias_id(), None);
2569        assert_eq!(sequence.tag(), Some(&tag));
2570        assert_eq!(sequence.scalar(), None);
2571        assert!(sequence.is_node());
2572
2573        assert_eq!(mapping.anchor_id(), Some(9));
2574        assert_eq!(mapping.alias_id(), None);
2575        assert_eq!(mapping.tag(), Some(&tag));
2576        assert_eq!(mapping.scalar(), None);
2577        assert!(mapping.is_node());
2578
2579        let alias = Event::Alias(10);
2580        assert_eq!(alias.anchor_id(), None);
2581        assert_eq!(alias.alias_id(), Some(10));
2582        assert_eq!(alias.tag(), None);
2583        assert_eq!(alias.scalar(), None);
2584        assert!(alias.is_node());
2585
2586        let unanchored_scalar = Event::Scalar("x".into(), ScalarStyle::Plain, 0, None);
2587        assert_eq!(unanchored_scalar.anchor_id(), None);
2588        assert_eq!(unanchored_scalar.alias_id(), None);
2589
2590        let stream_start = Event::StreamStart;
2591        assert_eq!(stream_start.anchor_id(), None);
2592        assert_eq!(stream_start.alias_id(), None);
2593        assert_eq!(stream_start.tag(), None);
2594        assert_eq!(stream_start.scalar(), None);
2595        assert!(!stream_start.is_node());
2596    }
2597
2598    #[test]
2599    fn test_peek_eq_parse() {
2600        let s = "
2601a0 bb: val
2602a1: &x
2603    b1: 4
2604    b2: d
2605a2: 4
2606a3: [1, 2, 3]
2607a4:
2608    - [a1, a2]
2609    - 2
2610a5: *x
2611";
2612        let mut p = Parser::new_from_str(s);
2613        loop {
2614            let event_peek = p.peek().unwrap().unwrap().clone();
2615            let event = p.next_event().unwrap().unwrap();
2616            assert_eq!(event, event_peek);
2617            if event.0 == Event::StreamEnd {
2618                break;
2619            }
2620        }
2621    }
2622
2623    #[test]
2624    fn test_repeated_peek_returns_buffered_event() {
2625        let mut parser = Parser::new_from_str("key: value\n");
2626
2627        let first_peek = parser.peek().unwrap().unwrap().clone();
2628        let second_peek = parser.peek().unwrap().unwrap().clone();
2629        let next = parser.next_event().unwrap().unwrap();
2630
2631        assert_eq!(first_peek, second_peek);
2632        assert_eq!(first_peek, next);
2633    }
2634
2635    #[test]
2636    fn test_peek_surfaces_scan_error_without_consuming_stream_end_state() {
2637        let mut parser = Parser::new_from_str("a: [1, 2");
2638
2639        loop {
2640            match parser.peek() {
2641                Some(Ok(_)) => {
2642                    parser.next_event().unwrap().unwrap();
2643                }
2644                Some(Err(error)) => {
2645                    assert_eq!(error.info(), "unclosed bracket '['");
2646                    break;
2647                }
2648                None => panic!("expected parse error"),
2649            }
2650        }
2651    }
2652
2653    #[test]
2654    fn test_peek_and_next_return_none_after_stream_end() {
2655        let mut parser = Parser::new_from_str("");
2656
2657        assert!(matches!(
2658            parser.next_event().unwrap().unwrap().0,
2659            Event::StreamStart
2660        ));
2661        assert!(matches!(
2662            parser.next_event().unwrap().unwrap().0,
2663            Event::StreamEnd
2664        ));
2665        assert!(parser.next_event().is_none());
2666        assert!(parser.peek().is_none());
2667    }
2668
2669    #[test]
2670    fn test_load_after_stream_already_ended_emits_stream_end() {
2671        let mut parser = Parser::new_from_str("");
2672        while parser.next_event().is_some() {}
2673
2674        let mut sink = CollectingSink::default();
2675        parser.load(&mut sink, true).unwrap();
2676
2677        assert_eq!(sink.events, vec![Event::StreamEnd]);
2678    }
2679
2680    #[test]
2681    fn test_load_visits_nested_collection_events() {
2682        let mut parser = Parser::new_from_str("root:\n  - item: value\n  - [a, b]\n");
2683        let mut sink = CollectingSink::default();
2684
2685        parser.load(&mut sink, true).unwrap();
2686
2687        assert_eq!(
2688            sink.events,
2689            vec![
2690                Event::StreamStart,
2691                Event::DocumentStart(false),
2692                Event::MappingStart(StructureStyle::Block, 0, None),
2693                Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
2694                Event::SequenceStart(StructureStyle::Block, 0, None),
2695                Event::MappingStart(StructureStyle::Block, 0, None),
2696                Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
2697                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2698                Event::MappingEnd,
2699                Event::SequenceStart(StructureStyle::Flow, 0, None),
2700                Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
2701                Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
2702                Event::SequenceEnd,
2703                Event::SequenceEnd,
2704                Event::MappingEnd,
2705                Event::DocumentEnd,
2706                Event::StreamEnd,
2707            ]
2708        );
2709    }
2710
2711    #[derive(Clone, Debug, PartialEq, Eq)]
2712    enum ValidationError {
2713        ForbiddenValue,
2714    }
2715
2716    #[derive(Debug)]
2717    struct ReceiverFailure;
2718
2719    impl fmt::Display for ReceiverFailure {
2720        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2721            write!(f, "receiver failed")
2722        }
2723    }
2724
2725    impl core::error::Error for ReceiverFailure {}
2726
2727    struct FailingSink<'input> {
2728        events: Vec<Event<'input>>,
2729    }
2730
2731    impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
2732        type Error = ValidationError;
2733
2734        fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
2735            let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
2736            self.events.push(ev);
2737            if should_fail {
2738                Err(ValidationError::ForbiddenValue)
2739            } else {
2740                Ok(())
2741            }
2742        }
2743    }
2744
2745    #[test]
2746    fn test_try_load_stops_on_receiver_error() {
2747        let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
2748        let mut sink = FailingSink { events: Vec::new() };
2749
2750        let err = parser.try_load(&mut sink, true).unwrap_err();
2751
2752        assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
2753        assert!(sink
2754            .events
2755            .iter()
2756            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
2757        assert!(sink
2758            .events
2759            .iter()
2760            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
2761        assert!(!sink
2762            .events
2763            .iter()
2764            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
2765    }
2766
2767    struct SpannedFailingSink {
2768        failed_span: Option<Span>,
2769    }
2770
2771    impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
2772        type Error = Span;
2773
2774        fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
2775            if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
2776                self.failed_span = Some(span);
2777                Err(span)
2778            } else {
2779                Ok(())
2780            }
2781        }
2782    }
2783
2784    #[test]
2785    fn test_try_load_spanned_receiver_gets_span() {
2786        let mut parser = Parser::new_from_str("value: bad\n");
2787        let mut sink = SpannedFailingSink { failed_span: None };
2788
2789        let err = parser.try_load(&mut sink, false).unwrap_err();
2790
2791        let TryLoadError::Receiver(span) = err else {
2792            panic!("expected receiver error");
2793        };
2794
2795        assert_eq!(Some(span), sink.failed_span);
2796        assert!(!span.is_empty());
2797    }
2798
2799    struct NeverFails {
2800        count: usize,
2801    }
2802
2803    impl<'input> TryEventReceiver<'input> for NeverFails {
2804        type Error = ValidationError;
2805
2806        fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
2807            self.count += 1;
2808            Ok(())
2809        }
2810    }
2811
2812    #[test]
2813    fn test_try_load_returns_scan_error() {
2814        let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
2815        let mut sink = NeverFails { count: 0 };
2816
2817        let err = parser.try_load(&mut sink, true).unwrap_err();
2818
2819        let TryLoadError::Scan(err) = err else {
2820            panic!("expected scan error");
2821        };
2822        assert_eq!(err.info(), "duplicate version directive");
2823    }
2824
2825    #[test]
2826    fn test_try_load_error_display_and_source_cover_both_variants() {
2827        let scan = ScanError::new_str(Marker::new(3, 1, 3), "bad yaml");
2828        let scan_err: TryLoadError<ReceiverFailure> = scan.into();
2829
2830        assert!(scan_err.to_string().starts_with("parser error: bad yaml"));
2831        assert!(scan_err.source().is_some());
2832
2833        let receiver_err = TryLoadError::Receiver(ReceiverFailure);
2834
2835        assert_eq!(receiver_err.to_string(), "receiver error: receiver failed");
2836        assert!(receiver_err.source().is_some());
2837    }
2838
2839    #[test]
2840    fn test_try_load_document_rejects_non_document_start_event() {
2841        let mut parser = Parser::new_from_str("");
2842        let span = Span::empty(Marker::new(0, 1, 0));
2843        let mut sink = NeverFails { count: 0 };
2844
2845        let err = parser
2846            .try_load_document(
2847                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2848                span,
2849                &mut sink,
2850            )
2851            .unwrap_err();
2852
2853        let TryLoadError::Scan(err) = err else {
2854            panic!("expected scan error");
2855        };
2856        assert_eq!(err.info(), "did not find expected <document-start>");
2857    }
2858
2859    #[test]
2860    fn test_try_load_requires_buffered_stream_start() {
2861        let mut parser = Parser::new_from_str("");
2862        let span = Span::empty(Marker::new(0, 1, 0));
2863        parser.current = Some((
2864            Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2865            span,
2866        ));
2867        let mut sink = NeverFails { count: 0 };
2868
2869        let err = parser.try_load(&mut sink, true).unwrap_err();
2870
2871        let TryLoadError::Scan(err) = err else {
2872            panic!("expected scan error");
2873        };
2874        assert_eq!(err.info(), "did not find expected <stream-start>");
2875    }
2876
2877    #[test]
2878    fn test_try_load_after_stream_already_ended_emits_stream_end() {
2879        let mut parser = Parser::new_from_str("");
2880        while parser.next_event().is_some() {}
2881
2882        let mut sink = FailingSink { events: Vec::new() };
2883        parser.try_load(&mut sink, true).unwrap();
2884
2885        assert_eq!(sink.events, vec![Event::StreamEnd]);
2886    }
2887
2888    #[test]
2889    fn test_load_single_document_stops_before_next_document() {
2890        let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
2891        let mut sink = CollectingSink::default();
2892
2893        parser.load(&mut sink, false).unwrap();
2894
2895        assert!(sink
2896            .events
2897            .iter()
2898            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
2899        assert!(!sink
2900            .events
2901            .iter()
2902            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
2903        assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
2904    }
2905
2906    #[test]
2907    fn test_duplicate_version_directive_errors() {
2908        assert_eq!(
2909            first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
2910            "duplicate version directive"
2911        );
2912    }
2913
2914    #[test]
2915    fn test_duplicate_tag_directive_errors() {
2916        assert_eq!(
2917            first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
2918            "the TAG directive must only be given at most once per handle in the same document"
2919        );
2920    }
2921
2922    #[test]
2923    fn test_directive_after_implicit_document_requires_explicit_end() {
2924        assert_eq!(
2925            first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
2926            "missing explicit document end marker before directive"
2927        );
2928    }
2929
2930    #[test]
2931    fn test_anchor_offset_overflow_reports_error() {
2932        let mut parser = Parser::new_from_str("&a value");
2933        parser.set_anchor_offset(usize::MAX);
2934
2935        let err = parser
2936            .find_map(Result::err)
2937            .expect("anchor registration should overflow");
2938
2939        assert_eq!(
2940            err.info(),
2941            "while parsing anchor, anchor count exceeded supported limit"
2942        );
2943    }
2944
2945    #[test]
2946    fn test_alias_resolves_to_registered_anchor_id() {
2947        let events = Parser::new_from_str("- &a value\n- *a\n")
2948            .map(|event| event.unwrap().0)
2949            .collect::<Vec<_>>();
2950
2951        assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
2952    }
2953
2954    #[test]
2955    fn test_anchor_then_tag_applies_both_to_scalar() {
2956        let events = Parser::new_from_str("&a !!str value")
2957            .map(|event| event.unwrap().0)
2958            .collect::<Vec<_>>();
2959
2960        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
2961            .iter()
2962            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
2963        else {
2964            panic!("expected tagged anchored scalar");
2965        };
2966
2967        assert_eq!(value, "value");
2968        assert_eq!(*anchor_id, 1);
2969        assert_eq!(tag.handle, "tag:yaml.org,2002:");
2970        assert_eq!(tag.suffix, "str");
2971        assert_eq!(tag.original_handle, "!!");
2972        assert_eq!(tag.original(), "!!str");
2973    }
2974
2975    #[test]
2976    fn test_tag_then_anchor_applies_both_to_scalar() {
2977        let events = Parser::new_from_str("!!str &a value")
2978            .map(|event| event.unwrap().0)
2979            .collect::<Vec<_>>();
2980
2981        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
2982            .iter()
2983            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
2984        else {
2985            panic!("expected tagged anchored scalar");
2986        };
2987
2988        assert_eq!(value, "value");
2989        assert_eq!(*anchor_id, 1);
2990        assert_eq!(tag.handle, "tag:yaml.org,2002:");
2991        assert_eq!(tag.suffix, "str");
2992        assert_eq!(tag.original_handle, "!!");
2993        assert_eq!(tag.original(), "!!str");
2994    }
2995
2996    #[test]
2997    fn test_tag_directive_preserves_original_handle() {
2998        let events =
2999            Parser::new_from_str("%TAG !e! tag:example.com,2000:\n---\nconfig: !e!keep value\n")
3000                .map(|event| event.unwrap().0)
3001                .collect::<Vec<_>>();
3002
3003        let (value, tag) = events
3004            .iter()
3005            .find_map(|event| match event {
3006                Event::Scalar(value, _, _, Some(tag)) if value == "value" => Some((value, tag)),
3007                _ => None,
3008            })
3009            .expect("expected tagged scalar");
3010
3011        assert_eq!(value, "value");
3012        assert_eq!(tag.handle, "tag:example.com,2000:");
3013        assert_eq!(tag.suffix, "keep");
3014        assert_eq!(tag.original_handle, "!e!");
3015        assert_eq!(tag.parts(), ("tag:example.com,2000:", "keep"));
3016        assert_eq!(tag.original_parts(), ("!e!", "keep"));
3017        assert_eq!(tag.original(), "!e!keep");
3018    }
3019
3020    #[test]
3021    fn test_verbatim_tag_original_is_normalized_author_spelling() {
3022        let events = Parser::new_from_str("key: !<tag:example.com,2000:thing> value\n")
3023            .map(|event| event.unwrap().0)
3024            .collect::<Vec<_>>();
3025
3026        let Some(Event::Scalar(value, _, _, Some(tag))) = events
3027            .iter()
3028            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
3029        else {
3030            panic!("expected tagged scalar");
3031        };
3032
3033        assert_eq!(value, "value");
3034        assert_eq!(tag.handle, "");
3035        assert_eq!(tag.suffix, "tag:example.com,2000:thing");
3036        assert_eq!(tag.original_handle, "");
3037        assert_eq!(tag.parts(), ("", "tag:example.com,2000:thing"));
3038        assert_eq!(tag.original_parts(), ("", "tag:example.com,2000:thing"));
3039        assert_eq!(tag.original(), "!<tag:example.com,2000:thing>");
3040    }
3041
3042    #[test]
3043    fn test_multiple_tag_directives_are_kept_within_document() {
3044        let text = r"
3045%TAG !a! tag:a,2024:
3046%TAG !b! tag:b,2024:
3047---
3048first: !a!x foo
3049second: !b!y bar
3050";
3051
3052        let mut seen_a = false;
3053        let mut seen_b = false;
3054        for event in Parser::new_from_str(text) {
3055            let (event, _) = event.unwrap();
3056            if let Event::Scalar(_, _, _, Some(tag)) = event {
3057                if tag.handle == "tag:a,2024:" {
3058                    seen_a = true;
3059                } else if tag.handle == "tag:b,2024:" {
3060                    seen_b = true;
3061                }
3062            }
3063        }
3064
3065        assert!(seen_a);
3066        assert!(seen_b);
3067    }
3068
3069    #[test]
3070    fn test_tags_are_cleared_when_next_document_has_no_directives() {
3071        let text = r"
3072%TAG !t! tag:test,2024:
3073--- !t!1
3074foo
3075--- !t!2
3076bar
3077";
3078
3079        let mut parser = Parser::new_from_str(text);
3080        for event in parser.by_ref() {
3081            let (event, _) = event.unwrap();
3082            if let Event::DocumentEnd = event {
3083                break;
3084            }
3085        }
3086
3087        match parser.next().unwrap().unwrap().0 {
3088            Event::DocumentStart(true) => {}
3089            _ => panic!("expected explicit second document start"),
3090        }
3091
3092        let err = parser.next().unwrap().unwrap_err();
3093        assert!(format!("{err}").contains("the handle wasn't declared"));
3094    }
3095
3096    #[test]
3097    fn test_pull_parser_clears_anchors_between_documents() {
3098        let mut parser = Parser::new_from_str(
3099            "--- &a value
3100--- *a
3101",
3102        );
3103
3104        for event in parser.by_ref() {
3105            let (event, _) = event.unwrap();
3106            if matches!(event, Event::DocumentEnd) {
3107                break;
3108            }
3109        }
3110
3111        match parser.next().unwrap().unwrap().0 {
3112            Event::DocumentStart(true) => {}
3113            _ => panic!("expected explicit second document start"),
3114        }
3115
3116        let err = parser.next().unwrap().unwrap_err();
3117        assert!(format!("{err}").contains("unknown anchor"));
3118    }
3119
3120    #[test]
3121    fn test_keep_tags_across_multiple_documents() {
3122        let text = r#"
3123%YAML 1.1
3124%TAG !t! tag:test,2024:
3125--- !t!1 &1
3126foo: "bar"
3127--- !t!2 &2
3128baz: "qux"
3129"#;
3130        for x in Parser::new_from_str(text).keep_tags(true) {
3131            let x = x.unwrap();
3132            if let Event::MappingStart(_, _, tag) = x.0 {
3133                let tag = tag.unwrap();
3134                assert_eq!(tag.handle, "tag:test,2024:");
3135            }
3136        }
3137
3138        for x in Parser::new_from_str(text).keep_tags(false) {
3139            if x.is_err() {
3140                // Test successful
3141                return;
3142            }
3143        }
3144        panic!("Test failed, did not encounter error")
3145    }
3146
3147    #[test]
3148    fn test_flow_sequence_mapping_allows_empty_key() {
3149        let parser = Parser::new_from_str("[?: value]");
3150        for event in parser {
3151            event.expect("parser should accept flow sequence mappings with empty keys");
3152        }
3153    }
3154
3155    #[test]
3156    fn test_keep_tags_does_not_persist_default_tag_handles() {
3157        let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
3158
3159        let mut int_tags = Vec::new();
3160        for event in Parser::new_from_str(text).keep_tags(true) {
3161            let event = event.unwrap().0;
3162            if let Event::Scalar(_, _, _, Some(tag)) = event {
3163                if tag.suffix == "int" {
3164                    int_tags.push(tag.handle.clone());
3165                }
3166            }
3167        }
3168
3169        assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
3170    }
3171
3172    #[test]
3173    fn test_resolve_tag_uses_overridden_local_prefix() {
3174        let mut parser = Parser::new_from_str("");
3175        parser
3176            .tags
3177            .insert(String::new(), "tag:local.example,2024:".to_string());
3178
3179        let tag = parser
3180            .resolve_tag(
3181                Span::empty(Marker::new(0, 1, 0)),
3182                &Cow::Borrowed(""),
3183                Cow::Borrowed("!"),
3184            )
3185            .unwrap();
3186
3187        assert_eq!(tag.handle, "tag:local.example,2024:");
3188        assert_eq!(tag.suffix, "!");
3189    }
3190
3191    #[test]
3192    fn test_load_after_peek_stream_start() {
3193        #[derive(Default)]
3194        struct Sink<'input> {
3195            events: Vec<Event<'input>>,
3196        }
3197
3198        impl<'input> EventReceiver<'input> for Sink<'input> {
3199            fn on_event(&mut self, ev: Event<'input>) {
3200                self.events.push(ev);
3201            }
3202        }
3203
3204        let mut parser = Parser::new_from_str("key: value\n");
3205        let mut sink = Sink::default();
3206
3207        assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
3208        parser.load(&mut sink, false).unwrap();
3209
3210        assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
3211        assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
3212    }
3213}