Skip to main content

granit_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, BorrowedInput},
9    scanner::{Comment, Placement, ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
10    BufferedInput,
11};
12
13use alloc::{
14    borrow::Cow,
15    collections::{BTreeMap, BTreeSet, VecDeque},
16    string::{String, ToString},
17    vec::Vec,
18};
19use core::{
20    convert::Infallible,
21    fmt::{self, Display},
22};
23
24#[derive(Clone, Copy, PartialEq, Debug, Eq)]
25enum State {
26    StreamStart,
27    ImplicitDocumentStart,
28    DocumentStart,
29    DocumentContent,
30    DocumentEnd,
31    BlockNode,
32    BlockNodeOrIndentlessSequence,
33    FlowNode,
34    BlockSequenceFirstEntry,
35    BlockSequenceEntry,
36    IndentlessSequenceEntry,
37    IndentlessSequenceEntryNode,
38    BlockMappingFirstKey,
39    BlockMappingKey,
40    BlockMappingKeyNode,
41    BlockMappingValue,
42    BlockMappingValueNode,
43    FlowSequenceFirstEntry,
44    FlowSequenceEntry,
45    FlowSequenceEntryMappingKey,
46    FlowSequenceEntryMappingValue,
47    FlowSequenceEntryMappingValueNode,
48    FlowSequenceEntryMappingEnd,
49    FlowMappingFirstKey,
50    FlowMappingKey,
51    FlowMappingKeyNode,
52    FlowMappingValue,
53    FlowMappingValueNode,
54    FlowMappingEmptyValue,
55    BlockSequenceEntryNode,
56    End,
57}
58
59/// An event generated by the YAML parser.
60///
61/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
62/// [`EventReceiver`] trait.
63#[derive(Clone, PartialEq, Debug, Eq)]
64pub enum Event<'input> {
65    /// Reserved for internal use.
66    Nothing,
67    /// Event generated at the very beginning of parsing.
68    StreamStart,
69    /// Last event that will be generated by the parser. Signals EOF.
70    StreamEnd,
71    /// The start of a YAML document.
72    ///
73    /// When the boolean is `true`, it is an explicit document start
74    /// directive (`---`).
75    ///
76    /// When the boolean is `false`, it is an implicit document start
77    /// (without `---`).
78    DocumentStart(bool),
79    /// The end of a YAML document.
80    ///
81    /// This event is emitted for both explicit document end markers (`...`) and implicit document
82    /// ends.
83    DocumentEnd,
84    /// A YAML alias.
85    Alias(
86        /// The anchor ID the alias refers to.
87        usize,
88    ),
89    /// A YAML source comment.
90    ///
91    /// Comments are presentation metadata, not YAML data nodes. The payload is the raw text
92    /// exactly after `#`, excluding only the line break. The placement is a best-effort hint for
93    /// correlating the comment with nearby YAML presentation. The companion parser [`Span`] covers
94    /// the whole source comment, including `#` and excluding the line break.
95    Comment(
96        /// Raw comment payload exactly after `#`, excluding only the line break.
97        Cow<'input, str>,
98        /// Best-effort placement relative to nearby YAML content.
99        Placement,
100    ),
101    /// A YAML scalar value.
102    Scalar(
103        /// The scalar value after YAML escape processing.
104        Cow<'input, str>,
105        /// The source notation used for the scalar.
106        ScalarStyle,
107        /// The anchor ID defined on this scalar, or `0` if it has no anchor.
108        usize,
109        /// The resolved tag attached to this scalar, if any.
110        Option<Cow<'input, Tag>>,
111    ),
112    /// The start of a YAML sequence (array).
113    SequenceStart(
114        /// The notation style used for the sequence.
115        StructureStyle,
116        /// The anchor ID defined on this sequence, or `0` if it has no anchor.
117        usize,
118        /// The resolved tag attached to this sequence, if any.
119        Option<Cow<'input, Tag>>,
120    ),
121    /// The end of a YAML sequence (array).
122    SequenceEnd,
123    /// The start of a YAML mapping (object, hash).
124    MappingStart(
125        /// The notation style used for the mapping (Flow or Block).
126        StructureStyle,
127        /// The anchor ID defined on this mapping, or `0` if it has no anchor.
128        usize,
129        /// The resolved tag attached to this mapping, if any.
130        Option<Cow<'input, Tag>>,
131    ),
132    /// The end of a YAML mapping (object, hash).
133    MappingEnd,
134}
135
136/// The notation style used for a YAML sequence or mapping.
137///
138/// [`StructureStyle::Block`] means block notation:
139///
140/// ```yaml
141/// items:
142///   - milk
143///   - bread
144/// mapping:
145///   name: Ada
146///   active: true
147/// ```
148///
149/// [`StructureStyle::Flow`] means flow notation:
150///
151/// ```yaml
152/// items: [milk, bread]
153/// mapping: {name: Ada, active: true}
154/// ```
155#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
156pub enum StructureStyle {
157    /// Block notation, such as `- item` sequences and `key: value` mappings.
158    Block,
159    /// Flow notation, such as `[item]` sequences and `{key: value}` mappings.
160    Flow,
161}
162
163/// A YAML tag.
164#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
165pub struct Tag {
166    /// Resolved tag handle or prefix.
167    ///
168    /// Examples include `tag:yaml.org,2002:` for core-schema tags and `!` for local tags.
169    pub handle: String,
170    /// Tag suffix following the resolved handle or prefix.
171    pub suffix: String,
172}
173
174impl Tag {
175    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
176    ///
177    /// The YAML specification specifies [a list of
178    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function
179    /// checks whether _the handle_ (but not the suffix) is the handle for the YAML Core Schema.
180    ///
181    /// # Return
182    /// Returns `true` if the handle is `tag:yaml.org,2002:`, `false` otherwise.
183    #[must_use]
184    pub fn is_yaml_core_schema(&self) -> bool {
185        self.handle == "tag:yaml.org,2002:"
186    }
187
188    /// Return true for a YAML core-schema tag with the given suffix.
189    ///
190    /// For example, this matches core-schema tags such as `!!str`, `!!int`, `!!float`, `!!bool`,
191    /// `!!null`, `!!map`, or `!!seq` after tag resolution.
192    #[must_use]
193    pub fn is_yaml_core_schema_tag(&self, suffix: &str) -> bool {
194        self.is_yaml_core_schema() && self.suffix == suffix
195    }
196
197    /// Return true for a tag outside the YAML core-schema namespace.
198    ///
199    /// This checks only the tag handle. It returns `false` for any tag whose handle is
200    /// `tag:yaml.org,2002:`, regardless of suffix.
201    #[must_use]
202    pub fn is_custom(&self) -> bool {
203        !self.is_yaml_core_schema()
204    }
205
206    /// Return the tag as `(handle, suffix)`.
207    #[must_use]
208    pub fn parts(&self) -> (&str, &str) {
209        (&self.handle, &self.suffix)
210    }
211}
212
213impl Display for Tag {
214    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
215        if self.handle == "!" {
216            write!(f, "!{}", self.suffix)
217        } else {
218            write!(f, "{}{}", self.handle, self.suffix)
219        }
220    }
221}
222
223impl<'input> Event<'input> {
224    /// Return the anchor ID defined by this event, if any.
225    ///
226    /// Returns `Some(id)` when this event defines an anchor on a scalar, sequence, or mapping
227    /// node. Returns `None` for all other events, including `Alias` (which references an anchor
228    /// rather than defining one; use [`Self::alias_id`] to obtain the target anchor ID).
229    #[must_use]
230    pub fn anchor_id(&self) -> Option<usize> {
231        match self {
232            Self::Scalar(_, _, anchor_id, _)
233            | Self::SequenceStart(_, anchor_id, _)
234            | Self::MappingStart(_, anchor_id, _)
235                if *anchor_id != 0 =>
236            {
237                Some(*anchor_id)
238            }
239            _ => None,
240        }
241    }
242
243    /// Return the target anchor ID referenced by this alias event, if this event is an alias.
244    #[must_use]
245    pub fn alias_id(&self) -> Option<usize> {
246        match self {
247            Self::Alias(anchor_id) => Some(*anchor_id),
248            _ => None,
249        }
250    }
251
252    /// Return the resolved tag carried by this node event, if any.
253    #[must_use]
254    pub fn tag(&self) -> Option<&Tag> {
255        match self {
256            Self::Scalar(_, _, _, tag)
257            | Self::SequenceStart(_, _, tag)
258            | Self::MappingStart(_, _, tag) => tag.as_deref(),
259            _ => None,
260        }
261    }
262
263    /// Return the scalar value and style, if this event is a scalar.
264    #[must_use]
265    pub fn scalar(&self) -> Option<(&str, ScalarStyle)> {
266        match self {
267            Self::Scalar(value, style, _, _) => Some((value.as_ref(), *style)),
268            _ => None,
269        }
270    }
271
272    /// Return whether this event represents a YAML node (value).
273    ///
274    /// Returns `true` for scalars, collection starts, and aliases — all events that produce a
275    /// value in the document tree. Returns `false` for structural events such as `StreamStart`,
276    /// `DocumentStart`, collection ends, etc.
277    #[must_use]
278    pub fn is_node(&self) -> bool {
279        matches!(
280            self,
281            Self::Alias(_) | Self::Scalar(..) | Self::SequenceStart(..) | Self::MappingStart(..)
282        )
283    }
284
285    /// Create an empty scalar.
286    fn empty_scalar() -> Self {
287        // a null scalar
288        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
289    }
290
291    /// Create an empty scalar with the given anchor.
292    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
293        Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
294    }
295}
296
297/// A YAML parser.
298#[derive(Debug)]
299pub struct Parser<'input, T: BorrowedInput<'input>> {
300    /// The underlying scanner from which we pull tokens.
301    scanner: Scanner<'input, T>,
302    /// The stack of _previous_ states we were in.
303    ///
304    /// States are pushed in the context of subobjects to this stack. The top-most element is the
305    /// state in which to come back to when exiting the current state.
306    states: Vec<State>,
307    /// The state in which we currently are.
308    state: State,
309    /// The next token from the scanner.
310    token: Option<Token<'input>>,
311    /// The next YAML event to emit.
312    current: Option<(Event<'input>, Span)>,
313    /// YAML events buffered by parser states that need to emit an earlier synthetic node first.
314    queued_events: VecDeque<(Event<'input>, Span)>,
315
316    /// Pending indentation hint to be attached to the next emitted event span.
317    ///
318    /// This is used to communicate indentation for block mapping keys. It is set when consuming a
319    /// `TokenType::Key` in block style, and is applied to the next emitted node event (the key
320    /// itself).
321    pending_key_indent: Option<usize>,
322    /// Pending anchor ID to attach to a node after an intervening comment.
323    pending_node_anchor_id: usize,
324    /// Pending tag to attach to a node after an intervening comment.
325    pending_node_tag: Option<Cow<'input, Tag>>,
326    /// Pending empty scalar span captured before an intervening comment.
327    pending_empty_scalar_span: Option<Span>,
328    /// Anchors that have been encountered in the YAML document.
329    anchors: BTreeMap<Cow<'input, str>, usize>,
330    /// Next ID available for an anchor.
331    ///
332    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
333    /// return for the next anchor and the count of anchor IDs emitted.
334    anchor_id_count: usize,
335    /// The tag directives (`%TAG`) the parser has encountered.
336    ///
337    /// Key is the handle, and value is the prefix.
338    tags: BTreeMap<String, String>,
339    /// Whether we have emitted [`Event::StreamEnd`].
340    ///
341    /// Emitted means that it has been returned from [`Self::next`]. If it is stored in
342    /// [`Self::token`], this is set to `false`.
343    stream_end_emitted: bool,
344    /// Make tags global across all documents.
345    keep_tags: bool,
346}
347
348/// Trait to be implemented in order to use the low-level parsing API.
349///
350/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
351/// for each YAML [`Event`] that occurs.
352/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
353/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
354/// [`SpannedEventReceiver`] automatically.
355/// Non-spanned receivers receive [`Event::Comment(text, placement)`](Event::Comment) like any
356/// other event, but without source location. Spanned receivers receive the same comment event plus
357/// the comment [`Span`] in [`SpannedEventReceiver::on_event`]. For comments, that span covers the
358/// whole source comment, including `#` and excluding the line break. When parsing from an input
359/// with byte offsets, such as [`Parser::new_from_str`], [`Span::slice`] returns that source
360/// comment text.
361///
362/// # Event hierarchy
363/// The event stream starts with an [`Event::StreamStart`] event followed by an
364/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
365/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
366/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
367///
368/// In a mapping, key-values are sent as consecutive data events. Comments can appear in the raw
369/// event stream between a key and its value; they are presentation metadata, not YAML data nodes.
370/// Consumers building YAML data trees should ignore [`Event::Comment`]. Any key/value alternation
371/// shortcut applies only after filtering out comments and other presentation metadata. After that
372/// filtering, the first event after an [`Event::MappingStart`] will be the key, and the following
373/// event will be its value. If the mapping contains no sub-mapping or sub-sequence, then even events
374/// (starting from 0) will always be keys and odd ones will always be values. The mapping ends when
375/// an [`Event::MappingEnd`] event is received.
376///
377/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
378///
379/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
380/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
381/// [`Event::MappingEnd`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
382/// be part of the value and not another key-value pair or element in the sequence.
383///
384/// For instance, the following YAML:
385/// ```yaml
386/// a: b
387/// c:
388///   d: e
389/// f:
390///   - g
391///   - h
392/// ```
393/// will emit (indented and commented for visibility):
394/// ```text
395/// StreamStart, DocumentStart, MappingStart,
396///   Scalar("a", ..), Scalar("b", ..)
397///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
398///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
399/// MappingEnd, DocumentEnd, StreamEnd
400/// ```
401///
402/// # Example
403/// ```
404/// # use granit_parser::{Event, EventReceiver, Parser};
405/// #
406/// /// Sink of events. Collects them into an array.
407/// struct EventSink<'input> {
408///     events: Vec<Event<'input>>,
409/// }
410///
411/// /// Implement `on_event`, pushing into `self.events`.
412/// impl<'input> EventReceiver<'input> for EventSink<'input> {
413///     fn on_event(&mut self, ev: Event<'input>) {
414///         self.events.push(ev);
415///     }
416/// }
417///
418/// /// Load events from a YAML string.
419/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
420///     let mut sink = EventSink { events: Vec::new() };
421///     let mut parser = Parser::new_from_str(yaml);
422///     // Load events using our sink as the receiver.
423///     parser.load(&mut sink, true).unwrap();
424///     sink.events
425/// }
426/// ```
427pub trait EventReceiver<'input> {
428    /// Handler called for each YAML event that is emitted by the parser.
429    fn on_event(&mut self, ev: Event<'input>);
430}
431
432/// Trait to be implemented for using the low-level parsing API.
433///
434/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
435/// For [`Event::Comment`], the span is the source range of the whole comment.
436pub trait SpannedEventReceiver<'input> {
437    /// Handler called for each event that occurs.
438    fn on_event(&mut self, ev: Event<'input>, span: Span);
439}
440
441impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
442    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
443        self.on_event(ev);
444    }
445}
446
447/// Trait to be implemented for fallible event handling without source spans.
448///
449/// This is the fallible counterpart to [`EventReceiver`]. Use it with [`Parser::try_load`] when
450/// event handling may need to stop parsing by returning an application error.
451pub trait TryEventReceiver<'input> {
452    /// Error returned by this receiver.
453    type Error;
454
455    /// Handler called for each YAML event that is emitted by the parser.
456    ///
457    /// Returning an error stops [`Parser::try_load`] immediately.
458    ///
459    /// # Errors
460    /// Returns `Self::Error` when the receiver wants to stop parsing.
461    fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
462}
463
464/// Trait to be implemented for fallible event handling with source spans.
465///
466/// This is the fallible counterpart to [`SpannedEventReceiver`]. Use it with
467/// [`Parser::try_load`] when event handling may need to stop parsing by returning an application
468/// error.
469pub trait TrySpannedEventReceiver<'input> {
470    /// Error returned by this receiver.
471    type Error;
472
473    /// Handler called for each event that occurs.
474    ///
475    /// Returning an error stops [`Parser::try_load`] immediately.
476    ///
477    /// # Errors
478    /// Returns `Self::Error` when the receiver wants to stop parsing.
479    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
480}
481
482impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
483    type Error = R::Error;
484
485    fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
486        TryEventReceiver::on_event(self, ev)
487    }
488}
489
490/// Error returned by [`Parser::try_load`] and [`ParserTrait::try_load`].
491#[derive(Clone, PartialEq, Debug, Eq)]
492pub enum TryLoadError<E> {
493    /// Scanning or parsing failed.
494    Scan(
495        /// The scanner or parser error.
496        ScanError,
497    ),
498    /// The receiver returned an application error.
499    Receiver(
500        /// The error returned by the receiver.
501        E,
502    ),
503}
504
505impl<E> TryLoadError<E> {
506    #[cold]
507    fn scan(error: ScanError) -> Self {
508        Self::Scan(error)
509    }
510
511    #[cold]
512    fn receiver(error: E) -> Self {
513        Self::Receiver(error)
514    }
515}
516
517impl<E> From<ScanError> for TryLoadError<E> {
518    #[cold]
519    fn from(error: ScanError) -> Self {
520        Self::scan(error)
521    }
522}
523
524impl<E: Display> Display for TryLoadError<E> {
525    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
526        match self {
527            Self::Scan(error) => write!(f, "parser error: {error}"),
528            Self::Receiver(error) => write!(f, "receiver error: {error}"),
529        }
530    }
531}
532
533impl<E> core::error::Error for TryLoadError<E>
534where
535    E: core::error::Error + 'static,
536{
537    fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
538        match self {
539            Self::Scan(error) => Some(error),
540            Self::Receiver(error) => Some(error),
541        }
542    }
543}
544
545fn try_emit<'input, R>(
546    recv: &mut R,
547    ev: Event<'input>,
548    span: Span,
549) -> Result<(), TryLoadError<R::Error>>
550where
551    R: TrySpannedEventReceiver<'input>,
552{
553    recv.on_event(ev, span).map_err(TryLoadError::receiver)
554}
555
556struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
557
558impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
559    for InfallibleSpannedReceiver<'_, R>
560{
561    type Error = Infallible;
562
563    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
564        self.0.on_event(ev, span);
565        Ok(())
566    }
567}
568
569fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
570    match result {
571        Ok(()) => Ok(()),
572        Err(TryLoadError::Scan(error)) => error.into_result(),
573        Err(TryLoadError::Receiver(error)) => match error {},
574    }
575}
576
577/// A convenience alias for a parser event result.
578pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
579
580/// Trait extracted from `Parser` to support mocking and alternative implementations.
581pub trait ParserTrait<'input> {
582    /// Try to load the next event and return it without consuming it from `self`.
583    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
584
585    /// Try to load the next event and return it, consuming it from `self`.
586    fn next_event(&mut self) -> Option<ParseResult<'input>>;
587
588    /// Load the YAML from the stream in `self`, pushing events into `recv`.
589    ///
590    /// Use this method when event handling is infallible. If receiver code can return an
591    /// application error and should stop parsing, use [`ParserTrait::try_load`] instead. If the
592    /// caller should directly control when the next event is read, use [`ParserTrait::next_event`]
593    /// or [`Parser`]'s [`core::iter::Iterator`] implementation.
594    ///
595    /// # Errors
596    /// Returns `ScanError` when scanning or parsing the stream fails.
597    fn load<R: SpannedEventReceiver<'input>>(
598        &mut self,
599        recv: &mut R,
600        multi: bool,
601    ) -> Result<(), ScanError>;
602
603    /// Load the YAML from the stream in `self`, stopping if `recv` returns an error.
604    ///
605    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
606    /// inside the stream.
607    ///
608    /// If the receiver returns an error, the parser is left positioned immediately after the event
609    /// that caused the receiver error. Callers should treat the parser as partially consumed.
610    ///
611    /// # Errors
612    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
613    /// [`TryLoadError::Receiver`] when `recv` returns an error.
614    fn try_load<R: TrySpannedEventReceiver<'input>>(
615        &mut self,
616        recv: &mut R,
617        multi: bool,
618    ) -> Result<(), TryLoadError<R::Error>> {
619        while let Some(res) = self.next_event() {
620            let (ev, span) = res?;
621            let is_doc_end = matches!(ev, Event::DocumentEnd);
622            let is_stream_end = matches!(ev, Event::StreamEnd);
623
624            try_emit(recv, ev, span)?;
625
626            if is_stream_end {
627                break;
628            }
629            if !multi && is_doc_end {
630                break;
631            }
632        }
633
634        Ok(())
635    }
636}
637
638impl<'input> Parser<'input, StrInput<'input>> {
639    /// Create a parser over a borrowed string slice.
640    #[must_use]
641    pub fn new_from_str(value: &'input str) -> Self {
642        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
643        Parser::new(StrInput::new(value))
644    }
645}
646
647impl<T> Parser<'static, BufferedInput<T>>
648where
649    T: Iterator<Item = char>,
650{
651    /// Create a parser over an iterator of characters.
652    #[must_use]
653    pub fn new_from_iter(iter: T) -> Self {
654        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
655        Parser::new(BufferedInput::new(iter))
656    }
657}
658
659impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
660    /// Return the next anchor ID that will be assigned by this parser.
661    pub fn get_anchor_offset(&self) -> usize {
662        self.anchor_id_count
663    }
664
665    /// Set the next anchor ID that will be assigned by this parser.
666    pub fn set_anchor_offset(&mut self, offset: usize) {
667        self.anchor_id_count = offset;
668    }
669
670    /// Create a parser over a custom input source.
671    pub fn new(src: T) -> Self {
672        Parser {
673            scanner: Scanner::new(src),
674            states: Vec::new(),
675            state: State::StreamStart,
676            token: None,
677            current: None,
678            queued_events: VecDeque::new(),
679
680            pending_key_indent: None,
681            pending_node_anchor_id: 0,
682            pending_node_tag: None,
683            pending_empty_scalar_span: None,
684
685            anchors: BTreeMap::new(),
686            // valid anchor_id starts from 1
687            anchor_id_count: 1,
688            tags: BTreeMap::new(),
689            stream_end_emitted: false,
690            keep_tags: false,
691        }
692    }
693
694    /// Configure whether tag directives remain active across document boundaries.
695    ///
696    /// This behavior is non-standard as per the YAML specification but can be encountered in the
697    /// wild. Passing `true` enables this non-standard extension and allows the parser to accept
698    /// input from [test
699    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
700    /// of the yaml-test-suite:
701    ///
702    /// ```yaml
703    /// %TAG !prefix! tag:example.com,2011:
704    /// --- !prefix!A
705    /// a: b
706    /// --- !prefix!B
707    /// c: d
708    /// --- !prefix!C
709    /// e: f
710    /// ```
711    ///
712    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
713    /// only apply to the document immediately following them. This would error on `!prefix!B`.
714    ///
715    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
716    #[must_use]
717    pub fn keep_tags(mut self, value: bool) -> Self {
718        self.keep_tags = value;
719        self
720    }
721
722    /// Try to load the next event and return it without consuming it from `self`.
723    ///
724    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
725    /// [`Iterator::next`] or [`Parser::load`].
726    ///
727    /// # Errors
728    /// Returns `ScanError` when loading the next event fails.
729    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
730        ParserTrait::peek(self)
731    }
732
733    /// Try to load the next event and return it, consuming it from `self`.
734    ///
735    /// # Errors
736    /// Returns `ScanError` when loading the next event fails.
737    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
738        ParserTrait::next_event(self)
739    }
740
741    /// Implementation function for [`Self::next_event`] without the `Option`.
742    ///
743    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
744    /// option. This burdens the parser code. This function is used internally when an option is
745    /// undesirable.
746    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
747    where
748        'input: 'a,
749    {
750        match self.current.take() {
751            None => {
752                if let Some(event) = self.queued_events.pop_front() {
753                    Ok(self.apply_pending_key_indent(event))
754                } else if let Some(comment) = self.next_comment_event()? {
755                    Ok(comment)
756                } else {
757                    self.parse()
758                }
759            }
760            Some(v) => Ok(v),
761        }
762    }
763
764    fn apply_pending_key_indent<'a>(&mut self, (ev, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
765        if ev.is_node() {
766            if let Some(indent) = self.pending_key_indent.take() {
767                return (ev, span.with_indent(Some(indent)));
768            }
769        }
770
771        (ev, span)
772    }
773
774    /// Peek at the next token from the scanner.
775    fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
776        match self.token {
777            None => {
778                self.token = Some(self.scan_next_token()?);
779                Ok(self.token.as_ref().unwrap())
780            }
781            Some(ref tok) => Ok(tok),
782        }
783    }
784
785    /// Extract and return the next token from the scanner.
786    ///
787    /// This function does _not_ make use of `self.token`.
788    fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
789        match self.scanner.next() {
790            None => match self.scanner.get_error() {
791                None => Err(self.unexpected_eof()),
792                Some(e) => e.into_result(),
793            },
794            Some(tok) => Ok(tok),
795        }
796    }
797
798    fn next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
799    where
800        'input: 'a,
801    {
802        let is_comment = {
803            let token = self.peek_token()?;
804            matches!(token.1, TokenType::Comment(_))
805        };
806
807        if !is_comment {
808            return Ok(None);
809        }
810
811        let Token(span, token) = self.fetch_token();
812        match token {
813            TokenType::Comment(mut comment) => {
814                comment.placement = self.refined_comment_placement(&comment);
815                Ok(Some((
816                    Event::Comment(comment.text, comment.placement),
817                    span,
818                )))
819            }
820            _ => unreachable!("comment token disappeared after peek"),
821        }
822    }
823
824    fn next_comment_events(&mut self) -> Result<Vec<(Event<'input>, Span)>, ScanError> {
825        let mut comments = Vec::new();
826        loop {
827            match self.peek_token() {
828                Ok(token) if matches!(token.1, TokenType::Comment(_)) => {}
829                Err(error) if comments.is_empty() => return Err(error),
830                Ok(_) | Err(_) => return Ok(comments),
831            }
832
833            let comment = self
834                .next_comment_event()?
835                .expect("comment token disappeared after peek");
836            comments.push(comment);
837        }
838    }
839
840    fn queue_tail_and_return_first(
841        &mut self,
842        events: Vec<(Event<'input>, Span)>,
843    ) -> (Event<'input>, Span) {
844        let mut events = events.into_iter();
845        let first = events
846            .next()
847            .expect("event queue must contain at least one event");
848        self.queued_events.extend(events);
849        first
850    }
851
852    fn queue_event_by_span(
853        &mut self,
854        comments: Vec<(Event<'input>, Span)>,
855        event: (Event<'input>, Span),
856    ) -> (Event<'input>, Span) {
857        let insert_at = comments
858            .iter()
859            .position(|(_, comment_span)| {
860                comment_span.start.index() >= event.1.start.index()
861                    && comment_span.end.index() >= event.1.end.index()
862            })
863            .unwrap_or(comments.len());
864        let mut ordered = Vec::with_capacity(comments.len() + 1);
865        let mut comments = comments.into_iter();
866
867        for _ in 0..insert_at {
868            ordered.push(
869                comments
870                    .next()
871                    .expect("comment disappeared while ordering queued events"),
872            );
873        }
874        ordered.push(event);
875        ordered.extend(comments);
876
877        self.queue_tail_and_return_first(ordered)
878    }
879
880    fn refined_comment_placement(&mut self, comment: &Comment<'_>) -> Placement {
881        if comment.placement == Placement::Right {
882            return Placement::Right;
883        }
884
885        let Ok(next) = self.peek_token() else {
886            return comment.placement;
887        };
888        if matches!(next.1, TokenType::StreamEnd) {
889            return Placement::Last;
890        }
891
892        if next.0.start.line() == comment.span.end.line() + 1 {
893            Placement::Above
894        } else {
895            Placement::Free
896        }
897    }
898
899    #[cold]
900    fn unexpected_eof(&self) -> ScanError {
901        let info = match self.state {
902            State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
903                "unexpected EOF while parsing a flow sequence"
904            }
905            State::FlowMappingFirstKey
906            | State::FlowMappingKey
907            | State::FlowMappingValue
908            | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
909            State::FlowSequenceEntryMappingKey
910            | State::FlowSequenceEntryMappingValue
911            | State::FlowSequenceEntryMappingEnd
912            | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
913            State::BlockSequenceFirstEntry | State::BlockSequenceEntry | State::BlockNode => {
914                "unexpected EOF while parsing a block sequence"
915            }
916            State::BlockMappingFirstKey
917            | State::BlockMappingKey
918            | State::BlockMappingValue
919            | State::BlockNodeOrIndentlessSequence => {
920                "unexpected EOF while parsing a block mapping"
921            }
922            _ => "unexpected eof",
923        };
924        ScanError::new_str(self.scanner.mark(), info)
925    }
926
927    fn fetch_token<'a>(&mut self) -> Token<'a>
928    where
929        'input: 'a,
930    {
931        self.token
932            .take()
933            .expect("fetch_token needs to be preceded by peek_token")
934    }
935
936    /// Skip the next token from the scanner.
937    fn skip(&mut self) {
938        self.token = None;
939    }
940    /// Pops the top-most state and make it the current state.
941    fn pop_state(&mut self) {
942        self.state = self.states.pop().unwrap();
943    }
944    /// Push a new state atop the state stack.
945    fn push_state(&mut self, state: State) {
946        self.states.push(state);
947    }
948
949    fn defer_parse_node<'a>(
950        &mut self,
951        node_state: State,
952        return_state: State,
953        block: bool,
954        indentless_sequence: bool,
955    ) -> ParseResult<'a>
956    where
957        'input: 'a,
958    {
959        self.push_state(return_state);
960        self.state = node_state;
961        if let Some(comment) = self.next_comment_event()? {
962            Ok(comment)
963        } else {
964            self.parse_node(block, indentless_sequence)
965        }
966    }
967
968    fn parse<'a>(&mut self) -> ParseResult<'a>
969    where
970        'input: 'a,
971    {
972        if self.state == State::End {
973            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
974        }
975        let event = self.state_machine()?;
976        Ok(self.apply_pending_key_indent(event))
977    }
978
979    /// Load the YAML from the stream in `self`, pushing events into `recv`.
980    ///
981    /// The contents of the stream are parsed and the corresponding events are sent into the
982    /// receiver. For detailed explanations about how events work, see [`EventReceiver`].
983    ///
984    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
985    /// inside the stream.
986    ///
987    /// Use this method when event handling is infallible. If receiver code can return an
988    /// application error and should stop parsing, use [`Parser::try_load`] instead. If the caller
989    /// should directly control when the next event is read, use [`Parser`]'s
990    /// [`core::iter::Iterator`] implementation.
991    ///
992    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
993    /// former is enough to call this function.
994    ///
995    /// # Example
996    /// ```
997    /// # use granit_parser::{Event, EventReceiver, Parser};
998    /// # fn main() -> Result<(), granit_parser::ScanError> {
999    /// struct EventSink<'input> {
1000    ///     events: Vec<Event<'input>>,
1001    /// }
1002    ///
1003    /// impl<'input> EventReceiver<'input> for EventSink<'input> {
1004    ///     fn on_event(&mut self, ev: Event<'input>) {
1005    ///         self.events.push(ev);
1006    ///     }
1007    /// }
1008    ///
1009    /// let mut parser = Parser::new_from_str("a: 1\n");
1010    /// let mut sink = EventSink { events: Vec::new() };
1011    ///
1012    /// parser.load(&mut sink, false)?;
1013    ///
1014    /// assert!(sink
1015    ///     .events
1016    ///     .iter()
1017    ///     .any(|ev| matches!(ev, Event::Scalar(value, ..) if value == "a")));
1018    /// # Ok(())
1019    /// # }
1020    /// ```
1021    ///
1022    /// # Errors
1023    /// Returns `ScanError` when loading fails.
1024    pub fn load<R: SpannedEventReceiver<'input>>(
1025        &mut self,
1026        recv: &mut R,
1027        multi: bool,
1028    ) -> Result<(), ScanError> {
1029        ParserTrait::load(self, recv, multi)
1030    }
1031
1032    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1033    ///
1034    /// This is the fallible counterpart to [`Parser::load`]. If `recv` returns an error, parsing
1035    /// stops immediately and that error is returned as [`TryLoadError::Receiver`].
1036    ///
1037    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1038    /// inside the stream.
1039    ///
1040    /// If the receiver returns an error, the parser is left positioned immediately after the event
1041    /// that caused the receiver error. Callers should treat the parser as partially consumed.
1042    ///
1043    /// # Example
1044    /// ```
1045    /// # use granit_parser::{Event, Parser, TryEventReceiver, TryLoadError};
1046    /// #[derive(Debug, PartialEq, Eq)]
1047    /// enum ValidationError {
1048    ///     ForbiddenScalar,
1049    /// }
1050    ///
1051    /// struct Validator;
1052    ///
1053    /// impl<'input> TryEventReceiver<'input> for Validator {
1054    ///     type Error = ValidationError;
1055    ///
1056    ///     fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
1057    ///         if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
1058    ///             Err(ValidationError::ForbiddenScalar)
1059    ///         } else {
1060    ///             Ok(())
1061    ///         }
1062    ///     }
1063    /// }
1064    ///
1065    /// let mut parser = Parser::new_from_str("value: bad\n");
1066    /// let mut validator = Validator;
1067    ///
1068    /// let err = parser.try_load(&mut validator, false).unwrap_err();
1069    ///
1070    /// assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenScalar));
1071    /// ```
1072    ///
1073    /// # Errors
1074    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
1075    /// [`TryLoadError::Receiver`] when `recv` returns an error.
1076    pub fn try_load<R: TrySpannedEventReceiver<'input>>(
1077        &mut self,
1078        recv: &mut R,
1079        multi: bool,
1080    ) -> Result<(), TryLoadError<R::Error>> {
1081        ParserTrait::try_load(self, recv, multi)
1082    }
1083
1084    #[cfg(test)]
1085    fn try_load_document<R: TrySpannedEventReceiver<'input>>(
1086        &mut self,
1087        first_ev: Event<'input>,
1088        span: Span,
1089        recv: &mut R,
1090    ) -> Result<(), TryLoadError<R::Error>> {
1091        if !matches!(first_ev, Event::DocumentStart(_)) {
1092            return Err(TryLoadError::scan(ScanError::new_str(
1093                span.start,
1094                "did not find expected <document-start>",
1095            )));
1096        }
1097        try_emit(recv, first_ev, span)?;
1098
1099        let (ev, span) = self.next_event_impl()?;
1100        self.try_load_node(ev, span, recv)?;
1101
1102        // DOCUMENT-END is expected.
1103        let (ev, mark) = self.next_event_impl()?;
1104        assert_eq!(ev, Event::DocumentEnd);
1105        try_emit(recv, ev, mark)?;
1106
1107        Ok(())
1108    }
1109
1110    #[cfg(test)]
1111    fn try_load_node<R: TrySpannedEventReceiver<'input>>(
1112        &mut self,
1113        first_ev: Event<'input>,
1114        span: Span,
1115        recv: &mut R,
1116    ) -> Result<(), TryLoadError<R::Error>> {
1117        match first_ev {
1118            Event::Alias(..) | Event::Scalar(..) => try_emit(recv, first_ev, span),
1119            Event::SequenceStart(..) => {
1120                try_emit(recv, first_ev, span)?;
1121                self.try_load_sequence(recv)
1122            }
1123            Event::MappingStart(..) => {
1124                try_emit(recv, first_ev, span)?;
1125                self.try_load_mapping(recv)
1126            }
1127            _ => {
1128                #[cfg(feature = "debug_prints")]
1129                std::println!("UNREACHABLE EVENT: {first_ev:?}");
1130                unreachable!();
1131            }
1132        }
1133    }
1134
1135    #[cfg(test)]
1136    fn try_load_mapping<R: TrySpannedEventReceiver<'input>>(
1137        &mut self,
1138        recv: &mut R,
1139    ) -> Result<(), TryLoadError<R::Error>> {
1140        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
1141        while key_ev != Event::MappingEnd {
1142            // key
1143            self.try_load_node(key_ev, key_mark, recv)?;
1144
1145            // value
1146            let (ev, mark) = self.next_event_impl()?;
1147            self.try_load_node(ev, mark, recv)?;
1148
1149            // next event
1150            let (ev, mark) = self.next_event_impl()?;
1151            key_ev = ev;
1152            key_mark = mark;
1153        }
1154        try_emit(recv, key_ev, key_mark)?;
1155        Ok(())
1156    }
1157
1158    #[cfg(test)]
1159    fn try_load_sequence<R: TrySpannedEventReceiver<'input>>(
1160        &mut self,
1161        recv: &mut R,
1162    ) -> Result<(), TryLoadError<R::Error>> {
1163        let (mut ev, mut mark) = self.next_event_impl()?;
1164        while ev != Event::SequenceEnd {
1165            self.try_load_node(ev, mark, recv)?;
1166
1167            // next event
1168            let (next_ev, next_mark) = self.next_event_impl()?;
1169            ev = next_ev;
1170            mark = next_mark;
1171        }
1172        try_emit(recv, ev, mark)?;
1173        Ok(())
1174    }
1175
1176    fn state_machine<'a>(&mut self) -> ParseResult<'a>
1177    where
1178        'input: 'a,
1179    {
1180        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
1181
1182        match self.state {
1183            State::StreamStart => self.stream_start(),
1184
1185            State::ImplicitDocumentStart => self.document_start(true),
1186            State::DocumentStart => self.document_start(false),
1187            State::DocumentContent => self.document_content(),
1188            State::DocumentEnd => self.document_end(),
1189
1190            State::BlockNode => self.parse_node(true, false),
1191            State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
1192            State::FlowNode => self.parse_node(false, false),
1193            State::BlockMappingFirstKey => self.block_mapping_key(true),
1194            State::BlockMappingKey => self.block_mapping_key(false),
1195            State::BlockMappingKeyNode => self.block_mapping_key_node(),
1196            State::BlockMappingValue => self.block_mapping_value(),
1197            State::BlockMappingValueNode => self.block_mapping_value_node(),
1198
1199            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
1200            State::BlockSequenceEntry => self.block_sequence_entry(false),
1201            State::BlockSequenceEntryNode => self.block_sequence_entry_node(),
1202
1203            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
1204            State::FlowSequenceEntry => self.flow_sequence_entry(false),
1205
1206            State::FlowMappingFirstKey => self.flow_mapping_key(true),
1207            State::FlowMappingKey => self.flow_mapping_key(false),
1208            State::FlowMappingKeyNode => self.flow_mapping_key_node(),
1209            State::FlowMappingValue => self.flow_mapping_value(false),
1210            State::FlowMappingValueNode => self.flow_mapping_value_node(),
1211
1212            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
1213            State::IndentlessSequenceEntryNode => self.indentless_sequence_entry_node(),
1214
1215            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
1216            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
1217            State::FlowSequenceEntryMappingValueNode => {
1218                self.flow_sequence_entry_mapping_value_node()
1219            }
1220            State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
1221            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
1222
1223            /* impossible */
1224            State::End => unreachable!(),
1225        }
1226    }
1227
1228    fn stream_start<'a>(&mut self) -> ParseResult<'a>
1229    where
1230        'input: 'a,
1231    {
1232        match *self.peek_token()? {
1233            Token(span, TokenType::StreamStart(_)) => {
1234                self.state = State::ImplicitDocumentStart;
1235                self.skip();
1236                Ok((Event::StreamStart, span))
1237            }
1238            Token(span, _) => Err(ScanError::new_str(
1239                span.start,
1240                "did not find expected <stream-start>",
1241            )),
1242        }
1243    }
1244
1245    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
1246    where
1247        'input: 'a,
1248    {
1249        while let TokenType::DocumentEnd = self.peek_token()?.1 {
1250            self.skip();
1251        }
1252
1253        // Anchors are scoped to a single document.
1254        self.anchors.clear();
1255
1256        match *self.peek_token()? {
1257            Token(span, TokenType::StreamEnd) => {
1258                self.state = State::End;
1259                self.skip();
1260                Ok((Event::StreamEnd, span))
1261            }
1262            Token(
1263                _,
1264                TokenType::VersionDirective(..)
1265                | TokenType::TagDirective(..)
1266                | TokenType::ReservedDirective(..)
1267                | TokenType::DocumentStart,
1268            ) => {
1269                // explicit document
1270                self.explicit_document_start()
1271            }
1272            Token(span, _) if implicit => {
1273                self.parser_process_directives()?;
1274                self.push_state(State::DocumentEnd);
1275                self.state = State::BlockNode;
1276                Ok((Event::DocumentStart(false), span))
1277            }
1278            _ => {
1279                // explicit document
1280                self.explicit_document_start()
1281            }
1282        }
1283    }
1284
1285    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
1286        let mut version_directive_received = false;
1287        let mut tags = if self.keep_tags {
1288            self.tags.clone()
1289        } else {
1290            BTreeMap::new()
1291        };
1292        let mut document_tag_handles = BTreeSet::new();
1293
1294        loop {
1295            match self.peek_token()? {
1296                Token(span, TokenType::VersionDirective(_, _)) => {
1297                    // YAML version compatibility is non-fatal here. The scanner validates the
1298                    // directive shape, and the parser rejects duplicates below, but it does not
1299                    // expose a warning channel for unsupported versions.
1300                    if version_directive_received {
1301                        return Err(ScanError::new_str(
1302                            span.start,
1303                            "duplicate version directive",
1304                        ));
1305                    }
1306                    version_directive_received = true;
1307                }
1308                Token(mark, TokenType::TagDirective(handle, prefix)) => {
1309                    if !document_tag_handles.insert(handle.to_string()) {
1310                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
1311                    }
1312                    tags.insert(handle.to_string(), prefix.to_string());
1313                }
1314                Token(_, TokenType::ReservedDirective(_, _)) => {
1315                    // Reserved directives are ignored
1316                }
1317                _ => break,
1318            }
1319            self.skip();
1320        }
1321
1322        self.tags = tags;
1323        Ok(())
1324    }
1325
1326    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
1327    where
1328        'input: 'a,
1329    {
1330        self.parser_process_directives()?;
1331        if let Some(comment) = self.next_comment_event()? {
1332            return Ok(comment);
1333        }
1334        match *self.peek_token()? {
1335            Token(mark, TokenType::DocumentStart) => {
1336                self.push_state(State::DocumentEnd);
1337                self.state = State::DocumentContent;
1338                self.skip();
1339                Ok((Event::DocumentStart(true), mark))
1340            }
1341            Token(span, _) => Err(ScanError::new_str(
1342                span.start,
1343                "did not find expected <document start>",
1344            )),
1345        }
1346    }
1347
1348    fn document_content<'a>(&mut self) -> ParseResult<'a>
1349    where
1350        'input: 'a,
1351    {
1352        if let Token(
1353            mark,
1354            TokenType::VersionDirective(..)
1355            | TokenType::TagDirective(..)
1356            | TokenType::ReservedDirective(..)
1357            | TokenType::DocumentStart
1358            | TokenType::DocumentEnd
1359            | TokenType::StreamEnd,
1360        ) = *self.peek_token()?
1361        {
1362            self.pop_state();
1363            // empty scalar
1364            Ok((Event::empty_scalar(), mark))
1365        } else {
1366            self.state = State::BlockNode;
1367            self.parse_node(true, false)
1368        }
1369    }
1370
1371    fn document_end<'a>(&mut self) -> ParseResult<'a>
1372    where
1373        'input: 'a,
1374    {
1375        let mut explicit_end = false;
1376        let span: Span = match *self.peek_token()? {
1377            Token(span, TokenType::DocumentEnd) => {
1378                explicit_end = true;
1379                self.skip();
1380                span
1381            }
1382            Token(span, _) => span,
1383        };
1384
1385        if self.keep_tags {
1386            // Never persist default handles across document boundaries. Allowing `%TAG !! ...`
1387            // or `%TAG ! ...` to leak into following documents lets earlier documents alter how
1388            // explicit tags are interpreted later on.
1389            self.tags.remove("!!");
1390            self.tags.remove("");
1391        } else {
1392            self.tags.clear();
1393        }
1394        if explicit_end {
1395            self.state = State::ImplicitDocumentStart;
1396        } else {
1397            if let Token(
1398                span,
1399                TokenType::VersionDirective(..)
1400                | TokenType::TagDirective(..)
1401                | TokenType::ReservedDirective(..),
1402            ) = *self.peek_token()?
1403            {
1404                return Err(ScanError::new_str(
1405                    span.start,
1406                    "missing explicit document end marker before directive",
1407                ));
1408            }
1409            self.state = State::DocumentStart;
1410        }
1411
1412        Ok((Event::DocumentEnd, span))
1413    }
1414
1415    fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
1416        // YAML permits anchor names to be reused. Aliases resolve to the most recent definition.
1417        let new_id = self.anchor_id_count;
1418        self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
1419            ScanError::new_str(
1420                mark.start,
1421                "while parsing anchor, anchor count exceeded supported limit",
1422            )
1423        })?;
1424        self.anchors.insert(name, new_id);
1425        Ok(new_id)
1426    }
1427
1428    fn save_pending_node_properties(&mut self, anchor_id: usize, tag: Option<Cow<'input, Tag>>) {
1429        self.pending_node_anchor_id = anchor_id;
1430        self.pending_node_tag = tag;
1431    }
1432
1433    #[allow(clippy::too_many_lines)]
1434    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
1435    where
1436        'input: 'a,
1437    {
1438        if let Some(comment) = self.next_comment_event()? {
1439            return Ok(comment);
1440        }
1441
1442        let mut anchor_id = core::mem::take(&mut self.pending_node_anchor_id);
1443        let mut tag = self.pending_node_tag.take();
1444        match *self.peek_token()? {
1445            Token(_, TokenType::Alias(_)) => {
1446                self.pop_state();
1447                if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
1448                    match self.anchors.get(&*name) {
1449                        None => {
1450                            return Err(ScanError::new_str(
1451                                span.start,
1452                                "while parsing node, found unknown anchor",
1453                            ))
1454                        }
1455                        Some(id) => return Ok((Event::Alias(*id), span)),
1456                    }
1457                }
1458                unreachable!()
1459            }
1460            Token(_, TokenType::Anchor(_)) => {
1461                if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
1462                    anchor_id = self.register_anchor(name, &span)?;
1463                    if let TokenType::Tag(..) = self.peek_token()?.1 {
1464                        if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
1465                            tag = Some(self.resolve_tag(span, &handle, suffix)?);
1466                        } else {
1467                            unreachable!()
1468                        }
1469                    }
1470                    if let Some(comment) = self.next_comment_event()? {
1471                        self.save_pending_node_properties(anchor_id, tag);
1472                        return Ok(comment);
1473                    }
1474                } else {
1475                    unreachable!()
1476                }
1477            }
1478            Token(mark, TokenType::Tag(..)) => {
1479                if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
1480                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
1481                    if let TokenType::Anchor(_) = &self.peek_token()?.1 {
1482                        if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
1483                            anchor_id = self.register_anchor(name, &mark)?;
1484                        } else {
1485                            unreachable!()
1486                        }
1487                    }
1488                    if let Some(comment) = self.next_comment_event()? {
1489                        self.save_pending_node_properties(anchor_id, tag);
1490                        return Ok(comment);
1491                    }
1492                } else {
1493                    unreachable!()
1494                }
1495            }
1496            _ => {}
1497        }
1498        match *self.peek_token()? {
1499            Token(mark, TokenType::BlockEntry) if indentless_sequence => {
1500                self.skip();
1501                let comments = self.next_comment_events()?;
1502                self.pending_empty_scalar_span = Some(mark);
1503                self.state = State::IndentlessSequenceEntryNode;
1504                let start = (
1505                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1506                    mark,
1507                );
1508                if comments.is_empty() {
1509                    Ok(start)
1510                } else {
1511                    Ok(self.queue_event_by_span(comments, start))
1512                }
1513            }
1514            Token(_, TokenType::Scalar(..)) => {
1515                self.pop_state();
1516                if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
1517                    Ok((Event::Scalar(v, style, anchor_id, tag), mark))
1518                } else {
1519                    unreachable!()
1520                }
1521            }
1522            Token(mark, TokenType::FlowSequenceStart) => {
1523                self.state = State::FlowSequenceFirstEntry;
1524                self.skip();
1525                Ok((
1526                    Event::SequenceStart(StructureStyle::Flow, anchor_id, tag),
1527                    mark,
1528                ))
1529            }
1530            Token(mark, TokenType::FlowMappingStart) => {
1531                self.state = State::FlowMappingFirstKey;
1532                self.skip();
1533                Ok((
1534                    Event::MappingStart(StructureStyle::Flow, anchor_id, tag),
1535                    mark,
1536                ))
1537            }
1538            Token(mark, TokenType::BlockSequenceStart) if block => {
1539                self.state = State::BlockSequenceFirstEntry;
1540                self.skip();
1541                Ok((
1542                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1543                    mark,
1544                ))
1545            }
1546            Token(mark, TokenType::BlockMappingStart) if block => {
1547                self.state = State::BlockMappingFirstKey;
1548                self.skip();
1549                Ok((
1550                    Event::MappingStart(StructureStyle::Block, anchor_id, tag),
1551                    mark,
1552                ))
1553            }
1554            // ex 7.2, an empty scalar can follow a secondary tag
1555            Token(mark, _) if tag.is_some() || anchor_id > 0 => {
1556                self.pop_state();
1557                Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
1558            }
1559            Token(span, _) => {
1560                let info = match self.state {
1561                    State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1562                        "unexpected EOF while parsing a flow sequence"
1563                    }
1564                    State::FlowMappingFirstKey
1565                    | State::FlowMappingKey
1566                    | State::FlowMappingValue
1567                    | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1568                    State::FlowSequenceEntryMappingKey
1569                    | State::FlowSequenceEntryMappingValue
1570                    | State::FlowSequenceEntryMappingEnd
1571                    | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
1572                    State::BlockSequenceFirstEntry
1573                    | State::BlockSequenceEntry
1574                    | State::BlockNode => "unexpected EOF while parsing a block sequence",
1575                    State::BlockMappingFirstKey
1576                    | State::BlockMappingKey
1577                    | State::BlockMappingValue
1578                    | State::BlockNodeOrIndentlessSequence => {
1579                        "unexpected EOF while parsing a block mapping"
1580                    }
1581                    _ => "while parsing a node, did not find expected node content",
1582                };
1583                Err(ScanError::new_str(span.start, info))
1584            }
1585        }
1586    }
1587
1588    fn block_mapping_key<'a>(&mut self, _first: bool) -> ParseResult<'a>
1589    where
1590        'input: 'a,
1591    {
1592        match *self.peek_token()? {
1593            Token(_, TokenType::Key) => {
1594                // Indentation is only meaningful for block mapping keys.
1595                if let Token(key_span, TokenType::Key) = *self.peek_token()? {
1596                    self.pending_key_indent = Some(key_span.start.col());
1597                }
1598                self.skip();
1599                if let Some(comment) = self.next_comment_event()? {
1600                    self.state = State::BlockMappingKeyNode;
1601                    Ok(comment)
1602                } else {
1603                    self.block_mapping_key_node()
1604                }
1605            }
1606            // A missing block-mapping key before `:` is represented as an empty scalar.
1607            Token(mark, TokenType::Value) => {
1608                self.state = State::BlockMappingValue;
1609                Ok((Event::empty_scalar(), mark))
1610            }
1611            Token(mark, TokenType::BlockEnd) => {
1612                self.pop_state();
1613                self.skip();
1614                Ok((Event::MappingEnd, mark))
1615            }
1616            Token(span, _) => Err(ScanError::new_str(
1617                span.start,
1618                "while parsing a block mapping, did not find expected key",
1619            )),
1620        }
1621    }
1622
1623    fn block_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1624    where
1625        'input: 'a,
1626    {
1627        if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1628            *self.peek_token()?
1629        {
1630            self.state = State::BlockMappingValue;
1631            Ok((Event::empty_scalar(), mark))
1632        } else {
1633            self.defer_parse_node(
1634                State::BlockNodeOrIndentlessSequence,
1635                State::BlockMappingValue,
1636                true,
1637                true,
1638            )
1639        }
1640    }
1641
1642    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1643    where
1644        'input: 'a,
1645    {
1646        match *self.peek_token()? {
1647            Token(mark, TokenType::Value) => {
1648                self.skip();
1649                let comments = self.next_comment_events()?;
1650                if comments.is_empty() {
1651                    self.block_mapping_value_node_with_empty_span(mark)
1652                } else if let Ok(Token(
1653                    _,
1654                    TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1655                )) = self.peek_token()
1656                {
1657                    self.state = State::BlockMappingKey;
1658                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
1659                } else {
1660                    self.pending_empty_scalar_span = Some(mark);
1661                    self.state = State::BlockMappingValueNode;
1662                    Ok(self.queue_tail_and_return_first(comments))
1663                }
1664            }
1665            Token(mark, _) => {
1666                self.state = State::BlockMappingKey;
1667                // empty scalar
1668                Ok((Event::empty_scalar(), mark))
1669            }
1670        }
1671    }
1672
1673    fn block_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
1674    where
1675        'input: 'a,
1676    {
1677        let mark = match self.pending_empty_scalar_span.take() {
1678            Some(mark) => mark,
1679            None => self.peek_token()?.0,
1680        };
1681        self.block_mapping_value_node_with_empty_span(mark)
1682    }
1683
1684    fn block_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1685    where
1686        'input: 'a,
1687    {
1688        if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1689            *self.peek_token()?
1690        {
1691            self.state = State::BlockMappingKey;
1692            Ok((Event::empty_scalar(), mark))
1693        } else {
1694            self.defer_parse_node(
1695                State::BlockNodeOrIndentlessSequence,
1696                State::BlockMappingKey,
1697                true,
1698                true,
1699            )
1700        }
1701    }
1702
1703    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1704    where
1705        'input: 'a,
1706    {
1707        let span: Span = if let Token(mark, TokenType::FlowMappingEnd) = *self.peek_token()? {
1708            mark
1709        } else {
1710            if !first {
1711                match *self.peek_token()? {
1712                    Token(_, TokenType::FlowEntry) => {
1713                        self.skip();
1714                        if let Some(comment) = self.next_comment_event()? {
1715                            self.state = State::FlowMappingFirstKey;
1716                            return Ok(comment);
1717                        }
1718                    }
1719                    Token(span, _) => {
1720                        return Err(ScanError::new_str(
1721                            span.start,
1722                            "while parsing a flow mapping, did not find expected ',' or '}'",
1723                        ))
1724                    }
1725                }
1726            }
1727
1728            match *self.peek_token()? {
1729                Token(_, TokenType::Key) => {
1730                    self.skip();
1731                    if let Some(comment) = self.next_comment_event()? {
1732                        self.state = State::FlowMappingKeyNode;
1733                        return Ok(comment);
1734                    }
1735                    return self.flow_mapping_key_node();
1736                }
1737                Token(marker, TokenType::Value) => {
1738                    self.state = State::FlowMappingValue;
1739                    return Ok((Event::empty_scalar(), marker));
1740                }
1741                Token(_, TokenType::FlowMappingEnd) => (),
1742                _ => {
1743                    return self.defer_parse_node(
1744                        State::FlowNode,
1745                        State::FlowMappingEmptyValue,
1746                        false,
1747                        false,
1748                    );
1749                }
1750            }
1751
1752            self.peek_token()?.0
1753        };
1754
1755        self.pop_state();
1756        self.skip();
1757        Ok((Event::MappingEnd, span))
1758    }
1759
1760    fn flow_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1761    where
1762        'input: 'a,
1763    {
1764        if let Token(mark, TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd) =
1765            *self.peek_token()?
1766        {
1767            self.state = State::FlowMappingValue;
1768            Ok((Event::empty_scalar(), mark))
1769        } else {
1770            self.defer_parse_node(State::FlowNode, State::FlowMappingValue, false, false)
1771        }
1772    }
1773
1774    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1775    where
1776        'input: 'a,
1777    {
1778        let span: Span = {
1779            if empty {
1780                let Token(mark, _) = *self.peek_token()?;
1781                self.state = State::FlowMappingKey;
1782                return Ok((Event::empty_scalar(), mark));
1783            }
1784            match *self.peek_token()? {
1785                Token(span, TokenType::Value) => {
1786                    self.skip();
1787                    let comments = self.next_comment_events()?;
1788                    if comments.is_empty() {
1789                        return self.flow_mapping_value_node_with_empty_span(span);
1790                    }
1791                    if let Ok(Token(_, TokenType::FlowEntry | TokenType::FlowMappingEnd)) =
1792                        self.peek_token()
1793                    {
1794                        self.state = State::FlowMappingKey;
1795                        return Ok(
1796                            self.queue_event_by_span(comments, (Event::empty_scalar(), span))
1797                        );
1798                    }
1799
1800                    self.pending_empty_scalar_span = Some(span);
1801                    self.state = State::FlowMappingValueNode;
1802                    return Ok(self.queue_tail_and_return_first(comments));
1803                }
1804                Token(marker, _) => marker,
1805            }
1806        };
1807
1808        self.state = State::FlowMappingKey;
1809        Ok((Event::empty_scalar(), span))
1810    }
1811
1812    fn flow_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
1813    where
1814        'input: 'a,
1815    {
1816        let mark = match self.pending_empty_scalar_span.take() {
1817            Some(mark) => mark,
1818            None => Span::empty(self.peek_token()?.0.start),
1819        };
1820        self.flow_mapping_value_node_with_empty_span(mark)
1821    }
1822
1823    fn flow_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1824    where
1825        'input: 'a,
1826    {
1827        match self.peek_token()?.1 {
1828            TokenType::FlowEntry | TokenType::FlowMappingEnd => {
1829                self.state = State::FlowMappingKey;
1830                Ok((Event::empty_scalar(), mark))
1831            }
1832            _ => self.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false),
1833        }
1834    }
1835
1836    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1837    where
1838        'input: 'a,
1839    {
1840        match *self.peek_token()? {
1841            Token(mark, TokenType::FlowSequenceEnd) => {
1842                self.pop_state();
1843                self.skip();
1844                return Ok((Event::SequenceEnd, mark));
1845            }
1846            Token(_, TokenType::FlowEntry) if !first => {
1847                self.skip();
1848                if let Some(comment) = self.next_comment_event()? {
1849                    self.state = State::FlowSequenceFirstEntry;
1850                    return Ok(comment);
1851                }
1852            }
1853            Token(span, _) if !first => {
1854                return Err(ScanError::new_str(
1855                    span.start,
1856                    "while parsing a flow sequence, expected ',' or ']'",
1857                ));
1858            }
1859            _ => { /* next */ }
1860        }
1861        match *self.peek_token()? {
1862            Token(mark, TokenType::FlowSequenceEnd) => {
1863                self.pop_state();
1864                self.skip();
1865                Ok((Event::SequenceEnd, mark))
1866            }
1867            Token(mark, TokenType::Key) => {
1868                self.state = State::FlowSequenceEntryMappingKey;
1869                self.skip();
1870                Ok((Event::MappingStart(StructureStyle::Flow, 0, None), mark))
1871            }
1872            _ => self.defer_parse_node(State::FlowNode, State::FlowSequenceEntry, false, false),
1873        }
1874    }
1875
1876    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1877    where
1878        'input: 'a,
1879    {
1880        match *self.peek_token()? {
1881            Token(mark, TokenType::BlockEntry) => {
1882                self.skip();
1883                let comments = self.next_comment_events()?;
1884                if comments.is_empty() {
1885                    self.indentless_sequence_entry_node_with_empty_span(mark)
1886                } else if let Ok(Token(
1887                    _,
1888                    TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1889                )) = self.peek_token()
1890                {
1891                    self.state = State::IndentlessSequenceEntry;
1892                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
1893                } else {
1894                    self.pending_empty_scalar_span = Some(mark);
1895                    self.state = State::IndentlessSequenceEntryNode;
1896                    Ok(self.queue_tail_and_return_first(comments))
1897                }
1898            }
1899            Token(mark, _) => {
1900                self.pop_state();
1901                Ok((Event::SequenceEnd, mark))
1902            }
1903        }
1904    }
1905
1906    fn indentless_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
1907    where
1908        'input: 'a,
1909    {
1910        let mark = match self.pending_empty_scalar_span.take() {
1911            Some(mark) => mark,
1912            None => self.peek_token()?.0,
1913        };
1914        self.indentless_sequence_entry_node_with_empty_span(mark)
1915    }
1916
1917    fn indentless_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1918    where
1919        'input: 'a,
1920    {
1921        if let Token(
1922            _,
1923            TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1924        ) = *self.peek_token()?
1925        {
1926            self.state = State::IndentlessSequenceEntry;
1927            Ok((Event::empty_scalar(), mark))
1928        } else {
1929            self.defer_parse_node(
1930                State::BlockNode,
1931                State::IndentlessSequenceEntry,
1932                true,
1933                false,
1934            )
1935        }
1936    }
1937
1938    fn block_sequence_entry<'a>(&mut self, _first: bool) -> ParseResult<'a>
1939    where
1940        'input: 'a,
1941    {
1942        match *self.peek_token()? {
1943            Token(mark, TokenType::BlockEnd) => {
1944                self.pop_state();
1945                self.skip();
1946                Ok((Event::SequenceEnd, mark))
1947            }
1948            Token(mark, TokenType::BlockEntry) => {
1949                self.skip();
1950                let comments = self.next_comment_events()?;
1951                if comments.is_empty() {
1952                    self.block_sequence_entry_node_with_empty_span(mark)
1953                } else if let Ok(Token(_, TokenType::BlockEntry | TokenType::BlockEnd)) =
1954                    self.peek_token()
1955                {
1956                    self.state = State::BlockSequenceEntry;
1957                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
1958                } else {
1959                    self.pending_empty_scalar_span = Some(mark);
1960                    self.state = State::BlockSequenceEntryNode;
1961                    Ok(self.queue_tail_and_return_first(comments))
1962                }
1963            }
1964            Token(span, _) => Err(ScanError::new_str(
1965                span.start,
1966                "while parsing a block collection, did not find expected '-' indicator",
1967            )),
1968        }
1969    }
1970
1971    fn block_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
1972    where
1973        'input: 'a,
1974    {
1975        let mark = match self.pending_empty_scalar_span.take() {
1976            Some(mark) => mark,
1977            None => self.peek_token()?.0,
1978        };
1979        self.block_sequence_entry_node_with_empty_span(mark)
1980    }
1981
1982    fn block_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1983    where
1984        'input: 'a,
1985    {
1986        if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
1987            self.state = State::BlockSequenceEntry;
1988            Ok((Event::empty_scalar(), mark))
1989        } else {
1990            self.defer_parse_node(State::BlockNode, State::BlockSequenceEntry, true, false)
1991        }
1992    }
1993
1994    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
1995    where
1996        'input: 'a,
1997    {
1998        if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1999            *self.peek_token()?
2000        {
2001            self.state = State::FlowSequenceEntryMappingValue;
2002            Ok((Event::empty_scalar(), mark))
2003        } else {
2004            self.defer_parse_node(
2005                State::FlowNode,
2006                State::FlowSequenceEntryMappingValue,
2007                false,
2008                false,
2009            )
2010        }
2011    }
2012
2013    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
2014    where
2015        'input: 'a,
2016    {
2017        match *self.peek_token()? {
2018            Token(_, TokenType::Value) => {
2019                self.skip();
2020                if let Some(comment) = self.next_comment_event()? {
2021                    self.state = State::FlowSequenceEntryMappingValueNode;
2022                    Ok(comment)
2023                } else {
2024                    self.flow_sequence_entry_mapping_value_node()
2025                }
2026            }
2027            Token(mark, _) => {
2028                self.state = State::FlowSequenceEntryMappingEnd;
2029                Ok((Event::empty_scalar(), mark))
2030            }
2031        }
2032    }
2033
2034    fn flow_sequence_entry_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
2035    where
2036        'input: 'a,
2037    {
2038        let Token(span, ref tok) = *self.peek_token()?;
2039        if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
2040            self.state = State::FlowSequenceEntryMappingEnd;
2041            Ok((Event::empty_scalar(), Span::empty(span.start)))
2042        } else {
2043            self.defer_parse_node(
2044                State::FlowNode,
2045                State::FlowSequenceEntryMappingEnd,
2046                false,
2047                false,
2048            )
2049        }
2050    }
2051
2052    #[allow(clippy::unnecessary_wraps)]
2053    fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
2054    where
2055        'input: 'a,
2056    {
2057        self.state = State::FlowSequenceEntry;
2058        let Token(span, _) = *self.peek_token()?;
2059        Ok((Event::MappingEnd, Span::empty(span.start)))
2060    }
2061
2062    /// Resolve a tag from the handle and the suffix.
2063    fn resolve_tag(
2064        &self,
2065        span: Span,
2066        handle: &Cow<'input, str>,
2067        suffix: Cow<'input, str>,
2068    ) -> Result<Cow<'input, Tag>, ScanError> {
2069        let suffix = suffix.into_owned();
2070        let tag = if handle == "!!" {
2071            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
2072            // overridden.
2073            Tag {
2074                handle: self
2075                    .tags
2076                    .get("!!")
2077                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
2078                suffix,
2079            }
2080        } else if handle.is_empty() && suffix == "!" {
2081            // "!" introduces a local tag. Local tags may have their prefix overridden.
2082            match self.tags.get("") {
2083                Some(prefix) => Tag {
2084                    handle: prefix.clone(),
2085                    suffix,
2086                },
2087                None => Tag {
2088                    handle: String::new(),
2089                    suffix,
2090                },
2091            }
2092        } else {
2093            // Lookup handle in our tag directives.
2094            let prefix = self.tags.get(&**handle);
2095            if let Some(prefix) = prefix {
2096                Tag {
2097                    handle: prefix.clone(),
2098                    suffix,
2099                }
2100            } else {
2101                // Otherwise, it may be a local handle. With a local handle, the handle is set to
2102                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
2103                // If the handle is of the form "!foo!", this cannot be a local handle and we need
2104                // to error.
2105                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2106                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
2107                }
2108                Tag {
2109                    handle: handle.to_string(),
2110                    suffix,
2111                }
2112            }
2113        };
2114        Ok(Cow::Owned(tag))
2115    }
2116}
2117
2118impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
2119    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
2120        if let Some(ref x) = self.current {
2121            Some(Ok(x))
2122        } else {
2123            if self.stream_end_emitted {
2124                return None;
2125            }
2126            match self.next_event_impl() {
2127                Ok(token) => self.current = Some(token),
2128                Err(e) => return Some(e.into_result()),
2129            }
2130            self.current.as_ref().map(Ok)
2131        }
2132    }
2133
2134    fn next_event(&mut self) -> Option<ParseResult<'input>> {
2135        if self.stream_end_emitted {
2136            return None;
2137        }
2138
2139        let tok = self.next_event_impl();
2140        if matches!(tok, Ok((Event::StreamEnd, _))) {
2141            self.stream_end_emitted = true;
2142        }
2143        Some(tok)
2144    }
2145
2146    fn load<R: SpannedEventReceiver<'input>>(
2147        &mut self,
2148        recv: &mut R,
2149        multi: bool,
2150    ) -> Result<(), ScanError> {
2151        let mut recv = InfallibleSpannedReceiver(recv);
2152        into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
2153    }
2154
2155    fn try_load<R: TrySpannedEventReceiver<'input>>(
2156        &mut self,
2157        recv: &mut R,
2158        multi: bool,
2159    ) -> Result<(), TryLoadError<R::Error>> {
2160        let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
2161        if !self.scanner.stream_started() || stream_start_buffered {
2162            let (ev, span) = self.next_event_impl()?;
2163            if ev != Event::StreamStart {
2164                return Err(TryLoadError::scan(ScanError::new_str(
2165                    span.start,
2166                    "did not find expected <stream-start>",
2167                )));
2168            }
2169            try_emit(recv, ev, span)?;
2170        }
2171
2172        if self.scanner.stream_ended() {
2173            // The scanner has already reached EOF before the document loop, so emit the terminal
2174            // event and stop.
2175            try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
2176            return Ok(());
2177        }
2178
2179        loop {
2180            let (ev, span) = self.next_event_impl()?;
2181            let is_doc_end = matches!(ev, Event::DocumentEnd);
2182            let is_stream_end = matches!(ev, Event::StreamEnd);
2183
2184            try_emit(recv, ev, span)?;
2185
2186            if is_stream_end {
2187                return Ok(());
2188            }
2189            if !multi && is_doc_end {
2190                return Ok(());
2191            }
2192        }
2193    }
2194}
2195
2196impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
2197    type Item = Result<(Event<'input>, Span), ScanError>;
2198
2199    fn next(&mut self) -> Option<Self::Item> {
2200        self.next_event()
2201    }
2202}
2203
2204#[cfg(test)]
2205mod test {
2206    use alloc::{
2207        borrow::{Cow, ToOwned},
2208        string::{String, ToString},
2209        vec::Vec,
2210    };
2211    use core::{error::Error as _, fmt};
2212
2213    use crate::scanner::{Marker, ScalarStyle, ScanError, Span};
2214
2215    use super::{
2216        Event, EventReceiver, Parser, State, StructureStyle, Tag, TryEventReceiver, TryLoadError,
2217        TrySpannedEventReceiver,
2218    };
2219
2220    #[derive(Default)]
2221    struct CollectingSink<'input> {
2222        events: Vec<Event<'input>>,
2223    }
2224
2225    impl<'input> EventReceiver<'input> for CollectingSink<'input> {
2226        fn on_event(&mut self, ev: Event<'input>) {
2227            self.events.push(ev);
2228        }
2229    }
2230
2231    fn first_error_info(input: &str) -> String {
2232        for event in Parser::new_from_str(input) {
2233            if let Err(err) = event {
2234                return err.info().to_owned();
2235            }
2236        }
2237        panic!("expected parser error")
2238    }
2239
2240    #[test]
2241    fn deferred_parse_node_can_emit_comment_before_flow_node() {
2242        let mut parser = Parser::new_from_str("# deferred\nvalue\n");
2243        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2244        assert_eq!(
2245            parser.document_start(true).unwrap().0,
2246            Event::DocumentStart(false)
2247        );
2248
2249        let (event, _) = parser
2250            .defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false)
2251            .unwrap();
2252
2253        assert!(matches!(event, Event::Comment(text, _) if text == " deferred"));
2254        assert_eq!(parser.state, State::FlowNode);
2255    }
2256
2257    #[test]
2258    fn queued_node_event_gets_pending_key_indent() {
2259        let mut parser = Parser::new_from_str("");
2260        let span = Span::empty(Marker::new(0, 1, 0));
2261
2262        parser.pending_key_indent = Some(3);
2263        parser
2264            .queued_events
2265            .push_back((Event::SequenceStart(StructureStyle::Block, 0, None), span));
2266
2267        let (event, span) = parser.next_event_impl().unwrap();
2268
2269        assert!(matches!(
2270            event,
2271            Event::SequenceStart(StructureStyle::Block, 0, None)
2272        ));
2273        assert_eq!(span.indent, Some(3));
2274        assert_eq!(parser.pending_key_indent, None);
2275    }
2276
2277    #[test]
2278    fn state_machine_handles_deferred_flow_node_states() {
2279        let mut parser = Parser::new_from_str("value\n");
2280        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2281        assert_eq!(
2282            parser.document_start(true).unwrap().0,
2283            Event::DocumentStart(false)
2284        );
2285        parser.state = State::FlowNode;
2286        parser.push_state(State::End);
2287
2288        let (event, _) = parser.state_machine().unwrap();
2289
2290        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2291
2292        let mut parser = Parser::new_from_str("value\n");
2293        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2294        assert_eq!(
2295            parser.document_start(true).unwrap().0,
2296            Event::DocumentStart(false)
2297        );
2298        parser.state = State::FlowSequenceEntryMappingValueNode;
2299
2300        let (event, _) = parser.state_machine().unwrap();
2301
2302        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2303    }
2304
2305    #[test]
2306    fn display_resolved_core_tag_without_extra_bang() {
2307        let tag = Tag {
2308            handle: "tag:yaml.org,2002:".to_owned(),
2309            suffix: "str".to_owned(),
2310        };
2311
2312        assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
2313    }
2314
2315    #[test]
2316    fn tag_helpers_distinguish_core_and_local_tags() {
2317        let core = Tag {
2318            handle: "tag:yaml.org,2002:".to_owned(),
2319            suffix: "int".to_owned(),
2320        };
2321        let local = Tag {
2322            handle: "!".to_owned(),
2323            suffix: "thing".to_owned(),
2324        };
2325
2326        assert!(core.is_yaml_core_schema());
2327        assert!(core.is_yaml_core_schema_tag("int"));
2328        assert!(!core.is_yaml_core_schema_tag("str"));
2329        assert!(!core.is_custom());
2330        assert_eq!(core.parts(), ("tag:yaml.org,2002:", "int"));
2331
2332        assert!(!local.is_yaml_core_schema());
2333        assert!(!local.is_yaml_core_schema_tag("thing"));
2334        assert!(local.is_custom());
2335        assert_eq!(local.parts(), ("!", "thing"));
2336        assert_eq!(local.to_string(), "!thing");
2337    }
2338
2339    #[test]
2340    fn event_inspection_helpers_report_node_metadata() {
2341        let tag = Tag {
2342            handle: "!".to_owned(),
2343            suffix: "thing".to_owned(),
2344        };
2345        let scalar = Event::Scalar(
2346            "value".into(),
2347            ScalarStyle::DoubleQuoted,
2348            7,
2349            Some(Cow::Borrowed(&tag)),
2350        );
2351        let sequence =
2352            Event::SequenceStart(StructureStyle::Block, 8, Some(Cow::Owned(tag.clone())));
2353        let mapping = Event::MappingStart(StructureStyle::Block, 9, Some(Cow::Borrowed(&tag)));
2354
2355        assert_eq!(scalar.anchor_id(), Some(7));
2356        assert_eq!(scalar.alias_id(), None);
2357        assert_eq!(scalar.tag(), Some(&tag));
2358        assert_eq!(scalar.scalar(), Some(("value", ScalarStyle::DoubleQuoted)));
2359        assert!(scalar.is_node());
2360
2361        assert_eq!(sequence.anchor_id(), Some(8));
2362        assert_eq!(sequence.alias_id(), None);
2363        assert_eq!(sequence.tag(), Some(&tag));
2364        assert_eq!(sequence.scalar(), None);
2365        assert!(sequence.is_node());
2366
2367        assert_eq!(mapping.anchor_id(), Some(9));
2368        assert_eq!(mapping.alias_id(), None);
2369        assert_eq!(mapping.tag(), Some(&tag));
2370        assert_eq!(mapping.scalar(), None);
2371        assert!(mapping.is_node());
2372
2373        let alias = Event::Alias(10);
2374        assert_eq!(alias.anchor_id(), None);
2375        assert_eq!(alias.alias_id(), Some(10));
2376        assert_eq!(alias.tag(), None);
2377        assert_eq!(alias.scalar(), None);
2378        assert!(alias.is_node());
2379
2380        let unanchored_scalar = Event::Scalar("x".into(), ScalarStyle::Plain, 0, None);
2381        assert_eq!(unanchored_scalar.anchor_id(), None);
2382        assert_eq!(unanchored_scalar.alias_id(), None);
2383
2384        let stream_start = Event::StreamStart;
2385        assert_eq!(stream_start.anchor_id(), None);
2386        assert_eq!(stream_start.alias_id(), None);
2387        assert_eq!(stream_start.tag(), None);
2388        assert_eq!(stream_start.scalar(), None);
2389        assert!(!stream_start.is_node());
2390    }
2391
2392    #[test]
2393    fn test_peek_eq_parse() {
2394        let s = "
2395a0 bb: val
2396a1: &x
2397    b1: 4
2398    b2: d
2399a2: 4
2400a3: [1, 2, 3]
2401a4:
2402    - [a1, a2]
2403    - 2
2404a5: *x
2405";
2406        let mut p = Parser::new_from_str(s);
2407        loop {
2408            let event_peek = p.peek().unwrap().unwrap().clone();
2409            let event = p.next_event().unwrap().unwrap();
2410            assert_eq!(event, event_peek);
2411            if event.0 == Event::StreamEnd {
2412                break;
2413            }
2414        }
2415    }
2416
2417    #[test]
2418    fn test_repeated_peek_returns_buffered_event() {
2419        let mut parser = Parser::new_from_str("key: value\n");
2420
2421        let first_peek = parser.peek().unwrap().unwrap().clone();
2422        let second_peek = parser.peek().unwrap().unwrap().clone();
2423        let next = parser.next_event().unwrap().unwrap();
2424
2425        assert_eq!(first_peek, second_peek);
2426        assert_eq!(first_peek, next);
2427    }
2428
2429    #[test]
2430    fn test_peek_surfaces_scan_error_without_consuming_stream_end_state() {
2431        let mut parser = Parser::new_from_str("a: [1, 2");
2432
2433        loop {
2434            match parser.peek() {
2435                Some(Ok(_)) => {
2436                    parser.next_event().unwrap().unwrap();
2437                }
2438                Some(Err(error)) => {
2439                    assert_eq!(error.info(), "unclosed bracket '['");
2440                    break;
2441                }
2442                None => panic!("expected parse error"),
2443            }
2444        }
2445    }
2446
2447    #[test]
2448    fn test_peek_and_next_return_none_after_stream_end() {
2449        let mut parser = Parser::new_from_str("");
2450
2451        assert!(matches!(
2452            parser.next_event().unwrap().unwrap().0,
2453            Event::StreamStart
2454        ));
2455        assert!(matches!(
2456            parser.next_event().unwrap().unwrap().0,
2457            Event::StreamEnd
2458        ));
2459        assert!(parser.next_event().is_none());
2460        assert!(parser.peek().is_none());
2461    }
2462
2463    #[test]
2464    fn test_load_after_stream_already_ended_emits_stream_end() {
2465        let mut parser = Parser::new_from_str("");
2466        while parser.next_event().is_some() {}
2467
2468        let mut sink = CollectingSink::default();
2469        parser.load(&mut sink, true).unwrap();
2470
2471        assert_eq!(sink.events, vec![Event::StreamEnd]);
2472    }
2473
2474    #[test]
2475    fn test_load_visits_nested_collection_events() {
2476        let mut parser = Parser::new_from_str("root:\n  - item: value\n  - [a, b]\n");
2477        let mut sink = CollectingSink::default();
2478
2479        parser.load(&mut sink, true).unwrap();
2480
2481        assert_eq!(
2482            sink.events,
2483            vec![
2484                Event::StreamStart,
2485                Event::DocumentStart(false),
2486                Event::MappingStart(StructureStyle::Block, 0, None),
2487                Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
2488                Event::SequenceStart(StructureStyle::Block, 0, None),
2489                Event::MappingStart(StructureStyle::Block, 0, None),
2490                Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
2491                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2492                Event::MappingEnd,
2493                Event::SequenceStart(StructureStyle::Flow, 0, None),
2494                Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
2495                Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
2496                Event::SequenceEnd,
2497                Event::SequenceEnd,
2498                Event::MappingEnd,
2499                Event::DocumentEnd,
2500                Event::StreamEnd,
2501            ]
2502        );
2503    }
2504
2505    #[derive(Clone, Debug, PartialEq, Eq)]
2506    enum ValidationError {
2507        ForbiddenValue,
2508    }
2509
2510    #[derive(Debug)]
2511    struct ReceiverFailure;
2512
2513    impl fmt::Display for ReceiverFailure {
2514        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2515            write!(f, "receiver failed")
2516        }
2517    }
2518
2519    impl core::error::Error for ReceiverFailure {}
2520
2521    struct FailingSink<'input> {
2522        events: Vec<Event<'input>>,
2523    }
2524
2525    impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
2526        type Error = ValidationError;
2527
2528        fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
2529            let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
2530            self.events.push(ev);
2531            if should_fail {
2532                Err(ValidationError::ForbiddenValue)
2533            } else {
2534                Ok(())
2535            }
2536        }
2537    }
2538
2539    #[test]
2540    fn test_try_load_stops_on_receiver_error() {
2541        let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
2542        let mut sink = FailingSink { events: Vec::new() };
2543
2544        let err = parser.try_load(&mut sink, true).unwrap_err();
2545
2546        assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
2547        assert!(sink
2548            .events
2549            .iter()
2550            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
2551        assert!(sink
2552            .events
2553            .iter()
2554            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
2555        assert!(!sink
2556            .events
2557            .iter()
2558            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
2559    }
2560
2561    struct SpannedFailingSink {
2562        failed_span: Option<Span>,
2563    }
2564
2565    impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
2566        type Error = Span;
2567
2568        fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
2569            if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
2570                self.failed_span = Some(span);
2571                Err(span)
2572            } else {
2573                Ok(())
2574            }
2575        }
2576    }
2577
2578    #[test]
2579    fn test_try_load_spanned_receiver_gets_span() {
2580        let mut parser = Parser::new_from_str("value: bad\n");
2581        let mut sink = SpannedFailingSink { failed_span: None };
2582
2583        let err = parser.try_load(&mut sink, false).unwrap_err();
2584
2585        let TryLoadError::Receiver(span) = err else {
2586            panic!("expected receiver error");
2587        };
2588
2589        assert_eq!(Some(span), sink.failed_span);
2590        assert!(!span.is_empty());
2591    }
2592
2593    struct NeverFails {
2594        count: usize,
2595    }
2596
2597    impl<'input> TryEventReceiver<'input> for NeverFails {
2598        type Error = ValidationError;
2599
2600        fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
2601            self.count += 1;
2602            Ok(())
2603        }
2604    }
2605
2606    #[test]
2607    fn test_try_load_returns_scan_error() {
2608        let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
2609        let mut sink = NeverFails { count: 0 };
2610
2611        let err = parser.try_load(&mut sink, true).unwrap_err();
2612
2613        let TryLoadError::Scan(err) = err else {
2614            panic!("expected scan error");
2615        };
2616        assert_eq!(err.info(), "duplicate version directive");
2617    }
2618
2619    #[test]
2620    fn test_try_load_error_display_and_source_cover_both_variants() {
2621        let scan = ScanError::new_str(Marker::new(3, 1, 3), "bad yaml");
2622        let scan_err: TryLoadError<ReceiverFailure> = scan.into();
2623
2624        assert!(scan_err.to_string().starts_with("parser error: bad yaml"));
2625        assert!(scan_err.source().is_some());
2626
2627        let receiver_err = TryLoadError::Receiver(ReceiverFailure);
2628
2629        assert_eq!(receiver_err.to_string(), "receiver error: receiver failed");
2630        assert!(receiver_err.source().is_some());
2631    }
2632
2633    #[test]
2634    fn test_try_load_document_rejects_non_document_start_event() {
2635        let mut parser = Parser::new_from_str("");
2636        let span = Span::empty(Marker::new(0, 1, 0));
2637        let mut sink = NeverFails { count: 0 };
2638
2639        let err = parser
2640            .try_load_document(
2641                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2642                span,
2643                &mut sink,
2644            )
2645            .unwrap_err();
2646
2647        let TryLoadError::Scan(err) = err else {
2648            panic!("expected scan error");
2649        };
2650        assert_eq!(err.info(), "did not find expected <document-start>");
2651    }
2652
2653    #[test]
2654    fn test_try_load_requires_buffered_stream_start() {
2655        let mut parser = Parser::new_from_str("");
2656        let span = Span::empty(Marker::new(0, 1, 0));
2657        parser.current = Some((
2658            Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2659            span,
2660        ));
2661        let mut sink = NeverFails { count: 0 };
2662
2663        let err = parser.try_load(&mut sink, true).unwrap_err();
2664
2665        let TryLoadError::Scan(err) = err else {
2666            panic!("expected scan error");
2667        };
2668        assert_eq!(err.info(), "did not find expected <stream-start>");
2669    }
2670
2671    #[test]
2672    fn test_try_load_after_stream_already_ended_emits_stream_end() {
2673        let mut parser = Parser::new_from_str("");
2674        while parser.next_event().is_some() {}
2675
2676        let mut sink = FailingSink { events: Vec::new() };
2677        parser.try_load(&mut sink, true).unwrap();
2678
2679        assert_eq!(sink.events, vec![Event::StreamEnd]);
2680    }
2681
2682    #[test]
2683    fn test_load_single_document_stops_before_next_document() {
2684        let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
2685        let mut sink = CollectingSink::default();
2686
2687        parser.load(&mut sink, false).unwrap();
2688
2689        assert!(sink
2690            .events
2691            .iter()
2692            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
2693        assert!(!sink
2694            .events
2695            .iter()
2696            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
2697        assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
2698    }
2699
2700    #[test]
2701    fn test_duplicate_version_directive_errors() {
2702        assert_eq!(
2703            first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
2704            "duplicate version directive"
2705        );
2706    }
2707
2708    #[test]
2709    fn test_duplicate_tag_directive_errors() {
2710        assert_eq!(
2711            first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
2712            "the TAG directive must only be given at most once per handle in the same document"
2713        );
2714    }
2715
2716    #[test]
2717    fn test_directive_after_implicit_document_requires_explicit_end() {
2718        assert_eq!(
2719            first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
2720            "missing explicit document end marker before directive"
2721        );
2722    }
2723
2724    #[test]
2725    fn test_anchor_offset_overflow_reports_error() {
2726        let mut parser = Parser::new_from_str("&a value");
2727        parser.set_anchor_offset(usize::MAX);
2728
2729        let err = parser
2730            .find_map(Result::err)
2731            .expect("anchor registration should overflow");
2732
2733        assert_eq!(
2734            err.info(),
2735            "while parsing anchor, anchor count exceeded supported limit"
2736        );
2737    }
2738
2739    #[test]
2740    fn test_alias_resolves_to_registered_anchor_id() {
2741        let events = Parser::new_from_str("- &a value\n- *a\n")
2742            .map(|event| event.unwrap().0)
2743            .collect::<Vec<_>>();
2744
2745        assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
2746    }
2747
2748    #[test]
2749    fn test_anchor_then_tag_applies_both_to_scalar() {
2750        let events = Parser::new_from_str("&a !!str value")
2751            .map(|event| event.unwrap().0)
2752            .collect::<Vec<_>>();
2753
2754        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
2755            .iter()
2756            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
2757        else {
2758            panic!("expected tagged anchored scalar");
2759        };
2760
2761        assert_eq!(value, "value");
2762        assert_eq!(*anchor_id, 1);
2763        assert_eq!(tag.handle, "tag:yaml.org,2002:");
2764        assert_eq!(tag.suffix, "str");
2765    }
2766
2767    #[test]
2768    fn test_tag_then_anchor_applies_both_to_scalar() {
2769        let events = Parser::new_from_str("!!str &a value")
2770            .map(|event| event.unwrap().0)
2771            .collect::<Vec<_>>();
2772
2773        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
2774            .iter()
2775            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
2776        else {
2777            panic!("expected tagged anchored scalar");
2778        };
2779
2780        assert_eq!(value, "value");
2781        assert_eq!(*anchor_id, 1);
2782        assert_eq!(tag.handle, "tag:yaml.org,2002:");
2783        assert_eq!(tag.suffix, "str");
2784    }
2785
2786    #[test]
2787    fn test_multiple_tag_directives_are_kept_within_document() {
2788        let text = r"
2789%TAG !a! tag:a,2024:
2790%TAG !b! tag:b,2024:
2791---
2792first: !a!x foo
2793second: !b!y bar
2794";
2795
2796        let mut seen_a = false;
2797        let mut seen_b = false;
2798        for event in Parser::new_from_str(text) {
2799            let (event, _) = event.unwrap();
2800            if let Event::Scalar(_, _, _, Some(tag)) = event {
2801                if tag.handle == "tag:a,2024:" {
2802                    seen_a = true;
2803                } else if tag.handle == "tag:b,2024:" {
2804                    seen_b = true;
2805                }
2806            }
2807        }
2808
2809        assert!(seen_a);
2810        assert!(seen_b);
2811    }
2812
2813    #[test]
2814    fn test_tags_are_cleared_when_next_document_has_no_directives() {
2815        let text = r"
2816%TAG !t! tag:test,2024:
2817--- !t!1
2818foo
2819--- !t!2
2820bar
2821";
2822
2823        let mut parser = Parser::new_from_str(text);
2824        for event in parser.by_ref() {
2825            let (event, _) = event.unwrap();
2826            if let Event::DocumentEnd = event {
2827                break;
2828            }
2829        }
2830
2831        match parser.next().unwrap().unwrap().0 {
2832            Event::DocumentStart(true) => {}
2833            _ => panic!("expected explicit second document start"),
2834        }
2835
2836        let err = parser.next().unwrap().unwrap_err();
2837        assert!(format!("{err}").contains("the handle wasn't declared"));
2838    }
2839
2840    #[test]
2841    fn test_pull_parser_clears_anchors_between_documents() {
2842        let mut parser = Parser::new_from_str(
2843            "--- &a value
2844--- *a
2845",
2846        );
2847
2848        for event in parser.by_ref() {
2849            let (event, _) = event.unwrap();
2850            if matches!(event, Event::DocumentEnd) {
2851                break;
2852            }
2853        }
2854
2855        match parser.next().unwrap().unwrap().0 {
2856            Event::DocumentStart(true) => {}
2857            _ => panic!("expected explicit second document start"),
2858        }
2859
2860        let err = parser.next().unwrap().unwrap_err();
2861        assert!(format!("{err}").contains("unknown anchor"));
2862    }
2863
2864    #[test]
2865    fn test_keep_tags_across_multiple_documents() {
2866        let text = r#"
2867%YAML 1.1
2868%TAG !t! tag:test,2024:
2869--- !t!1 &1
2870foo: "bar"
2871--- !t!2 &2
2872baz: "qux"
2873"#;
2874        for x in Parser::new_from_str(text).keep_tags(true) {
2875            let x = x.unwrap();
2876            if let Event::MappingStart(_, _, tag) = x.0 {
2877                let tag = tag.unwrap();
2878                assert_eq!(tag.handle, "tag:test,2024:");
2879            }
2880        }
2881
2882        for x in Parser::new_from_str(text).keep_tags(false) {
2883            if x.is_err() {
2884                // Test successful
2885                return;
2886            }
2887        }
2888        panic!("Test failed, did not encounter error")
2889    }
2890
2891    #[test]
2892    fn test_flow_sequence_mapping_allows_empty_key() {
2893        let parser = Parser::new_from_str("[?: value]");
2894        for event in parser {
2895            event.expect("parser should accept flow sequence mappings with empty keys");
2896        }
2897    }
2898
2899    #[test]
2900    fn test_keep_tags_does_not_persist_default_tag_handles() {
2901        let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
2902
2903        let mut int_tags = Vec::new();
2904        for event in Parser::new_from_str(text).keep_tags(true) {
2905            let event = event.unwrap().0;
2906            if let Event::Scalar(_, _, _, Some(tag)) = event {
2907                if tag.suffix == "int" {
2908                    int_tags.push(tag.handle.clone());
2909                }
2910            }
2911        }
2912
2913        assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
2914    }
2915
2916    #[test]
2917    fn test_resolve_tag_uses_overridden_local_prefix() {
2918        let mut parser = Parser::new_from_str("");
2919        parser
2920            .tags
2921            .insert(String::new(), "tag:local.example,2024:".to_string());
2922
2923        let tag = parser
2924            .resolve_tag(
2925                Span::empty(Marker::new(0, 1, 0)),
2926                &Cow::Borrowed(""),
2927                Cow::Borrowed("!"),
2928            )
2929            .unwrap();
2930
2931        assert_eq!(tag.handle, "tag:local.example,2024:");
2932        assert_eq!(tag.suffix, "!");
2933    }
2934
2935    #[test]
2936    fn test_load_after_peek_stream_start() {
2937        #[derive(Default)]
2938        struct Sink<'input> {
2939            events: Vec<Event<'input>>,
2940        }
2941
2942        impl<'input> EventReceiver<'input> for Sink<'input> {
2943            fn on_event(&mut self, ev: Event<'input>) {
2944                self.events.push(ev);
2945            }
2946        }
2947
2948        let mut parser = Parser::new_from_str("key: value\n");
2949        let mut sink = Sink::default();
2950
2951        assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
2952        parser.load(&mut sink, false).unwrap();
2953
2954        assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
2955        assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
2956    }
2957}