Skip to main content

granit_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, BorrowedInput},
9    scanner::{
10        Marker, Placement, QueuedToken, QueuedTokenType, ScalarStyle, ScanError, Scanner, Span,
11    },
12    BufferedInput,
13};
14
15use alloc::{
16    borrow::Cow,
17    collections::{BTreeMap, BTreeSet, VecDeque},
18    string::{String, ToString},
19    vec::Vec,
20};
21use core::{
22    convert::Infallible,
23    fmt::{self, Display},
24};
25
26#[derive(Clone, Copy, PartialEq, Debug, Eq)]
27enum State {
28    StreamStart,
29    ImplicitDocumentStart,
30    DocumentStart,
31    DocumentContent,
32    DocumentEnd,
33    BlockNode,
34    BlockNodeOrIndentlessSequence,
35    FlowNode,
36    BlockSequenceFirstEntry,
37    BlockSequenceEntry,
38    IndentlessSequenceEntry,
39    IndentlessSequenceEntryNode,
40    BlockMappingFirstKey,
41    BlockMappingKey,
42    BlockMappingKeyNode,
43    BlockMappingValue,
44    BlockMappingValueNode,
45    FlowSequenceFirstEntry,
46    FlowSequenceEntry,
47    FlowSequenceEntryMappingKey,
48    FlowSequenceEntryMappingValue,
49    FlowSequenceEntryMappingValueNode,
50    FlowSequenceEntryMappingEnd,
51    FlowMappingFirstKey,
52    FlowMappingKey,
53    FlowMappingKeyNode,
54    FlowMappingValue,
55    FlowMappingValueNode,
56    FlowMappingEmptyValue,
57    BlockSequenceEntryNode,
58    End,
59}
60
61/// An event generated by the YAML parser.
62///
63/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
64/// [`EventReceiver`] trait.
65#[derive(Clone, PartialEq, Debug, Eq)]
66pub enum Event<'input> {
67    /// Reserved for internal use.
68    Nothing,
69    /// Event generated at the very beginning of parsing.
70    StreamStart,
71    /// Last event that will be generated by the parser. Signals EOF.
72    StreamEnd,
73    /// The start of a YAML document.
74    ///
75    /// When the boolean is `true`, it is an explicit document start
76    /// directive (`---`).
77    ///
78    /// When the boolean is `false`, it is an implicit document start
79    /// (without `---`).
80    DocumentStart(bool),
81    /// The end of a YAML document.
82    ///
83    /// This event is emitted for both explicit document end markers (`...`) and implicit document
84    /// ends.
85    DocumentEnd,
86    /// A YAML alias.
87    Alias(
88        /// The anchor ID the alias refers to.
89        usize,
90    ),
91    /// A YAML source comment.
92    ///
93    /// Comments are presentation metadata, not YAML data nodes. The payload is the raw text
94    /// exactly after `#`, excluding only the line break. The placement is a best-effort hint for
95    /// correlating the comment with nearby YAML presentation. The companion parser [`Span`] covers
96    /// the whole source comment, including `#` and excluding the line break.
97    Comment(
98        /// Raw comment payload exactly after `#`, excluding only the line break.
99        Cow<'input, str>,
100        /// Best-effort placement relative to nearby YAML content.
101        Placement,
102    ),
103    /// A YAML scalar value.
104    Scalar(
105        /// The scalar value after YAML escape processing.
106        Cow<'input, str>,
107        /// The source notation used for the scalar.
108        ScalarStyle,
109        /// The anchor ID defined on this scalar, or `0` if it has no anchor.
110        usize,
111        /// The resolved tag attached to this scalar, if any.
112        Option<Cow<'input, Tag>>,
113    ),
114    /// The start of a YAML sequence (array).
115    SequenceStart(
116        /// The notation style used for the sequence.
117        StructureStyle,
118        /// The anchor ID defined on this sequence, or `0` if it has no anchor.
119        usize,
120        /// The resolved tag attached to this sequence, if any.
121        Option<Cow<'input, Tag>>,
122    ),
123    /// The end of a YAML sequence (array).
124    SequenceEnd,
125    /// The start of a YAML mapping (object, hash).
126    MappingStart(
127        /// The notation style used for the mapping (Flow or Block).
128        StructureStyle,
129        /// The anchor ID defined on this mapping, or `0` if it has no anchor.
130        usize,
131        /// The resolved tag attached to this mapping, if any.
132        Option<Cow<'input, Tag>>,
133    ),
134    /// The end of a YAML mapping (object, hash).
135    MappingEnd,
136}
137
138/// The notation style used for a YAML sequence or mapping.
139///
140/// [`StructureStyle::Block`] means block notation:
141///
142/// ```yaml
143/// items:
144///   - milk
145///   - bread
146/// mapping:
147///   name: Ada
148///   active: true
149/// ```
150///
151/// [`StructureStyle::Flow`] means flow notation:
152///
153/// ```yaml
154/// items: [milk, bread]
155/// mapping: {name: Ada, active: true}
156/// ```
157#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
158pub enum StructureStyle {
159    /// Block notation, such as `- item` sequences and `key: value` mappings.
160    Block,
161    /// Flow notation, such as `[item]` sequences and `{key: value}` mappings.
162    Flow,
163}
164
165/// A YAML tag.
166#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
167pub struct Tag {
168    /// Resolved tag handle or prefix.
169    ///
170    /// Examples include `tag:yaml.org,2002:` for core-schema tags and `!` for local tags.
171    pub handle: String,
172    /// Tag suffix following the resolved handle or prefix.
173    pub suffix: String,
174    /// Tag handle as written in the source before `%TAG` directive resolution.
175    ///
176    /// For example, with `%TAG !e! tag:example.com,2000:`, a source tag `!e!keep` is resolved
177    /// as `handle = "tag:example.com,2000:"` and `suffix = "keep"`, while
178    /// `original_handle = "!e!"`.
179    pub original_handle: String,
180}
181
182const YAML_CORE_SCHEMA_PREFIX: &str = "tag:yaml.org,2002:";
183
184// YAML 1.2.2 defines Core Schema tags by reference:
185// - §10.3.1 says Core Schema uses the same tags as YAML's JSON Schema.
186// - §10.2.1 adds null/bool/int/float to the Failsafe Schema.
187// - §10.1.1 defines the Failsafe Schema tags map/seq/str.
188// Therefore the YAML 1.2.2 Core Schema tag suffixes are:
189// bool, float, int, map, null, seq, and str.
190const YAML_CORE_SCHEMA_SUFFIXES: [&str; 7] = ["bool", "float", "int", "map", "null", "seq", "str"];
191
192fn known_yaml_core_schema_suffix(suffix: &str) -> Option<&str> {
193    YAML_CORE_SCHEMA_SUFFIXES
194        .contains(&suffix)
195        .then_some(suffix)
196}
197
198fn known_yaml_core_schema_suffix_from_split(
199    handle_tail: &str,
200    suffix: &str,
201) -> Option<&'static str> {
202    YAML_CORE_SCHEMA_SUFFIXES.iter().copied().find(|candidate| {
203        candidate
204            .strip_prefix(handle_tail)
205            .is_some_and(|candidate_tail| candidate_tail == suffix)
206    })
207}
208
209impl Tag {
210    /// Create a tag from resolved parts.
211    ///
212    /// This is mainly useful for tests and consumers constructing parser-compatible tags by hand.
213    /// When the original source handle matters, use [`Self::with_original_handle`].
214    #[must_use]
215    pub fn new(handle: impl Into<String>, suffix: impl Into<String>) -> Self {
216        let handle = handle.into();
217        Self {
218            original_handle: handle.clone(),
219            handle,
220            suffix: suffix.into(),
221        }
222    }
223
224    /// Create a tag from resolved parts and the handle as written in the source.
225    #[must_use]
226    pub fn with_original_handle(
227        handle: impl Into<String>,
228        suffix: impl Into<String>,
229        original_handle: impl Into<String>,
230    ) -> Self {
231        Self {
232            handle: handle.into(),
233            suffix: suffix.into(),
234            original_handle: original_handle.into(),
235        }
236    }
237
238    /// Return the resolved YAML core-schema suffix for this tag, if it is a known core tag.
239    ///
240    /// The tag is matched by its resolved URI, not by the source handle spelling. For example,
241    /// `!!int`, `!<tag:yaml.org,2002:int>`, and a `%TAG` split such as
242    /// `%TAG !m! tag:yaml.org,2002:i` followed by `!m!nt` all return `Some("int")`.
243    ///
244    /// Authored tag parts are left unchanged; use [`Self::parts`], [`Self::original_parts`], or
245    /// [`Self::original`] to inspect those spellings.
246    #[must_use]
247    pub fn core_suffix(&self) -> Option<&str> {
248        if self.handle.len() <= YAML_CORE_SCHEMA_PREFIX.len() {
249            let remaining_prefix = YAML_CORE_SCHEMA_PREFIX.strip_prefix(&self.handle)?;
250            let suffix = self.suffix.strip_prefix(remaining_prefix)?;
251            return known_yaml_core_schema_suffix(suffix);
252        }
253
254        let handle_tail = self.handle.strip_prefix(YAML_CORE_SCHEMA_PREFIX)?;
255        known_yaml_core_schema_suffix_from_split(handle_tail, &self.suffix)
256    }
257
258    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
259    ///
260    /// The YAML specification specifies [a list of
261    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function uses
262    /// the resolved tag URI, so it is independent of how the tag was split between handle and
263    /// suffix.
264    ///
265    /// # Return
266    /// Returns `true` if the resolved tag is a known YAML 1.2.2 Core Schema tag.
267    #[must_use]
268    pub fn is_yaml_core_schema(&self) -> bool {
269        self.core_suffix().is_some()
270    }
271
272    /// Return true for a YAML core-schema tag with the given suffix.
273    ///
274    /// For example, this matches core-schema tags such as `!!str`, `!!int`, `!!float`, `!!bool`,
275    /// `!!null`, `!!map`, or `!!seq` after tag resolution.
276    #[must_use]
277    pub fn is_yaml_core_schema_tag(&self, suffix: &str) -> bool {
278        self.core_suffix()
279            .is_some_and(|core_suffix| core_suffix == suffix)
280    }
281
282    /// Return true for a tag outside the YAML 1.2.2 Core Schema tag set.
283    ///
284    /// This checks the resolved tag URI, not just the tag handle spelling. For example,
285    /// `tag:yaml.org,2002:timestamp` is in the YAML tag namespace, but it is not a YAML 1.2.2
286    /// Core Schema tag.
287    #[must_use]
288    pub fn is_custom(&self) -> bool {
289        !self.is_yaml_core_schema()
290    }
291
292    /// Return the tag as `(handle, suffix)`.
293    #[must_use]
294    pub fn parts(&self) -> (&str, &str) {
295        (&self.handle, &self.suffix)
296    }
297
298    /// Return the tag as `(original_handle, suffix)` using the handle from the source token.
299    ///
300    /// This is useful when a consumer needs author spelling such as `!e!keep` instead of the
301    /// resolved URI tag `tag:example.com,2000:keep`.
302    #[must_use]
303    pub fn original_parts(&self) -> (&str, &str) {
304        (&self.original_handle, &self.suffix)
305    }
306
307    /// Return the tag spelling reconstructed from the source handle and suffix.
308    ///
309    /// For ordinary shorthand tags this returns the author-facing spelling, such as `!e!keep` or
310    /// `!!str`. For verbatim tags this returns a normalized verbatim spelling such as
311    /// `!<tag:example.com,2000:thing>`, not necessarily the byte-exact source token.
312    #[must_use]
313    pub fn original(&self) -> String {
314        if self.original_handle.is_empty() && self.suffix != "!" {
315            let mut tag = String::with_capacity(self.suffix.len() + 3);
316            tag.push_str("!<");
317            tag.push_str(&self.suffix);
318            tag.push('>');
319            return tag;
320        }
321
322        let mut tag = String::with_capacity(self.original_handle.len() + self.suffix.len());
323        tag.push_str(&self.original_handle);
324        tag.push_str(&self.suffix);
325        tag
326    }
327}
328
329impl Display for Tag {
330    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
331        if self.handle == "!" {
332            write!(f, "!{}", self.suffix)
333        } else {
334            write!(f, "{}{}", self.handle, self.suffix)
335        }
336    }
337}
338
339impl<'input> Event<'input> {
340    /// Return the anchor ID defined by this event, if any.
341    ///
342    /// Returns `Some(id)` when this event defines an anchor on a scalar, sequence, or mapping
343    /// node. Returns `None` for all other events, including `Alias` (which references an anchor
344    /// rather than defining one; use [`Self::alias_id`] to obtain the target anchor ID).
345    #[must_use]
346    pub fn anchor_id(&self) -> Option<usize> {
347        match self {
348            Self::Scalar(_, _, anchor_id, _)
349            | Self::SequenceStart(_, anchor_id, _)
350            | Self::MappingStart(_, anchor_id, _)
351                if *anchor_id != 0 =>
352            {
353                Some(*anchor_id)
354            }
355            _ => None,
356        }
357    }
358
359    /// Return the target anchor ID referenced by this alias event, if this event is an alias.
360    #[must_use]
361    pub fn alias_id(&self) -> Option<usize> {
362        match self {
363            Self::Alias(anchor_id) => Some(*anchor_id),
364            _ => None,
365        }
366    }
367
368    /// Return the resolved tag carried by this node event, if any.
369    #[must_use]
370    pub fn tag(&self) -> Option<&Tag> {
371        match self {
372            Self::Scalar(_, _, _, tag)
373            | Self::SequenceStart(_, _, tag)
374            | Self::MappingStart(_, _, tag) => tag.as_deref(),
375            _ => None,
376        }
377    }
378
379    /// Return the scalar value and style, if this event is a scalar.
380    #[must_use]
381    pub fn scalar(&self) -> Option<(&str, ScalarStyle)> {
382        match self {
383            Self::Scalar(value, style, _, _) => Some((value.as_ref(), *style)),
384            _ => None,
385        }
386    }
387
388    /// Return whether this event represents a YAML node (value).
389    ///
390    /// Returns `true` for scalars, collection starts, and aliases — all events that produce a
391    /// value in the document tree. Returns `false` for structural events such as `StreamStart`,
392    /// `DocumentStart`, collection ends, etc.
393    #[must_use]
394    pub fn is_node(&self) -> bool {
395        matches!(
396            self,
397            Self::Alias(_) | Self::Scalar(..) | Self::SequenceStart(..) | Self::MappingStart(..)
398        )
399    }
400
401    /// Create an empty scalar.
402    fn empty_scalar() -> Self {
403        // a null scalar
404        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
405    }
406
407    /// Create an empty scalar with the given anchor.
408    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
409        Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
410    }
411}
412
413// Preserve span ordering for normal-sized comment groups. Longer runs in syntactically ambiguous
414// positions are rejected before they can grow the parser queue without bound.
415const MAX_BUFFERED_COMMENT_EVENTS: usize = 32;
416
417/// A YAML parser.
418#[derive(Debug)]
419pub struct Parser<'input, T: BorrowedInput<'input>> {
420    /// The underlying scanner from which we pull tokens.
421    scanner: Scanner<'input, T>,
422    /// The stack of _previous_ states we were in.
423    ///
424    /// States are pushed in the context of subobjects to this stack. The top-most element is the
425    /// state in which to come back to when exiting the current state.
426    states: Vec<State>,
427    /// The state in which we currently are.
428    state: State,
429    /// The next token from the scanner.
430    token: Option<QueuedToken<'input>>,
431    /// The next YAML event to emit.
432    current: Option<(Event<'input>, Span)>,
433    /// YAML events buffered by parser states that need to emit an earlier synthetic node first.
434    queued_events: VecDeque<(Event<'input>, Span)>,
435
436    /// Pending indentation hint to be attached to the next emitted event span.
437    ///
438    /// This is used to communicate indentation for block mapping keys. It is set when consuming a
439    /// `TokenType::Key` in block style, and is applied to the next emitted node event (the key
440    /// itself).
441    pending_key_indent: Option<usize>,
442    /// Pending anchor ID to attach to a node after an intervening comment.
443    pending_node_anchor_id: usize,
444    /// Pending tag to attach to a node after an intervening comment.
445    pending_node_tag: Option<Cow<'input, Tag>>,
446    /// Pending explicit tag token start to attach to a node after an intervening comment.
447    pending_node_tag_start: Option<Marker>,
448    /// Pending empty scalar span captured before an intervening comment.
449    pending_empty_scalar_span: Option<Span>,
450    /// Anchors that have been encountered in the YAML document.
451    anchors: BTreeMap<Cow<'input, str>, usize>,
452    /// Next ID available for an anchor.
453    ///
454    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
455    /// return for the next anchor and the count of anchor IDs emitted.
456    anchor_id_count: usize,
457    /// The tag directives (`%TAG`) the parser has encountered.
458    ///
459    /// Key is the handle, and value is the prefix.
460    tags: BTreeMap<String, String>,
461    /// Whether we have emitted [`Event::StreamEnd`].
462    ///
463    /// Emitted means that it has been returned from [`Self::next`]. If it is stored in
464    /// [`Self::token`], this is set to `false`.
465    stream_end_emitted: bool,
466    /// Make tags global across all documents.
467    keep_tags: bool,
468}
469
470/// Trait to be implemented in order to use the low-level parsing API.
471///
472/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
473/// for each YAML [`Event`] that occurs.
474/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
475/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
476/// [`SpannedEventReceiver`] automatically.
477/// Non-spanned receivers receive [`Event::Comment(text, placement)`](Event::Comment) like any
478/// other event, but without source location. Spanned receivers receive the same comment event plus
479/// the comment [`Span`] in [`SpannedEventReceiver::on_event`]. For comments, that span covers the
480/// whole source comment, including `#` and excluding the line break. When parsing from an input
481/// with byte offsets, such as [`Parser::new_from_str`], [`Span::slice`] returns that source
482/// comment text.
483///
484/// # Event hierarchy
485/// The event stream starts with an [`Event::StreamStart`] event followed by an
486/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
487/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
488/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
489///
490/// In a mapping, key-values are sent as consecutive data events. Comments can appear in the raw
491/// event stream between a key and its value; they are presentation metadata, not YAML data nodes.
492/// Consumers building YAML data trees should ignore [`Event::Comment`]. Any key/value alternation
493/// shortcut applies only after filtering out comments and other presentation metadata. After that
494/// filtering, the first event after an [`Event::MappingStart`] will be the key, and the following
495/// event will be its value. If the mapping contains no sub-mapping or sub-sequence, then even events
496/// (starting from 0) will always be keys and odd ones will always be values. The mapping ends when
497/// an [`Event::MappingEnd`] event is received.
498///
499/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
500///
501/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
502/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
503/// [`Event::MappingEnd`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
504/// be part of the value and not another key-value pair or element in the sequence.
505///
506/// For instance, the following YAML:
507/// ```yaml
508/// a: b
509/// c:
510///   d: e
511/// f:
512///   - g
513///   - h
514/// ```
515/// will emit (indented and commented for visibility):
516/// ```text
517/// StreamStart, DocumentStart, MappingStart,
518///   Scalar("a", ..), Scalar("b", ..)
519///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
520///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
521/// MappingEnd, DocumentEnd, StreamEnd
522/// ```
523///
524/// # Example
525/// ```
526/// # use granit_parser::{Event, EventReceiver, Parser};
527/// #
528/// /// Sink of events. Collects them into an array.
529/// struct EventSink<'input> {
530///     events: Vec<Event<'input>>,
531/// }
532///
533/// /// Implement `on_event`, pushing into `self.events`.
534/// impl<'input> EventReceiver<'input> for EventSink<'input> {
535///     fn on_event(&mut self, ev: Event<'input>) {
536///         self.events.push(ev);
537///     }
538/// }
539///
540/// /// Load events from a YAML string.
541/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
542///     let mut sink = EventSink { events: Vec::new() };
543///     let mut parser = Parser::new_from_str(yaml);
544///     // Load events using our sink as the receiver.
545///     parser.load(&mut sink, true).unwrap();
546///     sink.events
547/// }
548/// ```
549pub trait EventReceiver<'input> {
550    /// Handler called for each YAML event that is emitted by the parser.
551    fn on_event(&mut self, ev: Event<'input>);
552}
553
554/// Trait to be implemented for using the low-level parsing API.
555///
556/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
557/// For [`Event::Comment`], the span is the source range of the whole comment.
558pub trait SpannedEventReceiver<'input> {
559    /// Handler called for each event that occurs.
560    fn on_event(&mut self, ev: Event<'input>, span: Span);
561}
562
563impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
564    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
565        self.on_event(ev);
566    }
567}
568
569/// Trait to be implemented for fallible event handling without source spans.
570///
571/// This is the fallible counterpart to [`EventReceiver`]. Use it with [`Parser::try_load`] when
572/// event handling may need to stop parsing by returning an application error.
573pub trait TryEventReceiver<'input> {
574    /// Error returned by this receiver.
575    type Error;
576
577    /// Handler called for each YAML event that is emitted by the parser.
578    ///
579    /// Returning an error stops [`Parser::try_load`] immediately.
580    ///
581    /// # Errors
582    /// Returns `Self::Error` when the receiver wants to stop parsing.
583    fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
584}
585
586/// Trait to be implemented for fallible event handling with source spans.
587///
588/// This is the fallible counterpart to [`SpannedEventReceiver`]. Use it with
589/// [`Parser::try_load`] when event handling may need to stop parsing by returning an application
590/// error.
591pub trait TrySpannedEventReceiver<'input> {
592    /// Error returned by this receiver.
593    type Error;
594
595    /// Handler called for each event that occurs.
596    ///
597    /// Returning an error stops [`Parser::try_load`] immediately.
598    ///
599    /// # Errors
600    /// Returns `Self::Error` when the receiver wants to stop parsing.
601    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
602}
603
604impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
605    type Error = R::Error;
606
607    fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
608        TryEventReceiver::on_event(self, ev)
609    }
610}
611
612/// Error returned by [`Parser::try_load`] and [`ParserTrait::try_load`].
613#[derive(Clone, PartialEq, Debug, Eq)]
614pub enum TryLoadError<E> {
615    /// Scanning or parsing failed.
616    Scan(
617        /// The scanner or parser error.
618        ScanError,
619    ),
620    /// The receiver returned an application error.
621    Receiver(
622        /// The error returned by the receiver.
623        E,
624    ),
625}
626
627impl<E> TryLoadError<E> {
628    #[cold]
629    fn scan(error: ScanError) -> Self {
630        Self::Scan(error)
631    }
632
633    #[cold]
634    fn receiver(error: E) -> Self {
635        Self::Receiver(error)
636    }
637}
638
639impl<E> From<ScanError> for TryLoadError<E> {
640    #[cold]
641    fn from(error: ScanError) -> Self {
642        Self::scan(error)
643    }
644}
645
646impl<E: Display> Display for TryLoadError<E> {
647    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
648        match self {
649            Self::Scan(error) => write!(f, "parser error: {error}"),
650            Self::Receiver(error) => write!(f, "receiver error: {error}"),
651        }
652    }
653}
654
655impl<E> core::error::Error for TryLoadError<E>
656where
657    E: core::error::Error + 'static,
658{
659    fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
660        match self {
661            Self::Scan(error) => Some(error),
662            Self::Receiver(error) => Some(error),
663        }
664    }
665}
666
667fn try_emit<'input, R>(
668    recv: &mut R,
669    ev: Event<'input>,
670    span: Span,
671) -> Result<(), TryLoadError<R::Error>>
672where
673    R: TrySpannedEventReceiver<'input>,
674{
675    recv.on_event(ev, span).map_err(TryLoadError::receiver)
676}
677
678struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
679
680impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
681    for InfallibleSpannedReceiver<'_, R>
682{
683    type Error = Infallible;
684
685    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
686        self.0.on_event(ev, span);
687        Ok(())
688    }
689}
690
691fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
692    match result {
693        Ok(()) => Ok(()),
694        Err(TryLoadError::Scan(error)) => error.into_result(),
695        Err(TryLoadError::Receiver(error)) => match error {},
696    }
697}
698
699/// A convenience alias for a parser event result.
700pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
701
702/// Trait extracted from `Parser` to support mocking and alternative implementations.
703pub trait ParserTrait<'input> {
704    /// Try to load the next event and return it without consuming it from `self`.
705    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
706
707    /// Try to load the next event and return it, consuming it from `self`.
708    fn next_event(&mut self) -> Option<ParseResult<'input>>;
709
710    /// Load the YAML from the stream in `self`, pushing events into `recv`.
711    ///
712    /// Use this method when event handling is infallible. If receiver code can return an
713    /// application error and should stop parsing, use [`ParserTrait::try_load`] instead. If the
714    /// caller should directly control when the next event is read, use [`ParserTrait::next_event`]
715    /// or [`Parser`]'s [`core::iter::Iterator`] implementation.
716    ///
717    /// # Errors
718    /// Returns `ScanError` when scanning or parsing the stream fails.
719    fn load<R: SpannedEventReceiver<'input>>(
720        &mut self,
721        recv: &mut R,
722        multi: bool,
723    ) -> Result<(), ScanError>;
724
725    /// Load the YAML from the stream in `self`, stopping if `recv` returns an error.
726    ///
727    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
728    /// inside the stream.
729    ///
730    /// If the receiver returns an error, the parser is left positioned immediately after the event
731    /// that caused the receiver error. Callers should treat the parser as partially consumed.
732    ///
733    /// # Errors
734    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
735    /// [`TryLoadError::Receiver`] when `recv` returns an error.
736    fn try_load<R: TrySpannedEventReceiver<'input>>(
737        &mut self,
738        recv: &mut R,
739        multi: bool,
740    ) -> Result<(), TryLoadError<R::Error>> {
741        while let Some(res) = self.next_event() {
742            let (ev, span) = res?;
743            let is_doc_end = matches!(ev, Event::DocumentEnd);
744            let is_stream_end = matches!(ev, Event::StreamEnd);
745
746            try_emit(recv, ev, span)?;
747
748            if is_stream_end {
749                break;
750            }
751            if !multi && is_doc_end {
752                break;
753            }
754        }
755
756        Ok(())
757    }
758}
759
760impl<'input> Parser<'input, StrInput<'input>> {
761    /// Create a parser over a borrowed string slice.
762    #[must_use]
763    pub fn new_from_str(value: &'input str) -> Self {
764        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
765        Parser::new(StrInput::new(value))
766    }
767}
768
769impl<T> Parser<'static, BufferedInput<T>>
770where
771    T: Iterator<Item = char>,
772{
773    /// Create a parser over an iterator of characters.
774    #[must_use]
775    pub fn new_from_iter(iter: T) -> Self {
776        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
777        Parser::new(BufferedInput::new(iter))
778    }
779}
780
781impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
782    /// Return the next anchor ID that will be assigned by this parser.
783    pub fn get_anchor_offset(&self) -> usize {
784        self.anchor_id_count
785    }
786
787    /// Set the next anchor ID that will be assigned by this parser.
788    pub fn set_anchor_offset(&mut self, offset: usize) {
789        self.anchor_id_count = offset;
790    }
791
792    /// Create a parser over a custom input source.
793    pub fn new(src: T) -> Self {
794        Parser {
795            scanner: Scanner::new(src),
796            states: Vec::new(),
797            state: State::StreamStart,
798            token: None,
799            current: None,
800            queued_events: VecDeque::new(),
801
802            pending_key_indent: None,
803            pending_node_anchor_id: 0,
804            pending_node_tag: None,
805            pending_node_tag_start: None,
806            pending_empty_scalar_span: None,
807
808            anchors: BTreeMap::new(),
809            // valid anchor_id starts from 1
810            anchor_id_count: 1,
811            tags: BTreeMap::new(),
812            stream_end_emitted: false,
813            keep_tags: false,
814        }
815    }
816
817    /// Configure whether tag directives remain active across document boundaries.
818    ///
819    /// This behavior is non-standard as per the YAML specification but can be encountered in the
820    /// wild. Passing `true` enables this non-standard extension and allows the parser to accept
821    /// input from [test
822    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
823    /// of the yaml-test-suite:
824    ///
825    /// ```yaml
826    /// %TAG !prefix! tag:example.com,2011:
827    /// --- !prefix!A
828    /// a: b
829    /// --- !prefix!B
830    /// c: d
831    /// --- !prefix!C
832    /// e: f
833    /// ```
834    ///
835    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
836    /// only apply to the document immediately following them. This would error on `!prefix!B`.
837    ///
838    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
839    #[must_use]
840    pub fn keep_tags(mut self, value: bool) -> Self {
841        self.keep_tags = value;
842        self
843    }
844
845    /// Try to load the next event and return it without consuming it from `self`.
846    ///
847    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
848    /// [`Iterator::next`] or [`Parser::load`].
849    ///
850    /// # Errors
851    /// Returns `ScanError` when loading the next event fails.
852    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
853        ParserTrait::peek(self)
854    }
855
856    /// Try to load the next event and return it, consuming it from `self`.
857    ///
858    /// # Errors
859    /// Returns `ScanError` when loading the next event fails.
860    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
861        ParserTrait::next_event(self)
862    }
863
864    /// Implementation function for [`Self::next_event`] without the `Option`.
865    ///
866    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
867    /// option. This burdens the parser code. This function is used internally when an option is
868    /// undesirable.
869    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
870    where
871        'input: 'a,
872    {
873        match self.current.take() {
874            None => {
875                if let Some(event) = self.queued_events.pop_front() {
876                    Ok(self.apply_pending_key_indent(event))
877                } else if let Some(comment) = self.maybe_next_comment_event()? {
878                    Ok(comment)
879                } else {
880                    self.parse()
881                }
882            }
883            Some(v) => Ok(v),
884        }
885    }
886
887    fn apply_pending_key_indent<'a>(&mut self, (ev, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
888        if ev.is_node() {
889            if let Some(indent) = self.pending_key_indent.take() {
890                return (ev, span.with_indent(Some(indent)));
891            }
892        }
893
894        (ev, span)
895    }
896
897    /// Peek at the next token from the scanner.
898    fn peek_token(&mut self) -> Result<&QueuedToken<'_>, ScanError> {
899        match self.token {
900            None => {
901                self.token = Some(self.scan_next_token()?);
902                Ok(self.token.as_ref().unwrap())
903            }
904            Some(ref tok) => Ok(tok),
905        }
906    }
907
908    /// Extract and return the next token from the scanner.
909    ///
910    /// This function does _not_ make use of `self.token`.
911    fn scan_next_token(&mut self) -> Result<QueuedToken<'input>, ScanError> {
912        match self.scanner.next_queued_token()? {
913            None => match self.scanner.get_error() {
914                None => Err(self.unexpected_eof()),
915                Some(e) => e.into_result(),
916            },
917            Some(tok) => Ok(tok),
918        }
919    }
920
921    #[inline]
922    fn maybe_next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
923    where
924        'input: 'a,
925    {
926        if self.scanner.comments_possible() {
927            self.next_comment_event()
928        } else {
929            Ok(None)
930        }
931    }
932
933    fn next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
934    where
935        'input: 'a,
936    {
937        let is_comment = {
938            let token = self.peek_token()?;
939            matches!(token.1, QueuedTokenType::Comment(_))
940        };
941
942        if !is_comment {
943            return Ok(None);
944        }
945
946        let QueuedToken(span, token) = self.fetch_token();
947        match token {
948            QueuedTokenType::Comment(mut comment) => {
949                comment.placement = self.refined_comment_placement(span, comment.placement);
950                Ok(Some((
951                    Event::Comment(comment.text, comment.placement),
952                    span,
953                )))
954            }
955            _ => unreachable!("comment token disappeared after peek"),
956        }
957    }
958
959    #[inline]
960    fn next_comment_events(&mut self) -> Result<Vec<(Event<'input>, Span)>, ScanError> {
961        if !self.scanner.comments_possible() {
962            return Ok(Vec::new());
963        }
964
965        let mut events = Vec::new();
966        loop {
967            match self.peek_token() {
968                Ok(token) if matches!(token.1, QueuedTokenType::Comment(_)) => {}
969                Err(error) if events.is_empty() => return Err(error),
970                Ok(_) | Err(_) => return Ok(events),
971            }
972
973            if events.len() == MAX_BUFFERED_COMMENT_EVENTS {
974                return Err(ScanError::new_str(
975                    self.peek_token()?.0.start,
976                    "too many consecutive comments before resolving collection entry",
977                ));
978            }
979
980            let comment = self
981                .next_comment_event()?
982                .expect("comment token disappeared after peek");
983            events.push(comment);
984        }
985    }
986
987    fn queue_tail_and_return_first(
988        &mut self,
989        events: Vec<(Event<'input>, Span)>,
990    ) -> (Event<'input>, Span) {
991        let mut events = events.into_iter();
992        let first = events
993            .next()
994            .expect("event queue must contain at least one event");
995        self.queued_events.extend(events);
996        first
997    }
998
999    fn queue_event_by_span(
1000        &mut self,
1001        comments: Vec<(Event<'input>, Span)>,
1002        event: (Event<'input>, Span),
1003    ) -> (Event<'input>, Span) {
1004        let insert_at = comments
1005            .iter()
1006            .position(|(_, comment_span)| {
1007                comment_span.start.index() >= event.1.start.index()
1008                    && comment_span.end.index() >= event.1.end.index()
1009            })
1010            .unwrap_or(comments.len());
1011        let mut ordered = Vec::with_capacity(comments.len() + 1);
1012        let mut comments = comments.into_iter();
1013
1014        for _ in 0..insert_at {
1015            ordered.push(
1016                comments
1017                    .next()
1018                    .expect("comment disappeared while ordering queued events"),
1019            );
1020        }
1021        ordered.push(event);
1022        ordered.extend(comments);
1023
1024        self.queue_tail_and_return_first(ordered)
1025    }
1026
1027    fn queue_two_events_by_span(
1028        &mut self,
1029        comments: Vec<(Event<'input>, Span)>,
1030        first: (Event<'input>, Span),
1031        second: (Event<'input>, Span),
1032    ) -> (Event<'input>, Span) {
1033        let insert_at = comments
1034            .iter()
1035            .position(|(_, comment_span)| {
1036                comment_span.start.index() >= first.1.start.index()
1037                    && comment_span.end.index() >= first.1.end.index()
1038            })
1039            .unwrap_or(comments.len());
1040        let mut ordered = Vec::with_capacity(comments.len() + 2);
1041        let mut comments = comments.into_iter();
1042
1043        for _ in 0..insert_at {
1044            ordered.push(
1045                comments
1046                    .next()
1047                    .expect("comment disappeared while ordering queued events"),
1048            );
1049        }
1050        ordered.push(first);
1051        ordered.push(second);
1052        ordered.extend(comments);
1053
1054        self.queue_tail_and_return_first(ordered)
1055    }
1056
1057    fn refined_comment_placement(&mut self, span: Span, placement: Placement) -> Placement {
1058        if placement == Placement::Right {
1059            return Placement::Right;
1060        }
1061
1062        let Ok(next) = self.peek_token() else {
1063            return placement;
1064        };
1065        if matches!(next.1, QueuedTokenType::StreamEnd) {
1066            return Placement::Last;
1067        }
1068
1069        if next.0.start.line() == span.end.line() + 1 {
1070            Placement::Above
1071        } else {
1072            Placement::Free
1073        }
1074    }
1075
1076    #[cold]
1077    fn unexpected_eof(&self) -> ScanError {
1078        let info = match self.state {
1079            State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1080                "unexpected EOF while parsing a flow sequence"
1081            }
1082            State::FlowMappingFirstKey
1083            | State::FlowMappingKey
1084            | State::FlowMappingValue
1085            | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1086            State::FlowSequenceEntryMappingKey
1087            | State::FlowSequenceEntryMappingValue
1088            | State::FlowSequenceEntryMappingEnd
1089            | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
1090            State::BlockSequenceFirstEntry | State::BlockSequenceEntry | State::BlockNode => {
1091                "unexpected EOF while parsing a block sequence"
1092            }
1093            State::BlockMappingFirstKey
1094            | State::BlockMappingKey
1095            | State::BlockMappingValue
1096            | State::BlockNodeOrIndentlessSequence => {
1097                "unexpected EOF while parsing a block mapping"
1098            }
1099            _ => "unexpected eof",
1100        };
1101        ScanError::new_str(self.scanner.mark(), info)
1102    }
1103
1104    fn fetch_token<'a>(&mut self) -> QueuedToken<'a>
1105    where
1106        'input: 'a,
1107    {
1108        self.token
1109            .take()
1110            .expect("fetch_token needs to be preceded by peek_token")
1111    }
1112
1113    /// Skip the next token from the scanner.
1114    fn skip(&mut self) {
1115        self.token = None;
1116    }
1117    /// Pops the top-most state and make it the current state.
1118    fn pop_state(&mut self) {
1119        self.state = self.states.pop().unwrap();
1120    }
1121    /// Push a new state atop the state stack.
1122    fn push_state(&mut self, state: State) {
1123        self.states.push(state);
1124    }
1125
1126    fn defer_parse_node<'a>(
1127        &mut self,
1128        node_state: State,
1129        return_state: State,
1130        block: bool,
1131        indentless_sequence: bool,
1132    ) -> ParseResult<'a>
1133    where
1134        'input: 'a,
1135    {
1136        self.push_state(return_state);
1137        self.state = node_state;
1138        if let Some(comment) = self.maybe_next_comment_event()? {
1139            Ok(comment)
1140        } else {
1141            self.parse_node(block, indentless_sequence)
1142        }
1143    }
1144
1145    fn parse<'a>(&mut self) -> ParseResult<'a>
1146    where
1147        'input: 'a,
1148    {
1149        if self.state == State::End {
1150            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
1151        }
1152        let event = self.state_machine()?;
1153        Ok(self.apply_pending_key_indent(event))
1154    }
1155
1156    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1157    ///
1158    /// The contents of the stream are parsed and the corresponding events are sent into the
1159    /// receiver. For detailed explanations about how events work, see [`EventReceiver`].
1160    ///
1161    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1162    /// inside the stream.
1163    ///
1164    /// Use this method when event handling is infallible. If receiver code can return an
1165    /// application error and should stop parsing, use [`Parser::try_load`] instead. If the caller
1166    /// should directly control when the next event is read, use [`Parser`]'s
1167    /// [`core::iter::Iterator`] implementation.
1168    ///
1169    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
1170    /// former is enough to call this function.
1171    ///
1172    /// # Example
1173    /// ```
1174    /// # use granit_parser::{Event, EventReceiver, Parser};
1175    /// # fn main() -> Result<(), granit_parser::ScanError> {
1176    /// struct EventSink<'input> {
1177    ///     events: Vec<Event<'input>>,
1178    /// }
1179    ///
1180    /// impl<'input> EventReceiver<'input> for EventSink<'input> {
1181    ///     fn on_event(&mut self, ev: Event<'input>) {
1182    ///         self.events.push(ev);
1183    ///     }
1184    /// }
1185    ///
1186    /// let mut parser = Parser::new_from_str("a: 1\n");
1187    /// let mut sink = EventSink { events: Vec::new() };
1188    ///
1189    /// parser.load(&mut sink, false)?;
1190    ///
1191    /// assert!(sink
1192    ///     .events
1193    ///     .iter()
1194    ///     .any(|ev| matches!(ev, Event::Scalar(value, ..) if value == "a")));
1195    /// # Ok(())
1196    /// # }
1197    /// ```
1198    ///
1199    /// # Errors
1200    /// Returns `ScanError` when loading fails.
1201    pub fn load<R: SpannedEventReceiver<'input>>(
1202        &mut self,
1203        recv: &mut R,
1204        multi: bool,
1205    ) -> Result<(), ScanError> {
1206        ParserTrait::load(self, recv, multi)
1207    }
1208
1209    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1210    ///
1211    /// This is the fallible counterpart to [`Parser::load`]. If `recv` returns an error, parsing
1212    /// stops immediately and that error is returned as [`TryLoadError::Receiver`].
1213    ///
1214    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1215    /// inside the stream.
1216    ///
1217    /// If the receiver returns an error, the parser is left positioned immediately after the event
1218    /// that caused the receiver error. Callers should treat the parser as partially consumed.
1219    ///
1220    /// # Example
1221    /// ```
1222    /// # use granit_parser::{Event, Parser, TryEventReceiver, TryLoadError};
1223    /// #[derive(Debug, PartialEq, Eq)]
1224    /// enum ValidationError {
1225    ///     ForbiddenScalar,
1226    /// }
1227    ///
1228    /// struct Validator;
1229    ///
1230    /// impl<'input> TryEventReceiver<'input> for Validator {
1231    ///     type Error = ValidationError;
1232    ///
1233    ///     fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
1234    ///         if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
1235    ///             Err(ValidationError::ForbiddenScalar)
1236    ///         } else {
1237    ///             Ok(())
1238    ///         }
1239    ///     }
1240    /// }
1241    ///
1242    /// let mut parser = Parser::new_from_str("value: bad\n");
1243    /// let mut validator = Validator;
1244    ///
1245    /// let err = parser.try_load(&mut validator, false).unwrap_err();
1246    ///
1247    /// assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenScalar));
1248    /// ```
1249    ///
1250    /// # Errors
1251    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
1252    /// [`TryLoadError::Receiver`] when `recv` returns an error.
1253    pub fn try_load<R: TrySpannedEventReceiver<'input>>(
1254        &mut self,
1255        recv: &mut R,
1256        multi: bool,
1257    ) -> Result<(), TryLoadError<R::Error>> {
1258        ParserTrait::try_load(self, recv, multi)
1259    }
1260
1261    #[cfg(test)]
1262    fn try_load_document<R: TrySpannedEventReceiver<'input>>(
1263        &mut self,
1264        first_ev: Event<'input>,
1265        span: Span,
1266        recv: &mut R,
1267    ) -> Result<(), TryLoadError<R::Error>> {
1268        if !matches!(first_ev, Event::DocumentStart(_)) {
1269            return Err(TryLoadError::scan(ScanError::new_str(
1270                span.start,
1271                "did not find expected <document-start>",
1272            )));
1273        }
1274        try_emit(recv, first_ev, span)?;
1275
1276        let (ev, span) = self.next_event_impl()?;
1277        self.try_load_node(ev, span, recv)?;
1278
1279        // DOCUMENT-END is expected.
1280        let (ev, mark) = self.next_event_impl()?;
1281        assert_eq!(ev, Event::DocumentEnd);
1282        try_emit(recv, ev, mark)?;
1283
1284        Ok(())
1285    }
1286
1287    #[cfg(test)]
1288    fn try_load_node<R: TrySpannedEventReceiver<'input>>(
1289        &mut self,
1290        first_ev: Event<'input>,
1291        span: Span,
1292        recv: &mut R,
1293    ) -> Result<(), TryLoadError<R::Error>> {
1294        match first_ev {
1295            Event::Alias(..) | Event::Scalar(..) => try_emit(recv, first_ev, span),
1296            Event::SequenceStart(..) => {
1297                try_emit(recv, first_ev, span)?;
1298                self.try_load_sequence(recv)
1299            }
1300            Event::MappingStart(..) => {
1301                try_emit(recv, first_ev, span)?;
1302                self.try_load_mapping(recv)
1303            }
1304            _ => {
1305                #[cfg(feature = "debug_prints")]
1306                std::println!("UNREACHABLE EVENT: {first_ev:?}");
1307                unreachable!();
1308            }
1309        }
1310    }
1311
1312    #[cfg(test)]
1313    fn try_load_mapping<R: TrySpannedEventReceiver<'input>>(
1314        &mut self,
1315        recv: &mut R,
1316    ) -> Result<(), TryLoadError<R::Error>> {
1317        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
1318        while key_ev != Event::MappingEnd {
1319            // key
1320            self.try_load_node(key_ev, key_mark, recv)?;
1321
1322            // value
1323            let (ev, mark) = self.next_event_impl()?;
1324            self.try_load_node(ev, mark, recv)?;
1325
1326            // next event
1327            let (ev, mark) = self.next_event_impl()?;
1328            key_ev = ev;
1329            key_mark = mark;
1330        }
1331        try_emit(recv, key_ev, key_mark)?;
1332        Ok(())
1333    }
1334
1335    #[cfg(test)]
1336    fn try_load_sequence<R: TrySpannedEventReceiver<'input>>(
1337        &mut self,
1338        recv: &mut R,
1339    ) -> Result<(), TryLoadError<R::Error>> {
1340        let (mut ev, mut mark) = self.next_event_impl()?;
1341        while ev != Event::SequenceEnd {
1342            self.try_load_node(ev, mark, recv)?;
1343
1344            // next event
1345            let (next_ev, next_mark) = self.next_event_impl()?;
1346            ev = next_ev;
1347            mark = next_mark;
1348        }
1349        try_emit(recv, ev, mark)?;
1350        Ok(())
1351    }
1352
1353    fn state_machine<'a>(&mut self) -> ParseResult<'a>
1354    where
1355        'input: 'a,
1356    {
1357        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
1358
1359        match self.state {
1360            State::StreamStart => self.stream_start(),
1361
1362            State::ImplicitDocumentStart => self.document_start(true),
1363            State::DocumentStart => self.document_start(false),
1364            State::DocumentContent => self.document_content(),
1365            State::DocumentEnd => self.document_end(),
1366
1367            State::BlockNode => self.parse_node(true, false),
1368            State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
1369            State::FlowNode => self.parse_node(false, false),
1370            State::BlockMappingFirstKey => self.block_mapping_key(true),
1371            State::BlockMappingKey => self.block_mapping_key(false),
1372            State::BlockMappingKeyNode => self.block_mapping_key_node(),
1373            State::BlockMappingValue => self.block_mapping_value(),
1374            State::BlockMappingValueNode => self.block_mapping_value_node(),
1375
1376            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
1377            State::BlockSequenceEntry => self.block_sequence_entry(false),
1378            State::BlockSequenceEntryNode => self.block_sequence_entry_node(),
1379
1380            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
1381            State::FlowSequenceEntry => self.flow_sequence_entry(false),
1382
1383            State::FlowMappingFirstKey => self.flow_mapping_key(true),
1384            State::FlowMappingKey => self.flow_mapping_key(false),
1385            State::FlowMappingKeyNode => self.flow_mapping_key_node(),
1386            State::FlowMappingValue => self.flow_mapping_value(false),
1387            State::FlowMappingValueNode => self.flow_mapping_value_node(),
1388
1389            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
1390            State::IndentlessSequenceEntryNode => self.indentless_sequence_entry_node(),
1391
1392            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
1393            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
1394            State::FlowSequenceEntryMappingValueNode => {
1395                self.flow_sequence_entry_mapping_value_node()
1396            }
1397            State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
1398            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
1399
1400            /* impossible */
1401            State::End => unreachable!(),
1402        }
1403    }
1404
1405    fn stream_start<'a>(&mut self) -> ParseResult<'a>
1406    where
1407        'input: 'a,
1408    {
1409        match *self.peek_token()? {
1410            QueuedToken(span, QueuedTokenType::StreamStart(_)) => {
1411                self.state = State::ImplicitDocumentStart;
1412                self.skip();
1413                Ok((Event::StreamStart, span))
1414            }
1415            QueuedToken(span, _) => Err(ScanError::new_str(
1416                span.start,
1417                "did not find expected <stream-start>",
1418            )),
1419        }
1420    }
1421
1422    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
1423    where
1424        'input: 'a,
1425    {
1426        while let QueuedTokenType::DocumentEnd = self.peek_token()?.1 {
1427            self.skip();
1428        }
1429
1430        // Anchors are scoped to a single document.
1431        self.anchors.clear();
1432
1433        match *self.peek_token()? {
1434            QueuedToken(span, QueuedTokenType::StreamEnd) => {
1435                self.state = State::End;
1436                self.skip();
1437                Ok((Event::StreamEnd, span))
1438            }
1439            QueuedToken(
1440                _,
1441                QueuedTokenType::VersionDirective(..)
1442                | QueuedTokenType::TagDirective(..)
1443                | QueuedTokenType::ReservedDirective(..)
1444                | QueuedTokenType::DocumentStart,
1445            ) => {
1446                // explicit document
1447                self.explicit_document_start()
1448            }
1449            QueuedToken(span, _) if implicit => {
1450                self.parser_process_directives()?;
1451                self.push_state(State::DocumentEnd);
1452                self.state = State::BlockNode;
1453                Ok((Event::DocumentStart(false), span))
1454            }
1455            _ => {
1456                // explicit document
1457                self.explicit_document_start()
1458            }
1459        }
1460    }
1461
1462    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
1463        let mut version_directive_received = false;
1464        let mut tags = if self.keep_tags {
1465            self.tags.clone()
1466        } else {
1467            BTreeMap::new()
1468        };
1469        let mut document_tag_handles = BTreeSet::new();
1470
1471        loop {
1472            match self.peek_token()? {
1473                QueuedToken(span, QueuedTokenType::VersionDirective(_, _)) => {
1474                    // YAML version compatibility is non-fatal here. The scanner validates the
1475                    // directive shape, and the parser rejects duplicates below, but it does not
1476                    // expose a warning channel for unsupported versions.
1477                    if version_directive_received {
1478                        return Err(ScanError::new_str(
1479                            span.start,
1480                            "duplicate version directive",
1481                        ));
1482                    }
1483                    version_directive_received = true;
1484                }
1485                QueuedToken(mark, QueuedTokenType::TagDirective(handle, prefix)) => {
1486                    if !document_tag_handles.insert(handle.to_string()) {
1487                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
1488                    }
1489                    tags.insert(handle.to_string(), prefix.to_string());
1490                }
1491                QueuedToken(_, QueuedTokenType::ReservedDirective(_, _)) => {
1492                    // Reserved directives are ignored
1493                }
1494                _ => break,
1495            }
1496            self.skip();
1497        }
1498
1499        self.tags = tags;
1500        Ok(())
1501    }
1502
1503    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
1504    where
1505        'input: 'a,
1506    {
1507        self.parser_process_directives()?;
1508        if let Some(comment) = self.maybe_next_comment_event()? {
1509            return Ok(comment);
1510        }
1511        match *self.peek_token()? {
1512            QueuedToken(mark, QueuedTokenType::DocumentStart) => {
1513                self.push_state(State::DocumentEnd);
1514                self.state = State::DocumentContent;
1515                self.skip();
1516                Ok((Event::DocumentStart(true), mark))
1517            }
1518            QueuedToken(span, _) => Err(ScanError::new_str(
1519                span.start,
1520                "did not find expected <document start>",
1521            )),
1522        }
1523    }
1524
1525    fn document_content<'a>(&mut self) -> ParseResult<'a>
1526    where
1527        'input: 'a,
1528    {
1529        if let QueuedToken(
1530            mark,
1531            QueuedTokenType::VersionDirective(..)
1532            | QueuedTokenType::TagDirective(..)
1533            | QueuedTokenType::ReservedDirective(..)
1534            | QueuedTokenType::DocumentStart
1535            | QueuedTokenType::DocumentEnd
1536            | QueuedTokenType::StreamEnd,
1537        ) = *self.peek_token()?
1538        {
1539            self.pop_state();
1540            // empty scalar
1541            Ok((Event::empty_scalar(), mark))
1542        } else {
1543            self.state = State::BlockNode;
1544            self.parse_node(true, false)
1545        }
1546    }
1547
1548    fn document_end<'a>(&mut self) -> ParseResult<'a>
1549    where
1550        'input: 'a,
1551    {
1552        let mut explicit_end = false;
1553        let span: Span = match *self.peek_token()? {
1554            QueuedToken(span, QueuedTokenType::DocumentEnd) => {
1555                explicit_end = true;
1556                self.skip();
1557                span
1558            }
1559            QueuedToken(span, _) => span,
1560        };
1561
1562        if self.keep_tags {
1563            // Never persist default handles across document boundaries. Allowing `%TAG !! ...`
1564            // or `%TAG ! ...` to leak into following documents lets earlier documents alter how
1565            // explicit tags are interpreted later on.
1566            self.tags.remove("!!");
1567            self.tags.remove("");
1568        } else {
1569            self.tags.clear();
1570        }
1571        if explicit_end {
1572            self.state = State::ImplicitDocumentStart;
1573        } else {
1574            if let QueuedToken(
1575                span,
1576                QueuedTokenType::VersionDirective(..)
1577                | QueuedTokenType::TagDirective(..)
1578                | QueuedTokenType::ReservedDirective(..),
1579            ) = *self.peek_token()?
1580            {
1581                return Err(ScanError::new_str(
1582                    span.start,
1583                    "missing explicit document end marker before directive",
1584                ));
1585            }
1586            self.state = State::DocumentStart;
1587        }
1588
1589        Ok((Event::DocumentEnd, span))
1590    }
1591
1592    fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
1593        // YAML permits anchor names to be reused. Aliases resolve to the most recent definition.
1594        let new_id = self.anchor_id_count;
1595        self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
1596            ScanError::new_str(
1597                mark.start,
1598                "while parsing anchor, anchor count exceeded supported limit",
1599            )
1600        })?;
1601        self.anchors.insert(name, new_id);
1602        Ok(new_id)
1603    }
1604
1605    fn save_pending_node_properties(
1606        &mut self,
1607        anchor_id: usize,
1608        tag: Option<Cow<'input, Tag>>,
1609        tag_start: Option<Marker>,
1610    ) {
1611        self.pending_node_anchor_id = anchor_id;
1612        self.pending_node_tag = tag;
1613        self.pending_node_tag_start = tag_start;
1614    }
1615
1616    fn attach_tag_start(event: Event<'_>, span: Span, start: Option<Marker>) -> (Event<'_>, Span) {
1617        (event, span.with_tag_start(start))
1618    }
1619
1620    #[allow(clippy::too_many_lines)]
1621    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
1622    where
1623        'input: 'a,
1624    {
1625        if let Some(comment) = self.maybe_next_comment_event()? {
1626            return Ok(comment);
1627        }
1628
1629        let mut anchor_id = core::mem::take(&mut self.pending_node_anchor_id);
1630        let mut tag = self.pending_node_tag.take();
1631        let mut tag_start = self.pending_node_tag_start.take();
1632        match *self.peek_token()? {
1633            QueuedToken(_, QueuedTokenType::Alias(_)) => {
1634                self.pop_state();
1635                if let QueuedToken(span, QueuedTokenType::Alias(name)) = self.fetch_token() {
1636                    match self.anchors.get(&*name) {
1637                        None => {
1638                            return Err(ScanError::new_str(
1639                                span.start,
1640                                "while parsing node, found unknown anchor",
1641                            ))
1642                        }
1643                        Some(id) => return Ok((Event::Alias(*id), span)),
1644                    }
1645                }
1646                unreachable!()
1647            }
1648            QueuedToken(_, QueuedTokenType::Anchor(_)) => {
1649                if let QueuedToken(span, QueuedTokenType::Anchor(name)) = self.fetch_token() {
1650                    anchor_id = self.register_anchor(name, &span)?;
1651                    if matches!(self.peek_token()?.1, QueuedTokenType::Tag(..)) {
1652                        if let QueuedToken(tag_span, QueuedTokenType::Tag(handle, suffix)) =
1653                            self.fetch_token()
1654                        {
1655                            tag_start = Some(tag_span.start);
1656                            tag = Some(self.resolve_tag(tag_span, &handle, suffix)?);
1657                        } else {
1658                            unreachable!()
1659                        }
1660                    }
1661                    if let Some(comment) = self.maybe_next_comment_event()? {
1662                        self.save_pending_node_properties(anchor_id, tag, tag_start);
1663                        return Ok(comment);
1664                    }
1665                } else {
1666                    unreachable!()
1667                }
1668            }
1669            QueuedToken(mark, QueuedTokenType::Tag(..)) => {
1670                if let QueuedTokenType::Tag(handle, suffix) = self.fetch_token().1 {
1671                    tag_start = Some(mark.start);
1672                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
1673                    if let QueuedTokenType::Anchor(_) = &self.peek_token()?.1 {
1674                        if let QueuedToken(mark, QueuedTokenType::Anchor(name)) = self.fetch_token()
1675                        {
1676                            anchor_id = self.register_anchor(name, &mark)?;
1677                        } else {
1678                            unreachable!()
1679                        }
1680                    }
1681                    if let Some(comment) = self.maybe_next_comment_event()? {
1682                        self.save_pending_node_properties(anchor_id, tag, tag_start);
1683                        return Ok(comment);
1684                    }
1685                } else {
1686                    unreachable!()
1687                }
1688            }
1689            _ => {}
1690        }
1691        match *self.peek_token()? {
1692            QueuedToken(mark, QueuedTokenType::BlockEntry) if indentless_sequence => {
1693                self.skip();
1694                let comments = self.next_comment_events()?;
1695                let start = (
1696                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1697                    mark.with_tag_start(tag_start),
1698                );
1699                if comments.is_empty() {
1700                    self.pending_empty_scalar_span = Some(mark);
1701                    self.state = State::IndentlessSequenceEntryNode;
1702                    Ok(start)
1703                } else if let Ok(QueuedToken(
1704                    _,
1705                    QueuedTokenType::BlockEntry
1706                    | QueuedTokenType::Key
1707                    | QueuedTokenType::Value
1708                    | QueuedTokenType::BlockEnd,
1709                )) = self.peek_token()
1710                {
1711                    self.state = State::IndentlessSequenceEntry;
1712                    Ok(self.queue_two_events_by_span(
1713                        comments,
1714                        start,
1715                        (Event::empty_scalar(), mark),
1716                    ))
1717                } else {
1718                    self.pending_empty_scalar_span = Some(mark);
1719                    self.state = State::IndentlessSequenceEntryNode;
1720                    Ok(self.queue_event_by_span(comments, start))
1721                }
1722            }
1723            QueuedToken(_, QueuedTokenType::Scalar(..)) => {
1724                self.pop_state();
1725                if let QueuedToken(mark, QueuedTokenType::Scalar(style, v)) = self.fetch_token() {
1726                    Ok(Self::attach_tag_start(
1727                        Event::Scalar(v, style, anchor_id, tag),
1728                        mark,
1729                        tag_start,
1730                    ))
1731                } else {
1732                    unreachable!()
1733                }
1734            }
1735            QueuedToken(mark, QueuedTokenType::FlowSequenceStart) => {
1736                self.state = State::FlowSequenceFirstEntry;
1737                self.skip();
1738                Ok(Self::attach_tag_start(
1739                    Event::SequenceStart(StructureStyle::Flow, anchor_id, tag),
1740                    mark,
1741                    tag_start,
1742                ))
1743            }
1744            QueuedToken(mark, QueuedTokenType::FlowMappingStart) => {
1745                self.state = State::FlowMappingFirstKey;
1746                self.skip();
1747                Ok(Self::attach_tag_start(
1748                    Event::MappingStart(StructureStyle::Flow, anchor_id, tag),
1749                    mark,
1750                    tag_start,
1751                ))
1752            }
1753            QueuedToken(mark, QueuedTokenType::BlockSequenceStart) if block => {
1754                self.state = State::BlockSequenceFirstEntry;
1755                self.skip();
1756                Ok(Self::attach_tag_start(
1757                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1758                    mark,
1759                    tag_start,
1760                ))
1761            }
1762            QueuedToken(mark, QueuedTokenType::BlockMappingStart) if block => {
1763                self.state = State::BlockMappingFirstKey;
1764                self.skip();
1765                Ok(Self::attach_tag_start(
1766                    Event::MappingStart(StructureStyle::Block, anchor_id, tag),
1767                    mark,
1768                    tag_start,
1769                ))
1770            }
1771            // ex 7.2, an empty scalar can follow a secondary tag
1772            QueuedToken(mark, _) if tag.is_some() || anchor_id > 0 => {
1773                self.pop_state();
1774                Ok(Self::attach_tag_start(
1775                    Event::empty_scalar_with_anchor(anchor_id, tag),
1776                    mark,
1777                    tag_start,
1778                ))
1779            }
1780            QueuedToken(span, _) => {
1781                let info = match self.state {
1782                    State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1783                        "unexpected EOF while parsing a flow sequence"
1784                    }
1785                    State::FlowMappingFirstKey
1786                    | State::FlowMappingKey
1787                    | State::FlowMappingValue
1788                    | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1789                    State::FlowSequenceEntryMappingKey
1790                    | State::FlowSequenceEntryMappingValue
1791                    | State::FlowSequenceEntryMappingEnd
1792                    | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
1793                    State::BlockSequenceFirstEntry
1794                    | State::BlockSequenceEntry
1795                    | State::BlockNode => "unexpected EOF while parsing a block sequence",
1796                    State::BlockMappingFirstKey
1797                    | State::BlockMappingKey
1798                    | State::BlockMappingValue
1799                    | State::BlockNodeOrIndentlessSequence => {
1800                        "unexpected EOF while parsing a block mapping"
1801                    }
1802                    _ => "while parsing a node, did not find expected node content",
1803                };
1804                Err(ScanError::new_str(span.start, info))
1805            }
1806        }
1807    }
1808
1809    fn block_mapping_key<'a>(&mut self, _first: bool) -> ParseResult<'a>
1810    where
1811        'input: 'a,
1812    {
1813        match *self.peek_token()? {
1814            QueuedToken(_, QueuedTokenType::Key) => {
1815                // Indentation is only meaningful for block mapping keys.
1816                if let QueuedToken(key_span, QueuedTokenType::Key) = *self.peek_token()? {
1817                    self.pending_key_indent = Some(key_span.start.col());
1818                }
1819                self.skip();
1820                if let Some(comment) = self.maybe_next_comment_event()? {
1821                    self.state = State::BlockMappingKeyNode;
1822                    Ok(comment)
1823                } else {
1824                    self.block_mapping_key_node()
1825                }
1826            }
1827            // A missing block-mapping key before `:` is represented as an empty scalar.
1828            QueuedToken(mark, QueuedTokenType::Value) => {
1829                self.state = State::BlockMappingValue;
1830                Ok((Event::empty_scalar(), mark))
1831            }
1832            QueuedToken(mark, QueuedTokenType::BlockEnd) => {
1833                self.pop_state();
1834                self.skip();
1835                Ok((Event::MappingEnd, mark))
1836            }
1837            QueuedToken(span, _) => Err(ScanError::new_str(
1838                span.start,
1839                "while parsing a block mapping, did not find expected key",
1840            )),
1841        }
1842    }
1843
1844    fn block_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1845    where
1846        'input: 'a,
1847    {
1848        if let QueuedToken(
1849            mark,
1850            QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1851        ) = *self.peek_token()?
1852        {
1853            self.state = State::BlockMappingValue;
1854            Ok((Event::empty_scalar(), mark))
1855        } else {
1856            self.defer_parse_node(
1857                State::BlockNodeOrIndentlessSequence,
1858                State::BlockMappingValue,
1859                true,
1860                true,
1861            )
1862        }
1863    }
1864
1865    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1866    where
1867        'input: 'a,
1868    {
1869        match *self.peek_token()? {
1870            QueuedToken(mark, QueuedTokenType::Value) => {
1871                self.skip();
1872                let comments = self.next_comment_events()?;
1873                if comments.is_empty() {
1874                    self.block_mapping_value_node_with_empty_span(mark)
1875                } else if let Ok(QueuedToken(
1876                    _,
1877                    QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1878                )) = self.peek_token()
1879                {
1880                    self.state = State::BlockMappingKey;
1881                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
1882                } else {
1883                    self.pending_empty_scalar_span = Some(mark);
1884                    self.state = State::BlockMappingValueNode;
1885                    Ok(self.queue_tail_and_return_first(comments))
1886                }
1887            }
1888            QueuedToken(mark, _) => {
1889                self.state = State::BlockMappingKey;
1890                // empty scalar
1891                Ok((Event::empty_scalar(), mark))
1892            }
1893        }
1894    }
1895
1896    fn block_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
1897    where
1898        'input: 'a,
1899    {
1900        let mark = match self.pending_empty_scalar_span.take() {
1901            Some(mark) => mark,
1902            None => self.peek_token()?.0,
1903        };
1904        self.block_mapping_value_node_with_empty_span(mark)
1905    }
1906
1907    fn block_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1908    where
1909        'input: 'a,
1910    {
1911        if let QueuedToken(
1912            _,
1913            QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1914        ) = *self.peek_token()?
1915        {
1916            self.state = State::BlockMappingKey;
1917            Ok((Event::empty_scalar(), mark))
1918        } else {
1919            self.defer_parse_node(
1920                State::BlockNodeOrIndentlessSequence,
1921                State::BlockMappingKey,
1922                true,
1923                true,
1924            )
1925        }
1926    }
1927
1928    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1929    where
1930        'input: 'a,
1931    {
1932        let span: Span =
1933            if let QueuedToken(mark, QueuedTokenType::FlowMappingEnd) = *self.peek_token()? {
1934                mark
1935            } else {
1936                if !first {
1937                    match *self.peek_token()? {
1938                        QueuedToken(_, QueuedTokenType::FlowEntry) => {
1939                            self.skip();
1940                            if let Some(comment) = self.maybe_next_comment_event()? {
1941                                self.state = State::FlowMappingFirstKey;
1942                                return Ok(comment);
1943                            }
1944                        }
1945                        QueuedToken(span, _) => {
1946                            return Err(ScanError::new_str(
1947                                span.start,
1948                                "while parsing a flow mapping, did not find expected ',' or '}'",
1949                            ))
1950                        }
1951                    }
1952                }
1953
1954                match *self.peek_token()? {
1955                    QueuedToken(_, QueuedTokenType::Key) => {
1956                        self.skip();
1957                        if let Some(comment) = self.maybe_next_comment_event()? {
1958                            self.state = State::FlowMappingKeyNode;
1959                            return Ok(comment);
1960                        }
1961                        return self.flow_mapping_key_node();
1962                    }
1963                    QueuedToken(marker, QueuedTokenType::Value) => {
1964                        self.state = State::FlowMappingValue;
1965                        return Ok((Event::empty_scalar(), marker));
1966                    }
1967                    QueuedToken(_, QueuedTokenType::FlowMappingEnd) => (),
1968                    _ => {
1969                        return self.defer_parse_node(
1970                            State::FlowNode,
1971                            State::FlowMappingEmptyValue,
1972                            false,
1973                            false,
1974                        );
1975                    }
1976                }
1977
1978                self.peek_token()?.0
1979            };
1980
1981        self.pop_state();
1982        self.skip();
1983        Ok((Event::MappingEnd, span))
1984    }
1985
1986    fn flow_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1987    where
1988        'input: 'a,
1989    {
1990        if let QueuedToken(
1991            mark,
1992            QueuedTokenType::Value | QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
1993        ) = *self.peek_token()?
1994        {
1995            self.state = State::FlowMappingValue;
1996            Ok((Event::empty_scalar(), mark))
1997        } else {
1998            self.defer_parse_node(State::FlowNode, State::FlowMappingValue, false, false)
1999        }
2000    }
2001
2002    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
2003    where
2004        'input: 'a,
2005    {
2006        let span: Span = {
2007            if empty {
2008                let QueuedToken(mark, _) = *self.peek_token()?;
2009                self.state = State::FlowMappingKey;
2010                return Ok((Event::empty_scalar(), mark));
2011            }
2012            match *self.peek_token()? {
2013                QueuedToken(span, QueuedTokenType::Value) => {
2014                    self.skip();
2015                    let comments = self.next_comment_events()?;
2016                    if comments.is_empty() {
2017                        return self.flow_mapping_value_node_with_empty_span(span);
2018                    }
2019                    if let Ok(QueuedToken(
2020                        _,
2021                        QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
2022                    )) = self.peek_token()
2023                    {
2024                        self.state = State::FlowMappingKey;
2025                        return Ok(
2026                            self.queue_event_by_span(comments, (Event::empty_scalar(), span))
2027                        );
2028                    }
2029
2030                    self.pending_empty_scalar_span = Some(span);
2031                    self.state = State::FlowMappingValueNode;
2032                    return Ok(self.queue_tail_and_return_first(comments));
2033                }
2034                QueuedToken(marker, _) => marker,
2035            }
2036        };
2037
2038        self.state = State::FlowMappingKey;
2039        Ok((Event::empty_scalar(), span))
2040    }
2041
2042    fn flow_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
2043    where
2044        'input: 'a,
2045    {
2046        let mark = match self.pending_empty_scalar_span.take() {
2047            Some(mark) => mark,
2048            None => Span::empty(self.peek_token()?.0.start),
2049        };
2050        self.flow_mapping_value_node_with_empty_span(mark)
2051    }
2052
2053    fn flow_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2054    where
2055        'input: 'a,
2056    {
2057        match self.peek_token()?.1 {
2058            QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd => {
2059                self.state = State::FlowMappingKey;
2060                Ok((Event::empty_scalar(), mark))
2061            }
2062            _ => self.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false),
2063        }
2064    }
2065
2066    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
2067    where
2068        'input: 'a,
2069    {
2070        match *self.peek_token()? {
2071            QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
2072                self.pop_state();
2073                self.skip();
2074                return Ok((Event::SequenceEnd, mark));
2075            }
2076            QueuedToken(_, QueuedTokenType::FlowEntry) if !first => {
2077                self.skip();
2078                if let Some(comment) = self.maybe_next_comment_event()? {
2079                    self.state = State::FlowSequenceFirstEntry;
2080                    return Ok(comment);
2081                }
2082            }
2083            QueuedToken(span, _) if !first => {
2084                return Err(ScanError::new_str(
2085                    span.start,
2086                    "while parsing a flow sequence, expected ',' or ']'",
2087                ));
2088            }
2089            _ => { /* next */ }
2090        }
2091        match *self.peek_token()? {
2092            QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
2093                self.pop_state();
2094                self.skip();
2095                Ok((Event::SequenceEnd, mark))
2096            }
2097            QueuedToken(mark, QueuedTokenType::Key) => {
2098                self.state = State::FlowSequenceEntryMappingKey;
2099                self.skip();
2100                Ok((Event::MappingStart(StructureStyle::Flow, 0, None), mark))
2101            }
2102            _ => self.defer_parse_node(State::FlowNode, State::FlowSequenceEntry, false, false),
2103        }
2104    }
2105
2106    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
2107    where
2108        'input: 'a,
2109    {
2110        match *self.peek_token()? {
2111            QueuedToken(mark, QueuedTokenType::BlockEntry) => {
2112                self.skip();
2113                let comments = self.next_comment_events()?;
2114                if comments.is_empty() {
2115                    self.indentless_sequence_entry_node_with_empty_span(mark)
2116                } else if let Ok(QueuedToken(
2117                    _,
2118                    QueuedTokenType::BlockEntry
2119                    | QueuedTokenType::Key
2120                    | QueuedTokenType::Value
2121                    | QueuedTokenType::BlockEnd,
2122                )) = self.peek_token()
2123                {
2124                    self.state = State::IndentlessSequenceEntry;
2125                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
2126                } else {
2127                    self.pending_empty_scalar_span = Some(mark);
2128                    self.state = State::IndentlessSequenceEntryNode;
2129                    Ok(self.queue_tail_and_return_first(comments))
2130                }
2131            }
2132            QueuedToken(mark, _) => {
2133                self.pop_state();
2134                Ok((Event::SequenceEnd, mark))
2135            }
2136        }
2137    }
2138
2139    fn indentless_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
2140    where
2141        'input: 'a,
2142    {
2143        let mark = match self.pending_empty_scalar_span.take() {
2144            Some(mark) => mark,
2145            None => self.peek_token()?.0,
2146        };
2147        self.indentless_sequence_entry_node_with_empty_span(mark)
2148    }
2149
2150    fn indentless_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2151    where
2152        'input: 'a,
2153    {
2154        if let QueuedToken(
2155            _,
2156            QueuedTokenType::BlockEntry
2157            | QueuedTokenType::Key
2158            | QueuedTokenType::Value
2159            | QueuedTokenType::BlockEnd,
2160        ) = *self.peek_token()?
2161        {
2162            self.state = State::IndentlessSequenceEntry;
2163            Ok((Event::empty_scalar(), mark))
2164        } else {
2165            self.defer_parse_node(
2166                State::BlockNode,
2167                State::IndentlessSequenceEntry,
2168                true,
2169                false,
2170            )
2171        }
2172    }
2173
2174    fn block_sequence_entry<'a>(&mut self, _first: bool) -> ParseResult<'a>
2175    where
2176        'input: 'a,
2177    {
2178        match *self.peek_token()? {
2179            QueuedToken(mark, QueuedTokenType::BlockEnd) => {
2180                self.pop_state();
2181                self.skip();
2182                Ok((Event::SequenceEnd, mark))
2183            }
2184            QueuedToken(mark, QueuedTokenType::BlockEntry) => {
2185                self.skip();
2186                let comments = self.next_comment_events()?;
2187                if comments.is_empty() {
2188                    self.block_sequence_entry_node_with_empty_span(mark)
2189                } else if let Ok(QueuedToken(
2190                    _,
2191                    QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd,
2192                )) = self.peek_token()
2193                {
2194                    self.state = State::BlockSequenceEntry;
2195                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
2196                } else {
2197                    self.pending_empty_scalar_span = Some(mark);
2198                    self.state = State::BlockSequenceEntryNode;
2199                    Ok(self.queue_tail_and_return_first(comments))
2200                }
2201            }
2202            QueuedToken(span, _) => Err(ScanError::new_str(
2203                span.start,
2204                "while parsing a block collection, did not find expected '-' indicator",
2205            )),
2206        }
2207    }
2208
2209    fn block_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
2210    where
2211        'input: 'a,
2212    {
2213        let mark = match self.pending_empty_scalar_span.take() {
2214            Some(mark) => mark,
2215            None => self.peek_token()?.0,
2216        };
2217        self.block_sequence_entry_node_with_empty_span(mark)
2218    }
2219
2220    fn block_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2221    where
2222        'input: 'a,
2223    {
2224        if let QueuedToken(_, QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd) =
2225            *self.peek_token()?
2226        {
2227            self.state = State::BlockSequenceEntry;
2228            Ok((Event::empty_scalar(), mark))
2229        } else {
2230            self.defer_parse_node(State::BlockNode, State::BlockSequenceEntry, true, false)
2231        }
2232    }
2233
2234    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
2235    where
2236        'input: 'a,
2237    {
2238        if let QueuedToken(mark, QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd) =
2239            *self.peek_token()?
2240        {
2241            self.state = State::FlowSequenceEntryMappingValue;
2242            Ok((Event::empty_scalar(), mark))
2243        } else {
2244            self.defer_parse_node(
2245                State::FlowNode,
2246                State::FlowSequenceEntryMappingValue,
2247                false,
2248                false,
2249            )
2250        }
2251    }
2252
2253    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
2254    where
2255        'input: 'a,
2256    {
2257        match *self.peek_token()? {
2258            QueuedToken(_, QueuedTokenType::Value) => {
2259                self.skip();
2260                if let Some(comment) = self.maybe_next_comment_event()? {
2261                    self.state = State::FlowSequenceEntryMappingValueNode;
2262                    Ok(comment)
2263                } else {
2264                    self.flow_sequence_entry_mapping_value_node()
2265                }
2266            }
2267            QueuedToken(mark, _) => {
2268                self.state = State::FlowSequenceEntryMappingEnd;
2269                Ok((Event::empty_scalar(), mark))
2270            }
2271        }
2272    }
2273
2274    fn flow_sequence_entry_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
2275    where
2276        'input: 'a,
2277    {
2278        let QueuedToken(span, ref tok) = *self.peek_token()?;
2279        if matches!(
2280            tok,
2281            QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd
2282        ) {
2283            self.state = State::FlowSequenceEntryMappingEnd;
2284            Ok((Event::empty_scalar(), Span::empty(span.start)))
2285        } else {
2286            self.defer_parse_node(
2287                State::FlowNode,
2288                State::FlowSequenceEntryMappingEnd,
2289                false,
2290                false,
2291            )
2292        }
2293    }
2294
2295    #[allow(clippy::unnecessary_wraps)]
2296    fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
2297    where
2298        'input: 'a,
2299    {
2300        self.state = State::FlowSequenceEntry;
2301        let QueuedToken(span, _) = *self.peek_token()?;
2302        Ok((Event::MappingEnd, Span::empty(span.start)))
2303    }
2304
2305    /// Resolve a tag from the handle and the suffix.
2306    fn resolve_tag(
2307        &self,
2308        span: Span,
2309        handle: &Cow<'input, str>,
2310        suffix: Cow<'input, str>,
2311    ) -> Result<Cow<'input, Tag>, ScanError> {
2312        let original_handle = handle.to_string();
2313        let suffix = suffix.into_owned();
2314        let tag = if handle == "!!" {
2315            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
2316            // overridden.
2317            Tag::with_original_handle(
2318                self.tags
2319                    .get("!!")
2320                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
2321                suffix,
2322                original_handle,
2323            )
2324        } else if handle.is_empty() && suffix == "!" {
2325            // "!" introduces a local tag. Local tags may have their prefix overridden.
2326            match self.tags.get("") {
2327                Some(prefix) => Tag::with_original_handle(prefix.clone(), suffix, original_handle),
2328                None => Tag::with_original_handle(String::new(), suffix, original_handle),
2329            }
2330        } else {
2331            // Lookup handle in our tag directives.
2332            let prefix = self.tags.get(&**handle);
2333            if let Some(prefix) = prefix {
2334                Tag::with_original_handle(prefix.clone(), suffix, original_handle)
2335            } else {
2336                // Otherwise, it may be a local handle. With a local handle, the handle is set to
2337                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
2338                // If the handle is of the form "!foo!", this cannot be a local handle and we need
2339                // to error.
2340                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2341                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
2342                }
2343                Tag::with_original_handle(handle.to_string(), suffix, original_handle)
2344            }
2345        };
2346        Ok(Cow::Owned(tag))
2347    }
2348}
2349
2350impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
2351    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
2352        if let Some(ref x) = self.current {
2353            Some(Ok(x))
2354        } else {
2355            if self.stream_end_emitted {
2356                return None;
2357            }
2358            match self.next_event_impl() {
2359                Ok(token) => self.current = Some(token),
2360                Err(e) => return Some(e.into_result()),
2361            }
2362            self.current.as_ref().map(Ok)
2363        }
2364    }
2365
2366    fn next_event(&mut self) -> Option<ParseResult<'input>> {
2367        if self.stream_end_emitted {
2368            return None;
2369        }
2370
2371        let tok = self.next_event_impl();
2372        if matches!(tok, Ok((Event::StreamEnd, _))) {
2373            self.stream_end_emitted = true;
2374        }
2375        Some(tok)
2376    }
2377
2378    fn load<R: SpannedEventReceiver<'input>>(
2379        &mut self,
2380        recv: &mut R,
2381        multi: bool,
2382    ) -> Result<(), ScanError> {
2383        let mut recv = InfallibleSpannedReceiver(recv);
2384        into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
2385    }
2386
2387    fn try_load<R: TrySpannedEventReceiver<'input>>(
2388        &mut self,
2389        recv: &mut R,
2390        multi: bool,
2391    ) -> Result<(), TryLoadError<R::Error>> {
2392        let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
2393        if !self.scanner.stream_started() || stream_start_buffered {
2394            let (ev, span) = self.next_event_impl()?;
2395            if ev != Event::StreamStart {
2396                return Err(TryLoadError::scan(ScanError::new_str(
2397                    span.start,
2398                    "did not find expected <stream-start>",
2399                )));
2400            }
2401            try_emit(recv, ev, span)?;
2402        }
2403
2404        if self.scanner.stream_ended() {
2405            // The scanner has already reached EOF before the document loop, so emit the terminal
2406            // event and stop.
2407            try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
2408            return Ok(());
2409        }
2410
2411        loop {
2412            let (ev, span) = self.next_event_impl()?;
2413            let is_doc_end = matches!(ev, Event::DocumentEnd);
2414            let is_stream_end = matches!(ev, Event::StreamEnd);
2415
2416            try_emit(recv, ev, span)?;
2417
2418            if is_stream_end {
2419                return Ok(());
2420            }
2421            if !multi && is_doc_end {
2422                return Ok(());
2423            }
2424        }
2425    }
2426}
2427
2428impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
2429    type Item = Result<(Event<'input>, Span), ScanError>;
2430
2431    fn next(&mut self) -> Option<Self::Item> {
2432        self.next_event()
2433    }
2434}
2435
2436#[cfg(test)]
2437mod test {
2438    use alloc::{
2439        borrow::{Cow, ToOwned},
2440        string::{String, ToString},
2441        vec::Vec,
2442    };
2443    use core::{error::Error as _, fmt};
2444
2445    use crate::scanner::{Marker, ScalarStyle, ScanError, Span};
2446
2447    use super::{
2448        Event, EventReceiver, Parser, State, StructureStyle, Tag, TryEventReceiver, TryLoadError,
2449        TrySpannedEventReceiver,
2450    };
2451
2452    #[derive(Default)]
2453    struct CollectingSink<'input> {
2454        events: Vec<Event<'input>>,
2455    }
2456
2457    impl<'input> EventReceiver<'input> for CollectingSink<'input> {
2458        fn on_event(&mut self, ev: Event<'input>) {
2459            self.events.push(ev);
2460        }
2461    }
2462
2463    fn first_error_info(input: &str) -> String {
2464        for event in Parser::new_from_str(input) {
2465            if let Err(err) = event {
2466                return err.info().to_owned();
2467            }
2468        }
2469        panic!("expected parser error")
2470    }
2471
2472    fn first_tagged_scalar_tag(input: &str) -> Tag {
2473        Parser::new_from_str(input)
2474            .find_map(|event| match event.expect("input should parse").0 {
2475                Event::Scalar(_, _, _, Some(tag)) => Some(tag.into_owned()),
2476                _ => None,
2477            })
2478            .expect("expected tagged scalar")
2479    }
2480
2481    #[test]
2482    fn deferred_parse_node_can_emit_comment_before_flow_node() {
2483        let mut parser = Parser::new_from_str("# deferred\nvalue\n");
2484        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2485        assert_eq!(
2486            parser.document_start(true).unwrap().0,
2487            Event::DocumentStart(false)
2488        );
2489
2490        let (event, _) = parser
2491            .defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false)
2492            .unwrap();
2493
2494        assert!(matches!(event, Event::Comment(text, _) if text == " deferred"));
2495        assert_eq!(parser.state, State::FlowNode);
2496    }
2497
2498    #[test]
2499    fn queued_node_event_gets_pending_key_indent() {
2500        let mut parser = Parser::new_from_str("");
2501        let span = Span::empty(Marker::new(0, 1, 0));
2502
2503        parser.pending_key_indent = Some(3);
2504        parser
2505            .queued_events
2506            .push_back((Event::SequenceStart(StructureStyle::Block, 0, None), span));
2507
2508        let (event, span) = parser.next_event_impl().unwrap();
2509
2510        assert!(matches!(
2511            event,
2512            Event::SequenceStart(StructureStyle::Block, 0, None)
2513        ));
2514        assert_eq!(span.indent, Some(3));
2515        assert_eq!(parser.pending_key_indent, None);
2516    }
2517
2518    #[test]
2519    fn state_machine_handles_deferred_flow_node_states() {
2520        let mut parser = Parser::new_from_str("value\n");
2521        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2522        assert_eq!(
2523            parser.document_start(true).unwrap().0,
2524            Event::DocumentStart(false)
2525        );
2526        parser.state = State::FlowNode;
2527        parser.push_state(State::End);
2528
2529        let (event, _) = parser.state_machine().unwrap();
2530
2531        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2532
2533        let mut parser = Parser::new_from_str("value\n");
2534        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2535        assert_eq!(
2536            parser.document_start(true).unwrap().0,
2537            Event::DocumentStart(false)
2538        );
2539        parser.state = State::FlowSequenceEntryMappingValueNode;
2540
2541        let (event, _) = parser.state_machine().unwrap();
2542
2543        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2544    }
2545
2546    #[test]
2547    fn display_resolved_core_tag_without_extra_bang() {
2548        let tag = Tag::with_original_handle("tag:yaml.org,2002:", "str", "!!");
2549
2550        assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
2551    }
2552
2553    #[test]
2554    fn tag_helpers_distinguish_core_and_local_tags() {
2555        let core = Tag::with_original_handle("tag:yaml.org,2002:", "int", "!!");
2556        let local = Tag::new("!", "thing");
2557        let non_specific = Tag::with_original_handle("", "!", "");
2558        let verbatim = Tag::with_original_handle("", "tag:example.com,2000:thing", "");
2559        let unknown_yaml_org = Tag::with_original_handle("", "tag:yaml.org,2002:application", "");
2560
2561        assert_eq!(core.core_suffix(), Some("int"));
2562        assert!(core.is_yaml_core_schema());
2563        assert!(core.is_yaml_core_schema_tag("int"));
2564        assert!(!core.is_yaml_core_schema_tag("str"));
2565        assert!(!core.is_custom());
2566        assert_eq!(core.parts(), ("tag:yaml.org,2002:", "int"));
2567        assert_eq!(core.original_parts(), ("!!", "int"));
2568        assert_eq!(core.original(), "!!int");
2569
2570        assert_eq!(local.core_suffix(), None);
2571        assert!(!local.is_yaml_core_schema());
2572        assert!(!local.is_yaml_core_schema_tag("thing"));
2573        assert!(local.is_custom());
2574        assert_eq!(local.parts(), ("!", "thing"));
2575        assert_eq!(local.original_parts(), ("!", "thing"));
2576        assert_eq!(local.original(), "!thing");
2577        assert_eq!(local.to_string(), "!thing");
2578
2579        assert_eq!(non_specific.parts(), ("", "!"));
2580        assert_eq!(non_specific.original_parts(), ("", "!"));
2581        assert_eq!(non_specific.original(), "!");
2582
2583        assert_eq!(verbatim.parts(), ("", "tag:example.com,2000:thing"));
2584        assert_eq!(
2585            verbatim.original_parts(),
2586            ("", "tag:example.com,2000:thing")
2587        );
2588        assert_eq!(verbatim.original(), "!<tag:example.com,2000:thing>");
2589
2590        assert_eq!(unknown_yaml_org.core_suffix(), None);
2591        assert!(!unknown_yaml_org.is_yaml_core_schema());
2592        assert!(unknown_yaml_org.is_custom());
2593    }
2594
2595    #[test]
2596    fn core_suffix_uses_resolved_tag_uri_for_common_spellings() {
2597        let cases = [
2598            ("shorthand", "v: !!int 1\n", ("tag:yaml.org,2002:", "int")),
2599            (
2600                "verbatim",
2601                "v: !<tag:yaml.org,2002:int> 1\n",
2602                ("", "tag:yaml.org,2002:int"),
2603            ),
2604            (
2605                "full prefix",
2606                "%TAG !e! tag:yaml.org,2002:\n---\nv: !e!int 1\n",
2607                ("tag:yaml.org,2002:", "int"),
2608            ),
2609            (
2610                "mid-split",
2611                "%TAG !m! tag:yaml.org,2002:i\n---\nv: !m!nt 1\n",
2612                ("tag:yaml.org,2002:i", "nt"),
2613            ),
2614        ];
2615
2616        for (label, input, expected_parts) in cases {
2617            let tag = first_tagged_scalar_tag(input);
2618
2619            assert_eq!(tag.parts(), expected_parts, "{label}");
2620            assert_eq!(tag.core_suffix(), Some("int"), "{label}");
2621            assert!(tag.is_yaml_core_schema(), "{label}");
2622            assert!(tag.is_yaml_core_schema_tag("int"), "{label}");
2623            assert!(!tag.is_yaml_core_schema_tag("str"), "{label}");
2624            assert!(!tag.is_custom(), "{label}");
2625        }
2626    }
2627
2628    #[test]
2629    fn core_suffix_rejects_non_core_yaml_org_tags() {
2630        let cases = [
2631            "binary",
2632            "merge",
2633            "omap",
2634            "pairs",
2635            "set",
2636            "timestamp",
2637            "value",
2638            "yaml",
2639        ];
2640
2641        for suffix in cases {
2642            let tag = Tag::with_original_handle("tag:yaml.org,2002:", suffix, "!!");
2643
2644            assert_eq!(tag.core_suffix(), None, "{suffix}");
2645            assert!(!tag.is_yaml_core_schema(), "{suffix}");
2646            assert!(tag.is_custom(), "{suffix}");
2647        }
2648    }
2649
2650    #[test]
2651    fn core_suffix_rejects_non_core_tags() {
2652        let cases = [
2653            ("local", "v: !local 1\n"),
2654            ("verbatim custom", "v: !<tag:example.com,2000:int> 1\n"),
2655            (
2656                "custom directive",
2657                "%TAG !e! tag:example.com,2000:\n---\nv: !e!int 1\n",
2658            ),
2659            (
2660                "overridden secondary handle",
2661                "%TAG !! tag:example.com,2000:app/\n---\nv: !!int 1\n",
2662            ),
2663        ];
2664
2665        for (label, input) in cases {
2666            let tag = first_tagged_scalar_tag(input);
2667
2668            assert_eq!(tag.core_suffix(), None, "{label}");
2669            assert!(!tag.is_yaml_core_schema(), "{label}");
2670            assert!(!tag.is_yaml_core_schema_tag("int"), "{label}");
2671            assert!(tag.is_custom(), "{label}");
2672        }
2673    }
2674
2675    #[test]
2676    fn attach_tag_start_applies_marker_to_span() {
2677        let event = Event::Scalar("value".into(), ScalarStyle::Plain, 0, None);
2678        let span = Span::new(Marker::new(6, 1, 6), Marker::new(11, 1, 11));
2679        let tag_start = Marker::new(0, 1, 0);
2680
2681        let (attached_event, attached_span) =
2682            Parser::<crate::input::str::StrInput<'_>>::attach_tag_start(
2683                event.clone(),
2684                span,
2685                Some(tag_start),
2686            );
2687
2688        assert_eq!(attached_event, event);
2689        assert_eq!(attached_span.start, span.start);
2690        assert_eq!(attached_span.end, span.end);
2691        assert_eq!(attached_span.tag_start(), Some(tag_start));
2692    }
2693
2694    #[test]
2695    fn event_inspection_helpers_report_node_metadata() {
2696        let tag = Tag::new("!", "thing");
2697        let scalar = Event::Scalar(
2698            "value".into(),
2699            ScalarStyle::DoubleQuoted,
2700            7,
2701            Some(Cow::Borrowed(&tag)),
2702        );
2703        let sequence =
2704            Event::SequenceStart(StructureStyle::Block, 8, Some(Cow::Owned(tag.clone())));
2705        let mapping = Event::MappingStart(StructureStyle::Block, 9, Some(Cow::Borrowed(&tag)));
2706
2707        assert_eq!(scalar.anchor_id(), Some(7));
2708        assert_eq!(scalar.alias_id(), None);
2709        assert_eq!(scalar.tag(), Some(&tag));
2710        assert_eq!(scalar.scalar(), Some(("value", ScalarStyle::DoubleQuoted)));
2711        assert!(scalar.is_node());
2712
2713        assert_eq!(sequence.anchor_id(), Some(8));
2714        assert_eq!(sequence.alias_id(), None);
2715        assert_eq!(sequence.tag(), Some(&tag));
2716        assert_eq!(sequence.scalar(), None);
2717        assert!(sequence.is_node());
2718
2719        assert_eq!(mapping.anchor_id(), Some(9));
2720        assert_eq!(mapping.alias_id(), None);
2721        assert_eq!(mapping.tag(), Some(&tag));
2722        assert_eq!(mapping.scalar(), None);
2723        assert!(mapping.is_node());
2724
2725        let alias = Event::Alias(10);
2726        assert_eq!(alias.anchor_id(), None);
2727        assert_eq!(alias.alias_id(), Some(10));
2728        assert_eq!(alias.tag(), None);
2729        assert_eq!(alias.scalar(), None);
2730        assert!(alias.is_node());
2731
2732        let unanchored_scalar = Event::Scalar("x".into(), ScalarStyle::Plain, 0, None);
2733        assert_eq!(unanchored_scalar.anchor_id(), None);
2734        assert_eq!(unanchored_scalar.alias_id(), None);
2735
2736        let stream_start = Event::StreamStart;
2737        assert_eq!(stream_start.anchor_id(), None);
2738        assert_eq!(stream_start.alias_id(), None);
2739        assert_eq!(stream_start.tag(), None);
2740        assert_eq!(stream_start.scalar(), None);
2741        assert!(!stream_start.is_node());
2742    }
2743
2744    #[test]
2745    fn test_peek_eq_parse() {
2746        let s = "
2747a0 bb: val
2748a1: &x
2749    b1: 4
2750    b2: d
2751a2: 4
2752a3: [1, 2, 3]
2753a4:
2754    - [a1, a2]
2755    - 2
2756a5: *x
2757";
2758        let mut p = Parser::new_from_str(s);
2759        loop {
2760            let event_peek = p.peek().unwrap().unwrap().clone();
2761            let event = p.next_event().unwrap().unwrap();
2762            assert_eq!(event, event_peek);
2763            if event.0 == Event::StreamEnd {
2764                break;
2765            }
2766        }
2767    }
2768
2769    #[test]
2770    fn test_repeated_peek_returns_buffered_event() {
2771        let mut parser = Parser::new_from_str("key: value\n");
2772
2773        let first_peek = parser.peek().unwrap().unwrap().clone();
2774        let second_peek = parser.peek().unwrap().unwrap().clone();
2775        let next = parser.next_event().unwrap().unwrap();
2776
2777        assert_eq!(first_peek, second_peek);
2778        assert_eq!(first_peek, next);
2779    }
2780
2781    #[test]
2782    fn test_peek_surfaces_scan_error_without_consuming_stream_end_state() {
2783        let mut parser = Parser::new_from_str("a: [1, 2");
2784
2785        loop {
2786            match parser.peek() {
2787                Some(Ok(_)) => {
2788                    parser.next_event().unwrap().unwrap();
2789                }
2790                Some(Err(error)) => {
2791                    assert_eq!(error.info(), "unclosed bracket '['");
2792                    break;
2793                }
2794                None => panic!("expected parse error"),
2795            }
2796        }
2797    }
2798
2799    #[test]
2800    fn test_peek_and_next_return_none_after_stream_end() {
2801        let mut parser = Parser::new_from_str("");
2802
2803        assert!(matches!(
2804            parser.next_event().unwrap().unwrap().0,
2805            Event::StreamStart
2806        ));
2807        assert!(matches!(
2808            parser.next_event().unwrap().unwrap().0,
2809            Event::StreamEnd
2810        ));
2811        assert!(parser.next_event().is_none());
2812        assert!(parser.peek().is_none());
2813    }
2814
2815    #[test]
2816    fn test_load_after_stream_already_ended_emits_stream_end() {
2817        let mut parser = Parser::new_from_str("");
2818        while parser.next_event().is_some() {}
2819
2820        let mut sink = CollectingSink::default();
2821        parser.load(&mut sink, true).unwrap();
2822
2823        assert_eq!(sink.events, vec![Event::StreamEnd]);
2824    }
2825
2826    #[test]
2827    fn test_load_visits_nested_collection_events() {
2828        let mut parser = Parser::new_from_str("root:\n  - item: value\n  - [a, b]\n");
2829        let mut sink = CollectingSink::default();
2830
2831        parser.load(&mut sink, true).unwrap();
2832
2833        assert_eq!(
2834            sink.events,
2835            vec![
2836                Event::StreamStart,
2837                Event::DocumentStart(false),
2838                Event::MappingStart(StructureStyle::Block, 0, None),
2839                Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
2840                Event::SequenceStart(StructureStyle::Block, 0, None),
2841                Event::MappingStart(StructureStyle::Block, 0, None),
2842                Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
2843                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2844                Event::MappingEnd,
2845                Event::SequenceStart(StructureStyle::Flow, 0, None),
2846                Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
2847                Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
2848                Event::SequenceEnd,
2849                Event::SequenceEnd,
2850                Event::MappingEnd,
2851                Event::DocumentEnd,
2852                Event::StreamEnd,
2853            ]
2854        );
2855    }
2856
2857    #[derive(Clone, Debug, PartialEq, Eq)]
2858    enum ValidationError {
2859        ForbiddenValue,
2860    }
2861
2862    #[derive(Debug)]
2863    struct ReceiverFailure;
2864
2865    impl fmt::Display for ReceiverFailure {
2866        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2867            write!(f, "receiver failed")
2868        }
2869    }
2870
2871    impl core::error::Error for ReceiverFailure {}
2872
2873    struct FailingSink<'input> {
2874        events: Vec<Event<'input>>,
2875    }
2876
2877    impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
2878        type Error = ValidationError;
2879
2880        fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
2881            let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
2882            self.events.push(ev);
2883            if should_fail {
2884                Err(ValidationError::ForbiddenValue)
2885            } else {
2886                Ok(())
2887            }
2888        }
2889    }
2890
2891    #[test]
2892    fn test_try_load_stops_on_receiver_error() {
2893        let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
2894        let mut sink = FailingSink { events: Vec::new() };
2895
2896        let err = parser.try_load(&mut sink, true).unwrap_err();
2897
2898        assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
2899        assert!(sink
2900            .events
2901            .iter()
2902            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
2903        assert!(sink
2904            .events
2905            .iter()
2906            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
2907        assert!(!sink
2908            .events
2909            .iter()
2910            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
2911    }
2912
2913    struct SpannedFailingSink {
2914        failed_span: Option<Span>,
2915    }
2916
2917    impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
2918        type Error = Span;
2919
2920        fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
2921            if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
2922                self.failed_span = Some(span);
2923                Err(span)
2924            } else {
2925                Ok(())
2926            }
2927        }
2928    }
2929
2930    #[test]
2931    fn test_try_load_spanned_receiver_gets_span() {
2932        let mut parser = Parser::new_from_str("value: bad\n");
2933        let mut sink = SpannedFailingSink { failed_span: None };
2934
2935        let err = parser.try_load(&mut sink, false).unwrap_err();
2936
2937        let TryLoadError::Receiver(span) = err else {
2938            panic!("expected receiver error");
2939        };
2940
2941        assert_eq!(Some(span), sink.failed_span);
2942        assert!(!span.is_empty());
2943    }
2944
2945    struct NeverFails {
2946        count: usize,
2947    }
2948
2949    impl<'input> TryEventReceiver<'input> for NeverFails {
2950        type Error = ValidationError;
2951
2952        fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
2953            self.count += 1;
2954            Ok(())
2955        }
2956    }
2957
2958    #[test]
2959    fn test_try_load_returns_scan_error() {
2960        let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
2961        let mut sink = NeverFails { count: 0 };
2962
2963        let err = parser.try_load(&mut sink, true).unwrap_err();
2964
2965        let TryLoadError::Scan(err) = err else {
2966            panic!("expected scan error");
2967        };
2968        assert_eq!(err.info(), "duplicate version directive");
2969    }
2970
2971    #[test]
2972    fn test_try_load_error_display_and_source_cover_both_variants() {
2973        let scan = ScanError::new_str(Marker::new(3, 1, 3), "bad yaml");
2974        let scan_err: TryLoadError<ReceiverFailure> = scan.into();
2975
2976        assert!(scan_err.to_string().starts_with("parser error: bad yaml"));
2977        assert!(scan_err.source().is_some());
2978
2979        let receiver_err = TryLoadError::Receiver(ReceiverFailure);
2980
2981        assert_eq!(receiver_err.to_string(), "receiver error: receiver failed");
2982        assert!(receiver_err.source().is_some());
2983    }
2984
2985    #[test]
2986    fn test_try_load_document_rejects_non_document_start_event() {
2987        let mut parser = Parser::new_from_str("");
2988        let span = Span::empty(Marker::new(0, 1, 0));
2989        let mut sink = NeverFails { count: 0 };
2990
2991        let err = parser
2992            .try_load_document(
2993                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
2994                span,
2995                &mut sink,
2996            )
2997            .unwrap_err();
2998
2999        let TryLoadError::Scan(err) = err else {
3000            panic!("expected scan error");
3001        };
3002        assert_eq!(err.info(), "did not find expected <document-start>");
3003    }
3004
3005    #[test]
3006    fn test_try_load_requires_buffered_stream_start() {
3007        let mut parser = Parser::new_from_str("");
3008        let span = Span::empty(Marker::new(0, 1, 0));
3009        parser.current = Some((
3010            Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
3011            span,
3012        ));
3013        let mut sink = NeverFails { count: 0 };
3014
3015        let err = parser.try_load(&mut sink, true).unwrap_err();
3016
3017        let TryLoadError::Scan(err) = err else {
3018            panic!("expected scan error");
3019        };
3020        assert_eq!(err.info(), "did not find expected <stream-start>");
3021    }
3022
3023    #[test]
3024    fn test_try_load_after_stream_already_ended_emits_stream_end() {
3025        let mut parser = Parser::new_from_str("");
3026        while parser.next_event().is_some() {}
3027
3028        let mut sink = FailingSink { events: Vec::new() };
3029        parser.try_load(&mut sink, true).unwrap();
3030
3031        assert_eq!(sink.events, vec![Event::StreamEnd]);
3032    }
3033
3034    #[test]
3035    fn test_load_single_document_stops_before_next_document() {
3036        let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
3037        let mut sink = CollectingSink::default();
3038
3039        parser.load(&mut sink, false).unwrap();
3040
3041        assert!(sink
3042            .events
3043            .iter()
3044            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
3045        assert!(!sink
3046            .events
3047            .iter()
3048            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
3049        assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
3050    }
3051
3052    #[test]
3053    fn test_duplicate_version_directive_errors() {
3054        assert_eq!(
3055            first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
3056            "duplicate version directive"
3057        );
3058    }
3059
3060    #[test]
3061    fn test_duplicate_tag_directive_errors() {
3062        assert_eq!(
3063            first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
3064            "the TAG directive must only be given at most once per handle in the same document"
3065        );
3066    }
3067
3068    #[test]
3069    fn test_directive_after_implicit_document_requires_explicit_end() {
3070        assert_eq!(
3071            first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
3072            "missing explicit document end marker before directive"
3073        );
3074    }
3075
3076    #[test]
3077    fn test_anchor_offset_overflow_reports_error() {
3078        let mut parser = Parser::new_from_str("&a value");
3079        parser.set_anchor_offset(usize::MAX);
3080
3081        let err = parser
3082            .find_map(Result::err)
3083            .expect("anchor registration should overflow");
3084
3085        assert_eq!(
3086            err.info(),
3087            "while parsing anchor, anchor count exceeded supported limit"
3088        );
3089    }
3090
3091    #[test]
3092    fn test_alias_resolves_to_registered_anchor_id() {
3093        let events = Parser::new_from_str("- &a value\n- *a\n")
3094            .map(|event| event.unwrap().0)
3095            .collect::<Vec<_>>();
3096
3097        assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
3098    }
3099
3100    #[test]
3101    fn test_anchor_then_tag_applies_both_to_scalar() {
3102        let events = Parser::new_from_str("&a !!str value")
3103            .map(|event| event.unwrap().0)
3104            .collect::<Vec<_>>();
3105
3106        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
3107            .iter()
3108            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
3109        else {
3110            panic!("expected tagged anchored scalar");
3111        };
3112
3113        assert_eq!(value, "value");
3114        assert_eq!(*anchor_id, 1);
3115        assert_eq!(tag.handle, "tag:yaml.org,2002:");
3116        assert_eq!(tag.suffix, "str");
3117        assert_eq!(tag.original_handle, "!!");
3118        assert_eq!(tag.original(), "!!str");
3119    }
3120
3121    #[test]
3122    fn test_tag_then_anchor_applies_both_to_scalar() {
3123        let events = Parser::new_from_str("!!str &a value")
3124            .map(|event| event.unwrap().0)
3125            .collect::<Vec<_>>();
3126
3127        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
3128            .iter()
3129            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
3130        else {
3131            panic!("expected tagged anchored scalar");
3132        };
3133
3134        assert_eq!(value, "value");
3135        assert_eq!(*anchor_id, 1);
3136        assert_eq!(tag.handle, "tag:yaml.org,2002:");
3137        assert_eq!(tag.suffix, "str");
3138        assert_eq!(tag.original_handle, "!!");
3139        assert_eq!(tag.original(), "!!str");
3140    }
3141
3142    #[test]
3143    fn test_tag_directive_preserves_original_handle() {
3144        let events =
3145            Parser::new_from_str("%TAG !e! tag:example.com,2000:\n---\nconfig: !e!keep value\n")
3146                .map(|event| event.unwrap().0)
3147                .collect::<Vec<_>>();
3148
3149        let (value, tag) = events
3150            .iter()
3151            .find_map(|event| match event {
3152                Event::Scalar(value, _, _, Some(tag)) if value == "value" => Some((value, tag)),
3153                _ => None,
3154            })
3155            .expect("expected tagged scalar");
3156
3157        assert_eq!(value, "value");
3158        assert_eq!(tag.handle, "tag:example.com,2000:");
3159        assert_eq!(tag.suffix, "keep");
3160        assert_eq!(tag.original_handle, "!e!");
3161        assert_eq!(tag.parts(), ("tag:example.com,2000:", "keep"));
3162        assert_eq!(tag.original_parts(), ("!e!", "keep"));
3163        assert_eq!(tag.original(), "!e!keep");
3164    }
3165
3166    #[test]
3167    fn test_verbatim_tag_original_is_normalized_author_spelling() {
3168        let events = Parser::new_from_str("key: !<tag:example.com,2000:thing> value\n")
3169            .map(|event| event.unwrap().0)
3170            .collect::<Vec<_>>();
3171
3172        let Some(Event::Scalar(value, _, _, Some(tag))) = events
3173            .iter()
3174            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
3175        else {
3176            panic!("expected tagged scalar");
3177        };
3178
3179        assert_eq!(value, "value");
3180        assert_eq!(tag.handle, "");
3181        assert_eq!(tag.suffix, "tag:example.com,2000:thing");
3182        assert_eq!(tag.original_handle, "");
3183        assert_eq!(tag.parts(), ("", "tag:example.com,2000:thing"));
3184        assert_eq!(tag.original_parts(), ("", "tag:example.com,2000:thing"));
3185        assert_eq!(tag.original(), "!<tag:example.com,2000:thing>");
3186    }
3187
3188    #[test]
3189    fn test_multiple_tag_directives_are_kept_within_document() {
3190        let text = r"
3191%TAG !a! tag:a,2024:
3192%TAG !b! tag:b,2024:
3193---
3194first: !a!x foo
3195second: !b!y bar
3196";
3197
3198        let mut seen_a = false;
3199        let mut seen_b = false;
3200        for event in Parser::new_from_str(text) {
3201            let (event, _) = event.unwrap();
3202            if let Event::Scalar(_, _, _, Some(tag)) = event {
3203                if tag.handle == "tag:a,2024:" {
3204                    seen_a = true;
3205                } else if tag.handle == "tag:b,2024:" {
3206                    seen_b = true;
3207                }
3208            }
3209        }
3210
3211        assert!(seen_a);
3212        assert!(seen_b);
3213    }
3214
3215    #[test]
3216    fn test_tags_are_cleared_when_next_document_has_no_directives() {
3217        let text = r"
3218%TAG !t! tag:test,2024:
3219--- !t!1
3220foo
3221--- !t!2
3222bar
3223";
3224
3225        let mut parser = Parser::new_from_str(text);
3226        for event in parser.by_ref() {
3227            let (event, _) = event.unwrap();
3228            if let Event::DocumentEnd = event {
3229                break;
3230            }
3231        }
3232
3233        match parser.next().unwrap().unwrap().0 {
3234            Event::DocumentStart(true) => {}
3235            _ => panic!("expected explicit second document start"),
3236        }
3237
3238        let err = parser.next().unwrap().unwrap_err();
3239        assert!(format!("{err}").contains("the handle wasn't declared"));
3240    }
3241
3242    #[test]
3243    fn test_pull_parser_clears_anchors_between_documents() {
3244        let mut parser = Parser::new_from_str(
3245            "--- &a value
3246--- *a
3247",
3248        );
3249
3250        for event in parser.by_ref() {
3251            let (event, _) = event.unwrap();
3252            if matches!(event, Event::DocumentEnd) {
3253                break;
3254            }
3255        }
3256
3257        match parser.next().unwrap().unwrap().0 {
3258            Event::DocumentStart(true) => {}
3259            _ => panic!("expected explicit second document start"),
3260        }
3261
3262        let err = parser.next().unwrap().unwrap_err();
3263        assert!(format!("{err}").contains("unknown anchor"));
3264    }
3265
3266    #[test]
3267    fn test_keep_tags_across_multiple_documents() {
3268        let text = r#"
3269%YAML 1.1
3270%TAG !t! tag:test,2024:
3271--- !t!1 &1
3272foo: "bar"
3273--- !t!2 &2
3274baz: "qux"
3275"#;
3276        for x in Parser::new_from_str(text).keep_tags(true) {
3277            let x = x.unwrap();
3278            if let Event::MappingStart(_, _, tag) = x.0 {
3279                let tag = tag.unwrap();
3280                assert_eq!(tag.handle, "tag:test,2024:");
3281            }
3282        }
3283
3284        for x in Parser::new_from_str(text).keep_tags(false) {
3285            if x.is_err() {
3286                // Test successful
3287                return;
3288            }
3289        }
3290        panic!("Test failed, did not encounter error")
3291    }
3292
3293    #[test]
3294    fn test_flow_sequence_mapping_allows_empty_key() {
3295        let parser = Parser::new_from_str("[?: value]");
3296        for event in parser {
3297            event.expect("parser should accept flow sequence mappings with empty keys");
3298        }
3299    }
3300
3301    #[test]
3302    fn test_keep_tags_does_not_persist_default_tag_handles() {
3303        let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
3304
3305        let mut int_tags = Vec::new();
3306        for event in Parser::new_from_str(text).keep_tags(true) {
3307            let event = event.unwrap().0;
3308            if let Event::Scalar(_, _, _, Some(tag)) = event {
3309                if tag.suffix == "int" {
3310                    int_tags.push(tag.handle.clone());
3311                }
3312            }
3313        }
3314
3315        assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
3316    }
3317
3318    #[test]
3319    fn test_resolve_tag_uses_overridden_local_prefix() {
3320        let mut parser = Parser::new_from_str("");
3321        parser
3322            .tags
3323            .insert(String::new(), "tag:local.example,2024:".to_string());
3324
3325        let tag = parser
3326            .resolve_tag(
3327                Span::empty(Marker::new(0, 1, 0)),
3328                &Cow::Borrowed(""),
3329                Cow::Borrowed("!"),
3330            )
3331            .unwrap();
3332
3333        assert_eq!(tag.handle, "tag:local.example,2024:");
3334        assert_eq!(tag.suffix, "!");
3335    }
3336
3337    #[test]
3338    fn test_load_after_peek_stream_start() {
3339        #[derive(Default)]
3340        struct Sink<'input> {
3341            events: Vec<Event<'input>>,
3342        }
3343
3344        impl<'input> EventReceiver<'input> for Sink<'input> {
3345            fn on_event(&mut self, ev: Event<'input>) {
3346                self.events.push(ev);
3347            }
3348        }
3349
3350        let mut parser = Parser::new_from_str("key: value\n");
3351        let mut sink = Sink::default();
3352
3353        assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
3354        parser.load(&mut sink, false).unwrap();
3355
3356        assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
3357        assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
3358    }
3359}