Skip to main content

granit_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, BorrowedInput},
9    scanner::{
10        Marker, Placement, QueuedToken, QueuedTokenType, ScalarStyle, ScanError, Scanner, Span,
11    },
12    BufferedInput,
13};
14
15use alloc::{
16    borrow::Cow,
17    collections::{BTreeMap, BTreeSet, VecDeque},
18    string::{String, ToString},
19    vec::Vec,
20};
21use core::{
22    convert::Infallible,
23    fmt::{self, Display},
24};
25
26#[derive(Clone, Copy, PartialEq, Debug, Eq)]
27enum State {
28    StreamStart,
29    ImplicitDocumentStart,
30    DocumentStart,
31    DocumentContent,
32    DocumentEnd,
33    BlockNode,
34    BlockNodeOrIndentlessSequence,
35    FlowNode,
36    BlockSequenceFirstEntry,
37    BlockSequenceEntry,
38    IndentlessSequenceEntry,
39    IndentlessSequenceEntryNode,
40    BlockMappingFirstKey,
41    BlockMappingKey,
42    BlockMappingKeyNode,
43    BlockMappingValue,
44    BlockMappingValueNode,
45    FlowSequenceFirstEntry,
46    FlowSequenceEntry,
47    FlowSequenceEntryMappingKey,
48    FlowSequenceEntryMappingValue,
49    FlowSequenceEntryMappingValueNode,
50    FlowSequenceEntryMappingEnd,
51    FlowMappingFirstKey,
52    FlowMappingKey,
53    FlowMappingKeyNode,
54    FlowMappingValue,
55    FlowMappingValueNode,
56    FlowMappingEmptyValue,
57    BlockSequenceEntryNode,
58    End,
59}
60
61/// YAML version declared by a `%YAML` directive.
62#[derive(Copy, Clone, PartialEq, Debug, Eq, Hash)]
63pub struct YamlVersion {
64    /// Major version number.
65    pub major: u32,
66    /// Minor version number.
67    pub minor: u32,
68}
69
70impl YamlVersion {
71    /// Create a YAML version value.
72    #[must_use]
73    pub const fn new(major: u32, minor: u32) -> Self {
74        Self { major, minor }
75    }
76}
77
78/// An event generated by the YAML parser.
79///
80/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
81/// [`EventReceiver`] trait.
82#[derive(Clone, PartialEq, Debug, Eq)]
83pub enum Event<'input> {
84    /// Reserved for internal use.
85    Nothing,
86    /// Event generated at the very beginning of parsing.
87    StreamStart,
88    /// Last event that will be generated by the parser. Signals EOF.
89    StreamEnd,
90    /// The start of a YAML document.
91    ///
92    DocumentStart(
93        /// Whether this is an explicit document start marker (`---`).
94        ///
95        /// When `false`, the document start is implicit.
96        bool,
97        /// YAML version declared by a preceding `%YAML` directive, if any.
98        Option<YamlVersion>,
99    ),
100    /// The end of a YAML document.
101    ///
102    /// This event is emitted for both explicit document end markers (`...`) and implicit document
103    /// ends.
104    DocumentEnd,
105    /// A YAML alias.
106    Alias(
107        /// The anchor ID the alias refers to.
108        usize,
109    ),
110    /// A YAML source comment.
111    ///
112    /// Comments are presentation metadata, not YAML data nodes. The payload is the raw text
113    /// exactly after `#`, excluding only the line break. The placement is a best-effort hint for
114    /// correlating the comment with nearby YAML presentation. The companion parser [`Span`] covers
115    /// the whole source comment, including `#` and excluding the line break.
116    Comment(
117        /// Raw comment payload exactly after `#`, excluding only the line break.
118        Cow<'input, str>,
119        /// Best-effort placement relative to nearby YAML content.
120        Placement,
121    ),
122    /// A YAML scalar value.
123    Scalar(
124        /// The scalar value after YAML escape processing.
125        Cow<'input, str>,
126        /// The source notation used for the scalar.
127        ScalarStyle,
128        /// The anchor ID defined on this scalar, or `0` if it has no anchor.
129        usize,
130        /// The resolved tag attached to this scalar, if any.
131        Option<Cow<'input, Tag>>,
132    ),
133    /// The start of a YAML sequence (array).
134    SequenceStart(
135        /// The notation style used for the sequence.
136        StructureStyle,
137        /// The anchor ID defined on this sequence, or `0` if it has no anchor.
138        usize,
139        /// The resolved tag attached to this sequence, if any.
140        Option<Cow<'input, Tag>>,
141    ),
142    /// The end of a YAML sequence (array).
143    SequenceEnd,
144    /// The start of a YAML mapping (object, hash).
145    MappingStart(
146        /// The notation style used for the mapping (Flow or Block).
147        StructureStyle,
148        /// The anchor ID defined on this mapping, or `0` if it has no anchor.
149        usize,
150        /// The resolved tag attached to this mapping, if any.
151        Option<Cow<'input, Tag>>,
152    ),
153    /// The end of a YAML mapping (object, hash).
154    MappingEnd,
155}
156
157/// The notation style used for a YAML sequence or mapping.
158///
159/// [`StructureStyle::Block`] means block notation:
160///
161/// ```yaml
162/// items:
163///   - milk
164///   - bread
165/// mapping:
166///   name: Ada
167///   active: true
168/// ```
169///
170/// [`StructureStyle::Flow`] means flow notation:
171///
172/// ```yaml
173/// items: [milk, bread]
174/// mapping: {name: Ada, active: true}
175/// ```
176#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
177pub enum StructureStyle {
178    /// Block notation, such as `- item` sequences and `key: value` mappings.
179    Block,
180    /// Flow notation, such as `[item]` sequences and `{key: value}` mappings.
181    Flow,
182}
183
184/// A YAML tag.
185#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
186pub struct Tag {
187    /// Resolved tag handle or prefix.
188    ///
189    /// Examples include `tag:yaml.org,2002:` for core-schema tags and `!` for local tags.
190    pub handle: String,
191    /// Tag suffix following the resolved handle or prefix.
192    pub suffix: String,
193    /// Tag handle as written in the source before `%TAG` directive resolution.
194    ///
195    /// For example, with `%TAG !e! tag:example.com,2000:`, a source tag `!e!keep` is resolved
196    /// as `handle = "tag:example.com,2000:"` and `suffix = "keep"`, while
197    /// `original_handle = "!e!"`.
198    pub original_handle: String,
199}
200
201const YAML_CORE_SCHEMA_PREFIX: &str = "tag:yaml.org,2002:";
202
203// YAML 1.2.2 defines Core Schema tags by reference:
204// - §10.3.1 says Core Schema uses the same tags as YAML's JSON Schema.
205// - §10.2.1 adds null/bool/int/float to the Failsafe Schema.
206// - §10.1.1 defines the Failsafe Schema tags map/seq/str.
207// Therefore the YAML 1.2.2 Core Schema tag suffixes are:
208// bool, float, int, map, null, seq, and str.
209const YAML_CORE_SCHEMA_SUFFIXES: [&str; 7] = ["bool", "float", "int", "map", "null", "seq", "str"];
210
211fn known_yaml_core_schema_suffix(suffix: &str) -> Option<&str> {
212    YAML_CORE_SCHEMA_SUFFIXES
213        .contains(&suffix)
214        .then_some(suffix)
215}
216
217fn known_yaml_core_schema_suffix_from_split(
218    handle_tail: &str,
219    suffix: &str,
220) -> Option<&'static str> {
221    YAML_CORE_SCHEMA_SUFFIXES.iter().copied().find(|candidate| {
222        candidate
223            .strip_prefix(handle_tail)
224            .is_some_and(|candidate_tail| candidate_tail == suffix)
225    })
226}
227
228impl Tag {
229    /// Create a tag from resolved parts.
230    ///
231    /// This is mainly useful for tests and consumers constructing parser-compatible tags by hand.
232    /// When the original source handle matters, use [`Self::with_original_handle`].
233    #[must_use]
234    pub fn new(handle: impl Into<String>, suffix: impl Into<String>) -> Self {
235        let handle = handle.into();
236        Self {
237            original_handle: handle.clone(),
238            handle,
239            suffix: suffix.into(),
240        }
241    }
242
243    /// Create a tag from resolved parts and the handle as written in the source.
244    #[must_use]
245    pub fn with_original_handle(
246        handle: impl Into<String>,
247        suffix: impl Into<String>,
248        original_handle: impl Into<String>,
249    ) -> Self {
250        Self {
251            handle: handle.into(),
252            suffix: suffix.into(),
253            original_handle: original_handle.into(),
254        }
255    }
256
257    /// Return the resolved YAML core-schema suffix for this tag, if it is a known core tag.
258    ///
259    /// The tag is matched by its resolved URI, not by the source handle spelling. For example,
260    /// `!!int`, `!<tag:yaml.org,2002:int>`, and a `%TAG` split such as
261    /// `%TAG !m! tag:yaml.org,2002:i` followed by `!m!nt` all return `Some("int")`.
262    ///
263    /// Authored tag parts are left unchanged; use [`Self::parts`], [`Self::original_parts`], or
264    /// [`Self::original`] to inspect those spellings.
265    #[must_use]
266    pub fn core_suffix(&self) -> Option<&str> {
267        // The handle ends at or before the namespace boundary. The remaining namespace
268        // prefix and the complete type name are both contained in `suffix`.
269        if let Some(remaining_prefix) = YAML_CORE_SCHEMA_PREFIX.strip_prefix(self.handle.as_str()) {
270            let suffix = self.suffix.strip_prefix(remaining_prefix)?;
271            return known_yaml_core_schema_suffix(suffix);
272        }
273
274        // The handle extends beyond the namespace boundary, so the type name is split
275        // between the end of `handle` and `suffix`. Compare against the seven fixed names
276        // directly instead of assembling an allocated String.
277        let handle_tail = self.handle.strip_prefix(YAML_CORE_SCHEMA_PREFIX)?;
278        known_yaml_core_schema_suffix_from_split(handle_tail, &self.suffix)
279    }
280
281    /// Return the type name this tag resolves to within `prefix`, or `None` outside it.
282    ///
283    /// Like [`Self::core_suffix`], the tag is matched by its resolved `handle ++ suffix` URI,
284    /// not the source spelling, so `!!omap`, `!<tag:yaml.org,2002:omap>`, and a `%TAG` split
285    /// such as `%TAG !o! tag:yaml.org,2002:o` then `!o!map` all resolve to `Some("omap")` for
286    /// the `tag:yaml.org,2002:` prefix — but the name is not limited to the seven core types.
287    ///
288    /// Borrows from `self`; allocates only when the handle extends past `prefix`.
289    #[must_use]
290    pub fn suffix_in_namespace(&self, prefix: &str) -> Option<Cow<'_, str>> {
291        if let Some(handle_tail) = self.handle.strip_prefix(prefix) {
292            // Handle spans the whole prefix; the name is its tail plus the suffix (the tail
293            // is empty unless a `%TAG` split pushed part of the name into the handle).
294            return Some(if handle_tail.is_empty() {
295                Cow::Borrowed(self.suffix.as_str())
296            } else {
297                let mut name = String::with_capacity(handle_tail.len() + self.suffix.len());
298                name.push_str(handle_tail);
299                name.push_str(&self.suffix);
300                Cow::Owned(name)
301            });
302        }
303
304        // Handle stops inside the prefix; the suffix supplies the rest of the prefix
305        // and then the name.
306        prefix
307            .strip_prefix(self.handle.as_str())
308            .and_then(|prefix_tail| self.suffix.strip_prefix(prefix_tail))
309            .map(Cow::Borrowed)
310    }
311
312    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
313    ///
314    /// The YAML specification specifies [a list of
315    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function uses
316    /// the resolved tag URI, so it is independent of how the tag was split between handle and
317    /// suffix.
318    ///
319    /// # Return
320    /// Returns `true` if the resolved tag is a known YAML 1.2.2 Core Schema tag.
321    #[must_use]
322    pub fn is_yaml_core_schema(&self) -> bool {
323        self.core_suffix().is_some()
324    }
325
326    /// Return true for a YAML core-schema tag with the given suffix.
327    ///
328    /// For example, this matches core-schema tags such as `!!str`, `!!int`, `!!float`, `!!bool`,
329    /// `!!null`, `!!map`, or `!!seq` after tag resolution.
330    #[must_use]
331    pub fn is_yaml_core_schema_tag(&self, suffix: &str) -> bool {
332        self.core_suffix()
333            .is_some_and(|core_suffix| core_suffix == suffix)
334    }
335
336    /// Return true for a tag outside the YAML 1.2.2 Core Schema tag set.
337    ///
338    /// This checks the resolved tag URI, not just the tag handle spelling. For example,
339    /// `tag:yaml.org,2002:timestamp` is in the YAML tag namespace, but it is not a YAML 1.2.2
340    /// Core Schema tag.
341    #[must_use]
342    pub fn is_custom(&self) -> bool {
343        !self.is_yaml_core_schema()
344    }
345
346    /// Return the tag as `(handle, suffix)`.
347    #[must_use]
348    pub fn parts(&self) -> (&str, &str) {
349        (&self.handle, &self.suffix)
350    }
351
352    /// Return the tag as `(original_handle, suffix)` using the handle from the source token.
353    ///
354    /// This is useful when a consumer needs author spelling such as `!e!keep` instead of the
355    /// resolved URI tag `tag:example.com,2000:keep`.
356    #[must_use]
357    pub fn original_parts(&self) -> (&str, &str) {
358        (&self.original_handle, &self.suffix)
359    }
360
361    /// Return the tag spelling reconstructed from the source handle and suffix.
362    ///
363    /// For ordinary shorthand tags this returns the author-facing spelling, such as `!e!keep` or
364    /// `!!str`. For verbatim tags this returns a normalized verbatim spelling such as
365    /// `!<tag:example.com,2000:thing>`, not necessarily the byte-exact source token.
366    #[must_use]
367    pub fn original(&self) -> String {
368        if self.original_handle.is_empty() && self.suffix != "!" {
369            let mut tag = String::with_capacity(self.suffix.len() + 3);
370            tag.push_str("!<");
371            tag.push_str(&self.suffix);
372            tag.push('>');
373            return tag;
374        }
375
376        let mut tag = String::with_capacity(self.original_handle.len() + self.suffix.len());
377        tag.push_str(&self.original_handle);
378        tag.push_str(&self.suffix);
379        tag
380    }
381}
382
383impl Display for Tag {
384    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
385        if self.handle == "!" {
386            write!(f, "!{}", self.suffix)
387        } else {
388            write!(f, "{}{}", self.handle, self.suffix)
389        }
390    }
391}
392
393impl<'input> Event<'input> {
394    /// Return the anchor ID defined by this event, if any.
395    ///
396    /// Returns `Some(id)` when this event defines an anchor on a scalar, sequence, or mapping
397    /// node. Returns `None` for all other events, including `Alias` (which references an anchor
398    /// rather than defining one; use [`Self::alias_id`] to obtain the target anchor ID).
399    #[must_use]
400    pub fn anchor_id(&self) -> Option<usize> {
401        match self {
402            Self::Scalar(_, _, anchor_id, _)
403            | Self::SequenceStart(_, anchor_id, _)
404            | Self::MappingStart(_, anchor_id, _)
405                if *anchor_id != 0 =>
406            {
407                Some(*anchor_id)
408            }
409            _ => None,
410        }
411    }
412
413    /// Return the target anchor ID referenced by this alias event, if this event is an alias.
414    #[must_use]
415    pub fn alias_id(&self) -> Option<usize> {
416        match self {
417            Self::Alias(anchor_id) => Some(*anchor_id),
418            _ => None,
419        }
420    }
421
422    /// Return the resolved tag carried by this node event, if any.
423    #[must_use]
424    pub fn tag(&self) -> Option<&Tag> {
425        match self {
426            Self::Scalar(_, _, _, tag)
427            | Self::SequenceStart(_, _, tag)
428            | Self::MappingStart(_, _, tag) => tag.as_deref(),
429            _ => None,
430        }
431    }
432
433    /// Return the scalar value and style, if this event is a scalar.
434    #[must_use]
435    pub fn scalar(&self) -> Option<(&str, ScalarStyle)> {
436        match self {
437            Self::Scalar(value, style, _, _) => Some((value.as_ref(), *style)),
438            _ => None,
439        }
440    }
441
442    /// Return whether this event represents a YAML node (value).
443    ///
444    /// Returns `true` for scalars, collection starts, and aliases — all events that produce a
445    /// value in the document tree. Returns `false` for structural events such as `StreamStart`,
446    /// `DocumentStart`, collection ends, etc.
447    #[must_use]
448    pub fn is_node(&self) -> bool {
449        matches!(
450            self,
451            Self::Alias(_) | Self::Scalar(..) | Self::SequenceStart(..) | Self::MappingStart(..)
452        )
453    }
454
455    /// Create an empty scalar.
456    fn empty_scalar() -> Self {
457        // a null scalar
458        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
459    }
460
461    /// Create an empty scalar with the given node properties.
462    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
463        let value = if tag.is_some() {
464            Cow::default()
465        } else {
466            "~".into()
467        };
468
469        Event::Scalar(value, ScalarStyle::Plain, anchor, tag)
470    }
471}
472
473// Preserve span ordering for normal-sized comment groups. Longer runs in syntactically ambiguous
474// positions are rejected before they can grow the parser queue without bound.
475const MAX_BUFFERED_COMMENT_EVENTS: usize = 32;
476
477/// A YAML parser.
478#[derive(Debug)]
479pub struct Parser<'input, T: BorrowedInput<'input>> {
480    /// The underlying scanner from which we pull tokens.
481    scanner: Scanner<'input, T>,
482    /// The stack of _previous_ states we were in.
483    ///
484    /// States are pushed in the context of subobjects to this stack. The top-most element is the
485    /// state in which to come back to when exiting the current state.
486    states: Vec<State>,
487    /// The state in which we currently are.
488    state: State,
489    /// The next token from the scanner.
490    token: Option<QueuedToken<'input>>,
491    /// The next YAML event to emit.
492    current: Option<(Event<'input>, Span)>,
493    /// The next parser error to emit after it has been observed by `peek`.
494    current_error: Option<ScanError>,
495    /// YAML events buffered by parser states that need to emit an earlier synthetic node first.
496    queued_events: VecDeque<(Event<'input>, Span)>,
497
498    /// Pending indentation hint to be attached to the next emitted event span.
499    ///
500    /// This is used to communicate indentation for block mapping keys. It is set when consuming a
501    /// `TokenType::Key` in block style, and is applied to the next emitted node event (the key
502    /// itself).
503    pending_key_indent: Option<usize>,
504    /// Pending anchor ID to attach to a node after an intervening comment.
505    pending_node_anchor_id: usize,
506    /// Pending tag to attach to a node after an intervening comment.
507    pending_node_tag: Option<Cow<'input, Tag>>,
508    /// Pending explicit tag token start to attach to a node after an intervening comment.
509    pending_node_tag_start: Option<Marker>,
510    /// Pending end marker of the last node-property token before an intervening comment.
511    pending_node_property_end: Option<Marker>,
512    /// Pending empty scalar span captured before an intervening comment.
513    pending_empty_scalar_span: Option<Span>,
514    /// End marker of the most recently produced event.
515    last_event_end: Option<Marker>,
516    /// Pending YAML version captured before comments preceding an explicit document start.
517    pending_document_version: Option<YamlVersion>,
518    /// Whether document directives were already initialized before comments preceding `---`.
519    pending_document_directives: bool,
520    /// `%TAG` handles already seen before comments preceding an explicit document start.
521    pending_document_tag_handles: BTreeSet<String>,
522    /// Anchors that have been encountered in the YAML document.
523    anchors: BTreeMap<Cow<'input, str>, usize>,
524    /// Next ID available for an anchor.
525    ///
526    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
527    /// return for the next anchor and the count of anchor IDs emitted.
528    anchor_id_count: usize,
529    /// The tag directives (`%TAG`) the parser has encountered.
530    ///
531    /// Key is the handle, and value is the prefix.
532    tags: BTreeMap<String, String>,
533    /// Whether we have emitted a terminal iterator result.
534    ///
535    /// Terminal means either [`Event::StreamEnd`] or a [`ScanError`]. Emitted means that it has
536    /// been returned from [`Self::next_event`] or [`Self::next`]. If the terminal result is stored
537    /// in [`Self::current`] or [`Self::current_error`], this is set to `false`.
538    stream_end_emitted: bool,
539    /// Make tags global across all documents.
540    keep_tags: bool,
541}
542
543/// Trait to be implemented in order to use the low-level parsing API.
544///
545/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
546/// for each YAML [`Event`] that occurs.
547/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
548/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
549/// [`SpannedEventReceiver`] automatically.
550/// Non-spanned receivers receive [`Event::Comment(text, placement)`](Event::Comment) like any
551/// other event, but without source location. Spanned receivers receive the same comment event plus
552/// the comment [`Span`] in [`SpannedEventReceiver::on_event`]. For comments, that span covers the
553/// whole source comment, including `#` and excluding the line break. When parsing from an input
554/// with byte offsets, such as [`Parser::new_from_str`], [`Span::slice`] returns that source
555/// comment text.
556///
557/// # Event hierarchy
558/// The event stream starts with an [`Event::StreamStart`] event followed by an
559/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
560/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
561/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
562///
563/// In a mapping, key-values are sent as consecutive data events. Comments can appear in the raw
564/// event stream between a key and its value; they are presentation metadata, not YAML data nodes.
565/// Consumers building YAML data trees should ignore [`Event::Comment`]. Any key/value alternation
566/// shortcut applies only after filtering out comments and other presentation metadata. After that
567/// filtering, the first event after an [`Event::MappingStart`] will be the key, and the following
568/// event will be its value. If the mapping contains no sub-mapping or sub-sequence, then even events
569/// (starting from 0) will always be keys and odd ones will always be values. The mapping ends when
570/// an [`Event::MappingEnd`] event is received.
571///
572/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
573///
574/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
575/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
576/// [`Event::MappingEnd`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
577/// be part of the value and not another key-value pair or element in the sequence.
578///
579/// For instance, the following YAML:
580/// ```yaml
581/// a: b
582/// c:
583///   d: e
584/// f:
585///   - g
586///   - h
587/// ```
588/// will emit (indented and commented for visibility):
589/// ```text
590/// StreamStart, DocumentStart, MappingStart,
591///   Scalar("a", ..), Scalar("b", ..)
592///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
593///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
594/// MappingEnd, DocumentEnd, StreamEnd
595/// ```
596///
597/// # Example
598/// ```
599/// # use granit_parser::{Event, EventReceiver, Parser};
600/// #
601/// /// Sink of events. Collects them into an array.
602/// struct EventSink<'input> {
603///     events: Vec<Event<'input>>,
604/// }
605///
606/// /// Implement `on_event`, pushing into `self.events`.
607/// impl<'input> EventReceiver<'input> for EventSink<'input> {
608///     fn on_event(&mut self, ev: Event<'input>) {
609///         self.events.push(ev);
610///     }
611/// }
612///
613/// /// Load events from a YAML string.
614/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
615///     let mut sink = EventSink { events: Vec::new() };
616///     let mut parser = Parser::new_from_str(yaml);
617///     // Load events using our sink as the receiver.
618///     parser.load(&mut sink, true).unwrap();
619///     sink.events
620/// }
621/// ```
622pub trait EventReceiver<'input> {
623    /// Handler called for each YAML event that is emitted by the parser.
624    fn on_event(&mut self, ev: Event<'input>);
625}
626
627/// Trait to be implemented for using the low-level parsing API.
628///
629/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
630/// For [`Event::Comment`], the span is the source range of the whole comment.
631pub trait SpannedEventReceiver<'input> {
632    /// Handler called for each event that occurs.
633    fn on_event(&mut self, ev: Event<'input>, span: Span);
634}
635
636impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
637    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
638        self.on_event(ev);
639    }
640}
641
642/// Trait to be implemented for fallible event handling without source spans.
643///
644/// This is the fallible counterpart to [`EventReceiver`]. Use it with [`Parser::try_load`] when
645/// event handling may need to stop parsing by returning an application error.
646pub trait TryEventReceiver<'input> {
647    /// Error returned by this receiver.
648    type Error;
649
650    /// Handler called for each YAML event that is emitted by the parser.
651    ///
652    /// Returning an error stops [`Parser::try_load`] immediately.
653    ///
654    /// # Errors
655    /// Returns `Self::Error` when the receiver wants to stop parsing.
656    fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
657}
658
659/// Trait to be implemented for fallible event handling with source spans.
660///
661/// This is the fallible counterpart to [`SpannedEventReceiver`]. Use it with
662/// [`Parser::try_load`] when event handling may need to stop parsing by returning an application
663/// error.
664pub trait TrySpannedEventReceiver<'input> {
665    /// Error returned by this receiver.
666    type Error;
667
668    /// Handler called for each event that occurs.
669    ///
670    /// Returning an error stops [`Parser::try_load`] immediately.
671    ///
672    /// # Errors
673    /// Returns `Self::Error` when the receiver wants to stop parsing.
674    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
675}
676
677impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
678    type Error = R::Error;
679
680    fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
681        TryEventReceiver::on_event(self, ev)
682    }
683}
684
685/// Error returned by [`Parser::try_load`] and [`ParserTrait::try_load`].
686#[derive(Clone, PartialEq, Debug, Eq)]
687pub enum TryLoadError<E> {
688    /// Scanning or parsing failed.
689    Scan(
690        /// The scanner or parser error.
691        ScanError,
692    ),
693    /// The receiver returned an application error.
694    Receiver(
695        /// The error returned by the receiver.
696        E,
697    ),
698}
699
700impl<E> TryLoadError<E> {
701    #[cold]
702    fn scan(error: ScanError) -> Self {
703        Self::Scan(error)
704    }
705
706    #[cold]
707    fn receiver(error: E) -> Self {
708        Self::Receiver(error)
709    }
710}
711
712impl<E> From<ScanError> for TryLoadError<E> {
713    #[cold]
714    fn from(error: ScanError) -> Self {
715        Self::scan(error)
716    }
717}
718
719impl<E: Display> Display for TryLoadError<E> {
720    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
721        match self {
722            Self::Scan(error) => write!(f, "parser error: {error}"),
723            Self::Receiver(error) => write!(f, "receiver error: {error}"),
724        }
725    }
726}
727
728impl<E> core::error::Error for TryLoadError<E>
729where
730    E: core::error::Error + 'static,
731{
732    fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
733        match self {
734            Self::Scan(error) => Some(error),
735            Self::Receiver(error) => Some(error),
736        }
737    }
738}
739
740fn try_emit<'input, R>(
741    recv: &mut R,
742    ev: Event<'input>,
743    span: Span,
744) -> Result<(), TryLoadError<R::Error>>
745where
746    R: TrySpannedEventReceiver<'input>,
747{
748    recv.on_event(ev, span).map_err(TryLoadError::receiver)
749}
750
751struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
752
753impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
754    for InfallibleSpannedReceiver<'_, R>
755{
756    type Error = Infallible;
757
758    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
759        self.0.on_event(ev, span);
760        Ok(())
761    }
762}
763
764fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
765    match result {
766        Ok(()) => Ok(()),
767        Err(TryLoadError::Scan(error)) => error.into_result(),
768        Err(TryLoadError::Receiver(error)) => match error {},
769    }
770}
771
772/// A convenience alias for a parser event result.
773pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
774
775/// Trait extracted from `Parser` to support mocking and alternative implementations.
776pub trait ParserTrait<'input> {
777    /// Try to load the next event and return it without consuming it from `self`.
778    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
779
780    /// Try to load the next event and return it, consuming it from `self`.
781    fn next_event(&mut self) -> Option<ParseResult<'input>>;
782
783    /// Load the YAML from the stream in `self`, pushing events into `recv`.
784    ///
785    /// Use this method when event handling is infallible. If receiver code can return an
786    /// application error and should stop parsing, use [`ParserTrait::try_load`] instead. If the
787    /// caller should directly control when the next event is read, use [`ParserTrait::next_event`]
788    /// or [`Parser`]'s [`core::iter::Iterator`] implementation.
789    ///
790    /// # Errors
791    /// Returns `ScanError` when scanning or parsing the stream fails.
792    fn load<R: SpannedEventReceiver<'input>>(
793        &mut self,
794        recv: &mut R,
795        multi: bool,
796    ) -> Result<(), ScanError>;
797
798    /// Load the YAML from the stream in `self`, stopping if `recv` returns an error.
799    ///
800    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
801    /// inside the stream.
802    ///
803    /// If the receiver returns an error, the parser is left positioned immediately after the event
804    /// that caused the receiver error. Callers should treat the parser as partially consumed.
805    ///
806    /// # Errors
807    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
808    /// [`TryLoadError::Receiver`] when `recv` returns an error.
809    fn try_load<R: TrySpannedEventReceiver<'input>>(
810        &mut self,
811        recv: &mut R,
812        multi: bool,
813    ) -> Result<(), TryLoadError<R::Error>> {
814        while let Some(res) = self.next_event() {
815            let (ev, span) = res?;
816            let is_doc_end = matches!(ev, Event::DocumentEnd);
817            let is_stream_end = matches!(ev, Event::StreamEnd);
818
819            try_emit(recv, ev, span)?;
820
821            if is_stream_end {
822                break;
823            }
824            if !multi && is_doc_end {
825                break;
826            }
827        }
828
829        Ok(())
830    }
831}
832
833impl<'input> Parser<'input, StrInput<'input>> {
834    /// Create a parser over a borrowed string slice.
835    #[must_use]
836    pub fn new_from_str(value: &'input str) -> Self {
837        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
838        Parser::new(StrInput::new(value))
839    }
840}
841
842impl<T> Parser<'static, BufferedInput<T>>
843where
844    T: Iterator<Item = char>,
845{
846    /// Create a parser over an iterator of characters.
847    #[must_use]
848    pub fn new_from_iter(iter: T) -> Self {
849        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
850        Parser::new(BufferedInput::new(iter))
851    }
852}
853
854impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
855    /// Return the next anchor ID that will be assigned by this parser.
856    pub fn get_anchor_offset(&self) -> usize {
857        self.anchor_id_count
858    }
859
860    /// Set the next anchor ID that will be assigned by this parser.
861    pub fn set_anchor_offset(&mut self, offset: usize) {
862        self.anchor_id_count = offset;
863    }
864
865    /// Create a parser over a custom input source.
866    pub fn new(src: T) -> Self {
867        Parser {
868            scanner: Scanner::new(src),
869            states: Vec::new(),
870            state: State::StreamStart,
871            token: None,
872            current: None,
873            current_error: None,
874            queued_events: VecDeque::new(),
875
876            pending_key_indent: None,
877            pending_node_anchor_id: 0,
878            pending_node_tag: None,
879            pending_node_tag_start: None,
880            pending_node_property_end: None,
881            pending_empty_scalar_span: None,
882            last_event_end: None,
883            pending_document_version: None,
884            pending_document_directives: false,
885            pending_document_tag_handles: BTreeSet::new(),
886
887            anchors: BTreeMap::new(),
888            // valid anchor_id starts from 1
889            anchor_id_count: 1,
890            tags: BTreeMap::new(),
891            stream_end_emitted: false,
892            keep_tags: false,
893        }
894    }
895
896    /// Configure whether tag directives remain active across document boundaries.
897    ///
898    /// This behavior is non-standard as per the YAML specification but can be encountered in the
899    /// wild. Passing `true` enables this non-standard extension and allows the parser to accept
900    /// input from [test
901    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
902    /// of the yaml-test-suite:
903    ///
904    /// ```yaml
905    /// %TAG !prefix! tag:example.com,2011:
906    /// --- !prefix!A
907    /// a: b
908    /// --- !prefix!B
909    /// c: d
910    /// --- !prefix!C
911    /// e: f
912    /// ```
913    ///
914    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
915    /// only apply to the document immediately following them. This would error on `!prefix!B`.
916    ///
917    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
918    #[must_use]
919    pub fn keep_tags(mut self, value: bool) -> Self {
920        self.keep_tags = value;
921        self
922    }
923
924    /// Try to load the next event and return it without consuming it from `self`.
925    ///
926    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
927    /// [`Iterator::next`] or [`Parser::load`].
928    /// If the buffered value is a [`ScanError`], [`Parser::next_event`] returns that error once
929    /// and then the parser is exhausted.
930    ///
931    /// # Errors
932    /// Returns `ScanError` when loading the next event fails.
933    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
934        ParserTrait::peek(self)
935    }
936
937    /// Try to load the next event and return it, consuming it from `self`.
938    ///
939    /// After this returns a [`ScanError`], subsequent calls return [`None`].
940    ///
941    /// # Errors
942    /// Returns `ScanError` when loading the next event fails.
943    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
944        ParserTrait::next_event(self)
945    }
946
947    /// Implementation function for [`Self::next_event`] without the `Option`.
948    ///
949    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
950    /// option. This burdens the parser code. This function is used internally when an option is
951    /// undesirable.
952    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
953    where
954        'input: 'a,
955    {
956        let event = match self.current.take() {
957            None => {
958                if let Some(event) = self.queued_events.pop_front() {
959                    Ok(self.apply_pending_key_indent(event))
960                } else if let Some(comment) = self.maybe_next_comment_event()? {
961                    Ok(comment)
962                } else {
963                    self.parse()
964                }
965            }
966            Some(v) => Ok(v),
967        }?;
968
969        Ok(self.remember_event_end(event))
970    }
971
972    fn apply_pending_key_indent<'a>(&mut self, (ev, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
973        if ev.is_node() {
974            if let Some(indent) = self.pending_key_indent.take() {
975                return (ev, span.with_indent(Some(indent)));
976            }
977        }
978
979        (ev, span)
980    }
981
982    fn remember_event_end<'a>(&mut self, (event, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
983        self.last_event_end = Some(span.end);
984        (event, span)
985    }
986
987    /// Peek at the next token from the scanner.
988    fn peek_token(&mut self) -> Result<&QueuedToken<'_>, ScanError> {
989        match self.token {
990            None => {
991                self.token = Some(self.scan_next_token()?);
992                Ok(self.token.as_ref().unwrap())
993            }
994            Some(ref tok) => Ok(tok),
995        }
996    }
997
998    /// Extract and return the next token from the scanner.
999    ///
1000    /// This function does _not_ make use of `self.token`.
1001    fn scan_next_token(&mut self) -> Result<QueuedToken<'input>, ScanError> {
1002        match self.scanner.next_queued_token()? {
1003            None => match self.scanner.get_error() {
1004                None => Err(self.unexpected_eof()),
1005                Some(e) => e.into_result(),
1006            },
1007            Some(tok) => Ok(tok),
1008        }
1009    }
1010
1011    #[inline]
1012    fn maybe_next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
1013    where
1014        'input: 'a,
1015    {
1016        if self.scanner.comments_possible() {
1017            self.next_comment_event()
1018        } else {
1019            Ok(None)
1020        }
1021    }
1022
1023    fn next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
1024    where
1025        'input: 'a,
1026    {
1027        let is_comment = {
1028            let token = self.peek_token()?;
1029            matches!(token.1, QueuedTokenType::Comment(_))
1030        };
1031
1032        if !is_comment {
1033            return Ok(None);
1034        }
1035
1036        let QueuedToken(span, token) = self.fetch_token();
1037        match token {
1038            QueuedTokenType::Comment(mut comment) => {
1039                comment.placement = self.refined_comment_placement(span, comment.placement);
1040                Ok(Some((
1041                    Event::Comment(comment.text, comment.placement),
1042                    span,
1043                )))
1044            }
1045            _ => unreachable!("comment token disappeared after peek"),
1046        }
1047    }
1048
1049    #[inline]
1050    fn next_comment_events(&mut self) -> Result<Vec<(Event<'input>, Span)>, ScanError> {
1051        if !self.scanner.comments_possible() {
1052            return Ok(Vec::new());
1053        }
1054
1055        let mut events = Vec::new();
1056        loop {
1057            match self.peek_token() {
1058                Ok(token) if matches!(token.1, QueuedTokenType::Comment(_)) => {}
1059                Err(error) if events.is_empty() => return Err(error),
1060                Ok(_) | Err(_) => return Ok(events),
1061            }
1062
1063            if events.len() == MAX_BUFFERED_COMMENT_EVENTS {
1064                return Err(ScanError::new_str(
1065                    self.peek_token()?.0.start,
1066                    "too many consecutive comments before resolving collection entry",
1067                ));
1068            }
1069
1070            let comment = self
1071                .next_comment_event()?
1072                .expect("comment token disappeared after peek");
1073            events.push(comment);
1074        }
1075    }
1076
1077    fn queue_tail_and_return_first(
1078        &mut self,
1079        events: Vec<(Event<'input>, Span)>,
1080    ) -> (Event<'input>, Span) {
1081        let mut events = events.into_iter();
1082        let first = events
1083            .next()
1084            .expect("event queue must contain at least one event");
1085        self.queued_events.extend(events);
1086        first
1087    }
1088
1089    fn queue_event_by_span(
1090        &mut self,
1091        comments: Vec<(Event<'input>, Span)>,
1092        event: (Event<'input>, Span),
1093    ) -> (Event<'input>, Span) {
1094        let insert_at = comments
1095            .iter()
1096            .position(|(_, comment_span)| {
1097                comment_span.start.index() >= event.1.start.index()
1098                    && comment_span.end.index() >= event.1.end.index()
1099            })
1100            .unwrap_or(comments.len());
1101        let mut ordered = Vec::with_capacity(comments.len() + 1);
1102        let mut comments = comments.into_iter();
1103
1104        for _ in 0..insert_at {
1105            ordered.push(
1106                comments
1107                    .next()
1108                    .expect("comment disappeared while ordering queued events"),
1109            );
1110        }
1111        ordered.push(event);
1112        ordered.extend(comments);
1113
1114        self.queue_tail_and_return_first(ordered)
1115    }
1116
1117    fn queue_two_events_by_span(
1118        &mut self,
1119        comments: Vec<(Event<'input>, Span)>,
1120        first: (Event<'input>, Span),
1121        second: (Event<'input>, Span),
1122    ) -> (Event<'input>, Span) {
1123        let insert_at = comments
1124            .iter()
1125            .position(|(_, comment_span)| {
1126                comment_span.start.index() >= first.1.start.index()
1127                    && comment_span.end.index() >= first.1.end.index()
1128            })
1129            .unwrap_or(comments.len());
1130        let mut ordered = Vec::with_capacity(comments.len() + 2);
1131        let mut comments = comments.into_iter();
1132
1133        for _ in 0..insert_at {
1134            ordered.push(
1135                comments
1136                    .next()
1137                    .expect("comment disappeared while ordering queued events"),
1138            );
1139        }
1140        ordered.push(first);
1141        ordered.push(second);
1142        ordered.extend(comments);
1143
1144        self.queue_tail_and_return_first(ordered)
1145    }
1146
1147    fn refined_comment_placement(&mut self, span: Span, placement: Placement) -> Placement {
1148        if placement == Placement::Right {
1149            return Placement::Right;
1150        }
1151
1152        let Ok(next) = self.peek_token() else {
1153            return placement;
1154        };
1155        if matches!(next.1, QueuedTokenType::StreamEnd) {
1156            return Placement::Last;
1157        }
1158
1159        if next.0.start.line() == span.end.line() + 1 {
1160            Placement::Above
1161        } else {
1162            Placement::Free
1163        }
1164    }
1165
1166    #[cold]
1167    fn unexpected_eof(&self) -> ScanError {
1168        let info = match self.state {
1169            State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1170                "unexpected EOF while parsing a flow sequence"
1171            }
1172            State::FlowMappingFirstKey
1173            | State::FlowMappingKey
1174            | State::FlowMappingValue
1175            | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1176            State::FlowSequenceEntryMappingKey
1177            | State::FlowSequenceEntryMappingValue
1178            | State::FlowSequenceEntryMappingEnd
1179            | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
1180            State::BlockSequenceFirstEntry | State::BlockSequenceEntry | State::BlockNode => {
1181                "unexpected EOF while parsing a block sequence"
1182            }
1183            State::BlockMappingFirstKey
1184            | State::BlockMappingKey
1185            | State::BlockMappingValue
1186            | State::BlockNodeOrIndentlessSequence => {
1187                "unexpected EOF while parsing a block mapping"
1188            }
1189            _ => "unexpected eof",
1190        };
1191        ScanError::new_str(self.scanner.mark(), info)
1192    }
1193
1194    fn fetch_token<'a>(&mut self) -> QueuedToken<'a>
1195    where
1196        'input: 'a,
1197    {
1198        self.token
1199            .take()
1200            .expect("fetch_token needs to be preceded by peek_token")
1201    }
1202
1203    /// Skip the next token from the scanner.
1204    fn skip(&mut self) {
1205        self.token = None;
1206    }
1207    /// Pops the top-most state and make it the current state.
1208    fn pop_state(&mut self) {
1209        self.state = self.states.pop().unwrap();
1210    }
1211    /// Push a new state atop the state stack.
1212    fn push_state(&mut self, state: State) {
1213        self.states.push(state);
1214    }
1215
1216    fn defer_parse_node<'a>(
1217        &mut self,
1218        node_state: State,
1219        return_state: State,
1220        block: bool,
1221        indentless_sequence: bool,
1222    ) -> ParseResult<'a>
1223    where
1224        'input: 'a,
1225    {
1226        self.push_state(return_state);
1227        self.state = node_state;
1228        if let Some(comment) = self.maybe_next_comment_event()? {
1229            Ok(comment)
1230        } else {
1231            self.parse_node(block, indentless_sequence)
1232        }
1233    }
1234
1235    fn parse<'a>(&mut self) -> ParseResult<'a>
1236    where
1237        'input: 'a,
1238    {
1239        if self.state == State::End {
1240            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
1241        }
1242        let event = self.state_machine()?;
1243        Ok(self.apply_pending_key_indent(event))
1244    }
1245
1246    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1247    ///
1248    /// The contents of the stream are parsed and the corresponding events are sent into the
1249    /// receiver. For detailed explanations about how events work, see [`EventReceiver`].
1250    ///
1251    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1252    /// inside the stream.
1253    ///
1254    /// Use this method when event handling is infallible. If receiver code can return an
1255    /// application error and should stop parsing, use [`Parser::try_load`] instead. If the caller
1256    /// should directly control when the next event is read, use [`Parser`]'s
1257    /// [`core::iter::Iterator`] implementation.
1258    ///
1259    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
1260    /// former is enough to call this function.
1261    ///
1262    /// # Example
1263    /// ```
1264    /// # use granit_parser::{Event, EventReceiver, Parser};
1265    /// # fn main() -> Result<(), granit_parser::ScanError> {
1266    /// struct EventSink<'input> {
1267    ///     events: Vec<Event<'input>>,
1268    /// }
1269    ///
1270    /// impl<'input> EventReceiver<'input> for EventSink<'input> {
1271    ///     fn on_event(&mut self, ev: Event<'input>) {
1272    ///         self.events.push(ev);
1273    ///     }
1274    /// }
1275    ///
1276    /// let mut parser = Parser::new_from_str("a: 1\n");
1277    /// let mut sink = EventSink { events: Vec::new() };
1278    ///
1279    /// parser.load(&mut sink, false)?;
1280    ///
1281    /// assert!(sink
1282    ///     .events
1283    ///     .iter()
1284    ///     .any(|ev| matches!(ev, Event::Scalar(value, ..) if value == "a")));
1285    /// # Ok(())
1286    /// # }
1287    /// ```
1288    ///
1289    /// # Errors
1290    /// Returns `ScanError` when loading fails.
1291    pub fn load<R: SpannedEventReceiver<'input>>(
1292        &mut self,
1293        recv: &mut R,
1294        multi: bool,
1295    ) -> Result<(), ScanError> {
1296        ParserTrait::load(self, recv, multi)
1297    }
1298
1299    /// Load the YAML from the stream in `self`, pushing events into `recv`.
1300    ///
1301    /// This is the fallible counterpart to [`Parser::load`]. If `recv` returns an error, parsing
1302    /// stops immediately and that error is returned as [`TryLoadError::Receiver`].
1303    ///
1304    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
1305    /// inside the stream.
1306    ///
1307    /// If the receiver returns an error, the parser is left positioned immediately after the event
1308    /// that caused the receiver error. Callers should treat the parser as partially consumed.
1309    ///
1310    /// # Example
1311    /// ```
1312    /// # use granit_parser::{Event, Parser, TryEventReceiver, TryLoadError};
1313    /// #[derive(Debug, PartialEq, Eq)]
1314    /// enum ValidationError {
1315    ///     ForbiddenScalar,
1316    /// }
1317    ///
1318    /// struct Validator;
1319    ///
1320    /// impl<'input> TryEventReceiver<'input> for Validator {
1321    ///     type Error = ValidationError;
1322    ///
1323    ///     fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
1324    ///         if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
1325    ///             Err(ValidationError::ForbiddenScalar)
1326    ///         } else {
1327    ///             Ok(())
1328    ///         }
1329    ///     }
1330    /// }
1331    ///
1332    /// let mut parser = Parser::new_from_str("value: bad\n");
1333    /// let mut validator = Validator;
1334    ///
1335    /// let err = parser.try_load(&mut validator, false).unwrap_err();
1336    ///
1337    /// assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenScalar));
1338    /// ```
1339    ///
1340    /// # Errors
1341    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
1342    /// [`TryLoadError::Receiver`] when `recv` returns an error.
1343    pub fn try_load<R: TrySpannedEventReceiver<'input>>(
1344        &mut self,
1345        recv: &mut R,
1346        multi: bool,
1347    ) -> Result<(), TryLoadError<R::Error>> {
1348        ParserTrait::try_load(self, recv, multi)
1349    }
1350
1351    fn state_machine<'a>(&mut self) -> ParseResult<'a>
1352    where
1353        'input: 'a,
1354    {
1355        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
1356
1357        match self.state {
1358            State::StreamStart => self.stream_start(),
1359
1360            State::ImplicitDocumentStart => self.document_start(true),
1361            State::DocumentStart => self.document_start(false),
1362            State::DocumentContent => self.document_content(),
1363            State::DocumentEnd => self.document_end(),
1364
1365            State::BlockNode => self.parse_node(true, false),
1366            State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
1367            State::FlowNode => self.parse_node(false, false),
1368            State::BlockMappingFirstKey => self.block_mapping_key(true),
1369            State::BlockMappingKey => self.block_mapping_key(false),
1370            State::BlockMappingKeyNode => self.block_mapping_key_node(),
1371            State::BlockMappingValue => self.block_mapping_value(),
1372            State::BlockMappingValueNode => self.block_mapping_value_node(),
1373
1374            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
1375            State::BlockSequenceEntry => self.block_sequence_entry(false),
1376            State::BlockSequenceEntryNode => self.block_sequence_entry_node(),
1377
1378            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
1379            State::FlowSequenceEntry => self.flow_sequence_entry(false),
1380
1381            State::FlowMappingFirstKey => self.flow_mapping_key(true),
1382            State::FlowMappingKey => self.flow_mapping_key(false),
1383            State::FlowMappingKeyNode => self.flow_mapping_key_node(),
1384            State::FlowMappingValue => self.flow_mapping_value(false),
1385            State::FlowMappingValueNode => self.flow_mapping_value_node(),
1386
1387            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
1388            State::IndentlessSequenceEntryNode => self.indentless_sequence_entry_node(),
1389
1390            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
1391            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
1392            State::FlowSequenceEntryMappingValueNode => {
1393                self.flow_sequence_entry_mapping_value_node()
1394            }
1395            State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
1396            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
1397
1398            /* impossible */
1399            State::End => unreachable!(),
1400        }
1401    }
1402
1403    fn stream_start<'a>(&mut self) -> ParseResult<'a>
1404    where
1405        'input: 'a,
1406    {
1407        match *self.peek_token()? {
1408            QueuedToken(span, QueuedTokenType::StreamStart(_)) => {
1409                self.state = State::ImplicitDocumentStart;
1410                self.skip();
1411                Ok((Event::StreamStart, span))
1412            }
1413            QueuedToken(span, _) => Err(ScanError::new_str(
1414                span.start,
1415                "did not find expected <stream-start>",
1416            )),
1417        }
1418    }
1419
1420    fn has_pending_document_directives(&self) -> bool {
1421        self.pending_document_directives
1422            || self.pending_document_version.is_some()
1423            || !self.pending_document_tag_handles.is_empty()
1424    }
1425
1426    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
1427    where
1428        'input: 'a,
1429    {
1430        while let QueuedTokenType::DocumentEnd = self.peek_token()?.1 {
1431            self.skip();
1432        }
1433
1434        // Anchors are scoped to a single document.
1435        self.anchors.clear();
1436
1437        if self.has_pending_document_directives() {
1438            return self.explicit_document_start();
1439        }
1440
1441        match *self.peek_token()? {
1442            QueuedToken(span, QueuedTokenType::StreamEnd) => {
1443                self.state = State::End;
1444                self.skip();
1445                Ok((Event::StreamEnd, span))
1446            }
1447            QueuedToken(
1448                _,
1449                QueuedTokenType::VersionDirective(..)
1450                | QueuedTokenType::TagDirective(..)
1451                | QueuedTokenType::ReservedDirective(..)
1452                | QueuedTokenType::DocumentStart,
1453            ) => {
1454                // explicit document
1455                self.explicit_document_start()
1456            }
1457            QueuedToken(span, _) if implicit => {
1458                self.parser_process_directives(None, false, BTreeSet::new())?;
1459                self.push_state(State::DocumentEnd);
1460                self.state = State::BlockNode;
1461                Ok((Event::DocumentStart(false, None), span))
1462            }
1463            _ => {
1464                // explicit document
1465                self.explicit_document_start()
1466            }
1467        }
1468    }
1469
1470    fn parser_process_directives(
1471        &mut self,
1472        mut version: Option<YamlVersion>,
1473        continuing: bool,
1474        mut document_tag_handles: BTreeSet<String>,
1475    ) -> Result<(Option<YamlVersion>, BTreeSet<String>), ScanError> {
1476        let mut tags = if continuing || self.keep_tags {
1477            self.tags.clone()
1478        } else {
1479            BTreeMap::new()
1480        };
1481
1482        loop {
1483            match self.peek_token()? {
1484                QueuedToken(span, QueuedTokenType::VersionDirective(major, minor)) => {
1485                    if version.is_some() {
1486                        return Err(ScanError::new_str(
1487                            span.start,
1488                            "duplicate version directive",
1489                        ));
1490                    }
1491                    if *major != 1 {
1492                        return Err(ScanError::new_str(
1493                            span.start,
1494                            "unsupported YAML major version",
1495                        ));
1496                    }
1497                    version = Some(YamlVersion::new(*major, *minor));
1498                }
1499                QueuedToken(mark, QueuedTokenType::TagDirective(handle, prefix)) => {
1500                    if !document_tag_handles.insert(handle.to_string()) {
1501                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
1502                    }
1503                    tags.insert(handle.to_string(), prefix.to_string());
1504                }
1505                QueuedToken(_, QueuedTokenType::ReservedDirective(_, _)) => {
1506                    // Reserved directives are ignored
1507                }
1508                _ => break,
1509            }
1510            self.skip();
1511        }
1512
1513        self.tags = tags;
1514        Ok((version, document_tag_handles))
1515    }
1516
1517    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
1518    where
1519        'input: 'a,
1520    {
1521        let pending_version = self.pending_document_version.take();
1522        let continuing_directives = core::mem::take(&mut self.pending_document_directives);
1523        let pending_tag_handles = core::mem::take(&mut self.pending_document_tag_handles);
1524        let (version, document_tag_handles) = self.parser_process_directives(
1525            pending_version,
1526            continuing_directives,
1527            pending_tag_handles,
1528        )?;
1529        if let Some(comment) = self.maybe_next_comment_event()? {
1530            self.pending_document_version = version;
1531            self.pending_document_directives = true;
1532            self.pending_document_tag_handles = document_tag_handles;
1533            return Ok(comment);
1534        }
1535        match *self.peek_token()? {
1536            QueuedToken(mark, QueuedTokenType::DocumentStart) => {
1537                self.push_state(State::DocumentEnd);
1538                self.state = State::DocumentContent;
1539                self.skip();
1540                Ok((Event::DocumentStart(true, version), mark))
1541            }
1542            QueuedToken(span, _) => Err(ScanError::new_str(
1543                span.start,
1544                "did not find expected <document start>",
1545            )),
1546        }
1547    }
1548
1549    fn document_content<'a>(&mut self) -> ParseResult<'a>
1550    where
1551        'input: 'a,
1552    {
1553        if let QueuedToken(
1554            mark,
1555            QueuedTokenType::VersionDirective(..)
1556            | QueuedTokenType::TagDirective(..)
1557            | QueuedTokenType::ReservedDirective(..)
1558            | QueuedTokenType::DocumentStart
1559            | QueuedTokenType::DocumentEnd
1560            | QueuedTokenType::StreamEnd,
1561        ) = *self.peek_token()?
1562        {
1563            self.pop_state();
1564            let span = self
1565                .last_event_end
1566                .map_or_else(|| Span::empty(mark.start), Span::empty);
1567            Ok((Event::empty_scalar(), span))
1568        } else {
1569            self.state = State::BlockNode;
1570            self.parse_node(true, false)
1571        }
1572    }
1573
1574    fn document_end<'a>(&mut self) -> ParseResult<'a>
1575    where
1576        'input: 'a,
1577    {
1578        let mut explicit_end = false;
1579        let span: Span = match *self.peek_token()? {
1580            QueuedToken(span, QueuedTokenType::DocumentEnd) => {
1581                explicit_end = true;
1582                self.skip();
1583                span
1584            }
1585            QueuedToken(span, _) => self
1586                .last_event_end
1587                .map_or_else(|| Span::empty(span.start), Span::empty),
1588        };
1589
1590        if self.keep_tags {
1591            // Never persist default handles across document boundaries. Allowing `%TAG !! ...`
1592            // or `%TAG ! ...` to leak into following documents lets earlier documents alter how
1593            // explicit tags are interpreted later on.
1594            self.tags.remove("!!");
1595            self.tags.remove("!");
1596        } else {
1597            self.tags.clear();
1598        }
1599        if explicit_end {
1600            self.state = State::ImplicitDocumentStart;
1601        } else {
1602            if let QueuedToken(
1603                span,
1604                QueuedTokenType::VersionDirective(..)
1605                | QueuedTokenType::TagDirective(..)
1606                | QueuedTokenType::ReservedDirective(..),
1607            ) = *self.peek_token()?
1608            {
1609                return Err(ScanError::new_str(
1610                    span.start,
1611                    "missing explicit document end marker before directive",
1612                ));
1613            }
1614            self.state = State::DocumentStart;
1615        }
1616
1617        Ok((Event::DocumentEnd, span))
1618    }
1619
1620    fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
1621        // YAML permits anchor names to be reused. Aliases resolve to the most recent definition.
1622        let new_id = self.anchor_id_count;
1623        self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
1624            ScanError::new_str(
1625                mark.start,
1626                "while parsing anchor, anchor count exceeded supported limit",
1627            )
1628        })?;
1629        self.anchors.insert(name, new_id);
1630        Ok(new_id)
1631    }
1632
1633    fn save_pending_node_properties(
1634        &mut self,
1635        anchor_id: usize,
1636        tag: Option<Cow<'input, Tag>>,
1637        tag_start: Option<Marker>,
1638        property_end: Option<Marker>,
1639    ) {
1640        self.pending_node_anchor_id = anchor_id;
1641        self.pending_node_tag = tag;
1642        self.pending_node_tag_start = tag_start;
1643        self.pending_node_property_end = property_end;
1644    }
1645
1646    fn attach_tag_start(event: Event<'_>, span: Span, start: Option<Marker>) -> (Event<'_>, Span) {
1647        (event, span.with_tag_start(start))
1648    }
1649
1650    #[allow(clippy::too_many_lines)]
1651    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
1652    where
1653        'input: 'a,
1654    {
1655        if let Some(comment) = self.maybe_next_comment_event()? {
1656            return Ok(comment);
1657        }
1658
1659        let mut anchor_id = core::mem::take(&mut self.pending_node_anchor_id);
1660        let mut tag = self.pending_node_tag.take();
1661        let mut tag_start = self.pending_node_tag_start.take();
1662        let mut property_end = self.pending_node_property_end.take();
1663        match *self.peek_token()? {
1664            QueuedToken(_, QueuedTokenType::Alias(_)) => {
1665                self.pop_state();
1666                if let QueuedToken(span, QueuedTokenType::Alias(name)) = self.fetch_token() {
1667                    match self.anchors.get(&*name) {
1668                        None => {
1669                            return Err(ScanError::new_str(
1670                                span.start,
1671                                "while parsing node, found unknown anchor",
1672                            ))
1673                        }
1674                        Some(id) => return Ok((Event::Alias(*id), span)),
1675                    }
1676                }
1677                unreachable!()
1678            }
1679            QueuedToken(_, QueuedTokenType::Anchor(_)) => {
1680                if let QueuedToken(span, QueuedTokenType::Anchor(name)) = self.fetch_token() {
1681                    anchor_id = self.register_anchor(name, &span)?;
1682                    property_end = Some(span.end);
1683                    if matches!(self.peek_token()?.1, QueuedTokenType::Tag(..)) {
1684                        if let QueuedToken(tag_span, QueuedTokenType::Tag(handle, suffix)) =
1685                            self.fetch_token()
1686                        {
1687                            tag_start = Some(tag_span.start);
1688                            tag = Some(self.resolve_tag(tag_span, &handle, suffix)?);
1689                            property_end = Some(tag_span.end);
1690                        } else {
1691                            unreachable!()
1692                        }
1693                    }
1694                    if let Some(comment) = self.maybe_next_comment_event()? {
1695                        self.save_pending_node_properties(anchor_id, tag, tag_start, property_end);
1696                        return Ok(comment);
1697                    }
1698                } else {
1699                    unreachable!()
1700                }
1701            }
1702            QueuedToken(mark, QueuedTokenType::Tag(..)) => {
1703                if let QueuedTokenType::Tag(handle, suffix) = self.fetch_token().1 {
1704                    tag_start = Some(mark.start);
1705                    property_end = Some(mark.end);
1706                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
1707                    if let QueuedTokenType::Anchor(_) = &self.peek_token()?.1 {
1708                        if let QueuedToken(mark, QueuedTokenType::Anchor(name)) = self.fetch_token()
1709                        {
1710                            anchor_id = self.register_anchor(name, &mark)?;
1711                            property_end = Some(mark.end);
1712                        } else {
1713                            unreachable!()
1714                        }
1715                    }
1716                    if let Some(comment) = self.maybe_next_comment_event()? {
1717                        self.save_pending_node_properties(anchor_id, tag, tag_start, property_end);
1718                        return Ok(comment);
1719                    }
1720                } else {
1721                    unreachable!()
1722                }
1723            }
1724            _ => {}
1725        }
1726        match *self.peek_token()? {
1727            QueuedToken(mark, QueuedTokenType::BlockEntry) if indentless_sequence => {
1728                self.skip();
1729                let comments = self.next_comment_events()?;
1730                let start = (
1731                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1732                    mark.with_tag_start(tag_start),
1733                );
1734                if comments.is_empty() {
1735                    self.pending_empty_scalar_span = Some(mark);
1736                    self.state = State::IndentlessSequenceEntryNode;
1737                    Ok(start)
1738                } else if let Ok(QueuedToken(
1739                    _,
1740                    QueuedTokenType::BlockEntry
1741                    | QueuedTokenType::Key
1742                    | QueuedTokenType::Value
1743                    | QueuedTokenType::BlockEnd,
1744                )) = self.peek_token()
1745                {
1746                    self.state = State::IndentlessSequenceEntry;
1747                    Ok(self.queue_two_events_by_span(
1748                        comments,
1749                        start,
1750                        (Event::empty_scalar(), mark),
1751                    ))
1752                } else {
1753                    self.pending_empty_scalar_span = Some(mark);
1754                    self.state = State::IndentlessSequenceEntryNode;
1755                    Ok(self.queue_event_by_span(comments, start))
1756                }
1757            }
1758            QueuedToken(_, QueuedTokenType::Scalar(..)) => {
1759                self.pop_state();
1760                if let QueuedToken(mark, QueuedTokenType::Scalar(style, v)) = self.fetch_token() {
1761                    Ok(Self::attach_tag_start(
1762                        Event::Scalar(v, style, anchor_id, tag),
1763                        mark,
1764                        tag_start,
1765                    ))
1766                } else {
1767                    unreachable!()
1768                }
1769            }
1770            QueuedToken(mark, QueuedTokenType::FlowSequenceStart) => {
1771                self.state = State::FlowSequenceFirstEntry;
1772                self.skip();
1773                Ok(Self::attach_tag_start(
1774                    Event::SequenceStart(StructureStyle::Flow, anchor_id, tag),
1775                    mark,
1776                    tag_start,
1777                ))
1778            }
1779            QueuedToken(mark, QueuedTokenType::FlowMappingStart) => {
1780                self.state = State::FlowMappingFirstKey;
1781                self.skip();
1782                Ok(Self::attach_tag_start(
1783                    Event::MappingStart(StructureStyle::Flow, anchor_id, tag),
1784                    mark,
1785                    tag_start,
1786                ))
1787            }
1788            QueuedToken(mark, QueuedTokenType::BlockSequenceStart) if block => {
1789                self.state = State::BlockSequenceFirstEntry;
1790                self.skip();
1791                Ok(Self::attach_tag_start(
1792                    Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
1793                    mark,
1794                    tag_start,
1795                ))
1796            }
1797            QueuedToken(mark, QueuedTokenType::BlockMappingStart) if block => {
1798                self.state = State::BlockMappingFirstKey;
1799                self.skip();
1800                Ok(Self::attach_tag_start(
1801                    Event::MappingStart(StructureStyle::Block, anchor_id, tag),
1802                    mark,
1803                    tag_start,
1804                ))
1805            }
1806            // ex 7.2, an empty scalar can follow a secondary tag
1807            QueuedToken(mark, _) if tag.is_some() || anchor_id > 0 => {
1808                self.pop_state();
1809                let span = property_end.map_or_else(|| Span::empty(mark.start), Span::empty);
1810                Ok(Self::attach_tag_start(
1811                    Event::empty_scalar_with_anchor(anchor_id, tag),
1812                    span,
1813                    tag_start,
1814                ))
1815            }
1816            QueuedToken(span, _) => {
1817                let info = match self.state {
1818                    State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1819                        "unexpected EOF while parsing a flow sequence"
1820                    }
1821                    State::FlowMappingFirstKey
1822                    | State::FlowMappingKey
1823                    | State::FlowMappingValue
1824                    | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1825                    State::FlowSequenceEntryMappingKey
1826                    | State::FlowSequenceEntryMappingValue
1827                    | State::FlowSequenceEntryMappingEnd
1828                    | State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
1829                    State::BlockSequenceFirstEntry
1830                    | State::BlockSequenceEntry
1831                    | State::BlockNode => "unexpected EOF while parsing a block sequence",
1832                    State::BlockMappingFirstKey
1833                    | State::BlockMappingKey
1834                    | State::BlockMappingValue
1835                    | State::BlockNodeOrIndentlessSequence => {
1836                        "unexpected EOF while parsing a block mapping"
1837                    }
1838                    _ => "while parsing a node, did not find expected node content",
1839                };
1840                Err(ScanError::new_str(span.start, info))
1841            }
1842        }
1843    }
1844
1845    fn block_mapping_key<'a>(&mut self, _first: bool) -> ParseResult<'a>
1846    where
1847        'input: 'a,
1848    {
1849        match *self.peek_token()? {
1850            QueuedToken(_, QueuedTokenType::Key) => {
1851                // Indentation is only meaningful for block mapping keys.
1852                if let QueuedToken(key_span, QueuedTokenType::Key) = *self.peek_token()? {
1853                    self.pending_key_indent = Some(key_span.start.col());
1854                }
1855                self.skip();
1856                if let Some(comment) = self.maybe_next_comment_event()? {
1857                    self.state = State::BlockMappingKeyNode;
1858                    Ok(comment)
1859                } else {
1860                    self.block_mapping_key_node()
1861                }
1862            }
1863            // A missing block-mapping key before `:` is represented as an empty scalar.
1864            QueuedToken(mark, QueuedTokenType::Value) => {
1865                self.state = State::BlockMappingValue;
1866                Ok((Event::empty_scalar(), Span::empty(mark.start)))
1867            }
1868            QueuedToken(mark, QueuedTokenType::BlockEnd) => {
1869                self.pop_state();
1870                self.skip();
1871                Ok((Event::MappingEnd, mark))
1872            }
1873            QueuedToken(span, _) => Err(ScanError::new_str(
1874                span.start,
1875                "while parsing a block mapping, did not find expected key",
1876            )),
1877        }
1878    }
1879
1880    fn block_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
1881    where
1882        'input: 'a,
1883    {
1884        if let QueuedToken(
1885            mark,
1886            QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1887        ) = *self.peek_token()?
1888        {
1889            self.state = State::BlockMappingValue;
1890            Ok((Event::empty_scalar(), Span::empty(mark.start)))
1891        } else {
1892            self.defer_parse_node(
1893                State::BlockNodeOrIndentlessSequence,
1894                State::BlockMappingValue,
1895                true,
1896                true,
1897            )
1898        }
1899    }
1900
1901    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1902    where
1903        'input: 'a,
1904    {
1905        match *self.peek_token()? {
1906            QueuedToken(mark, QueuedTokenType::Value) => {
1907                self.skip();
1908                let comments = self.next_comment_events()?;
1909                if comments.is_empty() {
1910                    self.block_mapping_value_node_with_empty_span(mark)
1911                } else if let Ok(QueuedToken(
1912                    _,
1913                    QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1914                )) = self.peek_token()
1915                {
1916                    self.state = State::BlockMappingKey;
1917                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
1918                } else {
1919                    self.pending_empty_scalar_span = Some(mark);
1920                    self.state = State::BlockMappingValueNode;
1921                    Ok(self.queue_tail_and_return_first(comments))
1922                }
1923            }
1924            QueuedToken(mark, _) => {
1925                self.state = State::BlockMappingKey;
1926                Ok((Event::empty_scalar(), Span::empty(mark.start)))
1927            }
1928        }
1929    }
1930
1931    fn block_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
1932    where
1933        'input: 'a,
1934    {
1935        let mark = match self.pending_empty_scalar_span.take() {
1936            Some(mark) => mark,
1937            None => Span::empty(self.peek_token()?.0.start),
1938        };
1939        self.block_mapping_value_node_with_empty_span(mark)
1940    }
1941
1942    fn block_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
1943    where
1944        'input: 'a,
1945    {
1946        if let QueuedToken(
1947            _,
1948            QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
1949        ) = *self.peek_token()?
1950        {
1951            self.state = State::BlockMappingKey;
1952            Ok((Event::empty_scalar(), mark))
1953        } else {
1954            self.defer_parse_node(
1955                State::BlockNodeOrIndentlessSequence,
1956                State::BlockMappingKey,
1957                true,
1958                true,
1959            )
1960        }
1961    }
1962
1963    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1964    where
1965        'input: 'a,
1966    {
1967        let span: Span =
1968            if let QueuedToken(mark, QueuedTokenType::FlowMappingEnd) = *self.peek_token()? {
1969                mark
1970            } else {
1971                if !first {
1972                    match *self.peek_token()? {
1973                        QueuedToken(_, QueuedTokenType::FlowEntry) => {
1974                            self.skip();
1975                            if let Some(comment) = self.maybe_next_comment_event()? {
1976                                self.state = State::FlowMappingFirstKey;
1977                                return Ok(comment);
1978                            }
1979                        }
1980                        QueuedToken(span, _) => {
1981                            return Err(ScanError::new_str(
1982                                span.start,
1983                                "while parsing a flow mapping, did not find expected ',' or '}'",
1984                            ))
1985                        }
1986                    }
1987                }
1988
1989                match *self.peek_token()? {
1990                    QueuedToken(_, QueuedTokenType::Key) => {
1991                        self.skip();
1992                        if let Some(comment) = self.maybe_next_comment_event()? {
1993                            self.state = State::FlowMappingKeyNode;
1994                            return Ok(comment);
1995                        }
1996                        return self.flow_mapping_key_node();
1997                    }
1998                    QueuedToken(marker, QueuedTokenType::Value) => {
1999                        self.state = State::FlowMappingValue;
2000                        return Ok((Event::empty_scalar(), Span::empty(marker.start)));
2001                    }
2002                    QueuedToken(_, QueuedTokenType::FlowMappingEnd) => (),
2003                    _ => {
2004                        return self.defer_parse_node(
2005                            State::FlowNode,
2006                            State::FlowMappingEmptyValue,
2007                            false,
2008                            false,
2009                        );
2010                    }
2011                }
2012
2013                self.peek_token()?.0
2014            };
2015
2016        self.pop_state();
2017        self.skip();
2018        Ok((Event::MappingEnd, span))
2019    }
2020
2021    fn flow_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
2022    where
2023        'input: 'a,
2024    {
2025        if let QueuedToken(
2026            mark,
2027            QueuedTokenType::Value | QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
2028        ) = *self.peek_token()?
2029        {
2030            self.state = State::FlowMappingValue;
2031            Ok((Event::empty_scalar(), Span::empty(mark.start)))
2032        } else {
2033            self.defer_parse_node(State::FlowNode, State::FlowMappingValue, false, false)
2034        }
2035    }
2036
2037    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
2038    where
2039        'input: 'a,
2040    {
2041        let span: Span = {
2042            if empty {
2043                let QueuedToken(mark, _) = *self.peek_token()?;
2044                self.state = State::FlowMappingKey;
2045                return Ok((Event::empty_scalar(), Span::empty(mark.start)));
2046            }
2047            match *self.peek_token()? {
2048                QueuedToken(span, QueuedTokenType::Value) => {
2049                    self.skip();
2050                    let comments = self.next_comment_events()?;
2051                    if comments.is_empty() {
2052                        return self.flow_mapping_value_node_with_empty_span(span);
2053                    }
2054                    if let Ok(QueuedToken(
2055                        _,
2056                        QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
2057                    )) = self.peek_token()
2058                    {
2059                        self.state = State::FlowMappingKey;
2060                        return Ok(
2061                            self.queue_event_by_span(comments, (Event::empty_scalar(), span))
2062                        );
2063                    }
2064
2065                    self.pending_empty_scalar_span = Some(span);
2066                    self.state = State::FlowMappingValueNode;
2067                    return Ok(self.queue_tail_and_return_first(comments));
2068                }
2069                QueuedToken(marker, _) => Span::empty(marker.start),
2070            }
2071        };
2072
2073        self.state = State::FlowMappingKey;
2074        Ok((Event::empty_scalar(), span))
2075    }
2076
2077    fn flow_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
2078    where
2079        'input: 'a,
2080    {
2081        let mark = match self.pending_empty_scalar_span.take() {
2082            Some(mark) => mark,
2083            None => Span::empty(self.peek_token()?.0.start),
2084        };
2085        self.flow_mapping_value_node_with_empty_span(mark)
2086    }
2087
2088    fn flow_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2089    where
2090        'input: 'a,
2091    {
2092        match self.peek_token()?.1 {
2093            QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd => {
2094                self.state = State::FlowMappingKey;
2095                Ok((Event::empty_scalar(), mark))
2096            }
2097            _ => self.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false),
2098        }
2099    }
2100
2101    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
2102    where
2103        'input: 'a,
2104    {
2105        match *self.peek_token()? {
2106            QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
2107                self.pop_state();
2108                self.skip();
2109                return Ok((Event::SequenceEnd, mark));
2110            }
2111            QueuedToken(_, QueuedTokenType::FlowEntry) if !first => {
2112                self.skip();
2113                if let Some(comment) = self.maybe_next_comment_event()? {
2114                    self.state = State::FlowSequenceFirstEntry;
2115                    return Ok(comment);
2116                }
2117            }
2118            QueuedToken(span, _) if !first => {
2119                return Err(ScanError::new_str(
2120                    span.start,
2121                    "while parsing a flow sequence, expected ',' or ']'",
2122                ));
2123            }
2124            _ => { /* next */ }
2125        }
2126        match *self.peek_token()? {
2127            QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
2128                self.pop_state();
2129                self.skip();
2130                Ok((Event::SequenceEnd, mark))
2131            }
2132            QueuedToken(mark, QueuedTokenType::Key) => {
2133                self.state = State::FlowSequenceEntryMappingKey;
2134                self.skip();
2135                Ok((Event::MappingStart(StructureStyle::Flow, 0, None), mark))
2136            }
2137            _ => self.defer_parse_node(State::FlowNode, State::FlowSequenceEntry, false, false),
2138        }
2139    }
2140
2141    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
2142    where
2143        'input: 'a,
2144    {
2145        match *self.peek_token()? {
2146            QueuedToken(mark, QueuedTokenType::BlockEntry) => {
2147                self.skip();
2148                let comments = self.next_comment_events()?;
2149                if comments.is_empty() {
2150                    self.indentless_sequence_entry_node_with_empty_span(mark)
2151                } else if let Ok(QueuedToken(
2152                    _,
2153                    QueuedTokenType::BlockEntry
2154                    | QueuedTokenType::Key
2155                    | QueuedTokenType::Value
2156                    | QueuedTokenType::BlockEnd,
2157                )) = self.peek_token()
2158                {
2159                    self.state = State::IndentlessSequenceEntry;
2160                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
2161                } else {
2162                    self.pending_empty_scalar_span = Some(mark);
2163                    self.state = State::IndentlessSequenceEntryNode;
2164                    Ok(self.queue_tail_and_return_first(comments))
2165                }
2166            }
2167            QueuedToken(mark, _) => {
2168                self.pop_state();
2169                Ok((Event::SequenceEnd, mark))
2170            }
2171        }
2172    }
2173
2174    fn indentless_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
2175    where
2176        'input: 'a,
2177    {
2178        let mark = match self.pending_empty_scalar_span.take() {
2179            Some(mark) => mark,
2180            None => Span::empty(self.peek_token()?.0.start),
2181        };
2182        self.indentless_sequence_entry_node_with_empty_span(mark)
2183    }
2184
2185    fn indentless_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2186    where
2187        'input: 'a,
2188    {
2189        if let QueuedToken(
2190            _,
2191            QueuedTokenType::BlockEntry
2192            | QueuedTokenType::Key
2193            | QueuedTokenType::Value
2194            | QueuedTokenType::BlockEnd,
2195        ) = *self.peek_token()?
2196        {
2197            self.state = State::IndentlessSequenceEntry;
2198            Ok((Event::empty_scalar(), mark))
2199        } else {
2200            self.defer_parse_node(
2201                State::BlockNode,
2202                State::IndentlessSequenceEntry,
2203                true,
2204                false,
2205            )
2206        }
2207    }
2208
2209    fn block_sequence_entry<'a>(&mut self, _first: bool) -> ParseResult<'a>
2210    where
2211        'input: 'a,
2212    {
2213        match *self.peek_token()? {
2214            QueuedToken(mark, QueuedTokenType::BlockEnd) => {
2215                self.pop_state();
2216                self.skip();
2217                Ok((Event::SequenceEnd, mark))
2218            }
2219            QueuedToken(mark, QueuedTokenType::BlockEntry) => {
2220                self.skip();
2221                let comments = self.next_comment_events()?;
2222                if comments.is_empty() {
2223                    self.block_sequence_entry_node_with_empty_span(mark)
2224                } else if let Ok(QueuedToken(
2225                    _,
2226                    QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd,
2227                )) = self.peek_token()
2228                {
2229                    self.state = State::BlockSequenceEntry;
2230                    Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
2231                } else {
2232                    self.pending_empty_scalar_span = Some(mark);
2233                    self.state = State::BlockSequenceEntryNode;
2234                    Ok(self.queue_tail_and_return_first(comments))
2235                }
2236            }
2237            QueuedToken(span, _) => Err(ScanError::new_str(
2238                span.start,
2239                "while parsing a block collection, did not find expected '-' indicator",
2240            )),
2241        }
2242    }
2243
2244    fn block_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
2245    where
2246        'input: 'a,
2247    {
2248        let mark = match self.pending_empty_scalar_span.take() {
2249            Some(mark) => mark,
2250            None => Span::empty(self.peek_token()?.0.start),
2251        };
2252        self.block_sequence_entry_node_with_empty_span(mark)
2253    }
2254
2255    fn block_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
2256    where
2257        'input: 'a,
2258    {
2259        if let QueuedToken(_, QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd) =
2260            *self.peek_token()?
2261        {
2262            self.state = State::BlockSequenceEntry;
2263            Ok((Event::empty_scalar(), mark))
2264        } else {
2265            self.defer_parse_node(State::BlockNode, State::BlockSequenceEntry, true, false)
2266        }
2267    }
2268
2269    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
2270    where
2271        'input: 'a,
2272    {
2273        if let QueuedToken(mark, QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd) =
2274            *self.peek_token()?
2275        {
2276            self.state = State::FlowSequenceEntryMappingValue;
2277            Ok((Event::empty_scalar(), Span::empty(mark.start)))
2278        } else {
2279            self.defer_parse_node(
2280                State::FlowNode,
2281                State::FlowSequenceEntryMappingValue,
2282                false,
2283                false,
2284            )
2285        }
2286    }
2287
2288    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
2289    where
2290        'input: 'a,
2291    {
2292        match *self.peek_token()? {
2293            QueuedToken(_, QueuedTokenType::Value) => {
2294                self.skip();
2295                if let Some(comment) = self.maybe_next_comment_event()? {
2296                    self.state = State::FlowSequenceEntryMappingValueNode;
2297                    Ok(comment)
2298                } else {
2299                    self.flow_sequence_entry_mapping_value_node()
2300                }
2301            }
2302            QueuedToken(mark, _) => {
2303                self.state = State::FlowSequenceEntryMappingEnd;
2304                Ok((Event::empty_scalar(), Span::empty(mark.start)))
2305            }
2306        }
2307    }
2308
2309    fn flow_sequence_entry_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
2310    where
2311        'input: 'a,
2312    {
2313        let QueuedToken(span, ref tok) = *self.peek_token()?;
2314        if matches!(
2315            tok,
2316            QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd
2317        ) {
2318            self.state = State::FlowSequenceEntryMappingEnd;
2319            Ok((Event::empty_scalar(), Span::empty(span.start)))
2320        } else {
2321            self.defer_parse_node(
2322                State::FlowNode,
2323                State::FlowSequenceEntryMappingEnd,
2324                false,
2325                false,
2326            )
2327        }
2328    }
2329
2330    #[allow(clippy::unnecessary_wraps)]
2331    fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
2332    where
2333        'input: 'a,
2334    {
2335        self.state = State::FlowSequenceEntry;
2336        let QueuedToken(span, _) = *self.peek_token()?;
2337        Ok((Event::MappingEnd, Span::empty(span.start)))
2338    }
2339
2340    /// Resolve a tag from the handle and the suffix.
2341    fn resolve_tag(
2342        &self,
2343        span: Span,
2344        handle: &Cow<'input, str>,
2345        suffix: Cow<'input, str>,
2346    ) -> Result<Cow<'input, Tag>, ScanError> {
2347        let original_handle = handle.to_string();
2348        let suffix = suffix.into_owned();
2349        let tag = if handle == "!!" {
2350            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
2351            // overridden.
2352            Tag::with_original_handle(
2353                self.tags
2354                    .get("!!")
2355                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
2356                suffix,
2357                original_handle,
2358            )
2359        } else if handle.is_empty() && suffix == "!" {
2360            // "!" introduces a local tag. Local tags may have their prefix overridden.
2361            match self.tags.get("!") {
2362                Some(prefix) => Tag::with_original_handle(prefix.clone(), suffix, original_handle),
2363                None => Tag::with_original_handle(String::new(), suffix, original_handle),
2364            }
2365        } else {
2366            // Lookup handle in our tag directives.
2367            let prefix = self.tags.get(&**handle);
2368            if let Some(prefix) = prefix {
2369                Tag::with_original_handle(prefix.clone(), suffix, original_handle)
2370            } else {
2371                // Otherwise, it may be a local handle. With a local handle, the handle is set to
2372                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
2373                // If the handle is of the form "!foo!", this cannot be a local handle and we need
2374                // to error.
2375                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2376                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
2377                }
2378                Tag::with_original_handle(handle.to_string(), suffix, original_handle)
2379            }
2380        };
2381        Ok(Cow::Owned(tag))
2382    }
2383}
2384
2385impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
2386    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
2387        if let Some(ref x) = self.current {
2388            Some(Ok(x))
2389        } else if let Some(error) = &self.current_error {
2390            Some(Err(error.clone()))
2391        } else {
2392            if self.stream_end_emitted {
2393                return None;
2394            }
2395            match self.next_event_impl() {
2396                Ok(token) => self.current = Some(token),
2397                Err(error) => {
2398                    self.current_error = Some(error.clone());
2399                    return Some(Err(error));
2400                }
2401            }
2402            self.current.as_ref().map(Ok)
2403        }
2404    }
2405
2406    fn next_event(&mut self) -> Option<ParseResult<'input>> {
2407        if let Some(error) = self.current_error.take() {
2408            self.stream_end_emitted = true;
2409            return Some(Err(error));
2410        }
2411
2412        if self.stream_end_emitted {
2413            return None;
2414        }
2415
2416        let tok = self.next_event_impl();
2417        if matches!(tok, Ok((Event::StreamEnd, _)) | Err(_)) {
2418            self.stream_end_emitted = true;
2419        }
2420        Some(tok)
2421    }
2422
2423    fn load<R: SpannedEventReceiver<'input>>(
2424        &mut self,
2425        recv: &mut R,
2426        multi: bool,
2427    ) -> Result<(), ScanError> {
2428        let mut recv = InfallibleSpannedReceiver(recv);
2429        into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
2430    }
2431
2432    fn try_load<R: TrySpannedEventReceiver<'input>>(
2433        &mut self,
2434        recv: &mut R,
2435        multi: bool,
2436    ) -> Result<(), TryLoadError<R::Error>> {
2437        let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
2438        if !self.scanner.stream_started() || stream_start_buffered {
2439            let (ev, span) = self.next_event_impl()?;
2440            if ev != Event::StreamStart {
2441                return Err(TryLoadError::scan(ScanError::new_str(
2442                    span.start,
2443                    "did not find expected <stream-start>",
2444                )));
2445            }
2446            try_emit(recv, ev, span)?;
2447        }
2448
2449        let has_buffered_result = self.current.is_some()
2450            || self.current_error.is_some()
2451            || !self.queued_events.is_empty();
2452        if self.scanner.stream_ended() && !has_buffered_result {
2453            // The scanner has already reached EOF before the document loop, so emit the terminal
2454            // event and stop.
2455            try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
2456            self.stream_end_emitted = true;
2457            return Ok(());
2458        }
2459
2460        loop {
2461            let (ev, span) = if let Some(error) = self.current_error.take() {
2462                self.stream_end_emitted = true;
2463                return Err(TryLoadError::scan(error));
2464            } else {
2465                self.next_event_impl()?
2466            };
2467            let is_doc_end = matches!(ev, Event::DocumentEnd);
2468            let is_stream_end = matches!(ev, Event::StreamEnd);
2469
2470            try_emit(recv, ev, span)?;
2471
2472            if is_stream_end {
2473                self.stream_end_emitted = true;
2474                return Ok(());
2475            }
2476            if !multi && is_doc_end {
2477                return Ok(());
2478            }
2479        }
2480    }
2481}
2482
2483impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
2484    type Item = Result<(Event<'input>, Span), ScanError>;
2485
2486    fn next(&mut self) -> Option<Self::Item> {
2487        self.next_event()
2488    }
2489}
2490
2491#[cfg(test)]
2492mod test {
2493    use alloc::{
2494        borrow::{Cow, ToOwned},
2495        string::{String, ToString},
2496        vec::Vec,
2497    };
2498    use core::{error::Error as _, fmt};
2499
2500    use crate::scanner::{Marker, ScalarStyle, ScanError, Span};
2501
2502    use super::{
2503        Event, EventReceiver, Parser, State, StructureStyle, Tag, TryEventReceiver, TryLoadError,
2504        TrySpannedEventReceiver, YamlVersion,
2505    };
2506
2507    #[derive(Default)]
2508    struct CollectingSink<'input> {
2509        events: Vec<Event<'input>>,
2510    }
2511
2512    impl<'input> EventReceiver<'input> for CollectingSink<'input> {
2513        fn on_event(&mut self, ev: Event<'input>) {
2514            self.events.push(ev);
2515        }
2516    }
2517
2518    fn first_error_info(input: &str) -> String {
2519        for event in Parser::new_from_str(input) {
2520            if let Err(err) = event {
2521                return err.info().to_owned();
2522            }
2523        }
2524        panic!("expected parser error")
2525    }
2526
2527    fn first_tagged_scalar_tag(input: &str) -> Tag {
2528        Parser::new_from_str(input)
2529            .find_map(|event| match event.expect("input should parse").0 {
2530                Event::Scalar(_, _, _, Some(tag)) => Some(tag.into_owned()),
2531                _ => None,
2532            })
2533            .expect("expected tagged scalar")
2534    }
2535
2536    #[test]
2537    fn deferred_parse_node_can_emit_comment_before_flow_node() {
2538        let mut parser = Parser::new_from_str("# deferred\nvalue\n");
2539        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2540        assert_eq!(
2541            parser.document_start(true).unwrap().0,
2542            Event::DocumentStart(false, None)
2543        );
2544
2545        let (event, _) = parser
2546            .defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false)
2547            .unwrap();
2548
2549        assert!(matches!(event, Event::Comment(text, _) if text == " deferred"));
2550        assert_eq!(parser.state, State::FlowNode);
2551    }
2552
2553    #[test]
2554    fn queued_node_event_gets_pending_key_indent() {
2555        let mut parser = Parser::new_from_str("");
2556        let span = Span::empty(Marker::new(0, 1, 0));
2557
2558        parser.pending_key_indent = Some(3);
2559        parser
2560            .queued_events
2561            .push_back((Event::SequenceStart(StructureStyle::Block, 0, None), span));
2562
2563        let (event, span) = parser.next_event_impl().unwrap();
2564
2565        assert!(matches!(
2566            event,
2567            Event::SequenceStart(StructureStyle::Block, 0, None)
2568        ));
2569        assert_eq!(span.indent, Some(3));
2570        assert_eq!(parser.pending_key_indent, None);
2571    }
2572
2573    #[test]
2574    fn state_machine_handles_deferred_flow_node_states() {
2575        let mut parser = Parser::new_from_str("value\n");
2576        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2577        assert_eq!(
2578            parser.document_start(true).unwrap().0,
2579            Event::DocumentStart(false, None)
2580        );
2581        parser.state = State::FlowNode;
2582        parser.push_state(State::End);
2583
2584        let (event, _) = parser.state_machine().unwrap();
2585
2586        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2587
2588        let mut parser = Parser::new_from_str("value\n");
2589        assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
2590        assert_eq!(
2591            parser.document_start(true).unwrap().0,
2592            Event::DocumentStart(false, None)
2593        );
2594        parser.state = State::FlowSequenceEntryMappingValueNode;
2595
2596        let (event, _) = parser.state_machine().unwrap();
2597
2598        assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
2599    }
2600
2601    #[test]
2602    fn display_resolved_core_tag_without_extra_bang() {
2603        let tag = Tag::with_original_handle("tag:yaml.org,2002:", "str", "!!");
2604
2605        assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
2606    }
2607
2608    #[test]
2609    fn tag_helpers_distinguish_core_and_local_tags() {
2610        let core = Tag::with_original_handle("tag:yaml.org,2002:", "int", "!!");
2611        let local = Tag::new("!", "thing");
2612        let non_specific = Tag::with_original_handle("", "!", "");
2613        let verbatim = Tag::with_original_handle("", "tag:example.com,2000:thing", "");
2614        let unknown_yaml_org = Tag::with_original_handle("", "tag:yaml.org,2002:application", "");
2615
2616        assert_eq!(core.core_suffix(), Some("int"));
2617        assert!(core.is_yaml_core_schema());
2618        assert!(core.is_yaml_core_schema_tag("int"));
2619        assert!(!core.is_yaml_core_schema_tag("str"));
2620        assert!(!core.is_custom());
2621        assert_eq!(core.parts(), ("tag:yaml.org,2002:", "int"));
2622        assert_eq!(core.original_parts(), ("!!", "int"));
2623        assert_eq!(core.original(), "!!int");
2624
2625        assert_eq!(local.core_suffix(), None);
2626        assert!(!local.is_yaml_core_schema());
2627        assert!(!local.is_yaml_core_schema_tag("thing"));
2628        assert!(local.is_custom());
2629        assert_eq!(local.parts(), ("!", "thing"));
2630        assert_eq!(local.original_parts(), ("!", "thing"));
2631        assert_eq!(local.original(), "!thing");
2632        assert_eq!(local.to_string(), "!thing");
2633
2634        assert_eq!(non_specific.parts(), ("", "!"));
2635        assert_eq!(non_specific.original_parts(), ("", "!"));
2636        assert_eq!(non_specific.original(), "!");
2637
2638        assert_eq!(verbatim.parts(), ("", "tag:example.com,2000:thing"));
2639        assert_eq!(
2640            verbatim.original_parts(),
2641            ("", "tag:example.com,2000:thing")
2642        );
2643        assert_eq!(verbatim.original(), "!<tag:example.com,2000:thing>");
2644
2645        assert_eq!(unknown_yaml_org.core_suffix(), None);
2646        assert!(!unknown_yaml_org.is_yaml_core_schema());
2647        assert!(unknown_yaml_org.is_custom());
2648    }
2649
2650    #[test]
2651    fn core_suffix_uses_resolved_tag_uri_for_common_spellings() {
2652        let cases = [
2653            ("shorthand", "v: !!int 1\n", ("tag:yaml.org,2002:", "int")),
2654            (
2655                "verbatim",
2656                "v: !<tag:yaml.org,2002:int> 1\n",
2657                ("", "tag:yaml.org,2002:int"),
2658            ),
2659            (
2660                "full prefix",
2661                "%TAG !e! tag:yaml.org,2002:\n---\nv: !e!int 1\n",
2662                ("tag:yaml.org,2002:", "int"),
2663            ),
2664            (
2665                "mid-split",
2666                "%TAG !m! tag:yaml.org,2002:i\n---\nv: !m!nt 1\n",
2667                ("tag:yaml.org,2002:i", "nt"),
2668            ),
2669        ];
2670
2671        for (label, input, expected_parts) in cases {
2672            let tag = first_tagged_scalar_tag(input);
2673
2674            assert_eq!(tag.parts(), expected_parts, "{label}");
2675            assert_eq!(tag.core_suffix(), Some("int"), "{label}");
2676            assert!(tag.is_yaml_core_schema(), "{label}");
2677            assert!(tag.is_yaml_core_schema_tag("int"), "{label}");
2678            assert!(!tag.is_yaml_core_schema_tag("str"), "{label}");
2679            assert!(!tag.is_custom(), "{label}");
2680        }
2681    }
2682
2683    #[test]
2684    fn core_suffix_rejects_non_core_yaml_org_tags() {
2685        let cases = [
2686            "binary",
2687            "merge",
2688            "omap",
2689            "pairs",
2690            "set",
2691            "timestamp",
2692            "value",
2693            "yaml",
2694        ];
2695
2696        for suffix in cases {
2697            let tag = Tag::with_original_handle("tag:yaml.org,2002:", suffix, "!!");
2698
2699            assert_eq!(tag.core_suffix(), None, "{suffix}");
2700            assert!(!tag.is_yaml_core_schema(), "{suffix}");
2701            assert!(tag.is_custom(), "{suffix}");
2702        }
2703    }
2704
2705    #[test]
2706    fn core_suffix_rejects_non_core_tags() {
2707        let cases = [
2708            ("local", "v: !local 1\n"),
2709            ("verbatim custom", "v: !<tag:example.com,2000:int> 1\n"),
2710            (
2711                "custom directive",
2712                "%TAG !e! tag:example.com,2000:\n---\nv: !e!int 1\n",
2713            ),
2714            (
2715                "overridden secondary handle",
2716                "%TAG !! tag:example.com,2000:app/\n---\nv: !!int 1\n",
2717            ),
2718        ];
2719
2720        for (label, input) in cases {
2721            let tag = first_tagged_scalar_tag(input);
2722
2723            assert_eq!(tag.core_suffix(), None, "{label}");
2724            assert!(!tag.is_yaml_core_schema(), "{label}");
2725            assert!(!tag.is_yaml_core_schema_tag("int"), "{label}");
2726            assert!(tag.is_custom(), "{label}");
2727        }
2728    }
2729
2730    #[test]
2731    fn suffix_in_namespace_resolves_across_spellings() {
2732        const NS: &str = "tag:yaml.org,2002:";
2733
2734        // Every spelling of `tag:yaml.org,2002:omap` resolves to the same name, even though
2735        // `omap` is not a Core Schema type (so `core_suffix` reports `None`). `mid_split` cuts
2736        // the URI after the namespace prefix (handle longer than `NS`); `inside_split` cuts it
2737        // before the prefix ends (handle a non-empty prefix of `NS`), exercising both branches.
2738        let shorthand = Tag::with_original_handle(NS, "omap", "!!");
2739        let verbatim = Tag::with_original_handle("", "tag:yaml.org,2002:omap", "");
2740        let mid_split = Tag::with_original_handle("tag:yaml.org,2002:o", "map", "!o!");
2741        let inside_split = Tag::with_original_handle("tag:yaml.org,", "2002:omap", "!y!");
2742        for tag in [&shorthand, &verbatim, &mid_split, &inside_split] {
2743            assert_eq!(tag.suffix_in_namespace(NS).as_deref(), Some("omap"));
2744            assert_eq!(tag.core_suffix(), None);
2745        }
2746
2747        // Borrow whenever the resolved name is a contiguous slice of `handle` or `suffix`;
2748        // allocate only when a split lands inside the name itself (handle extends past `NS`).
2749        assert!(matches!(
2750            shorthand.suffix_in_namespace(NS),
2751            Some(Cow::Borrowed(_))
2752        ));
2753        assert!(matches!(
2754            verbatim.suffix_in_namespace(NS),
2755            Some(Cow::Borrowed(_))
2756        ));
2757        assert!(matches!(
2758            inside_split.suffix_in_namespace(NS),
2759            Some(Cow::Borrowed(_))
2760        ));
2761        assert!(matches!(
2762            mid_split.suffix_in_namespace(NS),
2763            Some(Cow::Owned(_))
2764        ));
2765
2766        // The non-core `merge` type resolves the same way; core types still flow through
2767        // `core_suffix`.
2768        let merge = Tag::with_original_handle(NS, "merge", "!!");
2769        assert_eq!(merge.suffix_in_namespace(NS).as_deref(), Some("merge"));
2770        assert_eq!(merge.core_suffix(), None);
2771        assert_eq!(
2772            Tag::new(NS, "int").suffix_in_namespace(NS).as_deref(),
2773            Some("int")
2774        );
2775
2776        // Tags outside the namespace do not resolve into it.
2777        assert_eq!(Tag::new("!", "omap").suffix_in_namespace(NS), None);
2778        assert_eq!(
2779            Tag::with_original_handle("", "tag:example.com,2000:omap", "").suffix_in_namespace(NS),
2780            None
2781        );
2782    }
2783
2784    #[test]
2785    fn attach_tag_start_applies_marker_to_span() {
2786        let event = Event::Scalar("value".into(), ScalarStyle::Plain, 0, None);
2787        let span = Span::new(Marker::new(6, 1, 6), Marker::new(11, 1, 11));
2788        let tag_start = Marker::new(0, 1, 0);
2789
2790        let (attached_event, attached_span) =
2791            Parser::<crate::input::str::StrInput<'_>>::attach_tag_start(
2792                event.clone(),
2793                span,
2794                Some(tag_start),
2795            );
2796
2797        assert_eq!(attached_event, event);
2798        assert_eq!(attached_span.start, span.start);
2799        assert_eq!(attached_span.end, span.end);
2800        assert_eq!(attached_span.tag_start(), Some(tag_start));
2801    }
2802
2803    #[test]
2804    fn event_inspection_helpers_report_node_metadata() {
2805        let tag = Tag::new("!", "thing");
2806        let scalar = Event::Scalar(
2807            "value".into(),
2808            ScalarStyle::DoubleQuoted,
2809            7,
2810            Some(Cow::Borrowed(&tag)),
2811        );
2812        let sequence =
2813            Event::SequenceStart(StructureStyle::Block, 8, Some(Cow::Owned(tag.clone())));
2814        let mapping = Event::MappingStart(StructureStyle::Block, 9, Some(Cow::Borrowed(&tag)));
2815
2816        assert_eq!(scalar.anchor_id(), Some(7));
2817        assert_eq!(scalar.alias_id(), None);
2818        assert_eq!(scalar.tag(), Some(&tag));
2819        assert_eq!(scalar.scalar(), Some(("value", ScalarStyle::DoubleQuoted)));
2820        assert!(scalar.is_node());
2821
2822        assert_eq!(sequence.anchor_id(), Some(8));
2823        assert_eq!(sequence.alias_id(), None);
2824        assert_eq!(sequence.tag(), Some(&tag));
2825        assert_eq!(sequence.scalar(), None);
2826        assert!(sequence.is_node());
2827
2828        assert_eq!(mapping.anchor_id(), Some(9));
2829        assert_eq!(mapping.alias_id(), None);
2830        assert_eq!(mapping.tag(), Some(&tag));
2831        assert_eq!(mapping.scalar(), None);
2832        assert!(mapping.is_node());
2833
2834        let alias = Event::Alias(10);
2835        assert_eq!(alias.anchor_id(), None);
2836        assert_eq!(alias.alias_id(), Some(10));
2837        assert_eq!(alias.tag(), None);
2838        assert_eq!(alias.scalar(), None);
2839        assert!(alias.is_node());
2840
2841        let unanchored_scalar = Event::Scalar("x".into(), ScalarStyle::Plain, 0, None);
2842        assert_eq!(unanchored_scalar.anchor_id(), None);
2843        assert_eq!(unanchored_scalar.alias_id(), None);
2844
2845        let stream_start = Event::StreamStart;
2846        assert_eq!(stream_start.anchor_id(), None);
2847        assert_eq!(stream_start.alias_id(), None);
2848        assert_eq!(stream_start.tag(), None);
2849        assert_eq!(stream_start.scalar(), None);
2850        assert!(!stream_start.is_node());
2851    }
2852
2853    #[test]
2854    fn test_peek_eq_parse() {
2855        let s = "
2856a0 bb: val
2857a1: &x
2858    b1: 4
2859    b2: d
2860a2: 4
2861a3: [1, 2, 3]
2862a4:
2863    - [a1, a2]
2864    - 2
2865a5: *x
2866";
2867        let mut p = Parser::new_from_str(s);
2868        loop {
2869            let event_peek = p.peek().unwrap().unwrap().clone();
2870            let event = p.next_event().unwrap().unwrap();
2871            assert_eq!(event, event_peek);
2872            if event.0 == Event::StreamEnd {
2873                break;
2874            }
2875        }
2876    }
2877
2878    #[test]
2879    fn test_repeated_peek_returns_buffered_event() {
2880        let mut parser = Parser::new_from_str("key: value\n");
2881
2882        let first_peek = parser.peek().unwrap().unwrap().clone();
2883        let second_peek = parser.peek().unwrap().unwrap().clone();
2884        let next = parser.next_event().unwrap().unwrap();
2885
2886        assert_eq!(first_peek, second_peek);
2887        assert_eq!(first_peek, next);
2888    }
2889
2890    #[test]
2891    fn test_peek_surfaces_scan_error_without_consuming_stream_end_state() {
2892        let mut parser = Parser::new_from_str("a: [1, 2");
2893
2894        loop {
2895            match parser.peek() {
2896                Some(Ok(_)) => {
2897                    parser.next_event().unwrap().unwrap();
2898                }
2899                Some(Err(error)) => {
2900                    assert_eq!(error.info(), "unclosed bracket '['");
2901                    break;
2902                }
2903                None => panic!("expected parse error"),
2904            }
2905        }
2906    }
2907
2908    #[test]
2909    fn test_iterator_terminates_after_scan_error() {
2910        let parser = Parser::new_from_str("foo:\n  bar\ninvalid\n");
2911        let mut errors = 0usize;
2912        let mut events = 0usize;
2913
2914        for item in parser {
2915            events += 1;
2916            if item.is_err() {
2917                errors += 1;
2918            }
2919            assert!(
2920                events < 1000,
2921                "parser iterator did not terminate after a scan error"
2922            );
2923        }
2924
2925        assert_eq!(errors, 1);
2926    }
2927
2928    #[test]
2929    fn test_iterator_terminates_after_node_property_error() {
2930        let parser = Parser::new_from_str("- *nope\n- 2\n");
2931        let mut errors = 0usize;
2932        let mut saw_later_node = false;
2933        let mut events = 0usize;
2934
2935        for item in parser {
2936            events += 1;
2937            match item {
2938                Ok((Event::Scalar(value, ..), _)) if value == "2" => saw_later_node = true,
2939                Ok(_) => {}
2940                Err(error) => {
2941                    assert_eq!(error.info(), "while parsing node, found unknown anchor");
2942                    errors += 1;
2943                }
2944            }
2945            assert!(
2946                events < 1000,
2947                "parser iterator did not terminate after a node-property error"
2948            );
2949        }
2950
2951        assert_eq!(errors, 1);
2952        assert!(!saw_later_node, "parser resumed after the alias error");
2953    }
2954
2955    #[test]
2956    fn test_peeked_scan_error_is_returned_once_by_next_event() {
2957        let mut parser = Parser::new_from_str("a: [1, 2");
2958
2959        let first_error = loop {
2960            match parser.peek() {
2961                Some(Ok(_)) => {
2962                    parser.next_event().unwrap().unwrap();
2963                }
2964                Some(Err(error)) => break error,
2965                None => panic!("expected parse error"),
2966            }
2967        };
2968        let Some(Err(second_error)) = parser.peek() else {
2969            panic!("expected cached parse error");
2970        };
2971
2972        assert_eq!(first_error, second_error);
2973        assert_eq!(parser.next_event().unwrap().unwrap_err(), first_error);
2974        assert!(parser.next_event().is_none());
2975        assert!(parser.peek().is_none());
2976    }
2977
2978    #[test]
2979    fn test_peeked_node_property_error_is_stable_and_terminal() {
2980        let mut parser = Parser::new_from_str("a: *nope\nb: 2\n");
2981
2982        for _ in 0..4 {
2983            parser.next_event().unwrap().unwrap();
2984        }
2985
2986        let Some(Err(first_error)) = parser.peek() else {
2987            panic!("expected unknown alias error");
2988        };
2989        let Some(Err(second_error)) = parser.peek() else {
2990            panic!("expected cached unknown alias error");
2991        };
2992
2993        assert_eq!(first_error, second_error);
2994        assert_eq!(
2995            first_error.info(),
2996            "while parsing node, found unknown anchor"
2997        );
2998        assert_eq!(parser.next_event().unwrap().unwrap_err(), first_error);
2999        assert!(parser.next_event().is_none());
3000        assert!(parser.peek().is_none());
3001    }
3002
3003    #[test]
3004    fn test_peek_and_next_return_none_after_stream_end() {
3005        let mut parser = Parser::new_from_str("");
3006
3007        assert!(matches!(
3008            parser.next_event().unwrap().unwrap().0,
3009            Event::StreamStart
3010        ));
3011        assert!(matches!(
3012            parser.next_event().unwrap().unwrap().0,
3013            Event::StreamEnd
3014        ));
3015        assert!(parser.next_event().is_none());
3016        assert!(parser.peek().is_none());
3017    }
3018
3019    #[test]
3020    fn test_load_after_stream_already_ended_emits_stream_end() {
3021        let mut parser = Parser::new_from_str("");
3022        while parser.next_event().is_some() {}
3023
3024        let mut sink = CollectingSink::default();
3025        parser.load(&mut sink, true).unwrap();
3026
3027        assert_eq!(sink.events, vec![Event::StreamEnd]);
3028    }
3029
3030    #[test]
3031    fn test_load_full_stream_fuses_iterator_after_stream_end() {
3032        let mut parser = Parser::new_from_str("a: 1\n");
3033        let mut sink = CollectingSink::default();
3034
3035        parser.load(&mut sink, true).unwrap();
3036
3037        assert!(matches!(sink.events.last(), Some(Event::StreamEnd)));
3038        assert!(parser.next_event().is_none());
3039        assert!(parser.peek().is_none());
3040    }
3041
3042    #[test]
3043    fn test_load_after_peek_delivers_buffered_document_end_before_stream_end() {
3044        let mut parser = Parser::new_from_str("a");
3045        for _ in 0..3 {
3046            parser.next_event().unwrap().unwrap();
3047        }
3048
3049        assert_eq!(parser.peek().unwrap().unwrap().0, Event::DocumentEnd);
3050
3051        let mut sink = CollectingSink::default();
3052        parser.load(&mut sink, true).unwrap();
3053
3054        assert_eq!(sink.events, vec![Event::DocumentEnd, Event::StreamEnd]);
3055        assert!(parser.next_event().is_none());
3056    }
3057
3058    #[test]
3059    fn test_load_visits_nested_collection_events() {
3060        let mut parser = Parser::new_from_str("root:\n  - item: value\n  - [a, b]\n");
3061        let mut sink = CollectingSink::default();
3062
3063        parser.load(&mut sink, true).unwrap();
3064
3065        assert_eq!(
3066            sink.events,
3067            vec![
3068                Event::StreamStart,
3069                Event::DocumentStart(false, None),
3070                Event::MappingStart(StructureStyle::Block, 0, None),
3071                Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
3072                Event::SequenceStart(StructureStyle::Block, 0, None),
3073                Event::MappingStart(StructureStyle::Block, 0, None),
3074                Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
3075                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
3076                Event::MappingEnd,
3077                Event::SequenceStart(StructureStyle::Flow, 0, None),
3078                Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
3079                Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
3080                Event::SequenceEnd,
3081                Event::SequenceEnd,
3082                Event::MappingEnd,
3083                Event::DocumentEnd,
3084                Event::StreamEnd,
3085            ]
3086        );
3087    }
3088
3089    #[derive(Clone, Debug, PartialEq, Eq)]
3090    enum ValidationError {
3091        ForbiddenValue,
3092    }
3093
3094    #[derive(Debug)]
3095    struct ReceiverFailure;
3096
3097    impl fmt::Display for ReceiverFailure {
3098        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3099            write!(f, "receiver failed")
3100        }
3101    }
3102
3103    impl core::error::Error for ReceiverFailure {}
3104
3105    struct FailingSink<'input> {
3106        events: Vec<Event<'input>>,
3107    }
3108
3109    impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
3110        type Error = ValidationError;
3111
3112        fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
3113            let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
3114            self.events.push(ev);
3115            if should_fail {
3116                Err(ValidationError::ForbiddenValue)
3117            } else {
3118                Ok(())
3119            }
3120        }
3121    }
3122
3123    #[test]
3124    fn test_try_load_stops_on_receiver_error() {
3125        let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
3126        let mut sink = FailingSink { events: Vec::new() };
3127
3128        let err = parser.try_load(&mut sink, true).unwrap_err();
3129
3130        assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
3131        assert!(sink
3132            .events
3133            .iter()
3134            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
3135        assert!(sink
3136            .events
3137            .iter()
3138            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
3139        assert!(!sink
3140            .events
3141            .iter()
3142            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
3143    }
3144
3145    struct SpannedFailingSink {
3146        failed_span: Option<Span>,
3147    }
3148
3149    impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
3150        type Error = Span;
3151
3152        fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
3153            if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
3154                self.failed_span = Some(span);
3155                Err(span)
3156            } else {
3157                Ok(())
3158            }
3159        }
3160    }
3161
3162    #[test]
3163    fn test_try_load_spanned_receiver_gets_span() {
3164        let mut parser = Parser::new_from_str("value: bad\n");
3165        let mut sink = SpannedFailingSink { failed_span: None };
3166
3167        let err = parser.try_load(&mut sink, false).unwrap_err();
3168
3169        let TryLoadError::Receiver(span) = err else {
3170            panic!("expected receiver error");
3171        };
3172
3173        assert_eq!(Some(span), sink.failed_span);
3174        assert!(!span.is_empty());
3175    }
3176
3177    struct NeverFails {
3178        count: usize,
3179    }
3180
3181    impl<'input> TryEventReceiver<'input> for NeverFails {
3182        type Error = ValidationError;
3183
3184        fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
3185            self.count += 1;
3186            Ok(())
3187        }
3188    }
3189
3190    #[test]
3191    fn test_try_load_returns_scan_error() {
3192        let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
3193        let mut sink = NeverFails { count: 0 };
3194
3195        let err = parser.try_load(&mut sink, true).unwrap_err();
3196
3197        let TryLoadError::Scan(err) = err else {
3198            panic!("expected scan error");
3199        };
3200        assert_eq!(err.info(), "duplicate version directive");
3201    }
3202
3203    #[test]
3204    fn test_try_load_error_display_and_source_cover_both_variants() {
3205        let scan = ScanError::new_str(Marker::new(3, 1, 3), "bad yaml");
3206        let scan_err: TryLoadError<ReceiverFailure> = scan.into();
3207
3208        assert!(scan_err.to_string().starts_with("parser error: bad yaml"));
3209        assert!(scan_err.source().is_some());
3210
3211        let receiver_err = TryLoadError::Receiver(ReceiverFailure);
3212
3213        assert_eq!(receiver_err.to_string(), "receiver error: receiver failed");
3214        assert!(receiver_err.source().is_some());
3215    }
3216
3217    #[test]
3218    fn test_try_load_requires_buffered_stream_start() {
3219        let mut parser = Parser::new_from_str("");
3220        let span = Span::empty(Marker::new(0, 1, 0));
3221        parser.current = Some((
3222            Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
3223            span,
3224        ));
3225        let mut sink = NeverFails { count: 0 };
3226
3227        let err = parser.try_load(&mut sink, true).unwrap_err();
3228
3229        let TryLoadError::Scan(err) = err else {
3230            panic!("expected scan error");
3231        };
3232        assert_eq!(err.info(), "did not find expected <stream-start>");
3233    }
3234
3235    #[test]
3236    fn test_try_load_after_stream_already_ended_emits_stream_end() {
3237        let mut parser = Parser::new_from_str("");
3238        while parser.next_event().is_some() {}
3239
3240        let mut sink = FailingSink { events: Vec::new() };
3241        parser.try_load(&mut sink, true).unwrap();
3242
3243        assert_eq!(sink.events, vec![Event::StreamEnd]);
3244    }
3245
3246    #[test]
3247    fn test_try_load_full_stream_fuses_iterator_after_stream_end() {
3248        let mut parser = Parser::new_from_str("a: 1\n");
3249        let mut sink = FailingSink { events: Vec::new() };
3250
3251        parser.try_load(&mut sink, true).unwrap();
3252
3253        assert!(matches!(sink.events.last(), Some(Event::StreamEnd)));
3254        assert!(parser.next_event().is_none());
3255        assert!(parser.peek().is_none());
3256    }
3257
3258    #[test]
3259    fn test_try_load_after_peek_delivers_buffered_document_end_before_stream_end() {
3260        let mut parser = Parser::new_from_str("a");
3261        for _ in 0..3 {
3262            parser.next_event().unwrap().unwrap();
3263        }
3264
3265        assert_eq!(parser.peek().unwrap().unwrap().0, Event::DocumentEnd);
3266
3267        let mut sink = FailingSink { events: Vec::new() };
3268        parser.try_load(&mut sink, true).unwrap();
3269
3270        assert_eq!(sink.events, vec![Event::DocumentEnd, Event::StreamEnd]);
3271        assert!(parser.next_event().is_none());
3272    }
3273
3274    #[test]
3275    fn test_load_single_document_stops_before_next_document() {
3276        let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
3277        let mut sink = CollectingSink::default();
3278
3279        parser.load(&mut sink, false).unwrap();
3280
3281        assert!(sink
3282            .events
3283            .iter()
3284            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
3285        assert!(!sink
3286            .events
3287            .iter()
3288            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
3289        assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
3290    }
3291
3292    #[test]
3293    fn test_duplicate_version_directive_errors() {
3294        assert_eq!(
3295            first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
3296            "duplicate version directive"
3297        );
3298    }
3299
3300    #[test]
3301    fn test_unsupported_yaml_major_version_errors() {
3302        assert_eq!(
3303            first_error_info("%YAML 9.9\n--- a\n"),
3304            "unsupported YAML major version"
3305        );
3306    }
3307
3308    #[test]
3309    fn test_document_start_emits_yaml_version() {
3310        let events = Parser::new_from_str("%YAML 1.2\n---\nvalue\n")
3311            .map(|event| event.unwrap().0)
3312            .collect::<Vec<_>>();
3313
3314        assert!(matches!(
3315            events.get(1),
3316            Some(Event::DocumentStart(
3317                true,
3318                Some(YamlVersion { major: 1, minor: 2 })
3319            ))
3320        ));
3321    }
3322
3323    #[test]
3324    fn test_document_start_allows_supported_major_future_minor_version() {
3325        let events = Parser::new_from_str("%YAML 1.9\n---\nvalue\n")
3326            .map(|event| event.unwrap().0)
3327            .collect::<Vec<_>>();
3328
3329        assert!(matches!(
3330            events.get(1),
3331            Some(Event::DocumentStart(
3332                true,
3333                Some(YamlVersion { major: 1, minor: 9 })
3334            ))
3335        ));
3336    }
3337
3338    #[test]
3339    fn test_document_start_keeps_version_and_tags_across_comment() {
3340        let events = Parser::new_from_str(
3341            "%YAML 1.2\n# directive comment\n%TAG !e! tag:example.com,2026:\n---\nkey: !e!thing value\n",
3342        )
3343        .map(|event| event.unwrap().0)
3344        .collect::<Vec<_>>();
3345
3346        assert!(matches!(
3347            events.get(2),
3348            Some(Event::DocumentStart(
3349                true,
3350                Some(YamlVersion { major: 1, minor: 2 })
3351            ))
3352        ));
3353
3354        let tag = events
3355            .iter()
3356            .find_map(|event| match event {
3357                Event::Scalar(value, _, _, Some(tag)) if value == "value" => Some(tag),
3358                _ => None,
3359            })
3360            .expect("expected tagged scalar after comment-separated directives");
3361
3362        assert_eq!(tag.handle, "tag:example.com,2026:");
3363        assert_eq!(tag.suffix, "thing");
3364    }
3365
3366    #[test]
3367    fn test_each_document_can_declare_own_yaml_version() {
3368        let document_starts = Parser::new_from_str(
3369            "%YAML 1.2\n---\na\n...\n%YAML 1.2\n---\nb\n...\n%YAML 1.1\n---\nc\n",
3370        )
3371        .filter_map(|event| match event.unwrap().0 {
3372            Event::DocumentStart(explicit, version) => Some((explicit, version)),
3373            _ => None,
3374        })
3375        .collect::<Vec<_>>();
3376
3377        assert_eq!(
3378            document_starts,
3379            vec![
3380                (true, Some(YamlVersion::new(1, 2))),
3381                (true, Some(YamlVersion::new(1, 2))),
3382                (true, Some(YamlVersion::new(1, 1))),
3383            ]
3384        );
3385    }
3386
3387    #[test]
3388    fn test_duplicate_tag_directive_errors() {
3389        assert_eq!(
3390            first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
3391            "the TAG directive must only be given at most once per handle in the same document"
3392        );
3393    }
3394
3395    #[test]
3396    fn duplicate_tag_directive_across_comment_is_rejected() {
3397        let input = concat!(
3398            "%TAG !e! tag:example.com,2000:one/\n",
3399            "# separator\n",
3400            "%TAG !e! tag:example.com,2000:two/\n",
3401            "---\n",
3402        );
3403
3404        assert_eq!(
3405            first_error_info(input),
3406            "the TAG directive must only be given at most once per handle in the same document"
3407        );
3408    }
3409
3410    #[test]
3411    fn test_keep_tags_inherited_handle_can_be_redeclared_in_next_document() {
3412        let input = concat!(
3413            "%TAG !e! tag:example.com,2000:one/\n",
3414            "---\n",
3415            "first: !e!thing value\n",
3416            "...\n",
3417            "%TAG !e! tag:example.com,2000:two/\n",
3418            "---\n",
3419            "second: !e!thing value\n",
3420        );
3421
3422        let tags = Parser::new_from_str(input)
3423            .keep_tags(true)
3424            .filter_map(|event| match event.expect("input should parse").0 {
3425                Event::Scalar(value, _, _, Some(tag)) if value == "value" => {
3426                    Some(tag.handle.clone())
3427                }
3428                _ => None,
3429            })
3430            .collect::<Vec<_>>();
3431
3432        assert_eq!(
3433            tags,
3434            vec!["tag:example.com,2000:one/", "tag:example.com,2000:two/"]
3435        );
3436    }
3437
3438    #[test]
3439    fn test_directive_after_implicit_document_requires_explicit_end() {
3440        assert_eq!(
3441            first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
3442            "missing explicit document end marker before directive"
3443        );
3444    }
3445
3446    #[test]
3447    fn test_anchor_offset_overflow_reports_error() {
3448        let mut parser = Parser::new_from_str("&a value");
3449        parser.set_anchor_offset(usize::MAX);
3450
3451        let err = parser
3452            .find_map(Result::err)
3453            .expect("anchor registration should overflow");
3454
3455        assert_eq!(
3456            err.info(),
3457            "while parsing anchor, anchor count exceeded supported limit"
3458        );
3459    }
3460
3461    #[test]
3462    fn test_alias_resolves_to_registered_anchor_id() {
3463        let events = Parser::new_from_str("- &a value\n- *a\n")
3464            .map(|event| event.unwrap().0)
3465            .collect::<Vec<_>>();
3466
3467        assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
3468    }
3469
3470    #[test]
3471    fn test_anchor_then_tag_applies_both_to_scalar() {
3472        let events = Parser::new_from_str("&a !!str value")
3473            .map(|event| event.unwrap().0)
3474            .collect::<Vec<_>>();
3475
3476        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
3477            .iter()
3478            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
3479        else {
3480            panic!("expected tagged anchored scalar");
3481        };
3482
3483        assert_eq!(value, "value");
3484        assert_eq!(*anchor_id, 1);
3485        assert_eq!(tag.handle, "tag:yaml.org,2002:");
3486        assert_eq!(tag.suffix, "str");
3487        assert_eq!(tag.original_handle, "!!");
3488        assert_eq!(tag.original(), "!!str");
3489    }
3490
3491    #[test]
3492    fn test_tag_then_anchor_applies_both_to_scalar() {
3493        let events = Parser::new_from_str("!!str &a value")
3494            .map(|event| event.unwrap().0)
3495            .collect::<Vec<_>>();
3496
3497        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
3498            .iter()
3499            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
3500        else {
3501            panic!("expected tagged anchored scalar");
3502        };
3503
3504        assert_eq!(value, "value");
3505        assert_eq!(*anchor_id, 1);
3506        assert_eq!(tag.handle, "tag:yaml.org,2002:");
3507        assert_eq!(tag.suffix, "str");
3508        assert_eq!(tag.original_handle, "!!");
3509        assert_eq!(tag.original(), "!!str");
3510    }
3511
3512    #[test]
3513    fn test_tag_directive_preserves_original_handle() {
3514        let events =
3515            Parser::new_from_str("%TAG !e! tag:example.com,2000:\n---\nconfig: !e!keep value\n")
3516                .map(|event| event.unwrap().0)
3517                .collect::<Vec<_>>();
3518
3519        let (value, tag) = events
3520            .iter()
3521            .find_map(|event| match event {
3522                Event::Scalar(value, _, _, Some(tag)) if value == "value" => Some((value, tag)),
3523                _ => None,
3524            })
3525            .expect("expected tagged scalar");
3526
3527        assert_eq!(value, "value");
3528        assert_eq!(tag.handle, "tag:example.com,2000:");
3529        assert_eq!(tag.suffix, "keep");
3530        assert_eq!(tag.original_handle, "!e!");
3531        assert_eq!(tag.parts(), ("tag:example.com,2000:", "keep"));
3532        assert_eq!(tag.original_parts(), ("!e!", "keep"));
3533        assert_eq!(tag.original(), "!e!keep");
3534    }
3535
3536    #[test]
3537    fn test_verbatim_tag_original_is_normalized_author_spelling() {
3538        let events = Parser::new_from_str("key: !<tag:example.com,2000:thing> value\n")
3539            .map(|event| event.unwrap().0)
3540            .collect::<Vec<_>>();
3541
3542        let Some(Event::Scalar(value, _, _, Some(tag))) = events
3543            .iter()
3544            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
3545        else {
3546            panic!("expected tagged scalar");
3547        };
3548
3549        assert_eq!(value, "value");
3550        assert_eq!(tag.handle, "");
3551        assert_eq!(tag.suffix, "tag:example.com,2000:thing");
3552        assert_eq!(tag.original_handle, "");
3553        assert_eq!(tag.parts(), ("", "tag:example.com,2000:thing"));
3554        assert_eq!(tag.original_parts(), ("", "tag:example.com,2000:thing"));
3555        assert_eq!(tag.original(), "!<tag:example.com,2000:thing>");
3556    }
3557
3558    #[test]
3559    fn test_multiple_tag_directives_are_kept_within_document() {
3560        let text = r"
3561%TAG !a! tag:a,2024:
3562%TAG !b! tag:b,2024:
3563---
3564first: !a!x foo
3565second: !b!y bar
3566";
3567
3568        let mut seen_a = false;
3569        let mut seen_b = false;
3570        for event in Parser::new_from_str(text) {
3571            let (event, _) = event.unwrap();
3572            if let Event::Scalar(_, _, _, Some(tag)) = event {
3573                if tag.handle == "tag:a,2024:" {
3574                    seen_a = true;
3575                } else if tag.handle == "tag:b,2024:" {
3576                    seen_b = true;
3577                }
3578            }
3579        }
3580
3581        assert!(seen_a);
3582        assert!(seen_b);
3583    }
3584
3585    #[test]
3586    fn test_tags_are_cleared_when_next_document_has_no_directives() {
3587        let text = r"
3588%TAG !t! tag:test,2024:
3589--- !t!1
3590foo
3591--- !t!2
3592bar
3593";
3594
3595        let mut parser = Parser::new_from_str(text);
3596        for event in parser.by_ref() {
3597            let (event, _) = event.unwrap();
3598            if let Event::DocumentEnd = event {
3599                break;
3600            }
3601        }
3602
3603        match parser.next().unwrap().unwrap().0 {
3604            Event::DocumentStart(true, None) => {}
3605            _ => panic!("expected explicit second document start"),
3606        }
3607
3608        let err = parser.next().unwrap().unwrap_err();
3609        assert!(format!("{err}").contains("the handle wasn't declared"));
3610    }
3611
3612    #[test]
3613    fn test_pull_parser_clears_anchors_between_documents() {
3614        let mut parser = Parser::new_from_str(
3615            "--- &a value
3616--- *a
3617",
3618        );
3619
3620        for event in parser.by_ref() {
3621            let (event, _) = event.unwrap();
3622            if matches!(event, Event::DocumentEnd) {
3623                break;
3624            }
3625        }
3626
3627        match parser.next().unwrap().unwrap().0 {
3628            Event::DocumentStart(true, None) => {}
3629            _ => panic!("expected explicit second document start"),
3630        }
3631
3632        let err = parser.next().unwrap().unwrap_err();
3633        assert!(format!("{err}").contains("unknown anchor"));
3634    }
3635
3636    #[test]
3637    fn test_keep_tags_across_multiple_documents() {
3638        let text = r#"
3639%YAML 1.1
3640%TAG !t! tag:test,2024:
3641--- !t!1 &1
3642foo: "bar"
3643--- !t!2 &2
3644baz: "qux"
3645"#;
3646        for x in Parser::new_from_str(text).keep_tags(true) {
3647            let x = x.unwrap();
3648            if let Event::MappingStart(_, _, tag) = x.0 {
3649                let tag = tag.unwrap();
3650                assert_eq!(tag.handle, "tag:test,2024:");
3651            }
3652        }
3653
3654        for x in Parser::new_from_str(text).keep_tags(false) {
3655            if x.is_err() {
3656                // Test successful
3657                return;
3658            }
3659        }
3660        panic!("Test failed, did not encounter error")
3661    }
3662
3663    #[test]
3664    fn test_flow_sequence_mapping_allows_empty_key() {
3665        let parser = Parser::new_from_str("[?: value]");
3666        for event in parser {
3667            event.expect("parser should accept flow sequence mappings with empty keys");
3668        }
3669    }
3670
3671    #[test]
3672    fn test_keep_tags_does_not_persist_default_tag_handles() {
3673        let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
3674
3675        let mut int_tags = Vec::new();
3676        for event in Parser::new_from_str(text).keep_tags(true) {
3677            let event = event.unwrap().0;
3678            if let Event::Scalar(_, _, _, Some(tag)) = event {
3679                if tag.suffix == "int" {
3680                    int_tags.push(tag.handle.clone());
3681                }
3682            }
3683        }
3684
3685        assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
3686    }
3687
3688    #[test]
3689    fn test_keep_tags_does_not_persist_primary_tag_handle() {
3690        let text = "%TAG ! tag:evil,2024:\n--- !int 1\n--- !int 2\n";
3691
3692        let tags = Parser::new_from_str(text)
3693            .keep_tags(true)
3694            .filter_map(|event| match event.expect("input should parse").0 {
3695                Event::Scalar(_, _, _, Some(tag)) if tag.suffix == "int" => {
3696                    Some(tag.handle.clone())
3697                }
3698                _ => None,
3699            })
3700            .collect::<Vec<_>>();
3701
3702        assert_eq!(tags, vec!["tag:evil,2024:", "!"]);
3703    }
3704
3705    #[test]
3706    fn test_resolve_tag_uses_overridden_local_prefix() {
3707        let mut parser = Parser::new_from_str("");
3708        parser
3709            .tags
3710            .insert("!".to_string(), "tag:local.example,2024:".to_string());
3711
3712        let tag = parser
3713            .resolve_tag(
3714                Span::empty(Marker::new(0, 1, 0)),
3715                &Cow::Borrowed(""),
3716                Cow::Borrowed("!"),
3717            )
3718            .unwrap();
3719
3720        assert_eq!(tag.handle, "tag:local.example,2024:");
3721        assert_eq!(tag.suffix, "!");
3722    }
3723
3724    #[test]
3725    fn test_load_after_peek_stream_start() {
3726        #[derive(Default)]
3727        struct Sink<'input> {
3728            events: Vec<Event<'input>>,
3729        }
3730
3731        impl<'input> EventReceiver<'input> for Sink<'input> {
3732            fn on_event(&mut self, ev: Event<'input>) {
3733                self.events.push(ev);
3734            }
3735        }
3736
3737        let mut parser = Parser::new_from_str("key: value\n");
3738        let mut sink = Sink::default();
3739
3740        assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
3741        parser.load(&mut sink, false).unwrap();
3742
3743        assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
3744        assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(..))));
3745    }
3746}