Skip to main content

granit_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, BorrowedInput},
9    scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
10    BufferedInput,
11};
12
13use alloc::{
14    borrow::Cow,
15    collections::{BTreeMap, BTreeSet},
16    string::{String, ToString},
17    vec::Vec,
18};
19use core::{
20    convert::Infallible,
21    fmt::{self, Display},
22};
23
24#[derive(Clone, Copy, PartialEq, Debug, Eq)]
25enum State {
26    StreamStart,
27    ImplicitDocumentStart,
28    DocumentStart,
29    DocumentContent,
30    DocumentEnd,
31    BlockNode,
32    BlockSequenceFirstEntry,
33    BlockSequenceEntry,
34    IndentlessSequenceEntry,
35    BlockMappingFirstKey,
36    BlockMappingKey,
37    BlockMappingValue,
38    FlowSequenceFirstEntry,
39    FlowSequenceEntry,
40    FlowSequenceEntryMappingKey,
41    FlowSequenceEntryMappingValue,
42    FlowSequenceEntryMappingEnd,
43    FlowMappingFirstKey,
44    FlowMappingKey,
45    FlowMappingValue,
46    FlowMappingEmptyValue,
47    End,
48}
49
50/// An event generated by the YAML parser.
51///
52/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
53/// [`EventReceiver`] trait.
54#[derive(Clone, PartialEq, Debug, Eq)]
55pub enum Event<'input> {
56    /// Reserved for internal use.
57    Nothing,
58    /// Event generated at the very beginning of parsing.
59    StreamStart,
60    /// Last event that will be generated by the parser. Signals EOF.
61    StreamEnd,
62    /// The start of a YAML document.
63    ///
64    /// When the boolean is `true`, it is an explicit document start
65    /// directive (`---`).
66    ///
67    /// When the boolean is `false`, it is an implicit document start
68    /// (without `---`).
69    DocumentStart(bool),
70    /// The YAML end document directive (`...`).
71    DocumentEnd,
72    /// A YAML Alias.
73    Alias(
74        /// The anchor ID the alias refers to.
75        usize,
76    ),
77    /// Value, style, `anchor_id`, tag
78    Scalar(
79        Cow<'input, str>,
80        ScalarStyle,
81        usize,
82        Option<Cow<'input, Tag>>,
83    ),
84    /// The start of a YAML sequence (array).
85    SequenceStart(
86        /// The anchor ID of the start of the sequence.
87        usize,
88        /// An optional tag
89        Option<Cow<'input, Tag>>,
90    ),
91    /// The end of a YAML sequence (array).
92    SequenceEnd,
93    /// The start of a YAML mapping (object, hash).
94    MappingStart(
95        /// The anchor ID of the start of the mapping.
96        usize,
97        /// An optional tag
98        Option<Cow<'input, Tag>>,
99    ),
100    /// The end of a YAML mapping (object, hash).
101    MappingEnd,
102}
103
104/// A YAML tag.
105#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
106pub struct Tag {
107    /// Handle of the tag (`!` included).
108    pub handle: String,
109    /// The suffix of the tag.
110    pub suffix: String,
111}
112
113impl Tag {
114    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
115    ///
116    /// The YAML specification specifies [a list of
117    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function
118    /// checks whether _the handle_ (but not the suffix) is the handle for the YAML Core Schema.
119    ///
120    /// # Return
121    /// Returns `true` if the handle is `tag:yaml.org,2002`, `false` otherwise.
122    #[must_use]
123    pub fn is_yaml_core_schema(&self) -> bool {
124        self.handle == "tag:yaml.org,2002:"
125    }
126}
127
128impl Display for Tag {
129    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
130        if self.handle == "!" {
131            write!(f, "!{}", self.suffix)
132        } else {
133            write!(f, "{}{}", self.handle, self.suffix)
134        }
135    }
136}
137
138impl<'input> Event<'input> {
139    /// Create an empty scalar.
140    fn empty_scalar() -> Self {
141        // a null scalar
142        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
143    }
144
145    /// Create an empty scalar with the given anchor.
146    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
147        Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
148    }
149}
150
151/// A YAML parser.
152#[derive(Debug)]
153pub struct Parser<'input, T: BorrowedInput<'input>> {
154    /// The underlying scanner from which we pull tokens.
155    scanner: Scanner<'input, T>,
156    /// The stack of _previous_ states we were in.
157    ///
158    /// States are pushed in the context of subobjects to this stack. The top-most element is the
159    /// state in which to come back to when exiting the current state.
160    states: Vec<State>,
161    /// The state in which we currently are.
162    state: State,
163    /// The next token from the scanner.
164    token: Option<Token<'input>>,
165    /// The next YAML event to emit.
166    current: Option<(Event<'input>, Span)>,
167
168    /// Pending indentation hint to be attached to the next emitted event span.
169    ///
170    /// This is used to communicate indentation for block mapping keys. It is set when consuming a
171    /// `TokenType::Key` in block style, and is applied to the next emitted node event (the key
172    /// itself).
173    pending_key_indent: Option<usize>,
174    /// Anchors that have been encountered in the YAML document.
175    anchors: BTreeMap<Cow<'input, str>, usize>,
176    /// Next ID available for an anchor.
177    ///
178    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
179    /// return for the next anchor and the count of anchor IDs emitted.
180    anchor_id_count: usize,
181    /// The tag directives (`%TAG`) the parser has encountered.
182    ///
183    /// Key is the handle, and value is the prefix.
184    tags: BTreeMap<String, String>,
185    /// Whether we have emitted [`Event::StreamEnd`].
186    ///
187    /// Emitted means that it has been returned from [`Self::next`]. If it is stored in
188    /// [`Self::token`], this is set to `false`.
189    stream_end_emitted: bool,
190    /// Make tags global across all documents.
191    keep_tags: bool,
192}
193
194/// Trait to be implemented in order to use the low-level parsing API.
195///
196/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
197/// for each YAML [`Event`] that occurs.
198/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
199/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
200/// [`SpannedEventReceiver`] automatically.
201///
202/// # Event hierarchy
203/// The event stream starts with an [`Event::StreamStart`] event followed by an
204/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
205/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
206/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
207///
208/// In a mapping, key-values are sent as consecutive events. The first event after an
209/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no
210/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd
211/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received.
212///
213/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
214///
215/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
216/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
217/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
218/// be part of the value and not another key-value pair or element in the sequence.
219///
220/// For instance, the following yaml:
221/// ```yaml
222/// a: b
223/// c:
224///   d: e
225/// f:
226///   - g
227///   - h
228/// ```
229/// will emit (indented and commented for visibility):
230/// ```text
231/// StreamStart, DocumentStart, MappingStart,
232///   Scalar("a", ..), Scalar("b", ..)
233///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
234///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
235/// MappingEnd, DocumentEnd, StreamEnd
236/// ```
237///
238/// # Example
239/// ```
240/// # use granit_parser::{Event, EventReceiver, Parser};
241/// #
242/// /// Sink of events. Collects them into an array.
243/// struct EventSink<'input> {
244///     events: Vec<Event<'input>>,
245/// }
246///
247/// /// Implement `on_event`, pushing into `self.events`.
248/// impl<'input> EventReceiver<'input> for EventSink<'input> {
249///     fn on_event(&mut self, ev: Event<'input>) {
250///         self.events.push(ev);
251///     }
252/// }
253///
254/// /// Load events from a yaml string.
255/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
256///     let mut sink = EventSink { events: Vec::new() };
257///     let mut parser = Parser::new_from_str(yaml);
258///     // Load events using our sink as the receiver.
259///     parser.load(&mut sink, true).unwrap();
260///     sink.events
261/// }
262/// ```
263pub trait EventReceiver<'input> {
264    /// Handler called for each YAML event that is emitted by the parser.
265    fn on_event(&mut self, ev: Event<'input>);
266}
267
268/// Trait to be implemented for using the low-level parsing API.
269///
270/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
271pub trait SpannedEventReceiver<'input> {
272    /// Handler called for each event that occurs.
273    fn on_event(&mut self, ev: Event<'input>, span: Span);
274}
275
276impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
277    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
278        self.on_event(ev);
279    }
280}
281
282/// Trait to be implemented for fallible event handling without source spans.
283///
284/// This is the fallible counterpart to [`EventReceiver`]. Use it with [`Parser::try_load`] when
285/// event handling may need to stop parsing by returning an application error.
286pub trait TryEventReceiver<'input> {
287    /// Error returned by this receiver.
288    type Error;
289
290    /// Handler called for each YAML event that is emitted by the parser.
291    ///
292    /// Returning an error stops [`Parser::try_load`] immediately.
293    ///
294    /// # Errors
295    /// Returns `Self::Error` when the receiver wants to stop parsing.
296    fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
297}
298
299/// Trait to be implemented for fallible event handling with source spans.
300///
301/// This is the fallible counterpart to [`SpannedEventReceiver`]. Use it with
302/// [`Parser::try_load`] when event handling may need to stop parsing by returning an application
303/// error.
304pub trait TrySpannedEventReceiver<'input> {
305    /// Error returned by this receiver.
306    type Error;
307
308    /// Handler called for each event that occurs.
309    ///
310    /// Returning an error stops [`Parser::try_load`] immediately.
311    ///
312    /// # Errors
313    /// Returns `Self::Error` when the receiver wants to stop parsing.
314    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
315}
316
317impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
318    type Error = R::Error;
319
320    fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
321        TryEventReceiver::on_event(self, ev)
322    }
323}
324
325/// Error returned by [`Parser::try_load`] and [`ParserTrait::try_load`].
326#[derive(Clone, PartialEq, Debug, Eq)]
327pub enum TryLoadError<E> {
328    /// Scanning or parsing failed.
329    Scan(
330        /// The scanner or parser error.
331        ScanError,
332    ),
333    /// The receiver returned an application error.
334    Receiver(
335        /// The error returned by the receiver.
336        E,
337    ),
338}
339
340impl<E> From<ScanError> for TryLoadError<E> {
341    fn from(error: ScanError) -> Self {
342        Self::Scan(error)
343    }
344}
345
346impl<E: Display> Display for TryLoadError<E> {
347    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
348        match self {
349            Self::Scan(error) => write!(f, "parser error: {error}"),
350            Self::Receiver(error) => write!(f, "receiver error: {error}"),
351        }
352    }
353}
354
355impl<E> core::error::Error for TryLoadError<E>
356where
357    E: core::error::Error + 'static,
358{
359    fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
360        match self {
361            Self::Scan(error) => Some(error),
362            Self::Receiver(error) => Some(error),
363        }
364    }
365}
366
367fn try_emit<'input, R>(
368    recv: &mut R,
369    ev: Event<'input>,
370    span: Span,
371) -> Result<(), TryLoadError<R::Error>>
372where
373    R: TrySpannedEventReceiver<'input>,
374{
375    recv.on_event(ev, span).map_err(TryLoadError::Receiver)
376}
377
378struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
379
380impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
381    for InfallibleSpannedReceiver<'_, R>
382{
383    type Error = Infallible;
384
385    fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
386        self.0.on_event(ev, span);
387        Ok(())
388    }
389}
390
391fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
392    match result {
393        Ok(()) => Ok(()),
394        Err(TryLoadError::Scan(error)) => Err(error),
395        Err(TryLoadError::Receiver(error)) => match error {},
396    }
397}
398
399/// A convenience alias for a `Result` of a parser event.
400pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
401
402/// Trait extracted from `Parser` to support mocking and alternative implementations.
403pub trait ParserTrait<'input> {
404    /// Try to load the next event and return it, but do not consuming it from `self`.
405    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
406
407    /// Try to load the next event and return it, consuming it from `self`.
408    fn next_event(&mut self) -> Option<ParseResult<'input>>;
409
410    /// Load the YAML from the stream in `self`, pushing events into `recv`.
411    ///
412    /// Use this method when event handling is infallible. If receiver code can return an
413    /// application error and should stop parsing, use [`ParserTrait::try_load`] instead. If the
414    /// caller should directly control when the next event is read, use [`ParserTrait::next_event`]
415    /// or [`Parser`]'s [`core::iter::Iterator`] implementation.
416    ///
417    /// # Errors
418    /// Returns `ScanError` when scanning or parsing the stream fails.
419    fn load<R: SpannedEventReceiver<'input>>(
420        &mut self,
421        recv: &mut R,
422        multi: bool,
423    ) -> Result<(), ScanError>;
424
425    /// Load the YAML from the stream in `self`, stopping if `recv` returns an error.
426    ///
427    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
428    /// inside the stream.
429    ///
430    /// If the receiver returns an error, the parser is left positioned immediately after the event
431    /// that caused the receiver error. Callers should treat the parser as partially consumed.
432    ///
433    /// # Errors
434    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
435    /// [`TryLoadError::Receiver`] when `recv` returns an error.
436    fn try_load<R: TrySpannedEventReceiver<'input>>(
437        &mut self,
438        recv: &mut R,
439        multi: bool,
440    ) -> Result<(), TryLoadError<R::Error>> {
441        while let Some(res) = self.next_event() {
442            let (ev, span) = res?;
443            let is_doc_end = matches!(ev, Event::DocumentEnd);
444            let is_stream_end = matches!(ev, Event::StreamEnd);
445
446            try_emit(recv, ev, span)?;
447
448            if is_stream_end {
449                break;
450            }
451            if !multi && is_doc_end {
452                break;
453            }
454        }
455
456        Ok(())
457    }
458}
459
460impl<'input> Parser<'input, StrInput<'input>> {
461    /// Create a new instance of a parser from a &str.
462    #[must_use]
463    pub fn new_from_str(value: &'input str) -> Self {
464        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
465        Parser::new(StrInput::new(value))
466    }
467}
468
469impl<T> Parser<'static, BufferedInput<T>>
470where
471    T: Iterator<Item = char>,
472{
473    /// Create a new instance of a parser from an iterator of `char`s.
474    #[must_use]
475    pub fn new_from_iter(iter: T) -> Self {
476        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
477        Parser::new(BufferedInput::new(iter))
478    }
479}
480
481impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
482    /// Get the current anchor offset count.
483    pub fn get_anchor_offset(&self) -> usize {
484        self.anchor_id_count
485    }
486
487    /// Set the current anchor offset count.
488    pub fn set_anchor_offset(&mut self, offset: usize) {
489        self.anchor_id_count = offset;
490    }
491
492    /// Create a new instance of a parser from the given input of characters.
493    pub fn new(src: T) -> Self {
494        Parser {
495            scanner: Scanner::new(src),
496            states: Vec::new(),
497            state: State::StreamStart,
498            token: None,
499            current: None,
500
501            pending_key_indent: None,
502
503            anchors: BTreeMap::new(),
504            // valid anchor_id starts from 1
505            anchor_id_count: 1,
506            tags: BTreeMap::new(),
507            stream_end_emitted: false,
508            keep_tags: false,
509        }
510    }
511
512    /// Whether to keep tags across multiple documents when parsing.
513    ///
514    /// This behavior is non-standard as per the YAML specification but can be encountered in the
515    /// wild. This boolean allows enabling this non-standard extension. This would result in the
516    /// parser accepting input from [test
517    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
518    /// of the yaml-test-suite:
519    ///
520    /// ```yaml
521    /// %TAG !prefix! tag:example.com,2011:
522    /// --- !prefix!A
523    /// a: b
524    /// --- !prefix!B
525    /// c: d
526    /// --- !prefix!C
527    /// e: f
528    /// ```
529    ///
530    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
531    /// only apply to the document immediately following them. This would error on `!prefix!B`.
532    ///
533    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
534    #[must_use]
535    pub fn keep_tags(mut self, value: bool) -> Self {
536        self.keep_tags = value;
537        self
538    }
539
540    /// Try to load the next event and return it, but do not consuming it from `self`.
541    ///
542    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
543    /// [`Iterator::next`] or [`Parser::load`].
544    ///
545    /// # Errors
546    /// Returns `ScanError` when loading the next event fails.
547    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
548        ParserTrait::peek(self)
549    }
550
551    /// Try to load the next event and return it, consuming it from `self`.
552    ///
553    /// # Errors
554    /// Returns `ScanError` when loading the next event fails.
555    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
556        ParserTrait::next_event(self)
557    }
558
559    /// Implementation function for [`Self::next_event`] without the `Option`.
560    ///
561    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
562    /// option. This burdens the parser code. This function is used internally when an option is
563    /// undesirable.
564    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
565    where
566        'input: 'a,
567    {
568        match self.current.take() {
569            None => self.parse(),
570            Some(v) => Ok(v),
571        }
572    }
573
574    /// Peek at the next token from the scanner.
575    fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
576        match self.token {
577            None => {
578                self.token = Some(self.scan_next_token()?);
579                Ok(self.token.as_ref().unwrap())
580            }
581            Some(ref tok) => Ok(tok),
582        }
583    }
584
585    /// Extract and return the next token from the scanner.
586    ///
587    /// This function does _not_ make use of `self.token`.
588    fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
589        let token = self.scanner.next();
590        match token {
591            None => match self.scanner.get_error() {
592                None => Err(self.unexpected_eof()),
593                Some(e) => Err(e),
594            },
595            Some(tok) => Ok(tok),
596        }
597    }
598
599    #[cold]
600    fn unexpected_eof(&self) -> ScanError {
601        let info = match self.state {
602            State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
603                "unexpected EOF while parsing a flow sequence"
604            }
605            State::FlowMappingFirstKey
606            | State::FlowMappingKey
607            | State::FlowMappingValue
608            | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
609            State::FlowSequenceEntryMappingKey
610            | State::FlowSequenceEntryMappingValue
611            | State::FlowSequenceEntryMappingEnd => {
612                "unexpected EOF while parsing an implicit flow mapping"
613            }
614            State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
615                "unexpected EOF while parsing a block sequence"
616            }
617            State::BlockMappingFirstKey | State::BlockMappingKey | State::BlockMappingValue => {
618                "unexpected EOF while parsing a block mapping"
619            }
620            _ => "unexpected eof",
621        };
622        ScanError::new_str(self.scanner.mark(), info)
623    }
624
625    fn fetch_token<'a>(&mut self) -> Token<'a>
626    where
627        'input: 'a,
628    {
629        self.token
630            .take()
631            .expect("fetch_token needs to be preceded by peek_token")
632    }
633
634    /// Skip the next token from the scanner.
635    fn skip(&mut self) {
636        self.token = None;
637    }
638    /// Pops the top-most state and make it the current state.
639    fn pop_state(&mut self) {
640        self.state = self.states.pop().unwrap();
641    }
642    /// Push a new state atop the state stack.
643    fn push_state(&mut self, state: State) {
644        self.states.push(state);
645    }
646
647    fn parse<'a>(&mut self) -> ParseResult<'a>
648    where
649        'input: 'a,
650    {
651        if self.state == State::End {
652            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
653        }
654        let (ev, span) = self.state_machine()?;
655        if let Some(indent) = self.pending_key_indent.take() {
656            Ok((ev, span.with_indent(Some(indent))))
657        } else {
658            Ok((ev, span))
659        }
660    }
661
662    /// Load the YAML from the stream in `self`, pushing events into `recv`.
663    ///
664    /// The contents of the stream are parsed and the corresponding events are sent into the
665    /// receiver. For detailed explanations about how events work, see [`EventReceiver`].
666    ///
667    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
668    /// inside the stream.
669    ///
670    /// Use this method when event handling is infallible. If receiver code can return an
671    /// application error and should stop parsing, use [`Parser::try_load`] instead. If the caller
672    /// should directly control when the next event is read, use [`Parser`]'s
673    /// [`core::iter::Iterator`] implementation.
674    ///
675    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
676    /// former is enough to call this function.
677    ///
678    /// # Example
679    /// ```
680    /// # use granit_parser::{Event, EventReceiver, Parser};
681    /// # fn main() -> Result<(), granit_parser::ScanError> {
682    /// struct EventSink<'input> {
683    ///     events: Vec<Event<'input>>,
684    /// }
685    ///
686    /// impl<'input> EventReceiver<'input> for EventSink<'input> {
687    ///     fn on_event(&mut self, ev: Event<'input>) {
688    ///         self.events.push(ev);
689    ///     }
690    /// }
691    ///
692    /// let mut parser = Parser::new_from_str("a: 1\n");
693    /// let mut sink = EventSink { events: Vec::new() };
694    ///
695    /// parser.load(&mut sink, false)?;
696    ///
697    /// assert!(sink
698    ///     .events
699    ///     .iter()
700    ///     .any(|ev| matches!(ev, Event::Scalar(value, ..) if value == "a")));
701    /// # Ok(())
702    /// # }
703    /// ```
704    ///
705    /// # Errors
706    /// Returns `ScanError` when loading fails.
707    pub fn load<R: SpannedEventReceiver<'input>>(
708        &mut self,
709        recv: &mut R,
710        multi: bool,
711    ) -> Result<(), ScanError> {
712        ParserTrait::load(self, recv, multi)
713    }
714
715    /// Load the YAML from the stream in `self`, pushing events into `recv`.
716    ///
717    /// This is the fallible counterpart to [`Parser::load`]. If `recv` returns an error, parsing
718    /// stops immediately and that error is returned as [`TryLoadError::Receiver`].
719    ///
720    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
721    /// inside the stream.
722    ///
723    /// If the receiver returns an error, the parser is left positioned immediately after the event
724    /// that caused the receiver error. Callers should treat the parser as partially consumed.
725    ///
726    /// # Example
727    /// ```
728    /// # use granit_parser::{Event, Parser, TryEventReceiver, TryLoadError};
729    /// #[derive(Debug, PartialEq, Eq)]
730    /// enum ValidationError {
731    ///     ForbiddenScalar,
732    /// }
733    ///
734    /// struct Validator;
735    ///
736    /// impl<'input> TryEventReceiver<'input> for Validator {
737    ///     type Error = ValidationError;
738    ///
739    ///     fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
740    ///         if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
741    ///             Err(ValidationError::ForbiddenScalar)
742    ///         } else {
743    ///             Ok(())
744    ///         }
745    ///     }
746    /// }
747    ///
748    /// let mut parser = Parser::new_from_str("value: bad\n");
749    /// let mut validator = Validator;
750    ///
751    /// let err = parser.try_load(&mut validator, false).unwrap_err();
752    ///
753    /// assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenScalar));
754    /// ```
755    ///
756    /// # Errors
757    /// Returns [`TryLoadError::Scan`] when scanning or parsing the stream fails. Returns
758    /// [`TryLoadError::Receiver`] when `recv` returns an error.
759    pub fn try_load<R: TrySpannedEventReceiver<'input>>(
760        &mut self,
761        recv: &mut R,
762        multi: bool,
763    ) -> Result<(), TryLoadError<R::Error>> {
764        ParserTrait::try_load(self, recv, multi)
765    }
766
767    fn try_load_document<R: TrySpannedEventReceiver<'input>>(
768        &mut self,
769        first_ev: Event<'input>,
770        span: Span,
771        recv: &mut R,
772    ) -> Result<(), TryLoadError<R::Error>> {
773        if !matches!(first_ev, Event::DocumentStart(_)) {
774            return Err(TryLoadError::Scan(ScanError::new_str(
775                span.start,
776                "did not find expected <document-start>",
777            )));
778        }
779        try_emit(recv, first_ev, span)?;
780
781        let (ev, span) = self.next_event_impl()?;
782        self.try_load_node(ev, span, recv)?;
783
784        // DOCUMENT-END is expected.
785        let (ev, mark) = self.next_event_impl()?;
786        assert_eq!(ev, Event::DocumentEnd);
787        try_emit(recv, ev, mark)?;
788
789        Ok(())
790    }
791
792    fn try_load_node<R: TrySpannedEventReceiver<'input>>(
793        &mut self,
794        first_ev: Event<'input>,
795        span: Span,
796        recv: &mut R,
797    ) -> Result<(), TryLoadError<R::Error>> {
798        match first_ev {
799            Event::Alias(..) | Event::Scalar(..) => try_emit(recv, first_ev, span),
800            Event::SequenceStart(..) => {
801                try_emit(recv, first_ev, span)?;
802                self.try_load_sequence(recv)
803            }
804            Event::MappingStart(..) => {
805                try_emit(recv, first_ev, span)?;
806                self.try_load_mapping(recv)
807            }
808            _ => {
809                #[cfg(feature = "debug_prints")]
810                std::println!("UNREACHABLE EVENT: {first_ev:?}");
811                unreachable!();
812            }
813        }
814    }
815
816    fn try_load_mapping<R: TrySpannedEventReceiver<'input>>(
817        &mut self,
818        recv: &mut R,
819    ) -> Result<(), TryLoadError<R::Error>> {
820        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
821        while key_ev != Event::MappingEnd {
822            // key
823            self.try_load_node(key_ev, key_mark, recv)?;
824
825            // value
826            let (ev, mark) = self.next_event_impl()?;
827            self.try_load_node(ev, mark, recv)?;
828
829            // next event
830            let (ev, mark) = self.next_event_impl()?;
831            key_ev = ev;
832            key_mark = mark;
833        }
834        try_emit(recv, key_ev, key_mark)?;
835        Ok(())
836    }
837
838    fn try_load_sequence<R: TrySpannedEventReceiver<'input>>(
839        &mut self,
840        recv: &mut R,
841    ) -> Result<(), TryLoadError<R::Error>> {
842        let (mut ev, mut mark) = self.next_event_impl()?;
843        while ev != Event::SequenceEnd {
844            self.try_load_node(ev, mark, recv)?;
845
846            // next event
847            let (next_ev, next_mark) = self.next_event_impl()?;
848            ev = next_ev;
849            mark = next_mark;
850        }
851        try_emit(recv, ev, mark)?;
852        Ok(())
853    }
854
855    fn state_machine<'a>(&mut self) -> ParseResult<'a>
856    where
857        'input: 'a,
858    {
859        // let next_tok = self.peek_token().cloned()?;
860        // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
861        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
862
863        match self.state {
864            State::StreamStart => self.stream_start(),
865
866            State::ImplicitDocumentStart => self.document_start(true),
867            State::DocumentStart => self.document_start(false),
868            State::DocumentContent => self.document_content(),
869            State::DocumentEnd => self.document_end(),
870
871            State::BlockNode => self.parse_node(true, false),
872            // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
873            // State::FlowNode => self.parse_node(false, false),
874            State::BlockMappingFirstKey => self.block_mapping_key(true),
875            State::BlockMappingKey => self.block_mapping_key(false),
876            State::BlockMappingValue => self.block_mapping_value(),
877
878            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
879            State::BlockSequenceEntry => self.block_sequence_entry(false),
880
881            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
882            State::FlowSequenceEntry => self.flow_sequence_entry(false),
883
884            State::FlowMappingFirstKey => self.flow_mapping_key(true),
885            State::FlowMappingKey => self.flow_mapping_key(false),
886            State::FlowMappingValue => self.flow_mapping_value(false),
887
888            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
889
890            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
891            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
892            State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
893            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
894
895            /* impossible */
896            State::End => unreachable!(),
897        }
898    }
899
900    fn stream_start<'a>(&mut self) -> ParseResult<'a>
901    where
902        'input: 'a,
903    {
904        match *self.peek_token()? {
905            Token(span, TokenType::StreamStart(_)) => {
906                self.state = State::ImplicitDocumentStart;
907                self.skip();
908                Ok((Event::StreamStart, span))
909            }
910            Token(span, _) => Err(ScanError::new_str(
911                span.start,
912                "did not find expected <stream-start>",
913            )),
914        }
915    }
916
917    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
918    where
919        'input: 'a,
920    {
921        while let TokenType::DocumentEnd = self.peek_token()?.1 {
922            self.skip();
923        }
924
925        // Anchors are scoped to a single document.
926        self.anchors.clear();
927
928        match *self.peek_token()? {
929            Token(span, TokenType::StreamEnd) => {
930                self.state = State::End;
931                self.skip();
932                Ok((Event::StreamEnd, span))
933            }
934            Token(
935                _,
936                TokenType::VersionDirective(..)
937                | TokenType::TagDirective(..)
938                | TokenType::ReservedDirective(..)
939                | TokenType::DocumentStart,
940            ) => {
941                // explicit document
942                self.explicit_document_start()
943            }
944            Token(span, _) if implicit => {
945                self.parser_process_directives()?;
946                self.push_state(State::DocumentEnd);
947                self.state = State::BlockNode;
948                Ok((Event::DocumentStart(false), span))
949            }
950            _ => {
951                // explicit document
952                self.explicit_document_start()
953            }
954        }
955    }
956
957    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
958        let mut version_directive_received = false;
959        let mut tags = if self.keep_tags {
960            self.tags.clone()
961        } else {
962            BTreeMap::new()
963        };
964        let mut document_tag_handles = BTreeSet::new();
965
966        loop {
967            match self.peek_token()? {
968                Token(span, TokenType::VersionDirective(_, _)) => {
969                    // XXX parsing with warning according to spec
970                    //if major != 1 || minor > 2 {
971                    //    return Err(ScanError::new_str(tok.0,
972                    //        "found incompatible YAML document"));
973                    //}
974                    if version_directive_received {
975                        return Err(ScanError::new_str(
976                            span.start,
977                            "duplicate version directive",
978                        ));
979                    }
980                    version_directive_received = true;
981                }
982                Token(mark, TokenType::TagDirective(handle, prefix)) => {
983                    if !document_tag_handles.insert(handle.to_string()) {
984                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
985                    }
986                    tags.insert(handle.to_string(), prefix.to_string());
987                }
988                Token(_, TokenType::ReservedDirective(_, _)) => {
989                    // Reserved directives are ignored
990                }
991                _ => break,
992            }
993            self.skip();
994        }
995
996        self.tags = tags;
997        Ok(())
998    }
999
1000    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
1001    where
1002        'input: 'a,
1003    {
1004        self.parser_process_directives()?;
1005        match *self.peek_token()? {
1006            Token(mark, TokenType::DocumentStart) => {
1007                self.push_state(State::DocumentEnd);
1008                self.state = State::DocumentContent;
1009                self.skip();
1010                Ok((Event::DocumentStart(true), mark))
1011            }
1012            Token(span, _) => Err(ScanError::new_str(
1013                span.start,
1014                "did not find expected <document start>",
1015            )),
1016        }
1017    }
1018
1019    fn document_content<'a>(&mut self) -> ParseResult<'a>
1020    where
1021        'input: 'a,
1022    {
1023        match *self.peek_token()? {
1024            Token(
1025                mark,
1026                TokenType::VersionDirective(..)
1027                | TokenType::TagDirective(..)
1028                | TokenType::ReservedDirective(..)
1029                | TokenType::DocumentStart
1030                | TokenType::DocumentEnd
1031                | TokenType::StreamEnd,
1032            ) => {
1033                self.pop_state();
1034                // empty scalar
1035                Ok((Event::empty_scalar(), mark))
1036            }
1037            _ => self.parse_node(true, false),
1038        }
1039    }
1040
1041    fn document_end<'a>(&mut self) -> ParseResult<'a>
1042    where
1043        'input: 'a,
1044    {
1045        let mut explicit_end = false;
1046        let span: Span = match *self.peek_token()? {
1047            Token(span, TokenType::DocumentEnd) => {
1048                explicit_end = true;
1049                self.skip();
1050                span
1051            }
1052            Token(span, _) => span,
1053        };
1054
1055        if self.keep_tags {
1056            // Never persist default handles across document boundaries. Allowing `%TAG !! ...`
1057            // or `%TAG ! ...` to leak into following documents lets earlier documents alter how
1058            // explicit tags are interpreted later on.
1059            self.tags.remove("!!");
1060            self.tags.remove("");
1061        } else {
1062            self.tags.clear();
1063        }
1064        if explicit_end {
1065            self.state = State::ImplicitDocumentStart;
1066        } else {
1067            if let Token(
1068                span,
1069                TokenType::VersionDirective(..)
1070                | TokenType::TagDirective(..)
1071                | TokenType::ReservedDirective(..),
1072            ) = *self.peek_token()?
1073            {
1074                return Err(ScanError::new_str(
1075                    span.start,
1076                    "missing explicit document end marker before directive",
1077                ));
1078            }
1079            self.state = State::DocumentStart;
1080        }
1081
1082        Ok((Event::DocumentEnd, span))
1083    }
1084
1085    fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
1086        // anchors can be overridden/reused
1087        // if self.anchors.contains_key(name) {
1088        //     return Err(ScanError::new_str(*mark,
1089        //         "while parsing anchor, found duplicated anchor"));
1090        // }
1091        let new_id = self.anchor_id_count;
1092        self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
1093            ScanError::new_str(
1094                mark.start,
1095                "while parsing anchor, anchor count exceeded supported limit",
1096            )
1097        })?;
1098        self.anchors.insert(name, new_id);
1099        Ok(new_id)
1100    }
1101
1102    #[allow(clippy::too_many_lines)]
1103    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
1104    where
1105        'input: 'a,
1106    {
1107        let mut anchor_id = 0;
1108        let mut tag = None;
1109        match *self.peek_token()? {
1110            Token(_, TokenType::Alias(_)) => {
1111                self.pop_state();
1112                if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
1113                    match self.anchors.get(&*name) {
1114                        None => {
1115                            return Err(ScanError::new_str(
1116                                span.start,
1117                                "while parsing node, found unknown anchor",
1118                            ))
1119                        }
1120                        Some(id) => return Ok((Event::Alias(*id), span)),
1121                    }
1122                }
1123                unreachable!()
1124            }
1125            Token(_, TokenType::Anchor(_)) => {
1126                if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
1127                    anchor_id = self.register_anchor(name, &span)?;
1128                    if let TokenType::Tag(..) = self.peek_token()?.1 {
1129                        if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
1130                            tag = Some(self.resolve_tag(span, &handle, suffix)?);
1131                        } else {
1132                            unreachable!()
1133                        }
1134                    }
1135                } else {
1136                    unreachable!()
1137                }
1138            }
1139            Token(mark, TokenType::Tag(..)) => {
1140                if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
1141                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
1142                    if let TokenType::Anchor(_) = &self.peek_token()?.1 {
1143                        if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
1144                            anchor_id = self.register_anchor(name, &mark)?;
1145                        } else {
1146                            unreachable!()
1147                        }
1148                    }
1149                } else {
1150                    unreachable!()
1151                }
1152            }
1153            _ => {}
1154        }
1155        match *self.peek_token()? {
1156            Token(mark, TokenType::BlockEntry) if indentless_sequence => {
1157                self.state = State::IndentlessSequenceEntry;
1158                Ok((Event::SequenceStart(anchor_id, tag), mark))
1159            }
1160            Token(_, TokenType::Scalar(..)) => {
1161                self.pop_state();
1162                if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
1163                    Ok((Event::Scalar(v, style, anchor_id, tag), mark))
1164                } else {
1165                    unreachable!()
1166                }
1167            }
1168            Token(mark, TokenType::FlowSequenceStart) => {
1169                self.state = State::FlowSequenceFirstEntry;
1170                Ok((Event::SequenceStart(anchor_id, tag), mark))
1171            }
1172            Token(mark, TokenType::FlowMappingStart) => {
1173                self.state = State::FlowMappingFirstKey;
1174                Ok((Event::MappingStart(anchor_id, tag), mark))
1175            }
1176            Token(mark, TokenType::BlockSequenceStart) if block => {
1177                self.state = State::BlockSequenceFirstEntry;
1178                Ok((Event::SequenceStart(anchor_id, tag), mark))
1179            }
1180            Token(mark, TokenType::BlockMappingStart) if block => {
1181                self.state = State::BlockMappingFirstKey;
1182                Ok((Event::MappingStart(anchor_id, tag), mark))
1183            }
1184            // ex 7.2, an empty scalar can follow a secondary tag
1185            Token(mark, _) if tag.is_some() || anchor_id > 0 => {
1186                self.pop_state();
1187                Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
1188            }
1189            Token(span, _) => {
1190                let info = match self.state {
1191                    State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1192                        "unexpected EOF while parsing a flow sequence"
1193                    }
1194                    State::FlowMappingFirstKey
1195                    | State::FlowMappingKey
1196                    | State::FlowMappingValue
1197                    | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1198                    State::FlowSequenceEntryMappingKey
1199                    | State::FlowSequenceEntryMappingValue
1200                    | State::FlowSequenceEntryMappingEnd => {
1201                        "unexpected EOF while parsing an implicit flow mapping"
1202                    }
1203                    State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
1204                        "unexpected EOF while parsing a block sequence"
1205                    }
1206                    State::BlockMappingFirstKey
1207                    | State::BlockMappingKey
1208                    | State::BlockMappingValue => "unexpected EOF while parsing a block mapping",
1209                    _ => "while parsing a node, did not find expected node content",
1210                };
1211                Err(ScanError::new_str(span.start, info))
1212            }
1213        }
1214    }
1215
1216    fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1217    where
1218        'input: 'a,
1219    {
1220        // skip BlockMappingStart
1221        if first {
1222            let _ = self.peek_token()?;
1223            //self.marks.push(tok.0);
1224            self.skip();
1225        }
1226        match *self.peek_token()? {
1227            Token(_, TokenType::Key) => {
1228                // Indentation is only meaningful for block mapping keys.
1229                if let Token(key_span, TokenType::Key) = *self.peek_token()? {
1230                    self.pending_key_indent = Some(key_span.start.col());
1231                }
1232                self.skip();
1233                if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1234                    *self.peek_token()?
1235                {
1236                    self.state = State::BlockMappingValue;
1237                    // empty scalar
1238                    Ok((Event::empty_scalar(), mark))
1239                } else {
1240                    self.push_state(State::BlockMappingValue);
1241                    self.parse_node(true, true)
1242                }
1243            }
1244            // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18
1245            Token(mark, TokenType::Value) => {
1246                self.state = State::BlockMappingValue;
1247                Ok((Event::empty_scalar(), mark))
1248            }
1249            Token(mark, TokenType::BlockEnd) => {
1250                self.pop_state();
1251                self.skip();
1252                Ok((Event::MappingEnd, mark))
1253            }
1254            Token(span, _) => Err(ScanError::new_str(
1255                span.start,
1256                "while parsing a block mapping, did not find expected key",
1257            )),
1258        }
1259    }
1260
1261    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1262    where
1263        'input: 'a,
1264    {
1265        match *self.peek_token()? {
1266            Token(mark, TokenType::Value) => {
1267                self.skip();
1268                if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1269                    *self.peek_token()?
1270                {
1271                    self.state = State::BlockMappingKey;
1272                    // empty scalar
1273                    Ok((Event::empty_scalar(), mark))
1274                } else {
1275                    self.push_state(State::BlockMappingKey);
1276                    self.parse_node(true, true)
1277                }
1278            }
1279            Token(mark, _) => {
1280                self.state = State::BlockMappingKey;
1281                // empty scalar
1282                Ok((Event::empty_scalar(), mark))
1283            }
1284        }
1285    }
1286
1287    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1288    where
1289        'input: 'a,
1290    {
1291        if first {
1292            let _ = self.peek_token()?;
1293            self.skip();
1294        }
1295        let span: Span = if let Token(mark, TokenType::FlowMappingEnd) = *self.peek_token()? {
1296            mark
1297        } else {
1298            if !first {
1299                match *self.peek_token()? {
1300                    Token(_, TokenType::FlowEntry) => self.skip(),
1301                    Token(span, _) => {
1302                        return Err(ScanError::new_str(
1303                            span.start,
1304                            "while parsing a flow mapping, did not find expected ',' or '}'",
1305                        ))
1306                    }
1307                }
1308            }
1309
1310            match *self.peek_token()? {
1311                Token(_, TokenType::Key) => {
1312                    self.skip();
1313                    if let Token(
1314                        mark,
1315                        TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
1316                    ) = *self.peek_token()?
1317                    {
1318                        self.state = State::FlowMappingValue;
1319                        return Ok((Event::empty_scalar(), mark));
1320                    }
1321                    self.push_state(State::FlowMappingValue);
1322                    return self.parse_node(false, false);
1323                }
1324                Token(marker, TokenType::Value) => {
1325                    self.state = State::FlowMappingValue;
1326                    return Ok((Event::empty_scalar(), marker));
1327                }
1328                Token(_, TokenType::FlowMappingEnd) => (),
1329                _ => {
1330                    self.push_state(State::FlowMappingEmptyValue);
1331                    return self.parse_node(false, false);
1332                }
1333            }
1334
1335            self.peek_token()?.0
1336        };
1337
1338        self.pop_state();
1339        self.skip();
1340        Ok((Event::MappingEnd, span))
1341    }
1342
1343    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1344    where
1345        'input: 'a,
1346    {
1347        let span: Span = {
1348            if empty {
1349                let Token(mark, _) = *self.peek_token()?;
1350                self.state = State::FlowMappingKey;
1351                return Ok((Event::empty_scalar(), mark));
1352            }
1353            match *self.peek_token()? {
1354                Token(span, TokenType::Value) => {
1355                    self.skip();
1356                    match self.peek_token()?.1 {
1357                        TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
1358                        _ => {
1359                            self.push_state(State::FlowMappingKey);
1360                            return self.parse_node(false, false);
1361                        }
1362                    }
1363                    span
1364                }
1365                Token(marker, _) => marker,
1366            }
1367        };
1368
1369        self.state = State::FlowMappingKey;
1370        Ok((Event::empty_scalar(), span))
1371    }
1372
1373    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1374    where
1375        'input: 'a,
1376    {
1377        // skip FlowMappingStart
1378        if first {
1379            let _ = self.peek_token()?;
1380            //self.marks.push(tok.0);
1381            self.skip();
1382        }
1383        match *self.peek_token()? {
1384            Token(mark, TokenType::FlowSequenceEnd) => {
1385                self.pop_state();
1386                self.skip();
1387                return Ok((Event::SequenceEnd, mark));
1388            }
1389            Token(_, TokenType::FlowEntry) if !first => {
1390                self.skip();
1391            }
1392            Token(span, _) if !first => {
1393                return Err(ScanError::new_str(
1394                    span.start,
1395                    "while parsing a flow sequence, expected ',' or ']'",
1396                ));
1397            }
1398            _ => { /* next */ }
1399        }
1400        match *self.peek_token()? {
1401            Token(mark, TokenType::FlowSequenceEnd) => {
1402                self.pop_state();
1403                self.skip();
1404                Ok((Event::SequenceEnd, mark))
1405            }
1406            Token(mark, TokenType::Key) => {
1407                self.state = State::FlowSequenceEntryMappingKey;
1408                self.skip();
1409                Ok((Event::MappingStart(0, None), mark))
1410            }
1411            _ => {
1412                self.push_state(State::FlowSequenceEntry);
1413                self.parse_node(false, false)
1414            }
1415        }
1416    }
1417
1418    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1419    where
1420        'input: 'a,
1421    {
1422        match *self.peek_token()? {
1423            Token(mark, TokenType::BlockEntry) => {
1424                self.skip();
1425                if let Token(
1426                    _,
1427                    TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1428                ) = *self.peek_token()?
1429                {
1430                    self.state = State::IndentlessSequenceEntry;
1431                    Ok((Event::empty_scalar(), mark))
1432                } else {
1433                    self.push_state(State::IndentlessSequenceEntry);
1434                    self.parse_node(true, false)
1435                }
1436            }
1437            Token(mark, _) => {
1438                self.pop_state();
1439                Ok((Event::SequenceEnd, mark))
1440            }
1441        }
1442    }
1443
1444    fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1445    where
1446        'input: 'a,
1447    {
1448        // BLOCK-SEQUENCE-START
1449        if first {
1450            let _ = self.peek_token()?;
1451            //self.marks.push(tok.0);
1452            self.skip();
1453        }
1454        match *self.peek_token()? {
1455            Token(mark, TokenType::BlockEnd) => {
1456                self.pop_state();
1457                self.skip();
1458                Ok((Event::SequenceEnd, mark))
1459            }
1460            Token(mark, TokenType::BlockEntry) => {
1461                self.skip();
1462                if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
1463                    self.state = State::BlockSequenceEntry;
1464                    Ok((Event::empty_scalar(), mark))
1465                } else {
1466                    self.push_state(State::BlockSequenceEntry);
1467                    self.parse_node(true, false)
1468                }
1469            }
1470            Token(span, _) => Err(ScanError::new_str(
1471                span.start,
1472                "while parsing a block collection, did not find expected '-' indicator",
1473            )),
1474        }
1475    }
1476
1477    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
1478    where
1479        'input: 'a,
1480    {
1481        if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1482            *self.peek_token()?
1483        {
1484            self.state = State::FlowSequenceEntryMappingValue;
1485            Ok((Event::empty_scalar(), mark))
1486        } else {
1487            self.push_state(State::FlowSequenceEntryMappingValue);
1488            self.parse_node(false, false)
1489        }
1490    }
1491
1492    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
1493    where
1494        'input: 'a,
1495    {
1496        match *self.peek_token()? {
1497            Token(_, TokenType::Value) => {
1498                self.skip();
1499                self.state = State::FlowSequenceEntryMappingValue;
1500                let Token(span, ref tok) = *self.peek_token()?;
1501                if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
1502                    self.state = State::FlowSequenceEntryMappingEnd;
1503                    Ok((Event::empty_scalar(), Span::empty(span.start)))
1504                } else {
1505                    self.push_state(State::FlowSequenceEntryMappingEnd);
1506                    self.parse_node(false, false)
1507                }
1508            }
1509            Token(mark, _) => {
1510                self.state = State::FlowSequenceEntryMappingEnd;
1511                Ok((Event::empty_scalar(), mark))
1512            }
1513        }
1514    }
1515
1516    #[allow(clippy::unnecessary_wraps)]
1517    fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
1518    where
1519        'input: 'a,
1520    {
1521        self.state = State::FlowSequenceEntry;
1522        let Token(span, _) = *self.peek_token()?;
1523        Ok((Event::MappingEnd, Span::empty(span.start)))
1524    }
1525
1526    /// Resolve a tag from the handle and the suffix.
1527    fn resolve_tag(
1528        &self,
1529        span: Span,
1530        handle: &Cow<'input, str>,
1531        suffix: Cow<'input, str>,
1532    ) -> Result<Cow<'input, Tag>, ScanError> {
1533        let suffix = suffix.into_owned();
1534        let tag = if handle == "!!" {
1535            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
1536            // overridden.
1537            Tag {
1538                handle: self
1539                    .tags
1540                    .get("!!")
1541                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
1542                suffix,
1543            }
1544        } else if handle.is_empty() && suffix == "!" {
1545            // "!" introduces a local tag. Local tags may have their prefix overridden.
1546            match self.tags.get("") {
1547                Some(prefix) => Tag {
1548                    handle: prefix.clone(),
1549                    suffix,
1550                },
1551                None => Tag {
1552                    handle: String::new(),
1553                    suffix,
1554                },
1555            }
1556        } else {
1557            // Lookup handle in our tag directives.
1558            let prefix = self.tags.get(&**handle);
1559            if let Some(prefix) = prefix {
1560                Tag {
1561                    handle: prefix.clone(),
1562                    suffix,
1563                }
1564            } else {
1565                // Otherwise, it may be a local handle. With a local handle, the handle is set to
1566                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
1567                // If the handle is of the form "!foo!", this cannot be a local handle and we need
1568                // to error.
1569                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1570                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
1571                }
1572                Tag {
1573                    handle: handle.to_string(),
1574                    suffix,
1575                }
1576            }
1577        };
1578        Ok(Cow::Owned(tag))
1579    }
1580}
1581
1582impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
1583    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
1584        if let Some(ref x) = self.current {
1585            Some(Ok(x))
1586        } else {
1587            if self.stream_end_emitted {
1588                return None;
1589            }
1590            match self.next_event_impl() {
1591                Ok(token) => self.current = Some(token),
1592                Err(e) => return Some(Err(e)),
1593            }
1594            self.current.as_ref().map(Ok)
1595        }
1596    }
1597
1598    fn next_event(&mut self) -> Option<ParseResult<'input>> {
1599        if self.stream_end_emitted {
1600            return None;
1601        }
1602
1603        let tok = self.next_event_impl();
1604        if matches!(tok, Ok((Event::StreamEnd, _))) {
1605            self.stream_end_emitted = true;
1606        }
1607        Some(tok)
1608    }
1609
1610    fn load<R: SpannedEventReceiver<'input>>(
1611        &mut self,
1612        recv: &mut R,
1613        multi: bool,
1614    ) -> Result<(), ScanError> {
1615        let mut recv = InfallibleSpannedReceiver(recv);
1616        into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
1617    }
1618
1619    fn try_load<R: TrySpannedEventReceiver<'input>>(
1620        &mut self,
1621        recv: &mut R,
1622        multi: bool,
1623    ) -> Result<(), TryLoadError<R::Error>> {
1624        let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
1625        if !self.scanner.stream_started() || stream_start_buffered {
1626            let (ev, span) = self.next_event_impl()?;
1627            if ev != Event::StreamStart {
1628                return Err(TryLoadError::Scan(ScanError::new_str(
1629                    span.start,
1630                    "did not find expected <stream-start>",
1631                )));
1632            }
1633            try_emit(recv, ev, span)?;
1634        }
1635
1636        if self.scanner.stream_ended() {
1637            // XXX has parsed?
1638            try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
1639            return Ok(());
1640        }
1641        loop {
1642            let (ev, span) = self.next_event_impl()?;
1643            if ev == Event::StreamEnd {
1644                try_emit(recv, ev, span)?;
1645                return Ok(());
1646            }
1647            // clear anchors before a new document
1648            self.anchors.clear();
1649            self.try_load_document(ev, span, recv)?;
1650            if !multi {
1651                break;
1652            }
1653        }
1654        Ok(())
1655    }
1656}
1657
1658impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
1659    type Item = Result<(Event<'input>, Span), ScanError>;
1660
1661    fn next(&mut self) -> Option<Self::Item> {
1662        self.next_event()
1663    }
1664}
1665
1666#[cfg(test)]
1667mod test {
1668    use alloc::{
1669        borrow::ToOwned,
1670        string::{String, ToString},
1671        vec::Vec,
1672    };
1673
1674    use crate::scanner::{ScalarStyle, Span};
1675
1676    use super::{
1677        Event, EventReceiver, Parser, Tag, TryEventReceiver, TryLoadError, TrySpannedEventReceiver,
1678    };
1679
1680    #[derive(Default)]
1681    struct CollectingSink<'input> {
1682        events: Vec<Event<'input>>,
1683    }
1684
1685    impl<'input> EventReceiver<'input> for CollectingSink<'input> {
1686        fn on_event(&mut self, ev: Event<'input>) {
1687            self.events.push(ev);
1688        }
1689    }
1690
1691    fn first_error_info(input: &str) -> String {
1692        for event in Parser::new_from_str(input) {
1693            if let Err(err) = event {
1694                return err.info().to_owned();
1695            }
1696        }
1697        panic!("expected parser error")
1698    }
1699
1700    #[test]
1701    fn display_resolved_core_tag_without_extra_bang() {
1702        let tag = Tag {
1703            handle: "tag:yaml.org,2002:".to_owned(),
1704            suffix: "str".to_owned(),
1705        };
1706
1707        assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
1708    }
1709
1710    #[test]
1711    fn tag_helpers_distinguish_core_and_local_tags() {
1712        let core = Tag {
1713            handle: "tag:yaml.org,2002:".to_owned(),
1714            suffix: "int".to_owned(),
1715        };
1716        let local = Tag {
1717            handle: "!".to_owned(),
1718            suffix: "thing".to_owned(),
1719        };
1720
1721        assert!(core.is_yaml_core_schema());
1722        assert!(!local.is_yaml_core_schema());
1723        assert_eq!(local.to_string(), "!thing");
1724    }
1725
1726    #[test]
1727    fn test_peek_eq_parse() {
1728        let s = "
1729a0 bb: val
1730a1: &x
1731    b1: 4
1732    b2: d
1733a2: 4
1734a3: [1, 2, 3]
1735a4:
1736    - [a1, a2]
1737    - 2
1738a5: *x
1739";
1740        let mut p = Parser::new_from_str(s);
1741        loop {
1742            let event_peek = p.peek().unwrap().unwrap().clone();
1743            let event = p.next_event().unwrap().unwrap();
1744            assert_eq!(event, event_peek);
1745            if event.0 == Event::StreamEnd {
1746                break;
1747            }
1748        }
1749    }
1750
1751    #[test]
1752    fn test_peek_and_next_return_none_after_stream_end() {
1753        let mut parser = Parser::new_from_str("");
1754
1755        assert!(matches!(
1756            parser.next_event().unwrap().unwrap().0,
1757            Event::StreamStart
1758        ));
1759        assert!(matches!(
1760            parser.next_event().unwrap().unwrap().0,
1761            Event::StreamEnd
1762        ));
1763        assert!(parser.next_event().is_none());
1764        assert!(parser.peek().is_none());
1765    }
1766
1767    #[test]
1768    fn test_load_after_stream_already_ended_emits_stream_end() {
1769        let mut parser = Parser::new_from_str("");
1770        while parser.next_event().is_some() {}
1771
1772        let mut sink = CollectingSink::default();
1773        parser.load(&mut sink, true).unwrap();
1774
1775        assert_eq!(sink.events, vec![Event::StreamEnd]);
1776    }
1777
1778    #[test]
1779    fn test_load_visits_nested_collection_events() {
1780        let mut parser = Parser::new_from_str("root:\n  - item: value\n  - [a, b]\n");
1781        let mut sink = CollectingSink::default();
1782
1783        parser.load(&mut sink, true).unwrap();
1784
1785        assert_eq!(
1786            sink.events,
1787            vec![
1788                Event::StreamStart,
1789                Event::DocumentStart(false),
1790                Event::MappingStart(0, None),
1791                Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
1792                Event::SequenceStart(0, None),
1793                Event::MappingStart(0, None),
1794                Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
1795                Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
1796                Event::MappingEnd,
1797                Event::SequenceStart(0, None),
1798                Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
1799                Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
1800                Event::SequenceEnd,
1801                Event::SequenceEnd,
1802                Event::MappingEnd,
1803                Event::DocumentEnd,
1804                Event::StreamEnd,
1805            ]
1806        );
1807    }
1808
1809    #[derive(Clone, Debug, PartialEq, Eq)]
1810    enum ValidationError {
1811        ForbiddenValue,
1812    }
1813
1814    struct FailingSink<'input> {
1815        events: Vec<Event<'input>>,
1816    }
1817
1818    impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
1819        type Error = ValidationError;
1820
1821        fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
1822            let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
1823            self.events.push(ev);
1824            if should_fail {
1825                Err(ValidationError::ForbiddenValue)
1826            } else {
1827                Ok(())
1828            }
1829        }
1830    }
1831
1832    #[test]
1833    fn test_try_load_stops_on_receiver_error() {
1834        let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
1835        let mut sink = FailingSink { events: Vec::new() };
1836
1837        let err = parser.try_load(&mut sink, true).unwrap_err();
1838
1839        assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
1840        assert!(sink
1841            .events
1842            .iter()
1843            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
1844        assert!(sink
1845            .events
1846            .iter()
1847            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
1848        assert!(!sink
1849            .events
1850            .iter()
1851            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
1852    }
1853
1854    struct SpannedFailingSink {
1855        failed_span: Option<Span>,
1856    }
1857
1858    impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
1859        type Error = Span;
1860
1861        fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
1862            if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
1863                self.failed_span = Some(span);
1864                Err(span)
1865            } else {
1866                Ok(())
1867            }
1868        }
1869    }
1870
1871    #[test]
1872    fn test_try_load_spanned_receiver_gets_span() {
1873        let mut parser = Parser::new_from_str("value: bad\n");
1874        let mut sink = SpannedFailingSink { failed_span: None };
1875
1876        let err = parser.try_load(&mut sink, false).unwrap_err();
1877
1878        let TryLoadError::Receiver(span) = err else {
1879            panic!("expected receiver error");
1880        };
1881
1882        assert_eq!(Some(span), sink.failed_span);
1883        assert!(!span.is_empty());
1884    }
1885
1886    struct NeverFails {
1887        count: usize,
1888    }
1889
1890    impl<'input> TryEventReceiver<'input> for NeverFails {
1891        type Error = ValidationError;
1892
1893        fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
1894            self.count += 1;
1895            Ok(())
1896        }
1897    }
1898
1899    #[test]
1900    fn test_try_load_returns_scan_error() {
1901        let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
1902        let mut sink = NeverFails { count: 0 };
1903
1904        let err = parser.try_load(&mut sink, true).unwrap_err();
1905
1906        let TryLoadError::Scan(err) = err else {
1907            panic!("expected scan error");
1908        };
1909        assert_eq!(err.info(), "duplicate version directive");
1910    }
1911
1912    #[test]
1913    fn test_try_load_after_stream_already_ended_emits_stream_end() {
1914        let mut parser = Parser::new_from_str("");
1915        while parser.next_event().is_some() {}
1916
1917        let mut sink = FailingSink { events: Vec::new() };
1918        parser.try_load(&mut sink, true).unwrap();
1919
1920        assert_eq!(sink.events, vec![Event::StreamEnd]);
1921    }
1922
1923    #[test]
1924    fn test_load_single_document_stops_before_next_document() {
1925        let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
1926        let mut sink = CollectingSink::default();
1927
1928        parser.load(&mut sink, false).unwrap();
1929
1930        assert!(sink
1931            .events
1932            .iter()
1933            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
1934        assert!(!sink
1935            .events
1936            .iter()
1937            .any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
1938        assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
1939    }
1940
1941    #[test]
1942    fn test_duplicate_version_directive_errors() {
1943        assert_eq!(
1944            first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
1945            "duplicate version directive"
1946        );
1947    }
1948
1949    #[test]
1950    fn test_duplicate_tag_directive_errors() {
1951        assert_eq!(
1952            first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
1953            "the TAG directive must only be given at most once per handle in the same document"
1954        );
1955    }
1956
1957    #[test]
1958    fn test_directive_after_implicit_document_requires_explicit_end() {
1959        assert_eq!(
1960            first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
1961            "missing explicit document end marker before directive"
1962        );
1963    }
1964
1965    #[test]
1966    fn test_anchor_offset_overflow_reports_error() {
1967        let mut parser = Parser::new_from_str("&a value");
1968        parser.set_anchor_offset(usize::MAX);
1969
1970        let err = parser
1971            .find_map(Result::err)
1972            .expect("anchor registration should overflow");
1973
1974        assert_eq!(
1975            err.info(),
1976            "while parsing anchor, anchor count exceeded supported limit"
1977        );
1978    }
1979
1980    #[test]
1981    fn test_alias_resolves_to_registered_anchor_id() {
1982        let events = Parser::new_from_str("- &a value\n- *a\n")
1983            .map(|event| event.unwrap().0)
1984            .collect::<Vec<_>>();
1985
1986        assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
1987    }
1988
1989    #[test]
1990    fn test_anchor_then_tag_applies_both_to_scalar() {
1991        let events = Parser::new_from_str("&a !!str value")
1992            .map(|event| event.unwrap().0)
1993            .collect::<Vec<_>>();
1994
1995        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
1996            .iter()
1997            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
1998        else {
1999            panic!("expected tagged anchored scalar");
2000        };
2001
2002        assert_eq!(value, "value");
2003        assert_eq!(*anchor_id, 1);
2004        assert_eq!(tag.handle, "tag:yaml.org,2002:");
2005        assert_eq!(tag.suffix, "str");
2006    }
2007
2008    #[test]
2009    fn test_tag_then_anchor_applies_both_to_scalar() {
2010        let events = Parser::new_from_str("!!str &a value")
2011            .map(|event| event.unwrap().0)
2012            .collect::<Vec<_>>();
2013
2014        let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
2015            .iter()
2016            .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
2017        else {
2018            panic!("expected tagged anchored scalar");
2019        };
2020
2021        assert_eq!(value, "value");
2022        assert_eq!(*anchor_id, 1);
2023        assert_eq!(tag.handle, "tag:yaml.org,2002:");
2024        assert_eq!(tag.suffix, "str");
2025    }
2026
2027    #[test]
2028    fn test_multiple_tag_directives_are_kept_within_document() {
2029        let text = r"
2030%TAG !a! tag:a,2024:
2031%TAG !b! tag:b,2024:
2032---
2033first: !a!x foo
2034second: !b!y bar
2035";
2036
2037        let mut seen_a = false;
2038        let mut seen_b = false;
2039        for event in Parser::new_from_str(text) {
2040            let (event, _) = event.unwrap();
2041            if let Event::Scalar(_, _, _, Some(tag)) = event {
2042                if tag.handle == "tag:a,2024:" {
2043                    seen_a = true;
2044                } else if tag.handle == "tag:b,2024:" {
2045                    seen_b = true;
2046                }
2047            }
2048        }
2049
2050        assert!(seen_a);
2051        assert!(seen_b);
2052    }
2053
2054    #[test]
2055    fn test_tags_are_cleared_when_next_document_has_no_directives() {
2056        let text = r"
2057%TAG !t! tag:test,2024:
2058--- !t!1
2059foo
2060--- !t!2
2061bar
2062";
2063
2064        let mut parser = Parser::new_from_str(text);
2065        for event in parser.by_ref() {
2066            let (event, _) = event.unwrap();
2067            if let Event::DocumentEnd = event {
2068                break;
2069            }
2070        }
2071
2072        match parser.next().unwrap().unwrap().0 {
2073            Event::DocumentStart(true) => {}
2074            _ => panic!("expected explicit second document start"),
2075        }
2076
2077        let err = parser.next().unwrap().unwrap_err();
2078        assert!(format!("{err}").contains("the handle wasn't declared"));
2079    }
2080
2081    #[test]
2082    fn test_pull_parser_clears_anchors_between_documents() {
2083        let mut parser = Parser::new_from_str(
2084            "--- &a value
2085--- *a
2086",
2087        );
2088
2089        for event in parser.by_ref() {
2090            let (event, _) = event.unwrap();
2091            if matches!(event, Event::DocumentEnd) {
2092                break;
2093            }
2094        }
2095
2096        match parser.next().unwrap().unwrap().0 {
2097            Event::DocumentStart(true) => {}
2098            _ => panic!("expected explicit second document start"),
2099        }
2100
2101        let err = parser.next().unwrap().unwrap_err();
2102        assert!(format!("{err}").contains("unknown anchor"));
2103    }
2104
2105    #[test]
2106    fn test_keep_tags_across_multiple_documents() {
2107        let text = r#"
2108%YAML 1.1
2109%TAG !t! tag:test,2024:
2110--- !t!1 &1
2111foo: "bar"
2112--- !t!2 &2
2113baz: "qux"
2114"#;
2115        for x in Parser::new_from_str(text).keep_tags(true) {
2116            let x = x.unwrap();
2117            if let Event::MappingStart(_, tag) = x.0 {
2118                let tag = tag.unwrap();
2119                assert_eq!(tag.handle, "tag:test,2024:");
2120            }
2121        }
2122
2123        for x in Parser::new_from_str(text).keep_tags(false) {
2124            if x.is_err() {
2125                // Test successful
2126                return;
2127            }
2128        }
2129        panic!("Test failed, did not encounter error")
2130    }
2131
2132    #[test]
2133    fn test_flow_sequence_mapping_allows_empty_key() {
2134        let parser = Parser::new_from_str("[?: value]");
2135        for event in parser {
2136            event.expect("parser should accept flow sequence mappings with empty keys");
2137        }
2138    }
2139
2140    #[test]
2141    fn test_keep_tags_does_not_persist_default_tag_handles() {
2142        let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
2143
2144        let mut int_tags = Vec::new();
2145        for event in Parser::new_from_str(text).keep_tags(true) {
2146            let event = event.unwrap().0;
2147            if let Event::Scalar(_, _, _, Some(tag)) = event {
2148                if tag.suffix == "int" {
2149                    int_tags.push(tag.handle.clone());
2150                }
2151            }
2152        }
2153
2154        assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
2155    }
2156
2157    #[test]
2158    fn test_load_after_peek_stream_start() {
2159        #[derive(Default)]
2160        struct Sink<'input> {
2161            events: Vec<Event<'input>>,
2162        }
2163
2164        impl<'input> EventReceiver<'input> for Sink<'input> {
2165            fn on_event(&mut self, ev: Event<'input>) {
2166                self.events.push(ev);
2167            }
2168        }
2169
2170        let mut parser = Parser::new_from_str("key: value\n");
2171        let mut sink = Sink::default();
2172
2173        assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
2174        parser.load(&mut sink, false).unwrap();
2175
2176        assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
2177        assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
2178    }
2179}