Skip to main content

granit_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, BorrowedInput},
9    scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
10    BufferedInput,
11};
12
13use alloc::{
14    borrow::Cow,
15    collections::{BTreeMap, BTreeSet},
16    string::{String, ToString},
17    vec::Vec,
18};
19use core::fmt::Display;
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22enum State {
23    StreamStart,
24    ImplicitDocumentStart,
25    DocumentStart,
26    DocumentContent,
27    DocumentEnd,
28    BlockNode,
29    BlockSequenceFirstEntry,
30    BlockSequenceEntry,
31    IndentlessSequenceEntry,
32    BlockMappingFirstKey,
33    BlockMappingKey,
34    BlockMappingValue,
35    FlowSequenceFirstEntry,
36    FlowSequenceEntry,
37    FlowSequenceEntryMappingKey,
38    FlowSequenceEntryMappingValue,
39    FlowSequenceEntryMappingEnd,
40    FlowMappingFirstKey,
41    FlowMappingKey,
42    FlowMappingValue,
43    FlowMappingEmptyValue,
44    End,
45}
46
47/// An event generated by the YAML parser.
48///
49/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
50/// [`EventReceiver`] trait.
51#[derive(Clone, PartialEq, Debug, Eq)]
52pub enum Event<'input> {
53    /// Reserved for internal use.
54    Nothing,
55    /// Event generated at the very beginning of parsing.
56    StreamStart,
57    /// Last event that will be generated by the parser. Signals EOF.
58    StreamEnd,
59    /// The start of a YAML document.
60    ///
61    /// When the boolean is `true`, it is an explicit document start
62    /// directive (`---`).
63    ///
64    /// When the boolean is `false`, it is an implicit document start
65    /// (without `---`).
66    DocumentStart(bool),
67    /// The YAML end document directive (`...`).
68    DocumentEnd,
69    /// A YAML Alias.
70    Alias(
71        /// The anchor ID the alias refers to.
72        usize,
73    ),
74    /// Value, style, `anchor_id`, tag
75    Scalar(
76        Cow<'input, str>,
77        ScalarStyle,
78        usize,
79        Option<Cow<'input, Tag>>,
80    ),
81    /// The start of a YAML sequence (array).
82    SequenceStart(
83        /// The anchor ID of the start of the sequence.
84        usize,
85        /// An optional tag
86        Option<Cow<'input, Tag>>,
87    ),
88    /// The end of a YAML sequence (array).
89    SequenceEnd,
90    /// The start of a YAML mapping (object, hash).
91    MappingStart(
92        /// The anchor ID of the start of the mapping.
93        usize,
94        /// An optional tag
95        Option<Cow<'input, Tag>>,
96    ),
97    /// The end of a YAML mapping (object, hash).
98    MappingEnd,
99}
100
101/// A YAML tag.
102#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
103pub struct Tag {
104    /// Handle of the tag (`!` included).
105    pub handle: String,
106    /// The suffix of the tag.
107    pub suffix: String,
108}
109
110impl Tag {
111    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
112    ///
113    /// The YAML specification specifies [a list of
114    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function
115    /// checks whether _the handle_ (but not the suffix) is the handle for the YAML Core Schema.
116    ///
117    /// # Return
118    /// Returns `true` if the handle is `tag:yaml.org,2002`, `false` otherwise.
119    #[must_use]
120    pub fn is_yaml_core_schema(&self) -> bool {
121        self.handle == "tag:yaml.org,2002:"
122    }
123}
124
125impl Display for Tag {
126    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
127        if self.handle == "!" {
128            write!(f, "!{}", self.suffix)
129        } else {
130            write!(f, "{}{}", self.handle, self.suffix)
131        }
132    }
133}
134
135impl<'input> Event<'input> {
136    /// Create an empty scalar.
137    fn empty_scalar() -> Self {
138        // a null scalar
139        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
140    }
141
142    /// Create an empty scalar with the given anchor.
143    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
144        Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
145    }
146}
147
148/// A YAML parser.
149#[derive(Debug)]
150pub struct Parser<'input, T: BorrowedInput<'input>> {
151    /// The underlying scanner from which we pull tokens.
152    scanner: Scanner<'input, T>,
153    /// The stack of _previous_ states we were in.
154    ///
155    /// States are pushed in the context of subobjects to this stack. The top-most element is the
156    /// state in which to come back to when exiting the current state.
157    states: Vec<State>,
158    /// The state in which we currently are.
159    state: State,
160    /// The next token from the scanner.
161    token: Option<Token<'input>>,
162    /// The next YAML event to emit.
163    current: Option<(Event<'input>, Span)>,
164
165    /// Pending indentation hint to be attached to the next emitted event span.
166    ///
167    /// This is used to communicate indentation for block mapping keys. It is set when consuming a
168    /// `TokenType::Key` in block style, and is applied to the next emitted node event (the key
169    /// itself).
170    pending_key_indent: Option<usize>,
171    /// Anchors that have been encountered in the YAML document.
172    anchors: BTreeMap<Cow<'input, str>, usize>,
173    /// Next ID available for an anchor.
174    ///
175    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
176    /// return for the next anchor and the count of anchor IDs emitted.
177    anchor_id_count: usize,
178    /// The tag directives (`%TAG`) the parser has encountered.
179    ///
180    /// Key is the handle, and value is the prefix.
181    tags: BTreeMap<String, String>,
182    /// Whether we have emitted [`Event::StreamEnd`].
183    ///
184    /// Emitted means that it has been returned from [`Self::next`]. If it is stored in
185    /// [`Self::token`], this is set to `false`.
186    stream_end_emitted: bool,
187    /// Make tags global across all documents.
188    keep_tags: bool,
189}
190
191/// Trait to be implemented in order to use the low-level parsing API.
192///
193/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
194/// for each YAML [`Event`] that occurs.
195/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
196/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
197/// [`SpannedEventReceiver`] automatically.
198///
199/// # Event hierarchy
200/// The event stream starts with an [`Event::StreamStart`] event followed by an
201/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
202/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
203/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
204///
205/// In a mapping, key-values are sent as consecutive events. The first event after an
206/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no
207/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd
208/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received.
209///
210/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
211///
212/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
213/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
214/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
215/// be part of the value and not another key-value pair or element in the sequence.
216///
217/// For instance, the following yaml:
218/// ```yaml
219/// a: b
220/// c:
221///   d: e
222/// f:
223///   - g
224///   - h
225/// ```
226/// will emit (indented and commented for visibility):
227/// ```text
228/// StreamStart, DocumentStart, MappingStart,
229///   Scalar("a", ..), Scalar("b", ..)
230///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
231///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
232/// MappingEnd, DocumentEnd, StreamEnd
233/// ```
234///
235/// # Example
236/// ```
237/// # use granit_parser::{Event, EventReceiver, Parser};
238/// #
239/// /// Sink of events. Collects them into an array.
240/// struct EventSink<'input> {
241///     events: Vec<Event<'input>>,
242/// }
243///
244/// /// Implement `on_event`, pushing into `self.events`.
245/// impl<'input> EventReceiver<'input> for EventSink<'input> {
246///     fn on_event(&mut self, ev: Event<'input>) {
247///         self.events.push(ev);
248///     }
249/// }
250///
251/// /// Load events from a yaml string.
252/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
253///     let mut sink = EventSink { events: Vec::new() };
254///     let mut parser = Parser::new_from_str(yaml);
255///     // Load events using our sink as the receiver.
256///     parser.load(&mut sink, true).unwrap();
257///     sink.events
258/// }
259/// ```
260pub trait EventReceiver<'input> {
261    /// Handler called for each YAML event that is emitted by the parser.
262    fn on_event(&mut self, ev: Event<'input>);
263}
264
265/// Trait to be implemented for using the low-level parsing API.
266///
267/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
268pub trait SpannedEventReceiver<'input> {
269    /// Handler called for each event that occurs.
270    fn on_event(&mut self, ev: Event<'input>, span: Span);
271}
272
273impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
274    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
275        self.on_event(ev);
276    }
277}
278
279/// A convenience alias for a `Result` of a parser event.
280pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
281
282/// Trait extracted from `Parser` to support mocking and alternative implementations.
283pub trait ParserTrait<'input> {
284    /// Try to load the next event and return it, but do not consuming it from `self`.
285    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
286
287    /// Try to load the next event and return it, consuming it from `self`.
288    fn next_event(&mut self) -> Option<ParseResult<'input>>;
289
290    /// Load the YAML from the stream in `self`, pushing events into `recv`.
291    ///
292    /// # Errors
293    /// Returns `ScanError` when scanning or parsing the stream fails.
294    fn load<R: SpannedEventReceiver<'input>>(
295        &mut self,
296        recv: &mut R,
297        multi: bool,
298    ) -> Result<(), ScanError>;
299}
300
301impl<'input> Parser<'input, StrInput<'input>> {
302    /// Create a new instance of a parser from a &str.
303    #[must_use]
304    pub fn new_from_str(value: &'input str) -> Self {
305        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
306        Parser::new(StrInput::new(value))
307    }
308}
309
310impl<T> Parser<'static, BufferedInput<T>>
311where
312    T: Iterator<Item = char>,
313{
314    /// Create a new instance of a parser from an iterator of `char`s.
315    #[must_use]
316    pub fn new_from_iter(iter: T) -> Self {
317        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
318        Parser::new(BufferedInput::new(iter))
319    }
320}
321
322impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
323    /// Get the current anchor offset count.
324    pub fn get_anchor_offset(&self) -> usize {
325        self.anchor_id_count
326    }
327
328    /// Set the current anchor offset count.
329    pub fn set_anchor_offset(&mut self, offset: usize) {
330        self.anchor_id_count = offset;
331    }
332
333    /// Create a new instance of a parser from the given input of characters.
334    pub fn new(src: T) -> Self {
335        Parser {
336            scanner: Scanner::new(src),
337            states: Vec::new(),
338            state: State::StreamStart,
339            token: None,
340            current: None,
341
342            pending_key_indent: None,
343
344            anchors: BTreeMap::new(),
345            // valid anchor_id starts from 1
346            anchor_id_count: 1,
347            tags: BTreeMap::new(),
348            stream_end_emitted: false,
349            keep_tags: false,
350        }
351    }
352
353    /// Whether to keep tags across multiple documents when parsing.
354    ///
355    /// This behavior is non-standard as per the YAML specification but can be encountered in the
356    /// wild. This boolean allows enabling this non-standard extension. This would result in the
357    /// parser accepting input from [test
358    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
359    /// of the yaml-test-suite:
360    ///
361    /// ```yaml
362    /// %TAG !prefix! tag:example.com,2011:
363    /// --- !prefix!A
364    /// a: b
365    /// --- !prefix!B
366    /// c: d
367    /// --- !prefix!C
368    /// e: f
369    /// ```
370    ///
371    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
372    /// only apply to the document immediately following them. This would error on `!prefix!B`.
373    ///
374    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
375    #[must_use]
376    pub fn keep_tags(mut self, value: bool) -> Self {
377        self.keep_tags = value;
378        self
379    }
380
381    /// Try to load the next event and return it, but do not consuming it from `self`.
382    ///
383    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
384    /// [`Iterator::next`] or [`Parser::load`].
385    ///
386    /// # Errors
387    /// Returns `ScanError` when loading the next event fails.
388    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
389        ParserTrait::peek(self)
390    }
391
392    /// Try to load the next event and return it, consuming it from `self`.
393    ///
394    /// # Errors
395    /// Returns `ScanError` when loading the next event fails.
396    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
397        ParserTrait::next_event(self)
398    }
399
400    /// Implementation function for [`Self::next_event`] without the `Option`.
401    ///
402    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
403    /// option. This burdens the parser code. This function is used internally when an option is
404    /// undesirable.
405    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
406    where
407        'input: 'a,
408    {
409        match self.current.take() {
410            None => self.parse(),
411            Some(v) => Ok(v),
412        }
413    }
414
415    /// Peek at the next token from the scanner.
416    fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
417        match self.token {
418            None => {
419                self.token = Some(self.scan_next_token()?);
420                Ok(self.token.as_ref().unwrap())
421            }
422            Some(ref tok) => Ok(tok),
423        }
424    }
425
426    /// Extract and return the next token from the scanner.
427    ///
428    /// This function does _not_ make use of `self.token`.
429    fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
430        let token = self.scanner.next();
431        match token {
432            None => match self.scanner.get_error() {
433                None => Err(self.unexpected_eof()),
434                Some(e) => Err(e),
435            },
436            Some(tok) => Ok(tok),
437        }
438    }
439
440    #[cold]
441    fn unexpected_eof(&self) -> ScanError {
442        let info = match self.state {
443            State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
444                "unexpected EOF while parsing a flow sequence"
445            }
446            State::FlowMappingFirstKey
447            | State::FlowMappingKey
448            | State::FlowMappingValue
449            | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
450            State::FlowSequenceEntryMappingKey
451            | State::FlowSequenceEntryMappingValue
452            | State::FlowSequenceEntryMappingEnd => {
453                "unexpected EOF while parsing an implicit flow mapping"
454            }
455            State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
456                "unexpected EOF while parsing a block sequence"
457            }
458            State::BlockMappingFirstKey | State::BlockMappingKey | State::BlockMappingValue => {
459                "unexpected EOF while parsing a block mapping"
460            }
461            _ => "unexpected eof",
462        };
463        ScanError::new_str(self.scanner.mark(), info)
464    }
465
466    fn fetch_token<'a>(&mut self) -> Token<'a>
467    where
468        'input: 'a,
469    {
470        self.token
471            .take()
472            .expect("fetch_token needs to be preceded by peek_token")
473    }
474
475    /// Skip the next token from the scanner.
476    fn skip(&mut self) {
477        self.token = None;
478    }
479    /// Pops the top-most state and make it the current state.
480    fn pop_state(&mut self) {
481        self.state = self.states.pop().unwrap();
482    }
483    /// Push a new state atop the state stack.
484    fn push_state(&mut self, state: State) {
485        self.states.push(state);
486    }
487
488    fn parse<'a>(&mut self) -> ParseResult<'a>
489    where
490        'input: 'a,
491    {
492        if self.state == State::End {
493            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
494        }
495        let (ev, span) = self.state_machine()?;
496        if let Some(indent) = self.pending_key_indent.take() {
497            Ok((ev, span.with_indent(Some(indent))))
498        } else {
499            Ok((ev, span))
500        }
501    }
502
503    /// Load the YAML from the stream in `self`, pushing events into `recv`.
504    ///
505    /// The contents of the stream are parsed and the corresponding events are sent into the
506    /// recveiver. For detailed explanations about how events work, see [`EventReceiver`].
507    ///
508    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
509    /// inside the stream.
510    ///
511    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
512    /// former is enough to call this function.
513    /// # Errors
514    /// Returns `ScanError` when loading fails.
515    pub fn load<R: SpannedEventReceiver<'input>>(
516        &mut self,
517        recv: &mut R,
518        multi: bool,
519    ) -> Result<(), ScanError> {
520        ParserTrait::load(self, recv, multi)
521    }
522
523    fn load_document<R: SpannedEventReceiver<'input>>(
524        &mut self,
525        first_ev: Event<'input>,
526        span: Span,
527        recv: &mut R,
528    ) -> Result<(), ScanError> {
529        if !matches!(first_ev, Event::DocumentStart(_)) {
530            return Err(ScanError::new_str(
531                span.start,
532                "did not find expected <document-start>",
533            ));
534        }
535        recv.on_event(first_ev, span);
536
537        let (ev, span) = self.next_event_impl()?;
538        self.load_node(ev, span, recv)?;
539
540        // DOCUMENT-END is expected.
541        let (ev, mark) = self.next_event_impl()?;
542        assert_eq!(ev, Event::DocumentEnd);
543        recv.on_event(ev, mark);
544
545        Ok(())
546    }
547
548    fn load_node<R: SpannedEventReceiver<'input>>(
549        &mut self,
550        first_ev: Event<'input>,
551        span: Span,
552        recv: &mut R,
553    ) -> Result<(), ScanError> {
554        match first_ev {
555            Event::Alias(..) | Event::Scalar(..) => {
556                recv.on_event(first_ev, span);
557                Ok(())
558            }
559            Event::SequenceStart(..) => {
560                recv.on_event(first_ev, span);
561                self.load_sequence(recv)
562            }
563            Event::MappingStart(..) => {
564                recv.on_event(first_ev, span);
565                self.load_mapping(recv)
566            }
567            _ => {
568                #[cfg(feature = "debug_prints")]
569                std::println!("UNREACHABLE EVENT: {first_ev:?}");
570                unreachable!();
571            }
572        }
573    }
574
575    fn load_mapping<R: SpannedEventReceiver<'input>>(
576        &mut self,
577        recv: &mut R,
578    ) -> Result<(), ScanError> {
579        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
580        while key_ev != Event::MappingEnd {
581            // key
582            self.load_node(key_ev, key_mark, recv)?;
583
584            // value
585            let (ev, mark) = self.next_event_impl()?;
586            self.load_node(ev, mark, recv)?;
587
588            // next event
589            let (ev, mark) = self.next_event_impl()?;
590            key_ev = ev;
591            key_mark = mark;
592        }
593        recv.on_event(key_ev, key_mark);
594        Ok(())
595    }
596
597    fn load_sequence<R: SpannedEventReceiver<'input>>(
598        &mut self,
599        recv: &mut R,
600    ) -> Result<(), ScanError> {
601        let (mut ev, mut mark) = self.next_event_impl()?;
602        while ev != Event::SequenceEnd {
603            self.load_node(ev, mark, recv)?;
604
605            // next event
606            let (next_ev, next_mark) = self.next_event_impl()?;
607            ev = next_ev;
608            mark = next_mark;
609        }
610        recv.on_event(ev, mark);
611        Ok(())
612    }
613
614    fn state_machine<'a>(&mut self) -> ParseResult<'a>
615    where
616        'input: 'a,
617    {
618        // let next_tok = self.peek_token().cloned()?;
619        // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
620        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
621
622        match self.state {
623            State::StreamStart => self.stream_start(),
624
625            State::ImplicitDocumentStart => self.document_start(true),
626            State::DocumentStart => self.document_start(false),
627            State::DocumentContent => self.document_content(),
628            State::DocumentEnd => self.document_end(),
629
630            State::BlockNode => self.parse_node(true, false),
631            // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
632            // State::FlowNode => self.parse_node(false, false),
633            State::BlockMappingFirstKey => self.block_mapping_key(true),
634            State::BlockMappingKey => self.block_mapping_key(false),
635            State::BlockMappingValue => self.block_mapping_value(),
636
637            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
638            State::BlockSequenceEntry => self.block_sequence_entry(false),
639
640            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
641            State::FlowSequenceEntry => self.flow_sequence_entry(false),
642
643            State::FlowMappingFirstKey => self.flow_mapping_key(true),
644            State::FlowMappingKey => self.flow_mapping_key(false),
645            State::FlowMappingValue => self.flow_mapping_value(false),
646
647            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
648
649            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
650            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
651            State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
652            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
653
654            /* impossible */
655            State::End => unreachable!(),
656        }
657    }
658
659    fn stream_start<'a>(&mut self) -> ParseResult<'a>
660    where
661        'input: 'a,
662    {
663        match *self.peek_token()? {
664            Token(span, TokenType::StreamStart(_)) => {
665                self.state = State::ImplicitDocumentStart;
666                self.skip();
667                Ok((Event::StreamStart, span))
668            }
669            Token(span, _) => Err(ScanError::new_str(
670                span.start,
671                "did not find expected <stream-start>",
672            )),
673        }
674    }
675
676    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
677    where
678        'input: 'a,
679    {
680        while let TokenType::DocumentEnd = self.peek_token()?.1 {
681            self.skip();
682        }
683
684        // Anchors are scoped to a single document.
685        self.anchors.clear();
686
687        match *self.peek_token()? {
688            Token(span, TokenType::StreamEnd) => {
689                self.state = State::End;
690                self.skip();
691                Ok((Event::StreamEnd, span))
692            }
693            Token(
694                _,
695                TokenType::VersionDirective(..)
696                | TokenType::TagDirective(..)
697                | TokenType::ReservedDirective(..)
698                | TokenType::DocumentStart,
699            ) => {
700                // explicit document
701                self.explicit_document_start()
702            }
703            Token(span, _) if implicit => {
704                self.parser_process_directives()?;
705                self.push_state(State::DocumentEnd);
706                self.state = State::BlockNode;
707                Ok((Event::DocumentStart(false), span))
708            }
709            _ => {
710                // explicit document
711                self.explicit_document_start()
712            }
713        }
714    }
715
716    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
717        let mut version_directive_received = false;
718        let mut tags = if self.keep_tags {
719            self.tags.clone()
720        } else {
721            BTreeMap::new()
722        };
723        let mut document_tag_handles = BTreeSet::new();
724
725        loop {
726            match self.peek_token()? {
727                Token(span, TokenType::VersionDirective(_, _)) => {
728                    // XXX parsing with warning according to spec
729                    //if major != 1 || minor > 2 {
730                    //    return Err(ScanError::new_str(tok.0,
731                    //        "found incompatible YAML document"));
732                    //}
733                    if version_directive_received {
734                        return Err(ScanError::new_str(
735                            span.start,
736                            "duplicate version directive",
737                        ));
738                    }
739                    version_directive_received = true;
740                }
741                Token(mark, TokenType::TagDirective(handle, prefix)) => {
742                    if !document_tag_handles.insert(handle.to_string()) {
743                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
744                    }
745                    tags.insert(handle.to_string(), prefix.to_string());
746                }
747                Token(_, TokenType::ReservedDirective(_, _)) => {
748                    // Reserved directives are ignored
749                }
750                _ => break,
751            }
752            self.skip();
753        }
754
755        self.tags = tags;
756        Ok(())
757    }
758
759    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
760    where
761        'input: 'a,
762    {
763        self.parser_process_directives()?;
764        match *self.peek_token()? {
765            Token(mark, TokenType::DocumentStart) => {
766                self.push_state(State::DocumentEnd);
767                self.state = State::DocumentContent;
768                self.skip();
769                Ok((Event::DocumentStart(true), mark))
770            }
771            Token(span, _) => Err(ScanError::new_str(
772                span.start,
773                "did not find expected <document start>",
774            )),
775        }
776    }
777
778    fn document_content<'a>(&mut self) -> ParseResult<'a>
779    where
780        'input: 'a,
781    {
782        match *self.peek_token()? {
783            Token(
784                mark,
785                TokenType::VersionDirective(..)
786                | TokenType::TagDirective(..)
787                | TokenType::ReservedDirective(..)
788                | TokenType::DocumentStart
789                | TokenType::DocumentEnd
790                | TokenType::StreamEnd,
791            ) => {
792                self.pop_state();
793                // empty scalar
794                Ok((Event::empty_scalar(), mark))
795            }
796            _ => self.parse_node(true, false),
797        }
798    }
799
800    fn document_end<'a>(&mut self) -> ParseResult<'a>
801    where
802        'input: 'a,
803    {
804        let mut explicit_end = false;
805        let span: Span = match *self.peek_token()? {
806            Token(span, TokenType::DocumentEnd) => {
807                explicit_end = true;
808                self.skip();
809                span
810            }
811            Token(span, _) => span,
812        };
813
814        if self.keep_tags {
815            // Never persist default handles across document boundaries. Allowing `%TAG !! ...`
816            // or `%TAG ! ...` to leak into following documents lets earlier documents alter how
817            // explicit tags are interpreted later on.
818            self.tags.remove("!!");
819            self.tags.remove("");
820        } else {
821            self.tags.clear();
822        }
823        if explicit_end {
824            self.state = State::ImplicitDocumentStart;
825        } else {
826            if let Token(
827                span,
828                TokenType::VersionDirective(..)
829                | TokenType::TagDirective(..)
830                | TokenType::ReservedDirective(..),
831            ) = *self.peek_token()?
832            {
833                return Err(ScanError::new_str(
834                    span.start,
835                    "missing explicit document end marker before directive",
836                ));
837            }
838            self.state = State::DocumentStart;
839        }
840
841        Ok((Event::DocumentEnd, span))
842    }
843
844    fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
845        // anchors can be overridden/reused
846        // if self.anchors.contains_key(name) {
847        //     return Err(ScanError::new_str(*mark,
848        //         "while parsing anchor, found duplicated anchor"));
849        // }
850        let new_id = self.anchor_id_count;
851        self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
852            ScanError::new_str(
853                mark.start,
854                "while parsing anchor, anchor count exceeded supported limit",
855            )
856        })?;
857        self.anchors.insert(name, new_id);
858        Ok(new_id)
859    }
860
861    #[allow(clippy::too_many_lines)]
862    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
863    where
864        'input: 'a,
865    {
866        let mut anchor_id = 0;
867        let mut tag = None;
868        match *self.peek_token()? {
869            Token(_, TokenType::Alias(_)) => {
870                self.pop_state();
871                if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
872                    match self.anchors.get(&*name) {
873                        None => {
874                            return Err(ScanError::new_str(
875                                span.start,
876                                "while parsing node, found unknown anchor",
877                            ))
878                        }
879                        Some(id) => return Ok((Event::Alias(*id), span)),
880                    }
881                }
882                unreachable!()
883            }
884            Token(_, TokenType::Anchor(_)) => {
885                if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
886                    anchor_id = self.register_anchor(name, &span)?;
887                    if let TokenType::Tag(..) = self.peek_token()?.1 {
888                        if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
889                            tag = Some(self.resolve_tag(span, &handle, suffix)?);
890                        } else {
891                            unreachable!()
892                        }
893                    }
894                } else {
895                    unreachable!()
896                }
897            }
898            Token(mark, TokenType::Tag(..)) => {
899                if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
900                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
901                    if let TokenType::Anchor(_) = &self.peek_token()?.1 {
902                        if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
903                            anchor_id = self.register_anchor(name, &mark)?;
904                        } else {
905                            unreachable!()
906                        }
907                    }
908                } else {
909                    unreachable!()
910                }
911            }
912            _ => {}
913        }
914        match *self.peek_token()? {
915            Token(mark, TokenType::BlockEntry) if indentless_sequence => {
916                self.state = State::IndentlessSequenceEntry;
917                Ok((Event::SequenceStart(anchor_id, tag), mark))
918            }
919            Token(_, TokenType::Scalar(..)) => {
920                self.pop_state();
921                if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
922                    Ok((Event::Scalar(v, style, anchor_id, tag), mark))
923                } else {
924                    unreachable!()
925                }
926            }
927            Token(mark, TokenType::FlowSequenceStart) => {
928                self.state = State::FlowSequenceFirstEntry;
929                Ok((Event::SequenceStart(anchor_id, tag), mark))
930            }
931            Token(mark, TokenType::FlowMappingStart) => {
932                self.state = State::FlowMappingFirstKey;
933                Ok((Event::MappingStart(anchor_id, tag), mark))
934            }
935            Token(mark, TokenType::BlockSequenceStart) if block => {
936                self.state = State::BlockSequenceFirstEntry;
937                Ok((Event::SequenceStart(anchor_id, tag), mark))
938            }
939            Token(mark, TokenType::BlockMappingStart) if block => {
940                self.state = State::BlockMappingFirstKey;
941                Ok((Event::MappingStart(anchor_id, tag), mark))
942            }
943            // ex 7.2, an empty scalar can follow a secondary tag
944            Token(mark, _) if tag.is_some() || anchor_id > 0 => {
945                self.pop_state();
946                Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
947            }
948            Token(span, _) => {
949                let info = match self.state {
950                    State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
951                        "unexpected EOF while parsing a flow sequence"
952                    }
953                    State::FlowMappingFirstKey
954                    | State::FlowMappingKey
955                    | State::FlowMappingValue
956                    | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
957                    State::FlowSequenceEntryMappingKey
958                    | State::FlowSequenceEntryMappingValue
959                    | State::FlowSequenceEntryMappingEnd => {
960                        "unexpected EOF while parsing an implicit flow mapping"
961                    }
962                    State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
963                        "unexpected EOF while parsing a block sequence"
964                    }
965                    State::BlockMappingFirstKey
966                    | State::BlockMappingKey
967                    | State::BlockMappingValue => "unexpected EOF while parsing a block mapping",
968                    _ => "while parsing a node, did not find expected node content",
969                };
970                Err(ScanError::new_str(span.start, info))
971            }
972        }
973    }
974
975    fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
976    where
977        'input: 'a,
978    {
979        // skip BlockMappingStart
980        if first {
981            let _ = self.peek_token()?;
982            //self.marks.push(tok.0);
983            self.skip();
984        }
985        match *self.peek_token()? {
986            Token(_, TokenType::Key) => {
987                // Indentation is only meaningful for block mapping keys.
988                if let Token(key_span, TokenType::Key) = *self.peek_token()? {
989                    self.pending_key_indent = Some(key_span.start.col());
990                }
991                self.skip();
992                if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
993                    *self.peek_token()?
994                {
995                    self.state = State::BlockMappingValue;
996                    // empty scalar
997                    Ok((Event::empty_scalar(), mark))
998                } else {
999                    self.push_state(State::BlockMappingValue);
1000                    self.parse_node(true, true)
1001                }
1002            }
1003            // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18
1004            Token(mark, TokenType::Value) => {
1005                self.state = State::BlockMappingValue;
1006                Ok((Event::empty_scalar(), mark))
1007            }
1008            Token(mark, TokenType::BlockEnd) => {
1009                self.pop_state();
1010                self.skip();
1011                Ok((Event::MappingEnd, mark))
1012            }
1013            Token(span, _) => Err(ScanError::new_str(
1014                span.start,
1015                "while parsing a block mapping, did not find expected key",
1016            )),
1017        }
1018    }
1019
1020    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1021    where
1022        'input: 'a,
1023    {
1024        match *self.peek_token()? {
1025            Token(mark, TokenType::Value) => {
1026                self.skip();
1027                if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1028                    *self.peek_token()?
1029                {
1030                    self.state = State::BlockMappingKey;
1031                    // empty scalar
1032                    Ok((Event::empty_scalar(), mark))
1033                } else {
1034                    self.push_state(State::BlockMappingKey);
1035                    self.parse_node(true, true)
1036                }
1037            }
1038            Token(mark, _) => {
1039                self.state = State::BlockMappingKey;
1040                // empty scalar
1041                Ok((Event::empty_scalar(), mark))
1042            }
1043        }
1044    }
1045
1046    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1047    where
1048        'input: 'a,
1049    {
1050        if first {
1051            let _ = self.peek_token()?;
1052            self.skip();
1053        }
1054        let span: Span = if let Token(mark, TokenType::FlowMappingEnd) = *self.peek_token()? {
1055            mark
1056        } else {
1057            if !first {
1058                match *self.peek_token()? {
1059                    Token(_, TokenType::FlowEntry) => self.skip(),
1060                    Token(span, _) => {
1061                        return Err(ScanError::new_str(
1062                            span.start,
1063                            "while parsing a flow mapping, did not find expected ',' or '}'",
1064                        ))
1065                    }
1066                }
1067            }
1068
1069            match *self.peek_token()? {
1070                Token(_, TokenType::Key) => {
1071                    self.skip();
1072                    if let Token(
1073                        mark,
1074                        TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
1075                    ) = *self.peek_token()?
1076                    {
1077                        self.state = State::FlowMappingValue;
1078                        return Ok((Event::empty_scalar(), mark));
1079                    }
1080                    self.push_state(State::FlowMappingValue);
1081                    return self.parse_node(false, false);
1082                }
1083                Token(marker, TokenType::Value) => {
1084                    self.state = State::FlowMappingValue;
1085                    return Ok((Event::empty_scalar(), marker));
1086                }
1087                Token(_, TokenType::FlowMappingEnd) => (),
1088                _ => {
1089                    self.push_state(State::FlowMappingEmptyValue);
1090                    return self.parse_node(false, false);
1091                }
1092            }
1093
1094            self.peek_token()?.0
1095        };
1096
1097        self.pop_state();
1098        self.skip();
1099        Ok((Event::MappingEnd, span))
1100    }
1101
1102    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1103    where
1104        'input: 'a,
1105    {
1106        let span: Span = {
1107            if empty {
1108                let Token(mark, _) = *self.peek_token()?;
1109                self.state = State::FlowMappingKey;
1110                return Ok((Event::empty_scalar(), mark));
1111            }
1112            match *self.peek_token()? {
1113                Token(span, TokenType::Value) => {
1114                    self.skip();
1115                    match self.peek_token()?.1 {
1116                        TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
1117                        _ => {
1118                            self.push_state(State::FlowMappingKey);
1119                            return self.parse_node(false, false);
1120                        }
1121                    }
1122                    span
1123                }
1124                Token(marker, _) => marker,
1125            }
1126        };
1127
1128        self.state = State::FlowMappingKey;
1129        Ok((Event::empty_scalar(), span))
1130    }
1131
1132    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1133    where
1134        'input: 'a,
1135    {
1136        // skip FlowMappingStart
1137        if first {
1138            let _ = self.peek_token()?;
1139            //self.marks.push(tok.0);
1140            self.skip();
1141        }
1142        match *self.peek_token()? {
1143            Token(mark, TokenType::FlowSequenceEnd) => {
1144                self.pop_state();
1145                self.skip();
1146                return Ok((Event::SequenceEnd, mark));
1147            }
1148            Token(_, TokenType::FlowEntry) if !first => {
1149                self.skip();
1150            }
1151            Token(span, _) if !first => {
1152                return Err(ScanError::new_str(
1153                    span.start,
1154                    "while parsing a flow sequence, expected ',' or ']'",
1155                ));
1156            }
1157            _ => { /* next */ }
1158        }
1159        match *self.peek_token()? {
1160            Token(mark, TokenType::FlowSequenceEnd) => {
1161                self.pop_state();
1162                self.skip();
1163                Ok((Event::SequenceEnd, mark))
1164            }
1165            Token(mark, TokenType::Key) => {
1166                self.state = State::FlowSequenceEntryMappingKey;
1167                self.skip();
1168                Ok((Event::MappingStart(0, None), mark))
1169            }
1170            _ => {
1171                self.push_state(State::FlowSequenceEntry);
1172                self.parse_node(false, false)
1173            }
1174        }
1175    }
1176
1177    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1178    where
1179        'input: 'a,
1180    {
1181        match *self.peek_token()? {
1182            Token(mark, TokenType::BlockEntry) => {
1183                self.skip();
1184                if let Token(
1185                    _,
1186                    TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1187                ) = *self.peek_token()?
1188                {
1189                    self.state = State::IndentlessSequenceEntry;
1190                    Ok((Event::empty_scalar(), mark))
1191                } else {
1192                    self.push_state(State::IndentlessSequenceEntry);
1193                    self.parse_node(true, false)
1194                }
1195            }
1196            Token(mark, _) => {
1197                self.pop_state();
1198                Ok((Event::SequenceEnd, mark))
1199            }
1200        }
1201    }
1202
1203    fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1204    where
1205        'input: 'a,
1206    {
1207        // BLOCK-SEQUENCE-START
1208        if first {
1209            let _ = self.peek_token()?;
1210            //self.marks.push(tok.0);
1211            self.skip();
1212        }
1213        match *self.peek_token()? {
1214            Token(mark, TokenType::BlockEnd) => {
1215                self.pop_state();
1216                self.skip();
1217                Ok((Event::SequenceEnd, mark))
1218            }
1219            Token(mark, TokenType::BlockEntry) => {
1220                self.skip();
1221                if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
1222                    self.state = State::BlockSequenceEntry;
1223                    Ok((Event::empty_scalar(), mark))
1224                } else {
1225                    self.push_state(State::BlockSequenceEntry);
1226                    self.parse_node(true, false)
1227                }
1228            }
1229            Token(span, _) => Err(ScanError::new_str(
1230                span.start,
1231                "while parsing a block collection, did not find expected '-' indicator",
1232            )),
1233        }
1234    }
1235
1236    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
1237    where
1238        'input: 'a,
1239    {
1240        if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1241            *self.peek_token()?
1242        {
1243            self.state = State::FlowSequenceEntryMappingValue;
1244            Ok((Event::empty_scalar(), mark))
1245        } else {
1246            self.push_state(State::FlowSequenceEntryMappingValue);
1247            self.parse_node(false, false)
1248        }
1249    }
1250
1251    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
1252    where
1253        'input: 'a,
1254    {
1255        match *self.peek_token()? {
1256            Token(_, TokenType::Value) => {
1257                self.skip();
1258                self.state = State::FlowSequenceEntryMappingValue;
1259                let Token(span, ref tok) = *self.peek_token()?;
1260                if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
1261                    self.state = State::FlowSequenceEntryMappingEnd;
1262                    Ok((Event::empty_scalar(), Span::empty(span.start)))
1263                } else {
1264                    self.push_state(State::FlowSequenceEntryMappingEnd);
1265                    self.parse_node(false, false)
1266                }
1267            }
1268            Token(mark, _) => {
1269                self.state = State::FlowSequenceEntryMappingEnd;
1270                Ok((Event::empty_scalar(), mark))
1271            }
1272        }
1273    }
1274
1275    #[allow(clippy::unnecessary_wraps)]
1276    fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
1277    where
1278        'input: 'a,
1279    {
1280        self.state = State::FlowSequenceEntry;
1281        let Token(span, _) = *self.peek_token()?;
1282        Ok((Event::MappingEnd, Span::empty(span.start)))
1283    }
1284
1285    /// Resolve a tag from the handle and the suffix.
1286    fn resolve_tag(
1287        &self,
1288        span: Span,
1289        handle: &Cow<'input, str>,
1290        suffix: Cow<'input, str>,
1291    ) -> Result<Cow<'input, Tag>, ScanError> {
1292        let suffix = suffix.into_owned();
1293        let tag = if handle == "!!" {
1294            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
1295            // overridden.
1296            Tag {
1297                handle: self
1298                    .tags
1299                    .get("!!")
1300                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
1301                suffix,
1302            }
1303        } else if handle.is_empty() && suffix == "!" {
1304            // "!" introduces a local tag. Local tags may have their prefix overridden.
1305            match self.tags.get("") {
1306                Some(prefix) => Tag {
1307                    handle: prefix.clone(),
1308                    suffix,
1309                },
1310                None => Tag {
1311                    handle: String::new(),
1312                    suffix,
1313                },
1314            }
1315        } else {
1316            // Lookup handle in our tag directives.
1317            let prefix = self.tags.get(&**handle);
1318            if let Some(prefix) = prefix {
1319                Tag {
1320                    handle: prefix.clone(),
1321                    suffix,
1322                }
1323            } else {
1324                // Otherwise, it may be a local handle. With a local handle, the handle is set to
1325                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
1326                // If the handle is of the form "!foo!", this cannot be a local handle and we need
1327                // to error.
1328                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1329                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
1330                }
1331                Tag {
1332                    handle: handle.to_string(),
1333                    suffix,
1334                }
1335            }
1336        };
1337        Ok(Cow::Owned(tag))
1338    }
1339}
1340
1341impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
1342    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
1343        if let Some(ref x) = self.current {
1344            Some(Ok(x))
1345        } else {
1346            if self.stream_end_emitted {
1347                return None;
1348            }
1349            match self.next_event_impl() {
1350                Ok(token) => self.current = Some(token),
1351                Err(e) => return Some(Err(e)),
1352            }
1353            self.current.as_ref().map(Ok)
1354        }
1355    }
1356
1357    fn next_event(&mut self) -> Option<ParseResult<'input>> {
1358        if self.stream_end_emitted {
1359            return None;
1360        }
1361
1362        let tok = self.next_event_impl();
1363        if matches!(tok, Ok((Event::StreamEnd, _))) {
1364            self.stream_end_emitted = true;
1365        }
1366        Some(tok)
1367    }
1368
1369    fn load<R: SpannedEventReceiver<'input>>(
1370        &mut self,
1371        recv: &mut R,
1372        multi: bool,
1373    ) -> Result<(), ScanError> {
1374        let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
1375        if !self.scanner.stream_started() || stream_start_buffered {
1376            let (ev, span) = self.next_event_impl()?;
1377            if ev != Event::StreamStart {
1378                return Err(ScanError::new_str(
1379                    span.start,
1380                    "did not find expected <stream-start>",
1381                ));
1382            }
1383            recv.on_event(ev, span);
1384        }
1385
1386        if self.scanner.stream_ended() {
1387            // XXX has parsed?
1388            recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
1389            return Ok(());
1390        }
1391        loop {
1392            let (ev, span) = self.next_event_impl()?;
1393            if ev == Event::StreamEnd {
1394                recv.on_event(ev, span);
1395                return Ok(());
1396            }
1397            // clear anchors before a new document
1398            self.anchors.clear();
1399            self.load_document(ev, span, recv)?;
1400            if !multi {
1401                break;
1402            }
1403        }
1404        Ok(())
1405    }
1406}
1407
1408impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
1409    type Item = Result<(Event<'input>, Span), ScanError>;
1410
1411    fn next(&mut self) -> Option<Self::Item> {
1412        self.next_event()
1413    }
1414}
1415
1416#[cfg(test)]
1417mod test {
1418    use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
1419
1420    use super::{Event, EventReceiver, Parser, Tag};
1421
1422    #[test]
1423    fn display_resolved_core_tag_without_extra_bang() {
1424        let tag = Tag {
1425            handle: "tag:yaml.org,2002:".to_owned(),
1426            suffix: "str".to_owned(),
1427        };
1428
1429        assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
1430    }
1431
1432    #[test]
1433    fn test_peek_eq_parse() {
1434        let s = "
1435a0 bb: val
1436a1: &x
1437    b1: 4
1438    b2: d
1439a2: 4
1440a3: [1, 2, 3]
1441a4:
1442    - [a1, a2]
1443    - 2
1444a5: *x
1445";
1446        let mut p = Parser::new_from_str(s);
1447        loop {
1448            let event_peek = p.peek().unwrap().unwrap().clone();
1449            let event = p.next_event().unwrap().unwrap();
1450            assert_eq!(event, event_peek);
1451            if event.0 == Event::StreamEnd {
1452                break;
1453            }
1454        }
1455    }
1456
1457    #[test]
1458    fn test_multiple_tag_directives_are_kept_within_document() {
1459        let text = r"
1460%TAG !a! tag:a,2024:
1461%TAG !b! tag:b,2024:
1462---
1463first: !a!x foo
1464second: !b!y bar
1465";
1466
1467        let mut seen_a = false;
1468        let mut seen_b = false;
1469        for event in Parser::new_from_str(text) {
1470            let (event, _) = event.unwrap();
1471            if let Event::Scalar(_, _, _, Some(tag)) = event {
1472                if tag.handle == "tag:a,2024:" {
1473                    seen_a = true;
1474                } else if tag.handle == "tag:b,2024:" {
1475                    seen_b = true;
1476                }
1477            }
1478        }
1479
1480        assert!(seen_a);
1481        assert!(seen_b);
1482    }
1483
1484    #[test]
1485    fn test_tags_are_cleared_when_next_document_has_no_directives() {
1486        let text = r"
1487%TAG !t! tag:test,2024:
1488--- !t!1
1489foo
1490--- !t!2
1491bar
1492";
1493
1494        let mut parser = Parser::new_from_str(text);
1495        for event in parser.by_ref() {
1496            let (event, _) = event.unwrap();
1497            if let Event::DocumentEnd = event {
1498                break;
1499            }
1500        }
1501
1502        match parser.next().unwrap().unwrap().0 {
1503            Event::DocumentStart(true) => {}
1504            _ => panic!("expected explicit second document start"),
1505        }
1506
1507        let err = parser.next().unwrap().unwrap_err();
1508        assert!(format!("{err}").contains("the handle wasn't declared"));
1509    }
1510
1511    #[test]
1512    fn test_pull_parser_clears_anchors_between_documents() {
1513        let mut parser = Parser::new_from_str(
1514            "--- &a value
1515--- *a
1516",
1517        );
1518
1519        for event in parser.by_ref() {
1520            let (event, _) = event.unwrap();
1521            if matches!(event, Event::DocumentEnd) {
1522                break;
1523            }
1524        }
1525
1526        match parser.next().unwrap().unwrap().0 {
1527            Event::DocumentStart(true) => {}
1528            _ => panic!("expected explicit second document start"),
1529        }
1530
1531        let err = parser.next().unwrap().unwrap_err();
1532        assert!(format!("{err}").contains("unknown anchor"));
1533    }
1534
1535    #[test]
1536    fn test_keep_tags_across_multiple_documents() {
1537        let text = r#"
1538%YAML 1.1
1539%TAG !t! tag:test,2024:
1540--- !t!1 &1
1541foo: "bar"
1542--- !t!2 &2
1543baz: "qux"
1544"#;
1545        for x in Parser::new_from_str(text).keep_tags(true) {
1546            let x = x.unwrap();
1547            if let Event::MappingStart(_, tag) = x.0 {
1548                let tag = tag.unwrap();
1549                assert_eq!(tag.handle, "tag:test,2024:");
1550            }
1551        }
1552
1553        for x in Parser::new_from_str(text).keep_tags(false) {
1554            if x.is_err() {
1555                // Test successful
1556                return;
1557            }
1558        }
1559        panic!("Test failed, did not encounter error")
1560    }
1561
1562    #[test]
1563    fn test_flow_sequence_mapping_allows_empty_key() {
1564        let parser = Parser::new_from_str("[?: value]");
1565        for event in parser {
1566            event.expect("parser should accept flow sequence mappings with empty keys");
1567        }
1568    }
1569
1570    #[test]
1571    fn test_keep_tags_does_not_persist_default_tag_handles() {
1572        let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
1573
1574        let mut int_tags = Vec::new();
1575        for event in Parser::new_from_str(text).keep_tags(true) {
1576            let event = event.unwrap().0;
1577            if let Event::Scalar(_, _, _, Some(tag)) = event {
1578                if tag.suffix == "int" {
1579                    int_tags.push(tag.handle.clone());
1580                }
1581            }
1582        }
1583
1584        assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
1585    }
1586
1587    #[test]
1588    fn test_load_after_peek_stream_start() {
1589        #[derive(Default)]
1590        struct Sink<'input> {
1591            events: Vec<Event<'input>>,
1592        }
1593
1594        impl<'input> EventReceiver<'input> for Sink<'input> {
1595            fn on_event(&mut self, ev: Event<'input>) {
1596                self.events.push(ev);
1597            }
1598        }
1599
1600        let mut parser = Parser::new_from_str("key: value\n");
1601        let mut sink = Sink::default();
1602
1603        assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
1604        parser.load(&mut sink, false).unwrap();
1605
1606        assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
1607        assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
1608    }
1609}