Skip to main content

saphyr_parser_bw/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, BorrowedInput},
9    scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
10    BufferedInput, Marker,
11};
12
13use alloc::{
14    borrow::Cow,
15    collections::{BTreeMap, BTreeSet},
16    string::{String, ToString},
17    vec::Vec,
18};
19use core::fmt::Display;
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22enum State {
23    StreamStart,
24    ImplicitDocumentStart,
25    DocumentStart,
26    DocumentContent,
27    DocumentEnd,
28    BlockNode,
29    BlockSequenceFirstEntry,
30    BlockSequenceEntry,
31    IndentlessSequenceEntry,
32    BlockMappingFirstKey,
33    BlockMappingKey,
34    BlockMappingValue,
35    FlowSequenceFirstEntry,
36    FlowSequenceEntry,
37    FlowSequenceEntryMappingKey,
38    FlowSequenceEntryMappingValue,
39    FlowSequenceEntryMappingEnd(Marker),
40    FlowMappingFirstKey,
41    FlowMappingKey,
42    FlowMappingValue,
43    FlowMappingEmptyValue,
44    End,
45}
46
47/// An event generated by the YAML parser.
48///
49/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
50/// [`EventReceiver`] trait.
51#[derive(Clone, PartialEq, Debug, Eq)]
52pub enum Event<'input> {
53    /// Reserved for internal use.
54    Nothing,
55    /// Event generated at the very beginning of parsing.
56    StreamStart,
57    /// Last event that will be generated by the parser. Signals EOF.
58    StreamEnd,
59    /// The start of a YAML document.
60    ///
61    /// When the boolean is `true`, it is an explicit document start
62    /// directive (`---`).
63    ///
64    /// When the boolean is `false`, it is an implicit document start
65    /// (without `---`).
66    DocumentStart(bool),
67    /// The YAML end document directive (`...`).
68    DocumentEnd,
69    /// A YAML Alias.
70    Alias(
71        /// The anchor ID the alias refers to.
72        usize,
73    ),
74    /// Value, style, `anchor_id`, tag
75    Scalar(
76        Cow<'input, str>,
77        ScalarStyle,
78        usize,
79        Option<Cow<'input, Tag>>,
80    ),
81    /// The start of a YAML sequence (array).
82    SequenceStart(
83        /// The anchor ID of the start of the sequence.
84        usize,
85        /// An optional tag
86        Option<Cow<'input, Tag>>,
87    ),
88    /// The end of a YAML sequence (array).
89    SequenceEnd,
90    /// The start of a YAML mapping (object, hash).
91    MappingStart(
92        /// The anchor ID of the start of the mapping.
93        usize,
94        /// An optional tag
95        Option<Cow<'input, Tag>>,
96    ),
97    /// The end of a YAML mapping (object, hash).
98    MappingEnd,
99}
100
101/// A YAML tag.
102#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
103pub struct Tag {
104    /// Handle of the tag (`!` included).
105    pub handle: String,
106    /// The suffix of the tag.
107    pub suffix: String,
108}
109
110impl Tag {
111    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
112    ///
113    /// The YAML specification specifies [a list of
114    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function
115    /// checks whether _the handle_ (but not the suffix) is the handle for the YAML Core Schema.
116    ///
117    /// # Return
118    /// Returns `true` if the handle is `tag:yaml.org,2002`, `false` otherwise.
119    #[must_use]
120    pub fn is_yaml_core_schema(&self) -> bool {
121        self.handle == "tag:yaml.org,2002:"
122    }
123}
124
125impl Display for Tag {
126    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
127        if self.handle == "!" {
128            write!(f, "!{}", self.suffix)
129        } else {
130            write!(f, "{}!{}", self.handle, self.suffix)
131        }
132    }
133}
134
135impl<'input> Event<'input> {
136    /// Create an empty scalar.
137    fn empty_scalar() -> Self {
138        // a null scalar
139        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
140    }
141
142    /// Create an empty scalar with the given anchor.
143    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
144        Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
145    }
146}
147
148/// A YAML parser.
149#[derive(Debug)]
150pub struct Parser<'input, T: BorrowedInput<'input>> {
151    /// The underlying scanner from which we pull tokens.
152    scanner: Scanner<'input, T>,
153    /// The stack of _previous_ states we were in.
154    ///
155    /// States are pushed in the context of subobjects to this stack. The top-most element is the
156    /// state in which to come back to when exiting the current state.
157    states: Vec<State>,
158    /// The state in which we currently are.
159    state: State,
160    /// The next token from the scanner.
161    token: Option<Token<'input>>,
162    /// The next YAML event to emit.
163    current: Option<(Event<'input>, Span)>,
164
165    /// Pending indentation hint to be attached to the next emitted event span.
166    ///
167    /// This is used to communicate indentation for block mapping keys. It is set when consuming a
168    /// `TokenType::Key` in block style, and is applied to the next emitted node event (the key
169    /// itself).
170    pending_key_indent: Option<usize>,
171    /// Anchors that have been encountered in the YAML document.
172    anchors: BTreeMap<Cow<'input, str>, usize>,
173    /// Next ID available for an anchor.
174    ///
175    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
176    /// return for the next anchor and the count of anchor IDs emitted.
177    anchor_id_count: usize,
178    /// The tag directives (`%TAG`) the parser has encountered.
179    ///
180    /// Key is the handle, and value is the prefix.
181    tags: BTreeMap<String, String>,
182    /// Whether we have emitted [`Event::StreamEnd`].
183    ///
184    /// Emitted means that it has been returned from [`Self::next`]. If it is stored in
185    /// [`Self::token`], this is set to `false`.
186    stream_end_emitted: bool,
187    /// Make tags global across all documents.
188    keep_tags: bool,
189}
190
191/// Trait to be implemented in order to use the low-level parsing API.
192///
193/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
194/// for each YAML [`Event`] that occurs.
195/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
196/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
197/// [`SpannedEventReceiver`] automatically.
198///
199/// # Event hierarchy
200/// The event stream starts with an [`Event::StreamStart`] event followed by an
201/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
202/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
203/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
204///
205/// In a mapping, key-values are sent as consecutive events. The first event after an
206/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no
207/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd
208/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received.
209///
210/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
211///
212/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
213/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
214/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
215/// be part of the value and not another key-value pair or element in the sequence.
216///
217/// For instance, the following yaml:
218/// ```yaml
219/// a: b
220/// c:
221///   d: e
222/// f:
223///   - g
224///   - h
225/// ```
226/// will emit (indented and commented for visibility):
227/// ```text
228/// StreamStart, DocumentStart, MappingStart,
229///   Scalar("a", ..), Scalar("b", ..)
230///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
231///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
232/// MappingEnd, DocumentEnd, StreamEnd
233/// ```
234///
235/// # Example
236/// ```
237/// # use saphyr_parser_bw::{Event, EventReceiver, Parser};
238/// #
239/// /// Sink of events. Collects them into an array.
240/// struct EventSink<'input> {
241///     events: Vec<Event<'input>>,
242/// }
243///
244/// /// Implement `on_event`, pushing into `self.events`.
245/// impl<'input> EventReceiver<'input> for EventSink<'input> {
246///     fn on_event(&mut self, ev: Event<'input>) {
247///         self.events.push(ev);
248///     }
249/// }
250///
251/// /// Load events from a yaml string.
252/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
253///     let mut sink = EventSink { events: Vec::new() };
254///     let mut parser = Parser::new_from_str(yaml);
255///     // Load events using our sink as the receiver.
256///     parser.load(&mut sink, true).unwrap();
257///     sink.events
258/// }
259/// ```
260pub trait EventReceiver<'input> {
261    /// Handler called for each YAML event that is emitted by the parser.
262    fn on_event(&mut self, ev: Event<'input>);
263}
264
265/// Trait to be implemented for using the low-level parsing API.
266///
267/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
268pub trait SpannedEventReceiver<'input> {
269    /// Handler called for each event that occurs.
270    fn on_event(&mut self, ev: Event<'input>, span: Span);
271}
272
273impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
274    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
275        self.on_event(ev);
276    }
277}
278
279/// A convenience alias for a `Result` of a parser event.
280pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
281
282/// Trait extracted from `Parser` to support mocking and alternative implementations.
283pub trait ParserTrait<'input> {
284    /// Try to load the next event and return it, but do not consuming it from `self`.
285    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
286
287    /// Try to load the next event and return it, consuming it from `self`.
288    fn next_event(&mut self) -> Option<ParseResult<'input>>;
289
290    /// Load the YAML from the stream in `self`, pushing events into `recv`.
291    ///
292    /// # Errors
293    /// Returns `ScanError` when scanning or parsing the stream fails.
294    fn load<R: SpannedEventReceiver<'input>>(
295        &mut self,
296        recv: &mut R,
297        multi: bool,
298    ) -> Result<(), ScanError>;
299}
300
301impl<'input> Parser<'input, StrInput<'input>> {
302    /// Create a new instance of a parser from a &str.
303    #[must_use]
304    pub fn new_from_str(value: &'input str) -> Self {
305        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
306        Parser::new(StrInput::new(value))
307    }
308}
309
310impl<T> Parser<'static, BufferedInput<T>>
311where
312    T: Iterator<Item = char>,
313{
314    /// Create a new instance of a parser from an iterator of `char`s.
315    #[must_use]
316    pub fn new_from_iter(iter: T) -> Self {
317        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
318        Parser::new(BufferedInput::new(iter))
319    }
320}
321
322impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
323    /// Get the current anchor offset count.
324    pub fn get_anchor_offset(&self) -> usize {
325        self.anchor_id_count
326    }
327
328    /// Set the current anchor offset count.
329    pub fn set_anchor_offset(&mut self, offset: usize) {
330        self.anchor_id_count = offset;
331    }
332
333    /// Create a new instance of a parser from the given input of characters.
334    pub fn new(src: T) -> Self {
335        Parser {
336            scanner: Scanner::new(src),
337            states: Vec::new(),
338            state: State::StreamStart,
339            token: None,
340            current: None,
341
342            pending_key_indent: None,
343
344            anchors: BTreeMap::new(),
345            // valid anchor_id starts from 1
346            anchor_id_count: 1,
347            tags: BTreeMap::new(),
348            stream_end_emitted: false,
349            keep_tags: false,
350        }
351    }
352
353    /// Whether to keep tags across multiple documents when parsing.
354    ///
355    /// This behavior is non-standard as per the YAML specification but can be encountered in the
356    /// wild. This boolean allows enabling this non-standard extension. This would result in the
357    /// parser accepting input from [test
358    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
359    /// of the yaml-test-suite:
360    ///
361    /// ```yaml
362    /// %TAG !prefix! tag:example.com,2011:
363    /// --- !prefix!A
364    /// a: b
365    /// --- !prefix!B
366    /// c: d
367    /// --- !prefix!C
368    /// e: f
369    /// ```
370    ///
371    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
372    /// only apply to the document immediately following them. This would error on `!prefix!B`.
373    ///
374    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
375    #[must_use]
376    pub fn keep_tags(mut self, value: bool) -> Self {
377        self.keep_tags = value;
378        self
379    }
380
381    /// Try to load the next event and return it, but do not consuming it from `self`.
382    ///
383    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
384    /// [`Iterator::next`] or [`Parser::load`].
385    ///
386    /// # Errors
387    /// Returns `ScanError` when loading the next event fails.
388    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
389        ParserTrait::peek(self)
390    }
391
392    /// Try to load the next event and return it, consuming it from `self`.
393    ///
394    /// # Errors
395    /// Returns `ScanError` when loading the next event fails.
396    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
397        ParserTrait::next_event(self)
398    }
399
400    /// Implementation function for [`Self::next_event`] without the `Option`.
401    ///
402    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
403    /// option. This burdens the parser code. This function is used internally when an option is
404    /// undesirable.
405    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
406    where
407        'input: 'a,
408    {
409        match self.current.take() {
410            None => self.parse(),
411            Some(v) => Ok(v),
412        }
413    }
414
415    /// Peek at the next token from the scanner.
416    fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
417        match self.token {
418            None => {
419                self.token = Some(self.scan_next_token()?);
420                Ok(self.token.as_ref().unwrap())
421            }
422            Some(ref tok) => Ok(tok),
423        }
424    }
425
426    /// Extract and return the next token from the scanner.
427    ///
428    /// This function does _not_ make use of `self.token`.
429    fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
430        let token = self.scanner.next();
431        match token {
432            None => match self.scanner.get_error() {
433                None => Err(self.unexpected_eof()),
434                Some(e) => Err(e),
435            },
436            Some(tok) => Ok(tok),
437        }
438    }
439
440    #[cold]
441    fn unexpected_eof(&self) -> ScanError {
442        let info = match self.state {
443            State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
444                "unexpected EOF while parsing a flow sequence"
445            }
446            State::FlowMappingFirstKey
447            | State::FlowMappingKey
448            | State::FlowMappingValue
449            | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
450            State::FlowSequenceEntryMappingKey
451            | State::FlowSequenceEntryMappingValue
452            | State::FlowSequenceEntryMappingEnd(_) => {
453                "unexpected EOF while parsing an implicit flow mapping"
454            }
455            State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
456                "unexpected EOF while parsing a block sequence"
457            }
458            State::BlockMappingFirstKey | State::BlockMappingKey | State::BlockMappingValue => {
459                "unexpected EOF while parsing a block mapping"
460            }
461            _ => "unexpected eof",
462        };
463        ScanError::new_str(self.scanner.mark(), info)
464    }
465
466    fn fetch_token<'a>(&mut self) -> Token<'a>
467    where
468        'input: 'a,
469    {
470        self.token
471            .take()
472            .expect("fetch_token needs to be preceded by peek_token")
473    }
474
475    /// Skip the next token from the scanner.
476    fn skip(&mut self) {
477        self.token = None;
478    }
479    /// Pops the top-most state and make it the current state.
480    fn pop_state(&mut self) {
481        self.state = self.states.pop().unwrap();
482    }
483    /// Push a new state atop the state stack.
484    fn push_state(&mut self, state: State) {
485        self.states.push(state);
486    }
487
488    fn parse<'a>(&mut self) -> ParseResult<'a>
489    where
490        'input: 'a,
491    {
492        if self.state == State::End {
493            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
494        }
495        let (ev, span) = self.state_machine()?;
496        if let Some(indent) = self.pending_key_indent.take() {
497            Ok((ev, span.with_indent(Some(indent))))
498        } else {
499            Ok((ev, span))
500        }
501    }
502
503    /// Load the YAML from the stream in `self`, pushing events into `recv`.
504    ///
505    /// The contents of the stream are parsed and the corresponding events are sent into the
506    /// recveiver. For detailed explanations about how events work, see [`EventReceiver`].
507    ///
508    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
509    /// inside the stream.
510    ///
511    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
512    /// former is enough to call this function.
513    /// # Errors
514    /// Returns `ScanError` when loading fails.
515    pub fn load<R: SpannedEventReceiver<'input>>(
516        &mut self,
517        recv: &mut R,
518        multi: bool,
519    ) -> Result<(), ScanError> {
520        ParserTrait::load(self, recv, multi)
521    }
522
523    fn load_document<R: SpannedEventReceiver<'input>>(
524        &mut self,
525        first_ev: Event<'input>,
526        span: Span,
527        recv: &mut R,
528    ) -> Result<(), ScanError> {
529        if !matches!(first_ev, Event::DocumentStart(_)) {
530            return Err(ScanError::new_str(
531                span.start,
532                "did not find expected <document-start>",
533            ));
534        }
535        recv.on_event(first_ev, span);
536
537        let (ev, span) = self.next_event_impl()?;
538        self.load_node(ev, span, recv)?;
539
540        // DOCUMENT-END is expected.
541        let (ev, mark) = self.next_event_impl()?;
542        assert_eq!(ev, Event::DocumentEnd);
543        recv.on_event(ev, mark);
544
545        Ok(())
546    }
547
548    fn load_node<R: SpannedEventReceiver<'input>>(
549        &mut self,
550        first_ev: Event<'input>,
551        span: Span,
552        recv: &mut R,
553    ) -> Result<(), ScanError> {
554        match first_ev {
555            Event::Alias(..) | Event::Scalar(..) => {
556                recv.on_event(first_ev, span);
557                Ok(())
558            }
559            Event::SequenceStart(..) => {
560                recv.on_event(first_ev, span);
561                self.load_sequence(recv)
562            }
563            Event::MappingStart(..) => {
564                recv.on_event(first_ev, span);
565                self.load_mapping(recv)
566            }
567            _ => {
568                #[cfg(feature = "debug_prints")]
569                std::println!("UNREACHABLE EVENT: {first_ev:?}");
570                unreachable!();
571            }
572        }
573    }
574
575    fn load_mapping<R: SpannedEventReceiver<'input>>(
576        &mut self,
577        recv: &mut R,
578    ) -> Result<(), ScanError> {
579        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
580        while key_ev != Event::MappingEnd {
581            // key
582            self.load_node(key_ev, key_mark, recv)?;
583
584            // value
585            let (ev, mark) = self.next_event_impl()?;
586            self.load_node(ev, mark, recv)?;
587
588            // next event
589            let (ev, mark) = self.next_event_impl()?;
590            key_ev = ev;
591            key_mark = mark;
592        }
593        recv.on_event(key_ev, key_mark);
594        Ok(())
595    }
596
597    fn load_sequence<R: SpannedEventReceiver<'input>>(
598        &mut self,
599        recv: &mut R,
600    ) -> Result<(), ScanError> {
601        let (mut ev, mut mark) = self.next_event_impl()?;
602        while ev != Event::SequenceEnd {
603            self.load_node(ev, mark, recv)?;
604
605            // next event
606            let (next_ev, next_mark) = self.next_event_impl()?;
607            ev = next_ev;
608            mark = next_mark;
609        }
610        recv.on_event(ev, mark);
611        Ok(())
612    }
613
614    fn state_machine<'a>(&mut self) -> ParseResult<'a>
615    where
616        'input: 'a,
617    {
618        // let next_tok = self.peek_token().cloned()?;
619        // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
620        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
621
622        match self.state {
623            State::StreamStart => self.stream_start(),
624
625            State::ImplicitDocumentStart => self.document_start(true),
626            State::DocumentStart => self.document_start(false),
627            State::DocumentContent => self.document_content(),
628            State::DocumentEnd => self.document_end(),
629
630            State::BlockNode => self.parse_node(true, false),
631            // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
632            // State::FlowNode => self.parse_node(false, false),
633            State::BlockMappingFirstKey => self.block_mapping_key(true),
634            State::BlockMappingKey => self.block_mapping_key(false),
635            State::BlockMappingValue => self.block_mapping_value(),
636
637            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
638            State::BlockSequenceEntry => self.block_sequence_entry(false),
639
640            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
641            State::FlowSequenceEntry => self.flow_sequence_entry(false),
642
643            State::FlowMappingFirstKey => self.flow_mapping_key(true),
644            State::FlowMappingKey => self.flow_mapping_key(false),
645            State::FlowMappingValue => self.flow_mapping_value(false),
646
647            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
648
649            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
650            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
651            State::FlowSequenceEntryMappingEnd(mark) => self.flow_sequence_entry_mapping_end(mark),
652            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
653
654            /* impossible */
655            State::End => unreachable!(),
656        }
657    }
658
659    fn stream_start<'a>(&mut self) -> ParseResult<'a>
660    where
661        'input: 'a,
662    {
663        match *self.peek_token()? {
664            Token(span, TokenType::StreamStart(_)) => {
665                self.state = State::ImplicitDocumentStart;
666                self.skip();
667                Ok((Event::StreamStart, span))
668            }
669            Token(span, _) => Err(ScanError::new_str(
670                span.start,
671                "did not find expected <stream-start>",
672            )),
673        }
674    }
675
676    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
677    where
678        'input: 'a,
679    {
680        while let TokenType::DocumentEnd = self.peek_token()?.1 {
681            self.skip();
682        }
683
684        match *self.peek_token()? {
685            Token(span, TokenType::StreamEnd) => {
686                self.state = State::End;
687                self.skip();
688                Ok((Event::StreamEnd, span))
689            }
690            Token(
691                _,
692                TokenType::VersionDirective(..)
693                | TokenType::TagDirective(..)
694                | TokenType::ReservedDirective(..)
695                | TokenType::DocumentStart,
696            ) => {
697                // explicit document
698                self.explicit_document_start()
699            }
700            Token(span, _) if implicit => {
701                self.parser_process_directives()?;
702                self.push_state(State::DocumentEnd);
703                self.state = State::BlockNode;
704                Ok((Event::DocumentStart(false), span))
705            }
706            _ => {
707                // explicit document
708                self.explicit_document_start()
709            }
710        }
711    }
712
713    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
714        let mut version_directive_received = false;
715        let mut tags = if self.keep_tags {
716            self.tags.clone()
717        } else {
718            BTreeMap::new()
719        };
720        let mut document_tag_handles = BTreeSet::new();
721
722        loop {
723            match self.peek_token()? {
724                Token(span, TokenType::VersionDirective(_, _)) => {
725                    // XXX parsing with warning according to spec
726                    //if major != 1 || minor > 2 {
727                    //    return Err(ScanError::new_str(tok.0,
728                    //        "found incompatible YAML document"));
729                    //}
730                    if version_directive_received {
731                        return Err(ScanError::new_str(
732                            span.start,
733                            "duplicate version directive",
734                        ));
735                    }
736                    version_directive_received = true;
737                }
738                Token(mark, TokenType::TagDirective(handle, prefix)) => {
739                    if !document_tag_handles.insert(handle.to_string()) {
740                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
741                    }
742                    tags.insert(handle.to_string(), prefix.to_string());
743                }
744                Token(_, TokenType::ReservedDirective(_, _)) => {
745                    // Reserved directives are ignored
746                }
747                _ => break,
748            }
749            self.skip();
750        }
751
752        self.tags = tags;
753        Ok(())
754    }
755
756    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
757    where
758        'input: 'a,
759    {
760        self.parser_process_directives()?;
761        match *self.peek_token()? {
762            Token(mark, TokenType::DocumentStart) => {
763                self.push_state(State::DocumentEnd);
764                self.state = State::DocumentContent;
765                self.skip();
766                Ok((Event::DocumentStart(true), mark))
767            }
768            Token(span, _) => Err(ScanError::new_str(
769                span.start,
770                "did not find expected <document start>",
771            )),
772        }
773    }
774
775    fn document_content<'a>(&mut self) -> ParseResult<'a>
776    where
777        'input: 'a,
778    {
779        match *self.peek_token()? {
780            Token(
781                mark,
782                TokenType::VersionDirective(..)
783                | TokenType::TagDirective(..)
784                | TokenType::ReservedDirective(..)
785                | TokenType::DocumentStart
786                | TokenType::DocumentEnd
787                | TokenType::StreamEnd,
788            ) => {
789                self.pop_state();
790                // empty scalar
791                Ok((Event::empty_scalar(), mark))
792            }
793            _ => self.parse_node(true, false),
794        }
795    }
796
797    fn document_end<'a>(&mut self) -> ParseResult<'a>
798    where
799        'input: 'a,
800    {
801        let mut explicit_end = false;
802        let span: Span = match *self.peek_token()? {
803            Token(span, TokenType::DocumentEnd) => {
804                explicit_end = true;
805                self.skip();
806                span
807            }
808            Token(span, _) => span,
809        };
810
811        if self.keep_tags {
812            // Never persist default handles across document boundaries. Allowing `%TAG !! ...`
813            // or `%TAG ! ...` to leak into following documents lets earlier documents alter how
814            // explicit tags are interpreted later on.
815            self.tags.remove("!!");
816            self.tags.remove("");
817        } else {
818            self.tags.clear();
819        }
820        if explicit_end {
821            self.state = State::ImplicitDocumentStart;
822        } else {
823            if let Token(
824                span,
825                TokenType::VersionDirective(..)
826                | TokenType::TagDirective(..)
827                | TokenType::ReservedDirective(..),
828            ) = *self.peek_token()?
829            {
830                return Err(ScanError::new_str(
831                    span.start,
832                    "missing explicit document end marker before directive",
833                ));
834            }
835            self.state = State::DocumentStart;
836        }
837
838        Ok((Event::DocumentEnd, span))
839    }
840
841    fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
842        // anchors can be overridden/reused
843        // if self.anchors.contains_key(name) {
844        //     return Err(ScanError::new_str(*mark,
845        //         "while parsing anchor, found duplicated anchor"));
846        // }
847        let new_id = self.anchor_id_count;
848        self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
849            ScanError::new_str(
850                mark.start,
851                "while parsing anchor, anchor count exceeded supported limit",
852            )
853        })?;
854        self.anchors.insert(name, new_id);
855        Ok(new_id)
856    }
857
858    #[allow(clippy::too_many_lines)]
859    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
860    where
861        'input: 'a,
862    {
863        let mut anchor_id = 0;
864        let mut tag = None;
865        match *self.peek_token()? {
866            Token(_, TokenType::Alias(_)) => {
867                self.pop_state();
868                if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
869                    match self.anchors.get(&*name) {
870                        None => {
871                            return Err(ScanError::new_str(
872                                span.start,
873                                "while parsing node, found unknown anchor",
874                            ))
875                        }
876                        Some(id) => return Ok((Event::Alias(*id), span)),
877                    }
878                }
879                unreachable!()
880            }
881            Token(_, TokenType::Anchor(_)) => {
882                if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
883                    anchor_id = self.register_anchor(name, &span)?;
884                    if let TokenType::Tag(..) = self.peek_token()?.1 {
885                        if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
886                            tag = Some(self.resolve_tag(span, &handle, suffix)?);
887                        } else {
888                            unreachable!()
889                        }
890                    }
891                } else {
892                    unreachable!()
893                }
894            }
895            Token(mark, TokenType::Tag(..)) => {
896                if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
897                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
898                    if let TokenType::Anchor(_) = &self.peek_token()?.1 {
899                        if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
900                            anchor_id = self.register_anchor(name, &mark)?;
901                        } else {
902                            unreachable!()
903                        }
904                    }
905                } else {
906                    unreachable!()
907                }
908            }
909            _ => {}
910        }
911        match *self.peek_token()? {
912            Token(mark, TokenType::BlockEntry) if indentless_sequence => {
913                self.state = State::IndentlessSequenceEntry;
914                Ok((Event::SequenceStart(anchor_id, tag), mark))
915            }
916            Token(_, TokenType::Scalar(..)) => {
917                self.pop_state();
918                if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
919                    Ok((Event::Scalar(v, style, anchor_id, tag), mark))
920                } else {
921                    unreachable!()
922                }
923            }
924            Token(mark, TokenType::FlowSequenceStart) => {
925                self.state = State::FlowSequenceFirstEntry;
926                Ok((Event::SequenceStart(anchor_id, tag), mark))
927            }
928            Token(mark, TokenType::FlowMappingStart) => {
929                self.state = State::FlowMappingFirstKey;
930                Ok((Event::MappingStart(anchor_id, tag), mark))
931            }
932            Token(mark, TokenType::BlockSequenceStart) if block => {
933                self.state = State::BlockSequenceFirstEntry;
934                Ok((Event::SequenceStart(anchor_id, tag), mark))
935            }
936            Token(mark, TokenType::BlockMappingStart) if block => {
937                self.state = State::BlockMappingFirstKey;
938                Ok((Event::MappingStart(anchor_id, tag), mark))
939            }
940            // ex 7.2, an empty scalar can follow a secondary tag
941            Token(mark, _) if tag.is_some() || anchor_id > 0 => {
942                self.pop_state();
943                Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
944            }
945            Token(span, _) => {
946                let info = match self.state {
947                    State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
948                        "unexpected EOF while parsing a flow sequence"
949                    }
950                    State::FlowMappingFirstKey
951                    | State::FlowMappingKey
952                    | State::FlowMappingValue
953                    | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
954                    State::FlowSequenceEntryMappingKey
955                    | State::FlowSequenceEntryMappingValue
956                    | State::FlowSequenceEntryMappingEnd(_) => {
957                        "unexpected EOF while parsing an implicit flow mapping"
958                    }
959                    State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
960                        "unexpected EOF while parsing a block sequence"
961                    }
962                    State::BlockMappingFirstKey
963                    | State::BlockMappingKey
964                    | State::BlockMappingValue => "unexpected EOF while parsing a block mapping",
965                    _ => "while parsing a node, did not find expected node content",
966                };
967                Err(ScanError::new_str(span.start, info))
968            }
969        }
970    }
971
972    fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
973    where
974        'input: 'a,
975    {
976        // skip BlockMappingStart
977        if first {
978            let _ = self.peek_token()?;
979            //self.marks.push(tok.0);
980            self.skip();
981        }
982        match *self.peek_token()? {
983            Token(_, TokenType::Key) => {
984                // Indentation is only meaningful for block mapping keys.
985                if let Token(key_span, TokenType::Key) = *self.peek_token()? {
986                    self.pending_key_indent = Some(key_span.start.col());
987                }
988                self.skip();
989                if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
990                    *self.peek_token()?
991                {
992                    self.state = State::BlockMappingValue;
993                    // empty scalar
994                    Ok((Event::empty_scalar(), mark))
995                } else {
996                    self.push_state(State::BlockMappingValue);
997                    self.parse_node(true, true)
998                }
999            }
1000            // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18
1001            Token(mark, TokenType::Value) => {
1002                self.state = State::BlockMappingValue;
1003                Ok((Event::empty_scalar(), mark))
1004            }
1005            Token(mark, TokenType::BlockEnd) => {
1006                self.pop_state();
1007                self.skip();
1008                Ok((Event::MappingEnd, mark))
1009            }
1010            Token(span, _) => Err(ScanError::new_str(
1011                span.start,
1012                "while parsing a block mapping, did not find expected key",
1013            )),
1014        }
1015    }
1016
1017    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1018    where
1019        'input: 'a,
1020    {
1021        match *self.peek_token()? {
1022            Token(mark, TokenType::Value) => {
1023                self.skip();
1024                if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1025                    *self.peek_token()?
1026                {
1027                    self.state = State::BlockMappingKey;
1028                    // empty scalar
1029                    Ok((Event::empty_scalar(), mark))
1030                } else {
1031                    self.push_state(State::BlockMappingKey);
1032                    self.parse_node(true, true)
1033                }
1034            }
1035            Token(mark, _) => {
1036                self.state = State::BlockMappingKey;
1037                // empty scalar
1038                Ok((Event::empty_scalar(), mark))
1039            }
1040        }
1041    }
1042
1043    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1044    where
1045        'input: 'a,
1046    {
1047        if first {
1048            let _ = self.peek_token()?;
1049            self.skip();
1050        }
1051        let span: Span = {
1052            match *self.peek_token()? {
1053                Token(mark, TokenType::FlowMappingEnd) => mark,
1054                Token(mark, _) => {
1055                    if !first {
1056                        match *self.peek_token()? {
1057                            Token(_, TokenType::FlowEntry) => self.skip(),
1058                            Token(span, _) => return Err(ScanError::new_str(
1059                                span.start,
1060                                "while parsing a flow mapping, did not find expected ',' or '}'",
1061                            )),
1062                        }
1063                    }
1064
1065                    match *self.peek_token()? {
1066                        Token(_, TokenType::Key) => {
1067                            self.skip();
1068                            if let Token(
1069                                mark,
1070                                TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
1071                            ) = *self.peek_token()?
1072                            {
1073                                self.state = State::FlowMappingValue;
1074                                return Ok((Event::empty_scalar(), mark));
1075                            }
1076                            self.push_state(State::FlowMappingValue);
1077                            return self.parse_node(false, false);
1078                        }
1079                        Token(marker, TokenType::Value) => {
1080                            self.state = State::FlowMappingValue;
1081                            return Ok((Event::empty_scalar(), marker));
1082                        }
1083                        Token(_, TokenType::FlowMappingEnd) => (),
1084                        _ => {
1085                            self.push_state(State::FlowMappingEmptyValue);
1086                            return self.parse_node(false, false);
1087                        }
1088                    }
1089
1090                    mark
1091                }
1092            }
1093        };
1094
1095        self.pop_state();
1096        self.skip();
1097        Ok((Event::MappingEnd, span))
1098    }
1099
1100    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1101    where
1102        'input: 'a,
1103    {
1104        let span: Span = {
1105            if empty {
1106                let Token(mark, _) = *self.peek_token()?;
1107                self.state = State::FlowMappingKey;
1108                return Ok((Event::empty_scalar(), mark));
1109            }
1110            match *self.peek_token()? {
1111                Token(span, TokenType::Value) => {
1112                    self.skip();
1113                    match self.peek_token()?.1 {
1114                        TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
1115                        _ => {
1116                            self.push_state(State::FlowMappingKey);
1117                            return self.parse_node(false, false);
1118                        }
1119                    }
1120                    span
1121                }
1122                Token(marker, _) => marker,
1123            }
1124        };
1125
1126        self.state = State::FlowMappingKey;
1127        Ok((Event::empty_scalar(), span))
1128    }
1129
1130    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1131    where
1132        'input: 'a,
1133    {
1134        // skip FlowMappingStart
1135        if first {
1136            let _ = self.peek_token()?;
1137            //self.marks.push(tok.0);
1138            self.skip();
1139        }
1140        match *self.peek_token()? {
1141            Token(mark, TokenType::FlowSequenceEnd) => {
1142                self.pop_state();
1143                self.skip();
1144                return Ok((Event::SequenceEnd, mark));
1145            }
1146            Token(_, TokenType::FlowEntry) if !first => {
1147                self.skip();
1148            }
1149            Token(span, _) if !first => {
1150                return Err(ScanError::new_str(
1151                    span.start,
1152                    "while parsing a flow sequence, expected ',' or ']'",
1153                ));
1154            }
1155            _ => { /* next */ }
1156        }
1157        match *self.peek_token()? {
1158            Token(mark, TokenType::FlowSequenceEnd) => {
1159                self.pop_state();
1160                self.skip();
1161                Ok((Event::SequenceEnd, mark))
1162            }
1163            Token(mark, TokenType::Key) => {
1164                self.state = State::FlowSequenceEntryMappingKey;
1165                self.skip();
1166                Ok((Event::MappingStart(0, None), mark))
1167            }
1168            _ => {
1169                self.push_state(State::FlowSequenceEntry);
1170                self.parse_node(false, false)
1171            }
1172        }
1173    }
1174
1175    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1176    where
1177        'input: 'a,
1178    {
1179        match *self.peek_token()? {
1180            Token(mark, TokenType::BlockEntry) => {
1181                self.skip();
1182                if let Token(
1183                    _,
1184                    TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1185                ) = *self.peek_token()?
1186                {
1187                    self.state = State::IndentlessSequenceEntry;
1188                    Ok((Event::empty_scalar(), mark))
1189                } else {
1190                    self.push_state(State::IndentlessSequenceEntry);
1191                    self.parse_node(true, false)
1192                }
1193            }
1194            Token(mark, _) => {
1195                self.pop_state();
1196                Ok((Event::SequenceEnd, mark))
1197            }
1198        }
1199    }
1200
1201    fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1202    where
1203        'input: 'a,
1204    {
1205        // BLOCK-SEQUENCE-START
1206        if first {
1207            let _ = self.peek_token()?;
1208            //self.marks.push(tok.0);
1209            self.skip();
1210        }
1211        match *self.peek_token()? {
1212            Token(mark, TokenType::BlockEnd) => {
1213                self.pop_state();
1214                self.skip();
1215                Ok((Event::SequenceEnd, mark))
1216            }
1217            Token(mark, TokenType::BlockEntry) => {
1218                self.skip();
1219                if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
1220                    self.state = State::BlockSequenceEntry;
1221                    Ok((Event::empty_scalar(), mark))
1222                } else {
1223                    self.push_state(State::BlockSequenceEntry);
1224                    self.parse_node(true, false)
1225                }
1226            }
1227            Token(span, _) => Err(ScanError::new_str(
1228                span.start,
1229                "while parsing a block collection, did not find expected '-' indicator",
1230            )),
1231        }
1232    }
1233
1234    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
1235    where
1236        'input: 'a,
1237    {
1238        if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1239            *self.peek_token()?
1240        {
1241            self.state = State::FlowSequenceEntryMappingValue;
1242            Ok((Event::empty_scalar(), mark))
1243        } else {
1244            self.push_state(State::FlowSequenceEntryMappingValue);
1245            self.parse_node(false, false)
1246        }
1247    }
1248
1249    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
1250    where
1251        'input: 'a,
1252    {
1253        match *self.peek_token()? {
1254            Token(_, TokenType::Value) => {
1255                self.skip();
1256                self.state = State::FlowSequenceEntryMappingValue;
1257                let Token(span, ref tok) = *self.peek_token()?;
1258                if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
1259                    self.state = State::FlowSequenceEntryMappingEnd(span.end);
1260                    Ok((Event::empty_scalar(), span))
1261                } else {
1262                    self.push_state(State::FlowSequenceEntryMappingEnd(span.end));
1263                    self.parse_node(false, false)
1264                }
1265            }
1266            Token(mark, _) => {
1267                self.state = State::FlowSequenceEntryMappingEnd(mark.end);
1268                Ok((Event::empty_scalar(), mark))
1269            }
1270        }
1271    }
1272
1273    #[allow(clippy::unnecessary_wraps)]
1274    fn flow_sequence_entry_mapping_end<'a>(&mut self, mark: Marker) -> ParseResult<'a>
1275    where
1276        'input: 'a,
1277    {
1278        self.state = State::FlowSequenceEntry;
1279        Ok((Event::MappingEnd, Span::empty(mark)))
1280    }
1281
1282    /// Resolve a tag from the handle and the suffix.
1283    fn resolve_tag(
1284        &self,
1285        span: Span,
1286        handle: &Cow<'input, str>,
1287        suffix: Cow<'input, str>,
1288    ) -> Result<Cow<'input, Tag>, ScanError> {
1289        let suffix = suffix.into_owned();
1290        let tag = if handle == "!!" {
1291            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
1292            // overridden.
1293            Tag {
1294                handle: self
1295                    .tags
1296                    .get("!!")
1297                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
1298                suffix,
1299            }
1300        } else if handle.is_empty() && suffix == "!" {
1301            // "!" introduces a local tag. Local tags may have their prefix overridden.
1302            match self.tags.get("") {
1303                Some(prefix) => Tag {
1304                    handle: prefix.clone(),
1305                    suffix,
1306                },
1307                None => Tag {
1308                    handle: String::new(),
1309                    suffix,
1310                },
1311            }
1312        } else {
1313            // Lookup handle in our tag directives.
1314            let prefix = self.tags.get(&**handle);
1315            if let Some(prefix) = prefix {
1316                Tag {
1317                    handle: prefix.clone(),
1318                    suffix,
1319                }
1320            } else {
1321                // Otherwise, it may be a local handle. With a local handle, the handle is set to
1322                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
1323                // If the handle is of the form "!foo!", this cannot be a local handle and we need
1324                // to error.
1325                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1326                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
1327                }
1328                Tag {
1329                    handle: handle.to_string(),
1330                    suffix,
1331                }
1332            }
1333        };
1334        Ok(Cow::Owned(tag))
1335    }
1336}
1337
1338impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
1339    fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
1340        if let Some(ref x) = self.current {
1341            Some(Ok(x))
1342        } else {
1343            if self.stream_end_emitted {
1344                return None;
1345            }
1346            match self.next_event_impl() {
1347                Ok(token) => self.current = Some(token),
1348                Err(e) => return Some(Err(e)),
1349            }
1350            self.current.as_ref().map(Ok)
1351        }
1352    }
1353
1354    fn next_event(&mut self) -> Option<ParseResult<'input>> {
1355        if self.stream_end_emitted {
1356            return None;
1357        }
1358
1359        let tok = self.next_event_impl();
1360        if matches!(tok, Ok((Event::StreamEnd, _))) {
1361            self.stream_end_emitted = true;
1362        }
1363        Some(tok)
1364    }
1365
1366    fn load<R: SpannedEventReceiver<'input>>(
1367        &mut self,
1368        recv: &mut R,
1369        multi: bool,
1370    ) -> Result<(), ScanError> {
1371        let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
1372        if !self.scanner.stream_started() || stream_start_buffered {
1373            let (ev, span) = self.next_event_impl()?;
1374            if ev != Event::StreamStart {
1375                return Err(ScanError::new_str(
1376                    span.start,
1377                    "did not find expected <stream-start>",
1378                ));
1379            }
1380            recv.on_event(ev, span);
1381        }
1382
1383        if self.scanner.stream_ended() {
1384            // XXX has parsed?
1385            recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
1386            return Ok(());
1387        }
1388        loop {
1389            let (ev, span) = self.next_event_impl()?;
1390            if ev == Event::StreamEnd {
1391                recv.on_event(ev, span);
1392                return Ok(());
1393            }
1394            // clear anchors before a new document
1395            self.anchors.clear();
1396            self.load_document(ev, span, recv)?;
1397            if !multi {
1398                break;
1399            }
1400        }
1401        Ok(())
1402    }
1403}
1404
1405impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
1406    type Item = Result<(Event<'input>, Span), ScanError>;
1407
1408    fn next(&mut self) -> Option<Self::Item> {
1409        self.next_event()
1410    }
1411}
1412
1413#[cfg(test)]
1414mod test {
1415    use alloc::vec::Vec;
1416
1417    use super::{Event, EventReceiver, Parser};
1418
1419    #[test]
1420    fn test_peek_eq_parse() {
1421        let s = "
1422a0 bb: val
1423a1: &x
1424    b1: 4
1425    b2: d
1426a2: 4
1427a3: [1, 2, 3]
1428a4:
1429    - [a1, a2]
1430    - 2
1431a5: *x
1432";
1433        let mut p = Parser::new_from_str(s);
1434        loop {
1435            let event_peek = p.peek().unwrap().unwrap().clone();
1436            let event = p.next_event().unwrap().unwrap();
1437            assert_eq!(event, event_peek);
1438            if event.0 == Event::StreamEnd {
1439                break;
1440            }
1441        }
1442    }
1443
1444    #[test]
1445    fn test_multiple_tag_directives_are_kept_within_document() {
1446        let text = r"
1447%TAG !a! tag:a,2024:
1448%TAG !b! tag:b,2024:
1449---
1450first: !a!x foo
1451second: !b!y bar
1452";
1453
1454        let mut seen_a = false;
1455        let mut seen_b = false;
1456        for event in Parser::new_from_str(text) {
1457            let (event, _) = event.unwrap();
1458            if let Event::Scalar(_, _, _, Some(tag)) = event {
1459                if tag.handle == "tag:a,2024:" {
1460                    seen_a = true;
1461                } else if tag.handle == "tag:b,2024:" {
1462                    seen_b = true;
1463                }
1464            }
1465        }
1466
1467        assert!(seen_a);
1468        assert!(seen_b);
1469    }
1470
1471    #[test]
1472    fn test_tags_are_cleared_when_next_document_has_no_directives() {
1473        let text = r"
1474%TAG !t! tag:test,2024:
1475--- !t!1
1476foo
1477--- !t!2
1478bar
1479";
1480
1481        let mut parser = Parser::new_from_str(text);
1482        for event in parser.by_ref() {
1483            let (event, _) = event.unwrap();
1484            if let Event::DocumentEnd = event {
1485                break;
1486            }
1487        }
1488
1489        match parser.next().unwrap().unwrap().0 {
1490            Event::DocumentStart(true) => {}
1491            _ => panic!("expected explicit second document start"),
1492        }
1493
1494        let err = parser.next().unwrap().unwrap_err();
1495        assert!(format!("{err}").contains("the handle wasn't declared"));
1496    }
1497
1498    #[test]
1499    fn test_keep_tags_across_multiple_documents() {
1500        let text = r#"
1501%YAML 1.1
1502%TAG !t! tag:test,2024:
1503--- !t!1 &1
1504foo: "bar"
1505--- !t!2 &2
1506baz: "qux"
1507"#;
1508        for x in Parser::new_from_str(text).keep_tags(true) {
1509            let x = x.unwrap();
1510            if let Event::MappingStart(_, tag) = x.0 {
1511                let tag = tag.unwrap();
1512                assert_eq!(tag.handle, "tag:test,2024:");
1513            }
1514        }
1515
1516        for x in Parser::new_from_str(text).keep_tags(false) {
1517            if x.is_err() {
1518                // Test successful
1519                return;
1520            }
1521        }
1522        panic!("Test failed, did not encounter error")
1523    }
1524
1525    #[test]
1526    fn test_flow_sequence_mapping_allows_empty_key() {
1527        let parser = Parser::new_from_str("[?: value]");
1528        for event in parser {
1529            event.expect("parser should accept flow sequence mappings with empty keys");
1530        }
1531    }
1532
1533    #[test]
1534    fn test_keep_tags_does_not_persist_default_tag_handles() {
1535        let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
1536
1537        let mut int_tags = Vec::new();
1538        for event in Parser::new_from_str(text).keep_tags(true) {
1539            let event = event.unwrap().0;
1540            if let Event::Scalar(_, _, _, Some(tag)) = event {
1541                if tag.suffix == "int" {
1542                    int_tags.push(tag.handle.clone());
1543                }
1544            }
1545        }
1546
1547        assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
1548    }
1549
1550    #[test]
1551    fn test_load_after_peek_stream_start() {
1552        #[derive(Default)]
1553        struct Sink<'input> {
1554            events: Vec<Event<'input>>,
1555        }
1556
1557        impl<'input> EventReceiver<'input> for Sink<'input> {
1558            fn on_event(&mut self, ev: Event<'input>) {
1559                self.events.push(ev);
1560            }
1561        }
1562
1563        let mut parser = Parser::new_from_str("key: value\n");
1564        let mut sink = Sink::default();
1565
1566        assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
1567        parser.load(&mut sink, false).unwrap();
1568
1569        assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
1570        assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
1571    }
1572}