saphyr_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, Input},
9    scanner::{ScanError, Scanner, Span, TScalarStyle, Token, TokenType},
10    BufferedInput, Marker,
11};
12
13use std::collections::HashMap;
14
15#[derive(Clone, Copy, PartialEq, Debug, Eq)]
16enum State {
17    StreamStart,
18    ImplicitDocumentStart,
19    DocumentStart,
20    DocumentContent,
21    DocumentEnd,
22    BlockNode,
23    BlockSequenceFirstEntry,
24    BlockSequenceEntry,
25    IndentlessSequenceEntry,
26    BlockMappingFirstKey,
27    BlockMappingKey,
28    BlockMappingValue,
29    FlowSequenceFirstEntry,
30    FlowSequenceEntry,
31    FlowSequenceEntryMappingKey,
32    FlowSequenceEntryMappingValue,
33    FlowSequenceEntryMappingEnd(Marker),
34    FlowMappingFirstKey,
35    FlowMappingKey,
36    FlowMappingValue,
37    FlowMappingEmptyValue,
38    End,
39}
40
41/// An event generated by the YAML parser.
42///
43/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
44/// [`EventReceiver`] trait.
45#[derive(Clone, PartialEq, Debug, Eq)]
46pub enum Event {
47    /// Reserved for internal use.
48    Nothing,
49    /// Event generated at the very beginning of parsing.
50    StreamStart,
51    /// Last event that will be generated by the parser. Signals EOF.
52    StreamEnd,
53    /// The start of a YAML document.
54    ///
55    /// When the boolean is `true`, it is an explicit document start
56    /// directive (`---`).
57    ///
58    /// When the boolean is `false`, it is an implicit document start
59    /// (without `---`).
60    DocumentStart(bool),
61    /// The YAML end document directive (`...`).
62    DocumentEnd,
63    /// A YAML Alias.
64    Alias(
65        /// The anchor ID the alias refers to.
66        usize,
67    ),
68    /// Value, style, `anchor_id`, tag
69    Scalar(String, TScalarStyle, usize, Option<Tag>),
70    /// The start of a YAML sequence (array).
71    SequenceStart(
72        /// The anchor ID of the start of the sequence.
73        usize,
74        /// An optional tag
75        Option<Tag>,
76    ),
77    /// The end of a YAML sequence (array).
78    SequenceEnd,
79    /// The start of a YAML mapping (object, hash).
80    MappingStart(
81        /// The anchor ID of the start of the mapping.
82        usize,
83        /// An optional tag
84        Option<Tag>,
85    ),
86    /// The end of a YAML mapping (object, hash).
87    MappingEnd,
88}
89
90/// A YAML tag.
91#[derive(Clone, PartialEq, Debug, Eq)]
92pub struct Tag {
93    /// Handle of the tag (`!` included).
94    pub handle: String,
95    /// The suffix of the tag.
96    pub suffix: String,
97}
98
99impl Event {
100    /// Create an empty scalar.
101    fn empty_scalar() -> Event {
102        // a null scalar
103        Event::Scalar("~".to_owned(), TScalarStyle::Plain, 0, None)
104    }
105
106    /// Create an empty scalar with the given anchor.
107    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Tag>) -> Event {
108        Event::Scalar(String::new(), TScalarStyle::Plain, anchor, tag)
109    }
110}
111
112/// A YAML parser.
113#[derive(Debug)]
114pub struct Parser<T: Input> {
115    /// The underlying scanner from which we pull tokens.
116    scanner: Scanner<T>,
117    /// The stack of _previous_ states we were in.
118    ///
119    /// States are pushed in the context of subobjects to this stack. The top-most element is the
120    /// state in which to come back to when exiting the current state.
121    states: Vec<State>,
122    /// The state in which we currently are.
123    state: State,
124    /// The next token from the scanner.
125    token: Option<Token>,
126    /// The next YAML event to emit.
127    current: Option<(Event, Span)>,
128    /// Anchors that have been encountered in the YAML document.
129    anchors: HashMap<String, usize>,
130    /// Next ID available for an anchor.
131    ///
132    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
133    /// return for the next anchor and the count of anchor IDs emitted.
134    anchor_id_count: usize,
135    /// The tag directives (`%TAG`) the parser has encountered.
136    ///
137    /// Key is the handle, and value is the prefix.
138    tags: HashMap<String, String>,
139    /// Whether we have emitted [`Event::StreamEnd`].
140    ///
141    /// Emitted means that it has been returned from [`Self::next_token`]. If it is stored in
142    /// [`Self::token`], this is set to `false`.
143    stream_end_emitted: bool,
144    /// Make tags global across all documents.
145    keep_tags: bool,
146}
147
148/// Trait to be implemented in order to use the low-level parsing API.
149///
150/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
151/// for each YAML [`Event`] that occurs.
152/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
153/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
154/// [`SpannedEventReceiver`] automatically.
155///
156/// # Event hierarchy
157/// The event stream starts with an [`Event::StreamStart`] event followed by an
158/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
159/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
160/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
161///
162/// In a mapping, key-values are sent as consecutive events. The first event after an
163/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no
164/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd
165/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received.
166///
167/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
168///
169/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
170/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
171/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
172/// be part of the value and not another key-value pair or element in the sequence.
173///
174/// For instance, the following yaml:
175/// ```yaml
176/// a: b
177/// c:
178///   d: e
179/// f:
180///   - g
181///   - h
182/// ```
183/// will emit (indented and commented for lisibility):
184/// ```text
185/// StreamStart, DocumentStart, MappingStart,
186///   Scalar("a", ..), Scalar("b", ..)
187///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
188///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
189/// MappingEnd, DocumentEnd, StreamEnd
190/// ```
191///
192/// # Example
193/// ```
194/// # use saphyr_parser::{Event, EventReceiver, Parser};
195/// #
196/// /// Sink of events. Collects them into an array.
197/// struct EventSink {
198///     events: Vec<Event>,
199/// }
200///
201/// /// Implement `on_event`, pushing into `self.events`.
202/// impl EventReceiver for EventSink {
203///     fn on_event(&mut self, ev: Event) {
204///         self.events.push(ev);
205///     }
206/// }
207///
208/// /// Load events from a yaml string.
209/// fn str_to_events(yaml: &str) -> Vec<Event> {
210///     let mut sink = EventSink { events: Vec::new() };
211///     let mut parser = Parser::new_from_str(yaml);
212///     // Load events using our sink as the receiver.
213///     parser.load(&mut sink, true).unwrap();
214///     sink.events
215/// }
216/// ```
217pub trait EventReceiver {
218    /// Handler called for each YAML event that is emitted by the parser.
219    fn on_event(&mut self, ev: Event);
220}
221
222/// Trait to be implemented for using the low-level parsing API.
223///
224/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
225pub trait SpannedEventReceiver {
226    /// Handler called for each event that occurs.
227    fn on_event(&mut self, ev: Event, span: Span);
228}
229
230impl<R: EventReceiver> SpannedEventReceiver for R {
231    fn on_event(&mut self, ev: Event, _span: Span) {
232        self.on_event(ev);
233    }
234}
235
236/// A convenience alias for a `Result` of a parser event.
237pub type ParseResult = Result<(Event, Span), ScanError>;
238
239impl<'a> Parser<StrInput<'a>> {
240    /// Create a new instance of a parser from a &str.
241    #[must_use]
242    pub fn new_from_str(value: &'a str) -> Self {
243        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
244        Parser::new(StrInput::new(value))
245    }
246}
247
248impl<T> Parser<BufferedInput<T>>
249where
250    T: Iterator<Item = char>,
251{
252    /// Create a new instance of a parser from an iterator of `char`s.
253    #[must_use]
254    pub fn new_from_iter(iter: T) -> Self {
255        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
256        Parser::new(BufferedInput::new(iter))
257    }
258}
259
260impl<T: Input> Parser<T> {
261    /// Create a new instance of a parser from the given input of characters.
262    pub fn new(src: T) -> Parser<T> {
263        Parser {
264            scanner: Scanner::new(src),
265            states: Vec::new(),
266            state: State::StreamStart,
267            token: None,
268            current: None,
269
270            anchors: HashMap::new(),
271            // valid anchor_id starts from 1
272            anchor_id_count: 1,
273            tags: HashMap::new(),
274            stream_end_emitted: false,
275            keep_tags: false,
276        }
277    }
278
279    /// Whether to keep tags across multiple documents when parsing.
280    ///
281    /// This behavior is non-standard as per the YAML specification but can be encountered in the
282    /// wild. This boolean allows enabling this non-standard extension. This would result in the
283    /// parser accepting input from [test
284    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
285    /// of the yaml-test-suite:
286    ///
287    /// ```yaml
288    /// %TAG !prefix! tag:example.com,2011:
289    /// --- !prefix!A
290    /// a: b
291    /// --- !prefix!B
292    /// c: d
293    /// --- !prefix!C
294    /// e: f
295    /// ```
296    ///
297    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
298    /// only apply to the document immediately following them. This would error on `!prefix!B`.
299    ///
300    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
301    #[must_use]
302    pub fn keep_tags(mut self, value: bool) -> Self {
303        self.keep_tags = value;
304        self
305    }
306
307    /// Try to load the next event and return it, but do not consuming it from `self`.
308    ///
309    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
310    /// [`Iterator::next`] or [`Parser::load`].
311    ///
312    /// # Errors
313    /// Returns `ScanError` when loading the next event fails.
314    pub fn peek(&mut self) -> Option<Result<&(Event, Span), ScanError>> {
315        if let Some(ref x) = self.current {
316            Some(Ok(x))
317        } else {
318            if self.stream_end_emitted {
319                return None;
320            }
321            match self.next_event_impl() {
322                Ok(token) => self.current = Some(token),
323                Err(e) => return Some(Err(e)),
324            }
325            self.current.as_ref().map(Ok)
326        }
327    }
328
329    /// Try to load the next event and return it, consuming it from `self`.
330    ///
331    /// # Errors
332    /// Returns `ScanError` when loading the next event fails.
333    pub fn next_event(&mut self) -> Option<ParseResult> {
334        if self.stream_end_emitted {
335            return None;
336        }
337
338        let tok = self.next_event_impl();
339        if matches!(tok, Ok((Event::StreamEnd, _))) {
340            self.stream_end_emitted = true;
341        }
342        Some(tok)
343    }
344
345    /// Implementation function for [`Self::next_event`] without the `Option`.
346    ///
347    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
348    /// option. This burdens the parser code. This function is used internally when an option is
349    /// undesirable.
350    fn next_event_impl(&mut self) -> ParseResult {
351        match self.current.take() {
352            None => self.parse(),
353            Some(v) => Ok(v),
354        }
355    }
356
357    /// Peek at the next token from the scanner.
358    fn peek_token(&mut self) -> Result<&Token, ScanError> {
359        match self.token {
360            None => {
361                self.token = Some(self.scan_next_token()?);
362                Ok(self.token.as_ref().unwrap())
363            }
364            Some(ref tok) => Ok(tok),
365        }
366    }
367
368    /// Extract and return the next token from the scanner.
369    ///
370    /// This function does _not_ make use of `self.token`.
371    fn scan_next_token(&mut self) -> Result<Token, ScanError> {
372        let token = self.scanner.next();
373        match token {
374            None => match self.scanner.get_error() {
375                None => Err(ScanError::new_str(self.scanner.mark(), "unexpected eof")),
376                Some(e) => Err(e),
377            },
378            Some(tok) => Ok(tok),
379        }
380    }
381
382    fn fetch_token(&mut self) -> Token {
383        self.token
384            .take()
385            .expect("fetch_token needs to be preceded by peek_token")
386    }
387
388    /// Skip the next token from the scanner.
389    fn skip(&mut self) {
390        self.token = None;
391    }
392    /// Pops the top-most state and make it the current state.
393    fn pop_state(&mut self) {
394        self.state = self.states.pop().unwrap();
395    }
396    /// Push a new state atop the state stack.
397    fn push_state(&mut self, state: State) {
398        self.states.push(state);
399    }
400
401    fn parse(&mut self) -> ParseResult {
402        if self.state == State::End {
403            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
404        }
405        let (ev, mark) = self.state_machine()?;
406        Ok((ev, mark))
407    }
408
409    /// Load the YAML from the stream in `self`, pushing events into `recv`.
410    ///
411    /// The contents of the stream are parsed and the corresponding events are sent into the
412    /// recveiver. For detailed explanations about how events work, see [`EventReceiver`].
413    ///
414    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
415    /// inside the stream.
416    ///
417    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
418    /// former is enough to call this function.
419    /// # Errors
420    /// Returns `ScanError` when loading fails.
421    pub fn load<R: SpannedEventReceiver>(
422        &mut self,
423        recv: &mut R,
424        multi: bool,
425    ) -> Result<(), ScanError> {
426        if !self.scanner.stream_started() {
427            let (ev, span) = self.next_event_impl()?;
428            if ev != Event::StreamStart {
429                return Err(ScanError::new_str(
430                    span.start,
431                    "did not find expected <stream-start>",
432                ));
433            }
434            recv.on_event(ev, span);
435        }
436
437        if self.scanner.stream_ended() {
438            // XXX has parsed?
439            recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
440            return Ok(());
441        }
442        loop {
443            let (ev, span) = self.next_event_impl()?;
444            if ev == Event::StreamEnd {
445                recv.on_event(ev, span);
446                return Ok(());
447            }
448            // clear anchors before a new document
449            self.anchors.clear();
450            self.load_document(ev, span, recv)?;
451            if !multi {
452                break;
453            }
454        }
455        Ok(())
456    }
457
458    fn load_document<R: SpannedEventReceiver>(
459        &mut self,
460        first_ev: Event,
461        span: Span,
462        recv: &mut R,
463    ) -> Result<(), ScanError> {
464        if !matches!(first_ev, Event::DocumentStart(_)) {
465            return Err(ScanError::new_str(
466                span.start,
467                "did not find expected <document-start>",
468            ));
469        }
470        recv.on_event(first_ev, span);
471
472        let (ev, span) = self.next_event_impl()?;
473        self.load_node(ev, span, recv)?;
474
475        // DOCUMENT-END is expected.
476        let (ev, mark) = self.next_event_impl()?;
477        assert_eq!(ev, Event::DocumentEnd);
478        recv.on_event(ev, mark);
479
480        Ok(())
481    }
482
483    fn load_node<R: SpannedEventReceiver>(
484        &mut self,
485        first_ev: Event,
486        span: Span,
487        recv: &mut R,
488    ) -> Result<(), ScanError> {
489        match first_ev {
490            Event::Alias(..) | Event::Scalar(..) => {
491                recv.on_event(first_ev, span);
492                Ok(())
493            }
494            Event::SequenceStart(..) => {
495                recv.on_event(first_ev, span);
496                self.load_sequence(recv)
497            }
498            Event::MappingStart(..) => {
499                recv.on_event(first_ev, span);
500                self.load_mapping(recv)
501            }
502            _ => {
503                println!("UNREACHABLE EVENT: {first_ev:?}");
504                unreachable!();
505            }
506        }
507    }
508
509    fn load_mapping<R: SpannedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
510        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
511        while key_ev != Event::MappingEnd {
512            // key
513            self.load_node(key_ev, key_mark, recv)?;
514
515            // value
516            let (ev, mark) = self.next_event_impl()?;
517            self.load_node(ev, mark, recv)?;
518
519            // next event
520            let (ev, mark) = self.next_event_impl()?;
521            key_ev = ev;
522            key_mark = mark;
523        }
524        recv.on_event(key_ev, key_mark);
525        Ok(())
526    }
527
528    fn load_sequence<R: SpannedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
529        let (mut ev, mut mark) = self.next_event_impl()?;
530        while ev != Event::SequenceEnd {
531            self.load_node(ev, mark, recv)?;
532
533            // next event
534            let (next_ev, next_mark) = self.next_event_impl()?;
535            ev = next_ev;
536            mark = next_mark;
537        }
538        recv.on_event(ev, mark);
539        Ok(())
540    }
541
542    fn state_machine(&mut self) -> ParseResult {
543        // let next_tok = self.peek_token().cloned()?;
544        // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
545        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
546
547        match self.state {
548            State::StreamStart => self.stream_start(),
549
550            State::ImplicitDocumentStart => self.document_start(true),
551            State::DocumentStart => self.document_start(false),
552            State::DocumentContent => self.document_content(),
553            State::DocumentEnd => self.document_end(),
554
555            State::BlockNode => self.parse_node(true, false),
556            // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
557            // State::FlowNode => self.parse_node(false, false),
558            State::BlockMappingFirstKey => self.block_mapping_key(true),
559            State::BlockMappingKey => self.block_mapping_key(false),
560            State::BlockMappingValue => self.block_mapping_value(),
561
562            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
563            State::BlockSequenceEntry => self.block_sequence_entry(false),
564
565            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
566            State::FlowSequenceEntry => self.flow_sequence_entry(false),
567
568            State::FlowMappingFirstKey => self.flow_mapping_key(true),
569            State::FlowMappingKey => self.flow_mapping_key(false),
570            State::FlowMappingValue => self.flow_mapping_value(false),
571
572            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
573
574            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
575            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
576            State::FlowSequenceEntryMappingEnd(mark) => self.flow_sequence_entry_mapping_end(mark),
577            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
578
579            /* impossible */
580            State::End => unreachable!(),
581        }
582    }
583
584    fn stream_start(&mut self) -> ParseResult {
585        match *self.peek_token()? {
586            Token(span, TokenType::StreamStart(_)) => {
587                self.state = State::ImplicitDocumentStart;
588                self.skip();
589                Ok((Event::StreamStart, span))
590            }
591            Token(span, _) => Err(ScanError::new_str(
592                span.start,
593                "did not find expected <stream-start>",
594            )),
595        }
596    }
597
598    fn document_start(&mut self, implicit: bool) -> ParseResult {
599        while let TokenType::DocumentEnd = self.peek_token()?.1 {
600            self.skip();
601        }
602
603        match *self.peek_token()? {
604            Token(span, TokenType::StreamEnd) => {
605                self.state = State::End;
606                self.skip();
607                Ok((Event::StreamEnd, span))
608            }
609            Token(
610                _,
611                TokenType::VersionDirective(..)
612                | TokenType::TagDirective(..)
613                | TokenType::DocumentStart,
614            ) => {
615                // explicit document
616                self.explicit_document_start()
617            }
618            Token(span, _) if implicit => {
619                self.parser_process_directives()?;
620                self.push_state(State::DocumentEnd);
621                self.state = State::BlockNode;
622                Ok((Event::DocumentStart(false), span))
623            }
624            _ => {
625                // explicit document
626                self.explicit_document_start()
627            }
628        }
629    }
630
631    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
632        let mut version_directive_received = false;
633        loop {
634            let mut tags = HashMap::new();
635            match self.peek_token()? {
636                Token(span, TokenType::VersionDirective(_, _)) => {
637                    // XXX parsing with warning according to spec
638                    //if major != 1 || minor > 2 {
639                    //    return Err(ScanError::new_str(tok.0,
640                    //        "found incompatible YAML document"));
641                    //}
642                    if version_directive_received {
643                        return Err(ScanError::new_str(
644                            span.start,
645                            "duplicate version directive",
646                        ));
647                    }
648                    version_directive_received = true;
649                }
650                Token(mark, TokenType::TagDirective(handle, prefix)) => {
651                    if tags.contains_key(handle) {
652                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
653                    }
654                    tags.insert(handle.to_string(), prefix.to_string());
655                }
656                _ => break,
657            }
658            self.tags = tags;
659            self.skip();
660        }
661        Ok(())
662    }
663
664    fn explicit_document_start(&mut self) -> ParseResult {
665        self.parser_process_directives()?;
666        match *self.peek_token()? {
667            Token(mark, TokenType::DocumentStart) => {
668                self.push_state(State::DocumentEnd);
669                self.state = State::DocumentContent;
670                self.skip();
671                Ok((Event::DocumentStart(true), mark))
672            }
673            Token(span, _) => Err(ScanError::new_str(
674                span.start,
675                "did not find expected <document start>",
676            )),
677        }
678    }
679
680    fn document_content(&mut self) -> ParseResult {
681        match *self.peek_token()? {
682            Token(
683                mark,
684                TokenType::VersionDirective(..)
685                | TokenType::TagDirective(..)
686                | TokenType::DocumentStart
687                | TokenType::DocumentEnd
688                | TokenType::StreamEnd,
689            ) => {
690                self.pop_state();
691                // empty scalar
692                Ok((Event::empty_scalar(), mark))
693            }
694            _ => self.parse_node(true, false),
695        }
696    }
697
698    fn document_end(&mut self) -> ParseResult {
699        let mut explicit_end = false;
700        let span: Span = match *self.peek_token()? {
701            Token(span, TokenType::DocumentEnd) => {
702                explicit_end = true;
703                self.skip();
704                span
705            }
706            Token(span, _) => span,
707        };
708
709        if !self.keep_tags {
710            self.tags.clear();
711        }
712        if explicit_end {
713            self.state = State::ImplicitDocumentStart;
714        } else {
715            if let Token(span, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) =
716                *self.peek_token()?
717            {
718                return Err(ScanError::new_str(
719                    span.start,
720                    "missing explicit document end marker before directive",
721                ));
722            }
723            self.state = State::DocumentStart;
724        }
725
726        Ok((Event::DocumentEnd, span))
727    }
728
729    fn register_anchor(&mut self, name: String, _: &Span) -> usize {
730        // anchors can be overridden/reused
731        // if self.anchors.contains_key(name) {
732        //     return Err(ScanError::new_str(*mark,
733        //         "while parsing anchor, found duplicated anchor"));
734        // }
735        let new_id = self.anchor_id_count;
736        self.anchor_id_count += 1;
737        self.anchors.insert(name, new_id);
738        new_id
739    }
740
741    fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult {
742        let mut anchor_id = 0;
743        let mut tag = None;
744        match *self.peek_token()? {
745            Token(_, TokenType::Alias(_)) => {
746                self.pop_state();
747                if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
748                    match self.anchors.get(&name) {
749                        None => {
750                            return Err(ScanError::new_str(
751                                span.start,
752                                "while parsing node, found unknown anchor",
753                            ))
754                        }
755                        Some(id) => return Ok((Event::Alias(*id), span)),
756                    }
757                }
758                unreachable!()
759            }
760            Token(_, TokenType::Anchor(_)) => {
761                if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
762                    anchor_id = self.register_anchor(name, &span);
763                    if let TokenType::Tag(..) = self.peek_token()?.1 {
764                        if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
765                            tag = Some(self.resolve_tag(span, &handle, suffix)?);
766                        } else {
767                            unreachable!()
768                        }
769                    }
770                } else {
771                    unreachable!()
772                }
773            }
774            Token(mark, TokenType::Tag(..)) => {
775                if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
776                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
777                    if let TokenType::Anchor(_) = &self.peek_token()?.1 {
778                        if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
779                            anchor_id = self.register_anchor(name, &mark);
780                        } else {
781                            unreachable!()
782                        }
783                    }
784                } else {
785                    unreachable!()
786                }
787            }
788            _ => {}
789        }
790        match *self.peek_token()? {
791            Token(mark, TokenType::BlockEntry) if indentless_sequence => {
792                self.state = State::IndentlessSequenceEntry;
793                Ok((Event::SequenceStart(anchor_id, tag), mark))
794            }
795            Token(_, TokenType::Scalar(..)) => {
796                self.pop_state();
797                if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
798                    Ok((Event::Scalar(v, style, anchor_id, tag), mark))
799                } else {
800                    unreachable!()
801                }
802            }
803            Token(mark, TokenType::FlowSequenceStart) => {
804                self.state = State::FlowSequenceFirstEntry;
805                Ok((Event::SequenceStart(anchor_id, tag), mark))
806            }
807            Token(mark, TokenType::FlowMappingStart) => {
808                self.state = State::FlowMappingFirstKey;
809                Ok((Event::MappingStart(anchor_id, tag), mark))
810            }
811            Token(mark, TokenType::BlockSequenceStart) if block => {
812                self.state = State::BlockSequenceFirstEntry;
813                Ok((Event::SequenceStart(anchor_id, tag), mark))
814            }
815            Token(mark, TokenType::BlockMappingStart) if block => {
816                self.state = State::BlockMappingFirstKey;
817                Ok((Event::MappingStart(anchor_id, tag), mark))
818            }
819            // ex 7.2, an empty scalar can follow a secondary tag
820            Token(mark, _) if tag.is_some() || anchor_id > 0 => {
821                self.pop_state();
822                Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
823            }
824            Token(span, _) => Err(ScanError::new_str(
825                span.start,
826                "while parsing a node, did not find expected node content",
827            )),
828        }
829    }
830
831    fn block_mapping_key(&mut self, first: bool) -> ParseResult {
832        // skip BlockMappingStart
833        if first {
834            let _ = self.peek_token()?;
835            //self.marks.push(tok.0);
836            self.skip();
837        }
838        match *self.peek_token()? {
839            Token(_, TokenType::Key) => {
840                self.skip();
841                if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
842                    *self.peek_token()?
843                {
844                    self.state = State::BlockMappingValue;
845                    // empty scalar
846                    Ok((Event::empty_scalar(), mark))
847                } else {
848                    self.push_state(State::BlockMappingValue);
849                    self.parse_node(true, true)
850                }
851            }
852            // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18
853            Token(mark, TokenType::Value) => {
854                self.state = State::BlockMappingValue;
855                Ok((Event::empty_scalar(), mark))
856            }
857            Token(mark, TokenType::BlockEnd) => {
858                self.pop_state();
859                self.skip();
860                Ok((Event::MappingEnd, mark))
861            }
862            Token(span, _) => Err(ScanError::new_str(
863                span.start,
864                "while parsing a block mapping, did not find expected key",
865            )),
866        }
867    }
868
869    fn block_mapping_value(&mut self) -> ParseResult {
870        match *self.peek_token()? {
871            Token(_, TokenType::Value) => {
872                self.skip();
873                if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
874                    *self.peek_token()?
875                {
876                    self.state = State::BlockMappingKey;
877                    // empty scalar
878                    Ok((Event::empty_scalar(), mark))
879                } else {
880                    self.push_state(State::BlockMappingKey);
881                    self.parse_node(true, true)
882                }
883            }
884            Token(mark, _) => {
885                self.state = State::BlockMappingKey;
886                // empty scalar
887                Ok((Event::empty_scalar(), mark))
888            }
889        }
890    }
891
892    fn flow_mapping_key(&mut self, first: bool) -> ParseResult {
893        if first {
894            let _ = self.peek_token()?;
895            self.skip();
896        }
897        let span: Span = {
898            match *self.peek_token()? {
899                Token(mark, TokenType::FlowMappingEnd) => mark,
900                Token(mark, _) => {
901                    if !first {
902                        match *self.peek_token()? {
903                            Token(_, TokenType::FlowEntry) => self.skip(),
904                            Token(span, _) => return Err(ScanError::new_str(
905                                span.start,
906                                "while parsing a flow mapping, did not find expected ',' or '}'",
907                            )),
908                        }
909                    }
910
911                    match *self.peek_token()? {
912                        Token(_, TokenType::Key) => {
913                            self.skip();
914                            if let Token(
915                                mark,
916                                TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
917                            ) = *self.peek_token()?
918                            {
919                                self.state = State::FlowMappingValue;
920                                return Ok((Event::empty_scalar(), mark));
921                            }
922                            self.push_state(State::FlowMappingValue);
923                            return self.parse_node(false, false);
924                        }
925                        Token(marker, TokenType::Value) => {
926                            self.state = State::FlowMappingValue;
927                            return Ok((Event::empty_scalar(), marker));
928                        }
929                        Token(_, TokenType::FlowMappingEnd) => (),
930                        _ => {
931                            self.push_state(State::FlowMappingEmptyValue);
932                            return self.parse_node(false, false);
933                        }
934                    }
935
936                    mark
937                }
938            }
939        };
940
941        self.pop_state();
942        self.skip();
943        Ok((Event::MappingEnd, span))
944    }
945
946    fn flow_mapping_value(&mut self, empty: bool) -> ParseResult {
947        let span: Span = {
948            if empty {
949                let Token(mark, _) = *self.peek_token()?;
950                self.state = State::FlowMappingKey;
951                return Ok((Event::empty_scalar(), mark));
952            }
953            match *self.peek_token()? {
954                Token(span, TokenType::Value) => {
955                    self.skip();
956                    match self.peek_token()?.1 {
957                        TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
958                        _ => {
959                            self.push_state(State::FlowMappingKey);
960                            return self.parse_node(false, false);
961                        }
962                    }
963                    span
964                }
965                Token(marker, _) => marker,
966            }
967        };
968
969        self.state = State::FlowMappingKey;
970        Ok((Event::empty_scalar(), span))
971    }
972
973    fn flow_sequence_entry(&mut self, first: bool) -> ParseResult {
974        // skip FlowMappingStart
975        if first {
976            let _ = self.peek_token()?;
977            //self.marks.push(tok.0);
978            self.skip();
979        }
980        match *self.peek_token()? {
981            Token(mark, TokenType::FlowSequenceEnd) => {
982                self.pop_state();
983                self.skip();
984                return Ok((Event::SequenceEnd, mark));
985            }
986            Token(_, TokenType::FlowEntry) if !first => {
987                self.skip();
988            }
989            Token(span, _) if !first => {
990                return Err(ScanError::new_str(
991                    span.start,
992                    "while parsing a flow sequence, expected ',' or ']'",
993                ));
994            }
995            _ => { /* next */ }
996        }
997        match *self.peek_token()? {
998            Token(mark, TokenType::FlowSequenceEnd) => {
999                self.pop_state();
1000                self.skip();
1001                Ok((Event::SequenceEnd, mark))
1002            }
1003            Token(mark, TokenType::Key) => {
1004                self.state = State::FlowSequenceEntryMappingKey;
1005                self.skip();
1006                Ok((Event::MappingStart(0, None), mark))
1007            }
1008            _ => {
1009                self.push_state(State::FlowSequenceEntry);
1010                self.parse_node(false, false)
1011            }
1012        }
1013    }
1014
1015    fn indentless_sequence_entry(&mut self) -> ParseResult {
1016        match *self.peek_token()? {
1017            Token(_, TokenType::BlockEntry) => (),
1018            Token(mark, _) => {
1019                self.pop_state();
1020                return Ok((Event::SequenceEnd, mark));
1021            }
1022        }
1023        self.skip();
1024        if let Token(
1025            mark,
1026            TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1027        ) = *self.peek_token()?
1028        {
1029            self.state = State::IndentlessSequenceEntry;
1030            Ok((Event::empty_scalar(), mark))
1031        } else {
1032            self.push_state(State::IndentlessSequenceEntry);
1033            self.parse_node(true, false)
1034        }
1035    }
1036
1037    fn block_sequence_entry(&mut self, first: bool) -> ParseResult {
1038        // BLOCK-SEQUENCE-START
1039        if first {
1040            let _ = self.peek_token()?;
1041            //self.marks.push(tok.0);
1042            self.skip();
1043        }
1044        match *self.peek_token()? {
1045            Token(mark, TokenType::BlockEnd) => {
1046                self.pop_state();
1047                self.skip();
1048                Ok((Event::SequenceEnd, mark))
1049            }
1050            Token(_, TokenType::BlockEntry) => {
1051                self.skip();
1052                if let Token(mark, TokenType::BlockEntry | TokenType::BlockEnd) =
1053                    *self.peek_token()?
1054                {
1055                    self.state = State::BlockSequenceEntry;
1056                    Ok((Event::empty_scalar(), mark))
1057                } else {
1058                    self.push_state(State::BlockSequenceEntry);
1059                    self.parse_node(true, false)
1060                }
1061            }
1062            Token(span, _) => Err(ScanError::new_str(
1063                span.start,
1064                "while parsing a block collection, did not find expected '-' indicator",
1065            )),
1066        }
1067    }
1068
1069    fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult {
1070        if let Token(mark, TokenType::Value | TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1071            *self.peek_token()?
1072        {
1073            self.skip();
1074            self.state = State::FlowSequenceEntryMappingValue;
1075            Ok((Event::empty_scalar(), mark))
1076        } else {
1077            self.push_state(State::FlowSequenceEntryMappingValue);
1078            self.parse_node(false, false)
1079        }
1080    }
1081
1082    fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult {
1083        match *self.peek_token()? {
1084            Token(_, TokenType::Value) => {
1085                self.skip();
1086                self.state = State::FlowSequenceEntryMappingValue;
1087                let Token(span, ref tok) = *self.peek_token()?;
1088                if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
1089                    self.state = State::FlowSequenceEntryMappingEnd(span.end);
1090                    Ok((Event::empty_scalar(), span))
1091                } else {
1092                    self.push_state(State::FlowSequenceEntryMappingEnd(span.end));
1093                    self.parse_node(false, false)
1094                }
1095            }
1096            Token(mark, _) => {
1097                self.state = State::FlowSequenceEntryMappingEnd(mark.end);
1098                Ok((Event::empty_scalar(), mark))
1099            }
1100        }
1101    }
1102
1103    #[allow(clippy::unnecessary_wraps)]
1104    fn flow_sequence_entry_mapping_end(&mut self, mark: Marker) -> ParseResult {
1105        self.state = State::FlowSequenceEntry;
1106        Ok((Event::MappingEnd, Span::empty(mark)))
1107    }
1108
1109    /// Resolve a tag from the handle and the suffix.
1110    fn resolve_tag(&self, span: Span, handle: &str, suffix: String) -> Result<Tag, ScanError> {
1111        if handle == "!!" {
1112            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
1113            // overridden.
1114            Ok(Tag {
1115                handle: self
1116                    .tags
1117                    .get("!!")
1118                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
1119                suffix,
1120            })
1121        } else if handle.is_empty() && suffix == "!" {
1122            // "!" introduces a local tag. Local tags may have their prefix overridden.
1123            match self.tags.get("") {
1124                Some(prefix) => Ok(Tag {
1125                    handle: prefix.to_string(),
1126                    suffix,
1127                }),
1128                None => Ok(Tag {
1129                    handle: String::new(),
1130                    suffix,
1131                }),
1132            }
1133        } else {
1134            // Lookup handle in our tag directives.
1135            let prefix = self.tags.get(handle);
1136            if let Some(prefix) = prefix {
1137                Ok(Tag {
1138                    handle: prefix.to_string(),
1139                    suffix,
1140                })
1141            } else {
1142                // Otherwise, it may be a local handle. With a local handle, the handle is set to
1143                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
1144                // If the handle is of the form "!foo!", this cannot be a local handle and we need
1145                // to error.
1146                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1147                    Err(ScanError::new_str(span.start, "the handle wasn't declared"))
1148                } else {
1149                    Ok(Tag {
1150                        handle: handle.to_string(),
1151                        suffix,
1152                    })
1153                }
1154            }
1155        }
1156    }
1157}
1158
1159impl<T: Input> Iterator for Parser<T> {
1160    type Item = Result<(Event, Span), ScanError>;
1161
1162    fn next(&mut self) -> Option<Self::Item> {
1163        self.next_event()
1164    }
1165}
1166
1167#[cfg(test)]
1168mod test {
1169    use super::{Event, Parser};
1170
1171    #[test]
1172    fn test_peek_eq_parse() {
1173        let s = "
1174a0 bb: val
1175a1: &x
1176    b1: 4
1177    b2: d
1178a2: 4
1179a3: [1, 2, 3]
1180a4:
1181    - [a1, a2]
1182    - 2
1183a5: *x
1184";
1185        let mut p = Parser::new_from_str(s);
1186        loop {
1187            let event_peek = p.peek().unwrap().unwrap().clone();
1188            let event = p.next_event().unwrap().unwrap();
1189            assert_eq!(event, event_peek);
1190            if event.0 == Event::StreamEnd {
1191                break;
1192            }
1193        }
1194    }
1195
1196    #[test]
1197    fn test_keep_tags_across_multiple_documents() {
1198        let text = r#"
1199%YAML 1.1
1200%TAG !t! tag:test,2024:
1201--- !t!1 &1
1202foo: "bar"
1203--- !t!2 &2
1204baz: "qux"
1205"#;
1206        for x in Parser::new_from_str(text).keep_tags(true) {
1207            let x = x.unwrap();
1208            if let Event::MappingStart(_, tag) = x.0 {
1209                let tag = tag.unwrap();
1210                assert_eq!(tag.handle, "tag:test,2024:");
1211            }
1212        }
1213
1214        for x in Parser::new_from_str(text).keep_tags(false) {
1215            if x.is_err() {
1216                // Test successful
1217                return;
1218            }
1219        }
1220        panic!("Test failed, did not encounter error")
1221    }
1222}