saphyr_parser/
parser.rs

1//! Home to the YAML Parser.
2//!
3//! The parser takes input from the [`crate::scanner::Scanner`], performs final checks for YAML
4//! compliance, and emits a stream of YAML events. This stream can for instance be used to create
5//! YAML objects.
6
7use crate::{
8    input::{str::StrInput, Input},
9    scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
10    BufferedInput, Marker,
11};
12
13use std::{borrow::Cow, collections::HashMap, fmt::Display};
14
15#[derive(Clone, Copy, PartialEq, Debug, Eq)]
16enum State {
17    StreamStart,
18    ImplicitDocumentStart,
19    DocumentStart,
20    DocumentContent,
21    DocumentEnd,
22    BlockNode,
23    BlockSequenceFirstEntry,
24    BlockSequenceEntry,
25    IndentlessSequenceEntry,
26    BlockMappingFirstKey,
27    BlockMappingKey,
28    BlockMappingValue,
29    FlowSequenceFirstEntry,
30    FlowSequenceEntry,
31    FlowSequenceEntryMappingKey,
32    FlowSequenceEntryMappingValue,
33    FlowSequenceEntryMappingEnd(Marker),
34    FlowMappingFirstKey,
35    FlowMappingKey,
36    FlowMappingValue,
37    FlowMappingEmptyValue,
38    End,
39}
40
41/// An event generated by the YAML parser.
42///
43/// Events are used in the low-level event-based API (push parser). The API entrypoint is the
44/// [`EventReceiver`] trait.
45#[derive(Clone, PartialEq, Debug, Eq)]
46pub enum Event<'input> {
47    /// Reserved for internal use.
48    Nothing,
49    /// Event generated at the very beginning of parsing.
50    StreamStart,
51    /// Last event that will be generated by the parser. Signals EOF.
52    StreamEnd,
53    /// The start of a YAML document.
54    ///
55    /// When the boolean is `true`, it is an explicit document start
56    /// directive (`---`).
57    ///
58    /// When the boolean is `false`, it is an implicit document start
59    /// (without `---`).
60    DocumentStart(bool),
61    /// The YAML end document directive (`...`).
62    DocumentEnd,
63    /// A YAML Alias.
64    Alias(
65        /// The anchor ID the alias refers to.
66        usize,
67    ),
68    /// Value, style, `anchor_id`, tag
69    Scalar(
70        Cow<'input, str>,
71        ScalarStyle,
72        usize,
73        Option<Cow<'input, Tag>>,
74    ),
75    /// The start of a YAML sequence (array).
76    SequenceStart(
77        /// The anchor ID of the start of the sequence.
78        usize,
79        /// An optional tag
80        Option<Cow<'input, Tag>>,
81    ),
82    /// The end of a YAML sequence (array).
83    SequenceEnd,
84    /// The start of a YAML mapping (object, hash).
85    MappingStart(
86        /// The anchor ID of the start of the mapping.
87        usize,
88        /// An optional tag
89        Option<Cow<'input, Tag>>,
90    ),
91    /// The end of a YAML mapping (object, hash).
92    MappingEnd,
93}
94
95/// A YAML tag.
96#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
97pub struct Tag {
98    /// Handle of the tag (`!` included).
99    pub handle: String,
100    /// The suffix of the tag.
101    pub suffix: String,
102}
103
104impl Tag {
105    /// Returns whether the tag is a YAML tag from the core schema (`!!str`, `!!int`, ...).
106    ///
107    /// The YAML specification specifies [a list of
108    /// tags](https://yaml.org/spec/1.2.2/#103-core-schema) for the Core Schema. This function
109    /// checks whether _the handle_ (but not the suffix) is the handle for the YAML Core Schema.
110    ///
111    /// # Return
112    /// Returns `true` if the handle is `tag:yaml.org,2002`, `false` otherwise.
113    #[must_use]
114    pub fn is_yaml_core_schema(&self) -> bool {
115        self.handle == "tag:yaml.org,2002:"
116    }
117}
118
119impl Display for Tag {
120    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121        if self.handle == "!" {
122            write!(f, "!{}", self.suffix)
123        } else {
124            write!(f, "{}!{}", self.handle, self.suffix)
125        }
126    }
127}
128
129impl<'input> Event<'input> {
130    /// Create an empty scalar.
131    fn empty_scalar() -> Self {
132        // a null scalar
133        Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
134    }
135
136    /// Create an empty scalar with the given anchor.
137    fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
138        Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
139    }
140}
141
142/// A YAML parser.
143#[derive(Debug)]
144pub struct Parser<'input, T: Input> {
145    /// The underlying scanner from which we pull tokens.
146    scanner: Scanner<'input, T>,
147    /// The stack of _previous_ states we were in.
148    ///
149    /// States are pushed in the context of subobjects to this stack. The top-most element is the
150    /// state in which to come back to when exiting the current state.
151    states: Vec<State>,
152    /// The state in which we currently are.
153    state: State,
154    /// The next token from the scanner.
155    token: Option<Token<'input>>,
156    /// The next YAML event to emit.
157    current: Option<(Event<'input>, Span)>,
158    /// Anchors that have been encountered in the YAML document.
159    anchors: HashMap<Cow<'input, str>, usize>,
160    /// Next ID available for an anchor.
161    ///
162    /// Every anchor is given a unique ID. We use an incrementing ID and this is both the ID to
163    /// return for the next anchor and the count of anchor IDs emitted.
164    anchor_id_count: usize,
165    /// The tag directives (`%TAG`) the parser has encountered.
166    ///
167    /// Key is the handle, and value is the prefix.
168    tags: HashMap<String, String>,
169    /// Whether we have emitted [`Event::StreamEnd`].
170    ///
171    /// Emitted means that it has been returned from [`Self::next`]. If it is stored in
172    /// [`Self::token`], this is set to `false`.
173    stream_end_emitted: bool,
174    /// Make tags global across all documents.
175    keep_tags: bool,
176}
177
178/// Trait to be implemented in order to use the low-level parsing API.
179///
180/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`]
181/// for each YAML [`Event`] that occurs.
182/// The [`EventReceiver`] trait only receives events. In order to receive both events and their
183/// location in the source, use [`SpannedEventReceiver`]. Note that [`EventReceiver`]s implement
184/// [`SpannedEventReceiver`] automatically.
185///
186/// # Event hierarchy
187/// The event stream starts with an [`Event::StreamStart`] event followed by an
188/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an
189/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an
190/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted.
191///
192/// In a mapping, key-values are sent as consecutive events. The first event after an
193/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no
194/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd
195/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received.
196///
197/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event.
198///
199/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or
200/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated
201/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will
202/// be part of the value and not another key-value pair or element in the sequence.
203///
204/// For instance, the following yaml:
205/// ```yaml
206/// a: b
207/// c:
208///   d: e
209/// f:
210///   - g
211///   - h
212/// ```
213/// will emit (indented and commented for lisibility):
214/// ```text
215/// StreamStart, DocumentStart, MappingStart,
216///   Scalar("a", ..), Scalar("b", ..)
217///   Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd,
218///   Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd,
219/// MappingEnd, DocumentEnd, StreamEnd
220/// ```
221///
222/// # Example
223/// ```
224/// # use saphyr_parser::{Event, EventReceiver, Parser};
225/// #
226/// /// Sink of events. Collects them into an array.
227/// struct EventSink<'input> {
228///     events: Vec<Event<'input>>,
229/// }
230///
231/// /// Implement `on_event`, pushing into `self.events`.
232/// impl<'input> EventReceiver<'input> for EventSink<'input> {
233///     fn on_event(&mut self, ev: Event<'input>) {
234///         self.events.push(ev);
235///     }
236/// }
237///
238/// /// Load events from a yaml string.
239/// fn str_to_events(yaml: &str) -> Vec<Event<'_>> {
240///     let mut sink = EventSink { events: Vec::new() };
241///     let mut parser = Parser::new_from_str(yaml);
242///     // Load events using our sink as the receiver.
243///     parser.load(&mut sink, true).unwrap();
244///     sink.events
245/// }
246/// ```
247pub trait EventReceiver<'input> {
248    /// Handler called for each YAML event that is emitted by the parser.
249    fn on_event(&mut self, ev: Event<'input>);
250}
251
252/// Trait to be implemented for using the low-level parsing API.
253///
254/// Functionally similar to [`EventReceiver`], but receives a [`Span`] as well as the event.
255pub trait SpannedEventReceiver<'input> {
256    /// Handler called for each event that occurs.
257    fn on_event(&mut self, ev: Event<'input>, span: Span);
258}
259
260impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
261    fn on_event(&mut self, ev: Event<'input>, _span: Span) {
262        self.on_event(ev);
263    }
264}
265
266/// A convenience alias for a `Result` of a parser event.
267pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
268
269impl<'input> Parser<'input, StrInput<'input>> {
270    /// Create a new instance of a parser from a &str.
271    #[must_use]
272    pub fn new_from_str(value: &'input str) -> Self {
273        debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
274        Parser::new(StrInput::new(value))
275    }
276}
277
278impl<'input, T> Parser<'input, BufferedInput<T>>
279where
280    T: Iterator<Item = char> + 'input,
281{
282    /// Create a new instance of a parser from an iterator of `char`s.
283    #[must_use]
284    pub fn new_from_iter(iter: T) -> Self {
285        debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
286        Parser::new(BufferedInput::new(iter))
287    }
288}
289
290impl<'input, T: Input> Parser<'input, T> {
291    /// Create a new instance of a parser from the given input of characters.
292    pub fn new(src: T) -> Self {
293        Parser {
294            scanner: Scanner::new(src),
295            states: Vec::new(),
296            state: State::StreamStart,
297            token: None,
298            current: None,
299
300            anchors: HashMap::new(),
301            // valid anchor_id starts from 1
302            anchor_id_count: 1,
303            tags: HashMap::new(),
304            stream_end_emitted: false,
305            keep_tags: false,
306        }
307    }
308
309    /// Whether to keep tags across multiple documents when parsing.
310    ///
311    /// This behavior is non-standard as per the YAML specification but can be encountered in the
312    /// wild. This boolean allows enabling this non-standard extension. This would result in the
313    /// parser accepting input from [test
314    /// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml)
315    /// of the yaml-test-suite:
316    ///
317    /// ```yaml
318    /// %TAG !prefix! tag:example.com,2011:
319    /// --- !prefix!A
320    /// a: b
321    /// --- !prefix!B
322    /// c: d
323    /// --- !prefix!C
324    /// e: f
325    /// ```
326    ///
327    /// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags
328    /// only apply to the document immediately following them. This would error on `!prefix!B`.
329    ///
330    /// With `keep_tags` set to `true`, the above YAML is accepted by the parser.
331    #[must_use]
332    pub fn keep_tags(mut self, value: bool) -> Self {
333        self.keep_tags = value;
334        self
335    }
336
337    /// Try to load the next event and return it, but do not consuming it from `self`.
338    ///
339    /// Any subsequent call to [`Parser::peek`] will return the same value, until a call to
340    /// [`Iterator::next`] or [`Parser::load`].
341    ///
342    /// # Errors
343    /// Returns `ScanError` when loading the next event fails.
344    pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
345        if let Some(ref x) = self.current {
346            Some(Ok(x))
347        } else {
348            if self.stream_end_emitted {
349                return None;
350            }
351            match self.next_event_impl() {
352                Ok(token) => self.current = Some(token),
353                Err(e) => return Some(Err(e)),
354            }
355            self.current.as_ref().map(Ok)
356        }
357    }
358
359    /// Try to load the next event and return it, consuming it from `self`.
360    ///
361    /// # Errors
362    /// Returns `ScanError` when loading the next event fails.
363    pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
364        if self.stream_end_emitted {
365            return None;
366        }
367
368        let tok = self.next_event_impl();
369        if matches!(tok, Ok((Event::StreamEnd, _))) {
370            self.stream_end_emitted = true;
371        }
372        Some(tok)
373    }
374
375    /// Implementation function for [`Self::next_event`] without the `Option`.
376    ///
377    /// [`Self::next_event`] should conform to the expectations of an [`Iterator`] and return an
378    /// option. This burdens the parser code. This function is used internally when an option is
379    /// undesirable.
380    fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
381    where
382        'input: 'a,
383    {
384        match self.current.take() {
385            None => self.parse(),
386            Some(v) => Ok(v),
387        }
388    }
389
390    /// Peek at the next token from the scanner.
391    fn peek_token(&mut self) -> Result<&Token, ScanError> {
392        match self.token {
393            None => {
394                self.token = Some(self.scan_next_token()?);
395                Ok(self.token.as_ref().unwrap())
396            }
397            Some(ref tok) => Ok(tok),
398        }
399    }
400
401    /// Extract and return the next token from the scanner.
402    ///
403    /// This function does _not_ make use of `self.token`.
404    fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
405        let token = self.scanner.next();
406        match token {
407            None => match self.scanner.get_error() {
408                None => Err(ScanError::new_str(self.scanner.mark(), "unexpected eof")),
409                Some(e) => Err(e),
410            },
411            Some(tok) => Ok(tok),
412        }
413    }
414
415    fn fetch_token<'a>(&mut self) -> Token<'a>
416    where
417        'input: 'a,
418    {
419        self.token
420            .take()
421            .expect("fetch_token needs to be preceded by peek_token")
422    }
423
424    /// Skip the next token from the scanner.
425    fn skip(&mut self) {
426        self.token = None;
427    }
428    /// Pops the top-most state and make it the current state.
429    fn pop_state(&mut self) {
430        self.state = self.states.pop().unwrap();
431    }
432    /// Push a new state atop the state stack.
433    fn push_state(&mut self, state: State) {
434        self.states.push(state);
435    }
436
437    fn parse<'a>(&mut self) -> ParseResult<'a>
438    where
439        'input: 'a,
440    {
441        if self.state == State::End {
442            return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
443        }
444        let (ev, mark) = self.state_machine()?;
445        Ok((ev, mark))
446    }
447
448    /// Load the YAML from the stream in `self`, pushing events into `recv`.
449    ///
450    /// The contents of the stream are parsed and the corresponding events are sent into the
451    /// recveiver. For detailed explanations about how events work, see [`EventReceiver`].
452    ///
453    /// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents
454    /// inside the stream.
455    ///
456    /// Note that any [`EventReceiver`] is also a [`SpannedEventReceiver`], so implementing the
457    /// former is enough to call this function.
458    /// # Errors
459    /// Returns `ScanError` when loading fails.
460    pub fn load<R: SpannedEventReceiver<'input>>(
461        &mut self,
462        recv: &mut R,
463        multi: bool,
464    ) -> Result<(), ScanError> {
465        if !self.scanner.stream_started() {
466            let (ev, span) = self.next_event_impl()?;
467            if ev != Event::StreamStart {
468                return Err(ScanError::new_str(
469                    span.start,
470                    "did not find expected <stream-start>",
471                ));
472            }
473            recv.on_event(ev, span);
474        }
475
476        if self.scanner.stream_ended() {
477            // XXX has parsed?
478            recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
479            return Ok(());
480        }
481        loop {
482            let (ev, span) = self.next_event_impl()?;
483            if ev == Event::StreamEnd {
484                recv.on_event(ev, span);
485                return Ok(());
486            }
487            // clear anchors before a new document
488            self.anchors.clear();
489            self.load_document(ev, span, recv)?;
490            if !multi {
491                break;
492            }
493        }
494        Ok(())
495    }
496
497    fn load_document<R: SpannedEventReceiver<'input>>(
498        &mut self,
499        first_ev: Event<'input>,
500        span: Span,
501        recv: &mut R,
502    ) -> Result<(), ScanError> {
503        if !matches!(first_ev, Event::DocumentStart(_)) {
504            return Err(ScanError::new_str(
505                span.start,
506                "did not find expected <document-start>",
507            ));
508        }
509        recv.on_event(first_ev, span);
510
511        let (ev, span) = self.next_event_impl()?;
512        self.load_node(ev, span, recv)?;
513
514        // DOCUMENT-END is expected.
515        let (ev, mark) = self.next_event_impl()?;
516        assert_eq!(ev, Event::DocumentEnd);
517        recv.on_event(ev, mark);
518
519        Ok(())
520    }
521
522    fn load_node<R: SpannedEventReceiver<'input>>(
523        &mut self,
524        first_ev: Event<'input>,
525        span: Span,
526        recv: &mut R,
527    ) -> Result<(), ScanError> {
528        match first_ev {
529            Event::Alias(..) | Event::Scalar(..) => {
530                recv.on_event(first_ev, span);
531                Ok(())
532            }
533            Event::SequenceStart(..) => {
534                recv.on_event(first_ev, span);
535                self.load_sequence(recv)
536            }
537            Event::MappingStart(..) => {
538                recv.on_event(first_ev, span);
539                self.load_mapping(recv)
540            }
541            _ => {
542                println!("UNREACHABLE EVENT: {first_ev:?}");
543                unreachable!();
544            }
545        }
546    }
547
548    fn load_mapping<R: SpannedEventReceiver<'input>>(
549        &mut self,
550        recv: &mut R,
551    ) -> Result<(), ScanError> {
552        let (mut key_ev, mut key_mark) = self.next_event_impl()?;
553        while key_ev != Event::MappingEnd {
554            // key
555            self.load_node(key_ev, key_mark, recv)?;
556
557            // value
558            let (ev, mark) = self.next_event_impl()?;
559            self.load_node(ev, mark, recv)?;
560
561            // next event
562            let (ev, mark) = self.next_event_impl()?;
563            key_ev = ev;
564            key_mark = mark;
565        }
566        recv.on_event(key_ev, key_mark);
567        Ok(())
568    }
569
570    fn load_sequence<R: SpannedEventReceiver<'input>>(
571        &mut self,
572        recv: &mut R,
573    ) -> Result<(), ScanError> {
574        let (mut ev, mut mark) = self.next_event_impl()?;
575        while ev != Event::SequenceEnd {
576            self.load_node(ev, mark, recv)?;
577
578            // next event
579            let (next_ev, next_mark) = self.next_event_impl()?;
580            ev = next_ev;
581            mark = next_mark;
582        }
583        recv.on_event(ev, mark);
584        Ok(())
585    }
586
587    fn state_machine<'a>(&mut self) -> ParseResult<'a>
588    where
589        'input: 'a,
590    {
591        // let next_tok = self.peek_token().cloned()?;
592        // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
593        debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
594
595        match self.state {
596            State::StreamStart => self.stream_start(),
597
598            State::ImplicitDocumentStart => self.document_start(true),
599            State::DocumentStart => self.document_start(false),
600            State::DocumentContent => self.document_content(),
601            State::DocumentEnd => self.document_end(),
602
603            State::BlockNode => self.parse_node(true, false),
604            // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
605            // State::FlowNode => self.parse_node(false, false),
606            State::BlockMappingFirstKey => self.block_mapping_key(true),
607            State::BlockMappingKey => self.block_mapping_key(false),
608            State::BlockMappingValue => self.block_mapping_value(),
609
610            State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
611            State::BlockSequenceEntry => self.block_sequence_entry(false),
612
613            State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
614            State::FlowSequenceEntry => self.flow_sequence_entry(false),
615
616            State::FlowMappingFirstKey => self.flow_mapping_key(true),
617            State::FlowMappingKey => self.flow_mapping_key(false),
618            State::FlowMappingValue => self.flow_mapping_value(false),
619
620            State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
621
622            State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
623            State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
624            State::FlowSequenceEntryMappingEnd(mark) => self.flow_sequence_entry_mapping_end(mark),
625            State::FlowMappingEmptyValue => self.flow_mapping_value(true),
626
627            /* impossible */
628            State::End => unreachable!(),
629        }
630    }
631
632    fn stream_start<'a>(&mut self) -> ParseResult<'a>
633    where
634        'input: 'a,
635    {
636        match *self.peek_token()? {
637            Token(span, TokenType::StreamStart(_)) => {
638                self.state = State::ImplicitDocumentStart;
639                self.skip();
640                Ok((Event::StreamStart, span))
641            }
642            Token(span, _) => Err(ScanError::new_str(
643                span.start,
644                "did not find expected <stream-start>",
645            )),
646        }
647    }
648
649    fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
650    where
651        'input: 'a,
652    {
653        while let TokenType::DocumentEnd = self.peek_token()?.1 {
654            self.skip();
655        }
656
657        match *self.peek_token()? {
658            Token(span, TokenType::StreamEnd) => {
659                self.state = State::End;
660                self.skip();
661                Ok((Event::StreamEnd, span))
662            }
663            Token(
664                _,
665                TokenType::VersionDirective(..)
666                | TokenType::TagDirective(..)
667                | TokenType::DocumentStart,
668            ) => {
669                // explicit document
670                self.explicit_document_start()
671            }
672            Token(span, _) if implicit => {
673                self.parser_process_directives()?;
674                self.push_state(State::DocumentEnd);
675                self.state = State::BlockNode;
676                Ok((Event::DocumentStart(false), span))
677            }
678            _ => {
679                // explicit document
680                self.explicit_document_start()
681            }
682        }
683    }
684
685    fn parser_process_directives(&mut self) -> Result<(), ScanError> {
686        let mut version_directive_received = false;
687        loop {
688            let mut tags = HashMap::new();
689            match self.peek_token()? {
690                Token(span, TokenType::VersionDirective(_, _)) => {
691                    // XXX parsing with warning according to spec
692                    //if major != 1 || minor > 2 {
693                    //    return Err(ScanError::new_str(tok.0,
694                    //        "found incompatible YAML document"));
695                    //}
696                    if version_directive_received {
697                        return Err(ScanError::new_str(
698                            span.start,
699                            "duplicate version directive",
700                        ));
701                    }
702                    version_directive_received = true;
703                }
704                Token(mark, TokenType::TagDirective(handle, prefix)) => {
705                    if tags.contains_key(&**handle) {
706                        return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
707                    }
708                    tags.insert(handle.to_string(), prefix.to_string());
709                }
710                _ => break,
711            }
712            self.tags = tags;
713            self.skip();
714        }
715        Ok(())
716    }
717
718    fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
719    where
720        'input: 'a,
721    {
722        self.parser_process_directives()?;
723        match *self.peek_token()? {
724            Token(mark, TokenType::DocumentStart) => {
725                self.push_state(State::DocumentEnd);
726                self.state = State::DocumentContent;
727                self.skip();
728                Ok((Event::DocumentStart(true), mark))
729            }
730            Token(span, _) => Err(ScanError::new_str(
731                span.start,
732                "did not find expected <document start>",
733            )),
734        }
735    }
736
737    fn document_content<'a>(&mut self) -> ParseResult<'a>
738    where
739        'input: 'a,
740    {
741        match *self.peek_token()? {
742            Token(
743                mark,
744                TokenType::VersionDirective(..)
745                | TokenType::TagDirective(..)
746                | TokenType::DocumentStart
747                | TokenType::DocumentEnd
748                | TokenType::StreamEnd,
749            ) => {
750                self.pop_state();
751                // empty scalar
752                Ok((Event::empty_scalar(), mark))
753            }
754            _ => self.parse_node(true, false),
755        }
756    }
757
758    fn document_end<'a>(&mut self) -> ParseResult<'a>
759    where
760        'input: 'a,
761    {
762        let mut explicit_end = false;
763        let span: Span = match *self.peek_token()? {
764            Token(span, TokenType::DocumentEnd) => {
765                explicit_end = true;
766                self.skip();
767                span
768            }
769            Token(span, _) => span,
770        };
771
772        if !self.keep_tags {
773            self.tags.clear();
774        }
775        if explicit_end {
776            self.state = State::ImplicitDocumentStart;
777        } else {
778            if let Token(span, TokenType::VersionDirective(..) | TokenType::TagDirective(..)) =
779                *self.peek_token()?
780            {
781                return Err(ScanError::new_str(
782                    span.start,
783                    "missing explicit document end marker before directive",
784                ));
785            }
786            self.state = State::DocumentStart;
787        }
788
789        Ok((Event::DocumentEnd, span))
790    }
791
792    fn register_anchor(&mut self, name: Cow<'input, str>, _: &Span) -> usize {
793        // anchors can be overridden/reused
794        // if self.anchors.contains_key(name) {
795        //     return Err(ScanError::new_str(*mark,
796        //         "while parsing anchor, found duplicated anchor"));
797        // }
798        let new_id = self.anchor_id_count;
799        self.anchor_id_count += 1;
800        self.anchors.insert(name, new_id);
801        new_id
802    }
803
804    fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
805    where
806        'input: 'a,
807    {
808        let mut anchor_id = 0;
809        let mut tag = None;
810        match *self.peek_token()? {
811            Token(_, TokenType::Alias(_)) => {
812                self.pop_state();
813                if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
814                    match self.anchors.get(&*name) {
815                        None => {
816                            return Err(ScanError::new_str(
817                                span.start,
818                                "while parsing node, found unknown anchor",
819                            ))
820                        }
821                        Some(id) => return Ok((Event::Alias(*id), span)),
822                    }
823                }
824                unreachable!()
825            }
826            Token(_, TokenType::Anchor(_)) => {
827                if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
828                    anchor_id = self.register_anchor(name, &span);
829                    if let TokenType::Tag(..) = self.peek_token()?.1 {
830                        if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
831                            tag = Some(self.resolve_tag(span, &handle, suffix)?);
832                        } else {
833                            unreachable!()
834                        }
835                    }
836                } else {
837                    unreachable!()
838                }
839            }
840            Token(mark, TokenType::Tag(..)) => {
841                if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
842                    tag = Some(self.resolve_tag(mark, &handle, suffix)?);
843                    if let TokenType::Anchor(_) = &self.peek_token()?.1 {
844                        if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
845                            anchor_id = self.register_anchor(name, &mark);
846                        } else {
847                            unreachable!()
848                        }
849                    }
850                } else {
851                    unreachable!()
852                }
853            }
854            _ => {}
855        }
856        match *self.peek_token()? {
857            Token(mark, TokenType::BlockEntry) if indentless_sequence => {
858                self.state = State::IndentlessSequenceEntry;
859                Ok((Event::SequenceStart(anchor_id, tag), mark))
860            }
861            Token(_, TokenType::Scalar(..)) => {
862                self.pop_state();
863                if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
864                    Ok((Event::Scalar(v, style, anchor_id, tag), mark))
865                } else {
866                    unreachable!()
867                }
868            }
869            Token(mark, TokenType::FlowSequenceStart) => {
870                self.state = State::FlowSequenceFirstEntry;
871                Ok((Event::SequenceStart(anchor_id, tag), mark))
872            }
873            Token(mark, TokenType::FlowMappingStart) => {
874                self.state = State::FlowMappingFirstKey;
875                Ok((Event::MappingStart(anchor_id, tag), mark))
876            }
877            Token(mark, TokenType::BlockSequenceStart) if block => {
878                self.state = State::BlockSequenceFirstEntry;
879                Ok((Event::SequenceStart(anchor_id, tag), mark))
880            }
881            Token(mark, TokenType::BlockMappingStart) if block => {
882                self.state = State::BlockMappingFirstKey;
883                Ok((Event::MappingStart(anchor_id, tag), mark))
884            }
885            // ex 7.2, an empty scalar can follow a secondary tag
886            Token(mark, _) if tag.is_some() || anchor_id > 0 => {
887                self.pop_state();
888                Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
889            }
890            Token(span, _) => Err(ScanError::new_str(
891                span.start,
892                "while parsing a node, did not find expected node content",
893            )),
894        }
895    }
896
897    fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
898    where
899        'input: 'a,
900    {
901        // skip BlockMappingStart
902        if first {
903            let _ = self.peek_token()?;
904            //self.marks.push(tok.0);
905            self.skip();
906        }
907        match *self.peek_token()? {
908            Token(_, TokenType::Key) => {
909                self.skip();
910                if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
911                    *self.peek_token()?
912                {
913                    self.state = State::BlockMappingValue;
914                    // empty scalar
915                    Ok((Event::empty_scalar(), mark))
916                } else {
917                    self.push_state(State::BlockMappingValue);
918                    self.parse_node(true, true)
919                }
920            }
921            // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18
922            Token(mark, TokenType::Value) => {
923                self.state = State::BlockMappingValue;
924                Ok((Event::empty_scalar(), mark))
925            }
926            Token(mark, TokenType::BlockEnd) => {
927                self.pop_state();
928                self.skip();
929                Ok((Event::MappingEnd, mark))
930            }
931            Token(span, _) => Err(ScanError::new_str(
932                span.start,
933                "while parsing a block mapping, did not find expected key",
934            )),
935        }
936    }
937
938    fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
939    where
940        'input: 'a,
941    {
942        match *self.peek_token()? {
943            Token(mark, TokenType::Value) => {
944                self.skip();
945                if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
946                    *self.peek_token()?
947                {
948                    self.state = State::BlockMappingKey;
949                    // empty scalar
950                    Ok((Event::empty_scalar(), mark))
951                } else {
952                    self.push_state(State::BlockMappingKey);
953                    self.parse_node(true, true)
954                }
955            }
956            Token(mark, _) => {
957                self.state = State::BlockMappingKey;
958                // empty scalar
959                Ok((Event::empty_scalar(), mark))
960            }
961        }
962    }
963
964    fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
965    where
966        'input: 'a,
967    {
968        if first {
969            let _ = self.peek_token()?;
970            self.skip();
971        }
972        let span: Span = {
973            match *self.peek_token()? {
974                Token(mark, TokenType::FlowMappingEnd) => mark,
975                Token(mark, _) => {
976                    if !first {
977                        match *self.peek_token()? {
978                            Token(_, TokenType::FlowEntry) => self.skip(),
979                            Token(span, _) => return Err(ScanError::new_str(
980                                span.start,
981                                "while parsing a flow mapping, did not find expected ',' or '}'",
982                            )),
983                        }
984                    }
985
986                    match *self.peek_token()? {
987                        Token(_, TokenType::Key) => {
988                            self.skip();
989                            if let Token(
990                                mark,
991                                TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
992                            ) = *self.peek_token()?
993                            {
994                                self.state = State::FlowMappingValue;
995                                return Ok((Event::empty_scalar(), mark));
996                            }
997                            self.push_state(State::FlowMappingValue);
998                            return self.parse_node(false, false);
999                        }
1000                        Token(marker, TokenType::Value) => {
1001                            self.state = State::FlowMappingValue;
1002                            return Ok((Event::empty_scalar(), marker));
1003                        }
1004                        Token(_, TokenType::FlowMappingEnd) => (),
1005                        _ => {
1006                            self.push_state(State::FlowMappingEmptyValue);
1007                            return self.parse_node(false, false);
1008                        }
1009                    }
1010
1011                    mark
1012                }
1013            }
1014        };
1015
1016        self.pop_state();
1017        self.skip();
1018        Ok((Event::MappingEnd, span))
1019    }
1020
1021    fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1022    where
1023        'input: 'a,
1024    {
1025        let span: Span = {
1026            if empty {
1027                let Token(mark, _) = *self.peek_token()?;
1028                self.state = State::FlowMappingKey;
1029                return Ok((Event::empty_scalar(), mark));
1030            }
1031            match *self.peek_token()? {
1032                Token(span, TokenType::Value) => {
1033                    self.skip();
1034                    match self.peek_token()?.1 {
1035                        TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
1036                        _ => {
1037                            self.push_state(State::FlowMappingKey);
1038                            return self.parse_node(false, false);
1039                        }
1040                    }
1041                    span
1042                }
1043                Token(marker, _) => marker,
1044            }
1045        };
1046
1047        self.state = State::FlowMappingKey;
1048        Ok((Event::empty_scalar(), span))
1049    }
1050
1051    fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1052    where
1053        'input: 'a,
1054    {
1055        // skip FlowMappingStart
1056        if first {
1057            let _ = self.peek_token()?;
1058            //self.marks.push(tok.0);
1059            self.skip();
1060        }
1061        match *self.peek_token()? {
1062            Token(mark, TokenType::FlowSequenceEnd) => {
1063                self.pop_state();
1064                self.skip();
1065                return Ok((Event::SequenceEnd, mark));
1066            }
1067            Token(_, TokenType::FlowEntry) if !first => {
1068                self.skip();
1069            }
1070            Token(span, _) if !first => {
1071                return Err(ScanError::new_str(
1072                    span.start,
1073                    "while parsing a flow sequence, expected ',' or ']'",
1074                ));
1075            }
1076            _ => { /* next */ }
1077        }
1078        match *self.peek_token()? {
1079            Token(mark, TokenType::FlowSequenceEnd) => {
1080                self.pop_state();
1081                self.skip();
1082                Ok((Event::SequenceEnd, mark))
1083            }
1084            Token(mark, TokenType::Key) => {
1085                self.state = State::FlowSequenceEntryMappingKey;
1086                self.skip();
1087                Ok((Event::MappingStart(0, None), mark))
1088            }
1089            _ => {
1090                self.push_state(State::FlowSequenceEntry);
1091                self.parse_node(false, false)
1092            }
1093        }
1094    }
1095
1096    fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1097    where
1098        'input: 'a,
1099    {
1100        match *self.peek_token()? {
1101            Token(mark, TokenType::BlockEntry) => {
1102                self.skip();
1103                if let Token(
1104                    _,
1105                    TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1106                ) = *self.peek_token()?
1107                {
1108                    self.state = State::IndentlessSequenceEntry;
1109                    Ok((Event::empty_scalar(), mark))
1110                } else {
1111                    self.push_state(State::IndentlessSequenceEntry);
1112                    self.parse_node(true, false)
1113                }
1114            }
1115            Token(mark, _) => {
1116                self.pop_state();
1117                Ok((Event::SequenceEnd, mark))
1118            }
1119        }
1120    }
1121
1122    fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1123    where
1124        'input: 'a,
1125    {
1126        // BLOCK-SEQUENCE-START
1127        if first {
1128            let _ = self.peek_token()?;
1129            //self.marks.push(tok.0);
1130            self.skip();
1131        }
1132        match *self.peek_token()? {
1133            Token(mark, TokenType::BlockEnd) => {
1134                self.pop_state();
1135                self.skip();
1136                Ok((Event::SequenceEnd, mark))
1137            }
1138            Token(mark, TokenType::BlockEntry) => {
1139                self.skip();
1140                if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
1141                    self.state = State::BlockSequenceEntry;
1142                    Ok((Event::empty_scalar(), mark))
1143                } else {
1144                    self.push_state(State::BlockSequenceEntry);
1145                    self.parse_node(true, false)
1146                }
1147            }
1148            Token(span, _) => Err(ScanError::new_str(
1149                span.start,
1150                "while parsing a block collection, did not find expected '-' indicator",
1151            )),
1152        }
1153    }
1154
1155    fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
1156    where
1157        'input: 'a,
1158    {
1159        if let Token(mark, TokenType::Value | TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1160            *self.peek_token()?
1161        {
1162            self.skip();
1163            self.state = State::FlowSequenceEntryMappingValue;
1164            Ok((Event::empty_scalar(), mark))
1165        } else {
1166            self.push_state(State::FlowSequenceEntryMappingValue);
1167            self.parse_node(false, false)
1168        }
1169    }
1170
1171    fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
1172    where
1173        'input: 'a,
1174    {
1175        match *self.peek_token()? {
1176            Token(_, TokenType::Value) => {
1177                self.skip();
1178                self.state = State::FlowSequenceEntryMappingValue;
1179                let Token(span, ref tok) = *self.peek_token()?;
1180                if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
1181                    self.state = State::FlowSequenceEntryMappingEnd(span.end);
1182                    Ok((Event::empty_scalar(), span))
1183                } else {
1184                    self.push_state(State::FlowSequenceEntryMappingEnd(span.end));
1185                    self.parse_node(false, false)
1186                }
1187            }
1188            Token(mark, _) => {
1189                self.state = State::FlowSequenceEntryMappingEnd(mark.end);
1190                Ok((Event::empty_scalar(), mark))
1191            }
1192        }
1193    }
1194
1195    #[allow(clippy::unnecessary_wraps)]
1196    fn flow_sequence_entry_mapping_end<'a>(&mut self, mark: Marker) -> ParseResult<'a>
1197    where
1198        'input: 'a,
1199    {
1200        self.state = State::FlowSequenceEntry;
1201        Ok((Event::MappingEnd, Span::empty(mark)))
1202    }
1203
1204    /// Resolve a tag from the handle and the suffix.
1205    fn resolve_tag(
1206        &self,
1207        span: Span,
1208        handle: &str,
1209        suffix: String,
1210    ) -> Result<Cow<'input, Tag>, ScanError> {
1211        let tag = if handle == "!!" {
1212            // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be
1213            // overridden.
1214            Tag {
1215                handle: self
1216                    .tags
1217                    .get("!!")
1218                    .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
1219                suffix,
1220            }
1221        } else if handle.is_empty() && suffix == "!" {
1222            // "!" introduces a local tag. Local tags may have their prefix overridden.
1223            match self.tags.get("") {
1224                Some(prefix) => Tag {
1225                    handle: prefix.to_string(),
1226                    suffix,
1227                },
1228                None => Tag {
1229                    handle: String::new(),
1230                    suffix,
1231                },
1232            }
1233        } else {
1234            // Lookup handle in our tag directives.
1235            let prefix = self.tags.get(handle);
1236            if let Some(prefix) = prefix {
1237                Tag {
1238                    handle: prefix.to_string(),
1239                    suffix,
1240                }
1241            } else {
1242                // Otherwise, it may be a local handle. With a local handle, the handle is set to
1243                // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")).
1244                // If the handle is of the form "!foo!", this cannot be a local handle and we need
1245                // to error.
1246                if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1247                    return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
1248                }
1249                Tag {
1250                    handle: handle.to_string(),
1251                    suffix,
1252                }
1253            }
1254        };
1255        Ok(Cow::Owned(tag))
1256    }
1257}
1258
1259impl<'input, T: Input> Iterator for Parser<'input, T> {
1260    type Item = Result<(Event<'input>, Span), ScanError>;
1261
1262    fn next(&mut self) -> Option<Self::Item> {
1263        self.next_event()
1264    }
1265}
1266
1267#[cfg(test)]
1268mod test {
1269    use super::{Event, Parser};
1270
1271    #[test]
1272    fn test_peek_eq_parse() {
1273        let s = "
1274a0 bb: val
1275a1: &x
1276    b1: 4
1277    b2: d
1278a2: 4
1279a3: [1, 2, 3]
1280a4:
1281    - [a1, a2]
1282    - 2
1283a5: *x
1284";
1285        let mut p = Parser::new_from_str(s);
1286        loop {
1287            let event_peek = p.peek().unwrap().unwrap().clone();
1288            let event = p.next_event().unwrap().unwrap();
1289            assert_eq!(event, event_peek);
1290            if event.0 == Event::StreamEnd {
1291                break;
1292            }
1293        }
1294    }
1295
1296    #[test]
1297    fn test_keep_tags_across_multiple_documents() {
1298        let text = r#"
1299%YAML 1.1
1300%TAG !t! tag:test,2024:
1301--- !t!1 &1
1302foo: "bar"
1303--- !t!2 &2
1304baz: "qux"
1305"#;
1306        for x in Parser::new_from_str(text).keep_tags(true) {
1307            let x = x.unwrap();
1308            if let Event::MappingStart(_, tag) = x.0 {
1309                let tag = tag.unwrap();
1310                assert_eq!(tag.handle, "tag:test,2024:");
1311            }
1312        }
1313
1314        for x in Parser::new_from_str(text).keep_tags(false) {
1315            if x.is_err() {
1316                // Test successful
1317                return;
1318            }
1319        }
1320        panic!("Test failed, did not encounter error")
1321    }
1322}