1use crate::{
8 input::{str::StrInput, BorrowedInput},
9 scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
10 BufferedInput,
11};
12
13use alloc::{
14 borrow::Cow,
15 collections::{BTreeMap, BTreeSet},
16 string::{String, ToString},
17 vec::Vec,
18};
19use core::fmt::Display;
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22enum State {
23 StreamStart,
24 ImplicitDocumentStart,
25 DocumentStart,
26 DocumentContent,
27 DocumentEnd,
28 BlockNode,
29 BlockSequenceFirstEntry,
30 BlockSequenceEntry,
31 IndentlessSequenceEntry,
32 BlockMappingFirstKey,
33 BlockMappingKey,
34 BlockMappingValue,
35 FlowSequenceFirstEntry,
36 FlowSequenceEntry,
37 FlowSequenceEntryMappingKey,
38 FlowSequenceEntryMappingValue,
39 FlowSequenceEntryMappingEnd,
40 FlowMappingFirstKey,
41 FlowMappingKey,
42 FlowMappingValue,
43 FlowMappingEmptyValue,
44 End,
45}
46
47#[derive(Clone, PartialEq, Debug, Eq)]
52pub enum Event<'input> {
53 Nothing,
55 StreamStart,
57 StreamEnd,
59 DocumentStart(bool),
67 DocumentEnd,
69 Alias(
71 usize,
73 ),
74 Scalar(
76 Cow<'input, str>,
77 ScalarStyle,
78 usize,
79 Option<Cow<'input, Tag>>,
80 ),
81 SequenceStart(
83 usize,
85 Option<Cow<'input, Tag>>,
87 ),
88 SequenceEnd,
90 MappingStart(
92 usize,
94 Option<Cow<'input, Tag>>,
96 ),
97 MappingEnd,
99}
100
101#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
103pub struct Tag {
104 pub handle: String,
106 pub suffix: String,
108}
109
110impl Tag {
111 #[must_use]
120 pub fn is_yaml_core_schema(&self) -> bool {
121 self.handle == "tag:yaml.org,2002:"
122 }
123}
124
125impl Display for Tag {
126 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
127 if self.handle == "!" {
128 write!(f, "!{}", self.suffix)
129 } else {
130 write!(f, "{}{}", self.handle, self.suffix)
131 }
132 }
133}
134
135impl<'input> Event<'input> {
136 fn empty_scalar() -> Self {
138 Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
140 }
141
142 fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
144 Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
145 }
146}
147
148#[derive(Debug)]
150pub struct Parser<'input, T: BorrowedInput<'input>> {
151 scanner: Scanner<'input, T>,
153 states: Vec<State>,
158 state: State,
160 token: Option<Token<'input>>,
162 current: Option<(Event<'input>, Span)>,
164
165 pending_key_indent: Option<usize>,
171 anchors: BTreeMap<Cow<'input, str>, usize>,
173 anchor_id_count: usize,
178 tags: BTreeMap<String, String>,
182 stream_end_emitted: bool,
187 keep_tags: bool,
189}
190
191pub trait EventReceiver<'input> {
261 fn on_event(&mut self, ev: Event<'input>);
263}
264
265pub trait SpannedEventReceiver<'input> {
269 fn on_event(&mut self, ev: Event<'input>, span: Span);
271}
272
273impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
274 fn on_event(&mut self, ev: Event<'input>, _span: Span) {
275 self.on_event(ev);
276 }
277}
278
279pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
281
282pub trait ParserTrait<'input> {
284 fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
286
287 fn next_event(&mut self) -> Option<ParseResult<'input>>;
289
290 fn load<R: SpannedEventReceiver<'input>>(
295 &mut self,
296 recv: &mut R,
297 multi: bool,
298 ) -> Result<(), ScanError>;
299}
300
301impl<'input> Parser<'input, StrInput<'input>> {
302 #[must_use]
304 pub fn new_from_str(value: &'input str) -> Self {
305 debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
306 Parser::new(StrInput::new(value))
307 }
308}
309
310impl<T> Parser<'static, BufferedInput<T>>
311where
312 T: Iterator<Item = char>,
313{
314 #[must_use]
316 pub fn new_from_iter(iter: T) -> Self {
317 debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
318 Parser::new(BufferedInput::new(iter))
319 }
320}
321
322impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
323 pub fn get_anchor_offset(&self) -> usize {
325 self.anchor_id_count
326 }
327
328 pub fn set_anchor_offset(&mut self, offset: usize) {
330 self.anchor_id_count = offset;
331 }
332
333 pub fn new(src: T) -> Self {
335 Parser {
336 scanner: Scanner::new(src),
337 states: Vec::new(),
338 state: State::StreamStart,
339 token: None,
340 current: None,
341
342 pending_key_indent: None,
343
344 anchors: BTreeMap::new(),
345 anchor_id_count: 1,
347 tags: BTreeMap::new(),
348 stream_end_emitted: false,
349 keep_tags: false,
350 }
351 }
352
353 #[must_use]
376 pub fn keep_tags(mut self, value: bool) -> Self {
377 self.keep_tags = value;
378 self
379 }
380
381 pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
389 ParserTrait::peek(self)
390 }
391
392 pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
397 ParserTrait::next_event(self)
398 }
399
400 fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
406 where
407 'input: 'a,
408 {
409 match self.current.take() {
410 None => self.parse(),
411 Some(v) => Ok(v),
412 }
413 }
414
415 fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
417 match self.token {
418 None => {
419 self.token = Some(self.scan_next_token()?);
420 Ok(self.token.as_ref().unwrap())
421 }
422 Some(ref tok) => Ok(tok),
423 }
424 }
425
426 fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
430 let token = self.scanner.next();
431 match token {
432 None => match self.scanner.get_error() {
433 None => Err(self.unexpected_eof()),
434 Some(e) => Err(e),
435 },
436 Some(tok) => Ok(tok),
437 }
438 }
439
440 #[cold]
441 fn unexpected_eof(&self) -> ScanError {
442 let info = match self.state {
443 State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
444 "unexpected EOF while parsing a flow sequence"
445 }
446 State::FlowMappingFirstKey
447 | State::FlowMappingKey
448 | State::FlowMappingValue
449 | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
450 State::FlowSequenceEntryMappingKey
451 | State::FlowSequenceEntryMappingValue
452 | State::FlowSequenceEntryMappingEnd => {
453 "unexpected EOF while parsing an implicit flow mapping"
454 }
455 State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
456 "unexpected EOF while parsing a block sequence"
457 }
458 State::BlockMappingFirstKey | State::BlockMappingKey | State::BlockMappingValue => {
459 "unexpected EOF while parsing a block mapping"
460 }
461 _ => "unexpected eof",
462 };
463 ScanError::new_str(self.scanner.mark(), info)
464 }
465
466 fn fetch_token<'a>(&mut self) -> Token<'a>
467 where
468 'input: 'a,
469 {
470 self.token
471 .take()
472 .expect("fetch_token needs to be preceded by peek_token")
473 }
474
475 fn skip(&mut self) {
477 self.token = None;
478 }
479 fn pop_state(&mut self) {
481 self.state = self.states.pop().unwrap();
482 }
483 fn push_state(&mut self, state: State) {
485 self.states.push(state);
486 }
487
488 fn parse<'a>(&mut self) -> ParseResult<'a>
489 where
490 'input: 'a,
491 {
492 if self.state == State::End {
493 return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
494 }
495 let (ev, span) = self.state_machine()?;
496 if let Some(indent) = self.pending_key_indent.take() {
497 Ok((ev, span.with_indent(Some(indent))))
498 } else {
499 Ok((ev, span))
500 }
501 }
502
503 pub fn load<R: SpannedEventReceiver<'input>>(
516 &mut self,
517 recv: &mut R,
518 multi: bool,
519 ) -> Result<(), ScanError> {
520 ParserTrait::load(self, recv, multi)
521 }
522
523 fn load_document<R: SpannedEventReceiver<'input>>(
524 &mut self,
525 first_ev: Event<'input>,
526 span: Span,
527 recv: &mut R,
528 ) -> Result<(), ScanError> {
529 if !matches!(first_ev, Event::DocumentStart(_)) {
530 return Err(ScanError::new_str(
531 span.start,
532 "did not find expected <document-start>",
533 ));
534 }
535 recv.on_event(first_ev, span);
536
537 let (ev, span) = self.next_event_impl()?;
538 self.load_node(ev, span, recv)?;
539
540 let (ev, mark) = self.next_event_impl()?;
542 assert_eq!(ev, Event::DocumentEnd);
543 recv.on_event(ev, mark);
544
545 Ok(())
546 }
547
548 fn load_node<R: SpannedEventReceiver<'input>>(
549 &mut self,
550 first_ev: Event<'input>,
551 span: Span,
552 recv: &mut R,
553 ) -> Result<(), ScanError> {
554 match first_ev {
555 Event::Alias(..) | Event::Scalar(..) => {
556 recv.on_event(first_ev, span);
557 Ok(())
558 }
559 Event::SequenceStart(..) => {
560 recv.on_event(first_ev, span);
561 self.load_sequence(recv)
562 }
563 Event::MappingStart(..) => {
564 recv.on_event(first_ev, span);
565 self.load_mapping(recv)
566 }
567 _ => {
568 #[cfg(feature = "debug_prints")]
569 std::println!("UNREACHABLE EVENT: {first_ev:?}");
570 unreachable!();
571 }
572 }
573 }
574
575 fn load_mapping<R: SpannedEventReceiver<'input>>(
576 &mut self,
577 recv: &mut R,
578 ) -> Result<(), ScanError> {
579 let (mut key_ev, mut key_mark) = self.next_event_impl()?;
580 while key_ev != Event::MappingEnd {
581 self.load_node(key_ev, key_mark, recv)?;
583
584 let (ev, mark) = self.next_event_impl()?;
586 self.load_node(ev, mark, recv)?;
587
588 let (ev, mark) = self.next_event_impl()?;
590 key_ev = ev;
591 key_mark = mark;
592 }
593 recv.on_event(key_ev, key_mark);
594 Ok(())
595 }
596
597 fn load_sequence<R: SpannedEventReceiver<'input>>(
598 &mut self,
599 recv: &mut R,
600 ) -> Result<(), ScanError> {
601 let (mut ev, mut mark) = self.next_event_impl()?;
602 while ev != Event::SequenceEnd {
603 self.load_node(ev, mark, recv)?;
604
605 let (next_ev, next_mark) = self.next_event_impl()?;
607 ev = next_ev;
608 mark = next_mark;
609 }
610 recv.on_event(ev, mark);
611 Ok(())
612 }
613
614 fn state_machine<'a>(&mut self) -> ParseResult<'a>
615 where
616 'input: 'a,
617 {
618 debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
621
622 match self.state {
623 State::StreamStart => self.stream_start(),
624
625 State::ImplicitDocumentStart => self.document_start(true),
626 State::DocumentStart => self.document_start(false),
627 State::DocumentContent => self.document_content(),
628 State::DocumentEnd => self.document_end(),
629
630 State::BlockNode => self.parse_node(true, false),
631 State::BlockMappingFirstKey => self.block_mapping_key(true),
634 State::BlockMappingKey => self.block_mapping_key(false),
635 State::BlockMappingValue => self.block_mapping_value(),
636
637 State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
638 State::BlockSequenceEntry => self.block_sequence_entry(false),
639
640 State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
641 State::FlowSequenceEntry => self.flow_sequence_entry(false),
642
643 State::FlowMappingFirstKey => self.flow_mapping_key(true),
644 State::FlowMappingKey => self.flow_mapping_key(false),
645 State::FlowMappingValue => self.flow_mapping_value(false),
646
647 State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
648
649 State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
650 State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
651 State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
652 State::FlowMappingEmptyValue => self.flow_mapping_value(true),
653
654 State::End => unreachable!(),
656 }
657 }
658
659 fn stream_start<'a>(&mut self) -> ParseResult<'a>
660 where
661 'input: 'a,
662 {
663 match *self.peek_token()? {
664 Token(span, TokenType::StreamStart(_)) => {
665 self.state = State::ImplicitDocumentStart;
666 self.skip();
667 Ok((Event::StreamStart, span))
668 }
669 Token(span, _) => Err(ScanError::new_str(
670 span.start,
671 "did not find expected <stream-start>",
672 )),
673 }
674 }
675
676 fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
677 where
678 'input: 'a,
679 {
680 while let TokenType::DocumentEnd = self.peek_token()?.1 {
681 self.skip();
682 }
683
684 self.anchors.clear();
686
687 match *self.peek_token()? {
688 Token(span, TokenType::StreamEnd) => {
689 self.state = State::End;
690 self.skip();
691 Ok((Event::StreamEnd, span))
692 }
693 Token(
694 _,
695 TokenType::VersionDirective(..)
696 | TokenType::TagDirective(..)
697 | TokenType::ReservedDirective(..)
698 | TokenType::DocumentStart,
699 ) => {
700 self.explicit_document_start()
702 }
703 Token(span, _) if implicit => {
704 self.parser_process_directives()?;
705 self.push_state(State::DocumentEnd);
706 self.state = State::BlockNode;
707 Ok((Event::DocumentStart(false), span))
708 }
709 _ => {
710 self.explicit_document_start()
712 }
713 }
714 }
715
716 fn parser_process_directives(&mut self) -> Result<(), ScanError> {
717 let mut version_directive_received = false;
718 let mut tags = if self.keep_tags {
719 self.tags.clone()
720 } else {
721 BTreeMap::new()
722 };
723 let mut document_tag_handles = BTreeSet::new();
724
725 loop {
726 match self.peek_token()? {
727 Token(span, TokenType::VersionDirective(_, _)) => {
728 if version_directive_received {
734 return Err(ScanError::new_str(
735 span.start,
736 "duplicate version directive",
737 ));
738 }
739 version_directive_received = true;
740 }
741 Token(mark, TokenType::TagDirective(handle, prefix)) => {
742 if !document_tag_handles.insert(handle.to_string()) {
743 return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
744 }
745 tags.insert(handle.to_string(), prefix.to_string());
746 }
747 Token(_, TokenType::ReservedDirective(_, _)) => {
748 }
750 _ => break,
751 }
752 self.skip();
753 }
754
755 self.tags = tags;
756 Ok(())
757 }
758
759 fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
760 where
761 'input: 'a,
762 {
763 self.parser_process_directives()?;
764 match *self.peek_token()? {
765 Token(mark, TokenType::DocumentStart) => {
766 self.push_state(State::DocumentEnd);
767 self.state = State::DocumentContent;
768 self.skip();
769 Ok((Event::DocumentStart(true), mark))
770 }
771 Token(span, _) => Err(ScanError::new_str(
772 span.start,
773 "did not find expected <document start>",
774 )),
775 }
776 }
777
778 fn document_content<'a>(&mut self) -> ParseResult<'a>
779 where
780 'input: 'a,
781 {
782 match *self.peek_token()? {
783 Token(
784 mark,
785 TokenType::VersionDirective(..)
786 | TokenType::TagDirective(..)
787 | TokenType::ReservedDirective(..)
788 | TokenType::DocumentStart
789 | TokenType::DocumentEnd
790 | TokenType::StreamEnd,
791 ) => {
792 self.pop_state();
793 Ok((Event::empty_scalar(), mark))
795 }
796 _ => self.parse_node(true, false),
797 }
798 }
799
800 fn document_end<'a>(&mut self) -> ParseResult<'a>
801 where
802 'input: 'a,
803 {
804 let mut explicit_end = false;
805 let span: Span = match *self.peek_token()? {
806 Token(span, TokenType::DocumentEnd) => {
807 explicit_end = true;
808 self.skip();
809 span
810 }
811 Token(span, _) => span,
812 };
813
814 if self.keep_tags {
815 self.tags.remove("!!");
819 self.tags.remove("");
820 } else {
821 self.tags.clear();
822 }
823 if explicit_end {
824 self.state = State::ImplicitDocumentStart;
825 } else {
826 if let Token(
827 span,
828 TokenType::VersionDirective(..)
829 | TokenType::TagDirective(..)
830 | TokenType::ReservedDirective(..),
831 ) = *self.peek_token()?
832 {
833 return Err(ScanError::new_str(
834 span.start,
835 "missing explicit document end marker before directive",
836 ));
837 }
838 self.state = State::DocumentStart;
839 }
840
841 Ok((Event::DocumentEnd, span))
842 }
843
844 fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
845 let new_id = self.anchor_id_count;
851 self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
852 ScanError::new_str(
853 mark.start,
854 "while parsing anchor, anchor count exceeded supported limit",
855 )
856 })?;
857 self.anchors.insert(name, new_id);
858 Ok(new_id)
859 }
860
861 #[allow(clippy::too_many_lines)]
862 fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
863 where
864 'input: 'a,
865 {
866 let mut anchor_id = 0;
867 let mut tag = None;
868 match *self.peek_token()? {
869 Token(_, TokenType::Alias(_)) => {
870 self.pop_state();
871 if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
872 match self.anchors.get(&*name) {
873 None => {
874 return Err(ScanError::new_str(
875 span.start,
876 "while parsing node, found unknown anchor",
877 ))
878 }
879 Some(id) => return Ok((Event::Alias(*id), span)),
880 }
881 }
882 unreachable!()
883 }
884 Token(_, TokenType::Anchor(_)) => {
885 if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
886 anchor_id = self.register_anchor(name, &span)?;
887 if let TokenType::Tag(..) = self.peek_token()?.1 {
888 if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
889 tag = Some(self.resolve_tag(span, &handle, suffix)?);
890 } else {
891 unreachable!()
892 }
893 }
894 } else {
895 unreachable!()
896 }
897 }
898 Token(mark, TokenType::Tag(..)) => {
899 if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
900 tag = Some(self.resolve_tag(mark, &handle, suffix)?);
901 if let TokenType::Anchor(_) = &self.peek_token()?.1 {
902 if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
903 anchor_id = self.register_anchor(name, &mark)?;
904 } else {
905 unreachable!()
906 }
907 }
908 } else {
909 unreachable!()
910 }
911 }
912 _ => {}
913 }
914 match *self.peek_token()? {
915 Token(mark, TokenType::BlockEntry) if indentless_sequence => {
916 self.state = State::IndentlessSequenceEntry;
917 Ok((Event::SequenceStart(anchor_id, tag), mark))
918 }
919 Token(_, TokenType::Scalar(..)) => {
920 self.pop_state();
921 if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
922 Ok((Event::Scalar(v, style, anchor_id, tag), mark))
923 } else {
924 unreachable!()
925 }
926 }
927 Token(mark, TokenType::FlowSequenceStart) => {
928 self.state = State::FlowSequenceFirstEntry;
929 Ok((Event::SequenceStart(anchor_id, tag), mark))
930 }
931 Token(mark, TokenType::FlowMappingStart) => {
932 self.state = State::FlowMappingFirstKey;
933 Ok((Event::MappingStart(anchor_id, tag), mark))
934 }
935 Token(mark, TokenType::BlockSequenceStart) if block => {
936 self.state = State::BlockSequenceFirstEntry;
937 Ok((Event::SequenceStart(anchor_id, tag), mark))
938 }
939 Token(mark, TokenType::BlockMappingStart) if block => {
940 self.state = State::BlockMappingFirstKey;
941 Ok((Event::MappingStart(anchor_id, tag), mark))
942 }
943 Token(mark, _) if tag.is_some() || anchor_id > 0 => {
945 self.pop_state();
946 Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
947 }
948 Token(span, _) => {
949 let info = match self.state {
950 State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
951 "unexpected EOF while parsing a flow sequence"
952 }
953 State::FlowMappingFirstKey
954 | State::FlowMappingKey
955 | State::FlowMappingValue
956 | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
957 State::FlowSequenceEntryMappingKey
958 | State::FlowSequenceEntryMappingValue
959 | State::FlowSequenceEntryMappingEnd => {
960 "unexpected EOF while parsing an implicit flow mapping"
961 }
962 State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
963 "unexpected EOF while parsing a block sequence"
964 }
965 State::BlockMappingFirstKey
966 | State::BlockMappingKey
967 | State::BlockMappingValue => "unexpected EOF while parsing a block mapping",
968 _ => "while parsing a node, did not find expected node content",
969 };
970 Err(ScanError::new_str(span.start, info))
971 }
972 }
973 }
974
975 fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
976 where
977 'input: 'a,
978 {
979 if first {
981 let _ = self.peek_token()?;
982 self.skip();
984 }
985 match *self.peek_token()? {
986 Token(_, TokenType::Key) => {
987 if let Token(key_span, TokenType::Key) = *self.peek_token()? {
989 self.pending_key_indent = Some(key_span.start.col());
990 }
991 self.skip();
992 if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
993 *self.peek_token()?
994 {
995 self.state = State::BlockMappingValue;
996 Ok((Event::empty_scalar(), mark))
998 } else {
999 self.push_state(State::BlockMappingValue);
1000 self.parse_node(true, true)
1001 }
1002 }
1003 Token(mark, TokenType::Value) => {
1005 self.state = State::BlockMappingValue;
1006 Ok((Event::empty_scalar(), mark))
1007 }
1008 Token(mark, TokenType::BlockEnd) => {
1009 self.pop_state();
1010 self.skip();
1011 Ok((Event::MappingEnd, mark))
1012 }
1013 Token(span, _) => Err(ScanError::new_str(
1014 span.start,
1015 "while parsing a block mapping, did not find expected key",
1016 )),
1017 }
1018 }
1019
1020 fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1021 where
1022 'input: 'a,
1023 {
1024 match *self.peek_token()? {
1025 Token(mark, TokenType::Value) => {
1026 self.skip();
1027 if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1028 *self.peek_token()?
1029 {
1030 self.state = State::BlockMappingKey;
1031 Ok((Event::empty_scalar(), mark))
1033 } else {
1034 self.push_state(State::BlockMappingKey);
1035 self.parse_node(true, true)
1036 }
1037 }
1038 Token(mark, _) => {
1039 self.state = State::BlockMappingKey;
1040 Ok((Event::empty_scalar(), mark))
1042 }
1043 }
1044 }
1045
1046 fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1047 where
1048 'input: 'a,
1049 {
1050 if first {
1051 let _ = self.peek_token()?;
1052 self.skip();
1053 }
1054 let span: Span = if let Token(mark, TokenType::FlowMappingEnd) = *self.peek_token()? {
1055 mark
1056 } else {
1057 if !first {
1058 match *self.peek_token()? {
1059 Token(_, TokenType::FlowEntry) => self.skip(),
1060 Token(span, _) => {
1061 return Err(ScanError::new_str(
1062 span.start,
1063 "while parsing a flow mapping, did not find expected ',' or '}'",
1064 ))
1065 }
1066 }
1067 }
1068
1069 match *self.peek_token()? {
1070 Token(_, TokenType::Key) => {
1071 self.skip();
1072 if let Token(
1073 mark,
1074 TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
1075 ) = *self.peek_token()?
1076 {
1077 self.state = State::FlowMappingValue;
1078 return Ok((Event::empty_scalar(), mark));
1079 }
1080 self.push_state(State::FlowMappingValue);
1081 return self.parse_node(false, false);
1082 }
1083 Token(marker, TokenType::Value) => {
1084 self.state = State::FlowMappingValue;
1085 return Ok((Event::empty_scalar(), marker));
1086 }
1087 Token(_, TokenType::FlowMappingEnd) => (),
1088 _ => {
1089 self.push_state(State::FlowMappingEmptyValue);
1090 return self.parse_node(false, false);
1091 }
1092 }
1093
1094 self.peek_token()?.0
1095 };
1096
1097 self.pop_state();
1098 self.skip();
1099 Ok((Event::MappingEnd, span))
1100 }
1101
1102 fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1103 where
1104 'input: 'a,
1105 {
1106 let span: Span = {
1107 if empty {
1108 let Token(mark, _) = *self.peek_token()?;
1109 self.state = State::FlowMappingKey;
1110 return Ok((Event::empty_scalar(), mark));
1111 }
1112 match *self.peek_token()? {
1113 Token(span, TokenType::Value) => {
1114 self.skip();
1115 match self.peek_token()?.1 {
1116 TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
1117 _ => {
1118 self.push_state(State::FlowMappingKey);
1119 return self.parse_node(false, false);
1120 }
1121 }
1122 span
1123 }
1124 Token(marker, _) => marker,
1125 }
1126 };
1127
1128 self.state = State::FlowMappingKey;
1129 Ok((Event::empty_scalar(), span))
1130 }
1131
1132 fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1133 where
1134 'input: 'a,
1135 {
1136 if first {
1138 let _ = self.peek_token()?;
1139 self.skip();
1141 }
1142 match *self.peek_token()? {
1143 Token(mark, TokenType::FlowSequenceEnd) => {
1144 self.pop_state();
1145 self.skip();
1146 return Ok((Event::SequenceEnd, mark));
1147 }
1148 Token(_, TokenType::FlowEntry) if !first => {
1149 self.skip();
1150 }
1151 Token(span, _) if !first => {
1152 return Err(ScanError::new_str(
1153 span.start,
1154 "while parsing a flow sequence, expected ',' or ']'",
1155 ));
1156 }
1157 _ => { }
1158 }
1159 match *self.peek_token()? {
1160 Token(mark, TokenType::FlowSequenceEnd) => {
1161 self.pop_state();
1162 self.skip();
1163 Ok((Event::SequenceEnd, mark))
1164 }
1165 Token(mark, TokenType::Key) => {
1166 self.state = State::FlowSequenceEntryMappingKey;
1167 self.skip();
1168 Ok((Event::MappingStart(0, None), mark))
1169 }
1170 _ => {
1171 self.push_state(State::FlowSequenceEntry);
1172 self.parse_node(false, false)
1173 }
1174 }
1175 }
1176
1177 fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1178 where
1179 'input: 'a,
1180 {
1181 match *self.peek_token()? {
1182 Token(mark, TokenType::BlockEntry) => {
1183 self.skip();
1184 if let Token(
1185 _,
1186 TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1187 ) = *self.peek_token()?
1188 {
1189 self.state = State::IndentlessSequenceEntry;
1190 Ok((Event::empty_scalar(), mark))
1191 } else {
1192 self.push_state(State::IndentlessSequenceEntry);
1193 self.parse_node(true, false)
1194 }
1195 }
1196 Token(mark, _) => {
1197 self.pop_state();
1198 Ok((Event::SequenceEnd, mark))
1199 }
1200 }
1201 }
1202
1203 fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1204 where
1205 'input: 'a,
1206 {
1207 if first {
1209 let _ = self.peek_token()?;
1210 self.skip();
1212 }
1213 match *self.peek_token()? {
1214 Token(mark, TokenType::BlockEnd) => {
1215 self.pop_state();
1216 self.skip();
1217 Ok((Event::SequenceEnd, mark))
1218 }
1219 Token(mark, TokenType::BlockEntry) => {
1220 self.skip();
1221 if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
1222 self.state = State::BlockSequenceEntry;
1223 Ok((Event::empty_scalar(), mark))
1224 } else {
1225 self.push_state(State::BlockSequenceEntry);
1226 self.parse_node(true, false)
1227 }
1228 }
1229 Token(span, _) => Err(ScanError::new_str(
1230 span.start,
1231 "while parsing a block collection, did not find expected '-' indicator",
1232 )),
1233 }
1234 }
1235
1236 fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
1237 where
1238 'input: 'a,
1239 {
1240 if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1241 *self.peek_token()?
1242 {
1243 self.state = State::FlowSequenceEntryMappingValue;
1244 Ok((Event::empty_scalar(), mark))
1245 } else {
1246 self.push_state(State::FlowSequenceEntryMappingValue);
1247 self.parse_node(false, false)
1248 }
1249 }
1250
1251 fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
1252 where
1253 'input: 'a,
1254 {
1255 match *self.peek_token()? {
1256 Token(_, TokenType::Value) => {
1257 self.skip();
1258 self.state = State::FlowSequenceEntryMappingValue;
1259 let Token(span, ref tok) = *self.peek_token()?;
1260 if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
1261 self.state = State::FlowSequenceEntryMappingEnd;
1262 Ok((Event::empty_scalar(), Span::empty(span.start)))
1263 } else {
1264 self.push_state(State::FlowSequenceEntryMappingEnd);
1265 self.parse_node(false, false)
1266 }
1267 }
1268 Token(mark, _) => {
1269 self.state = State::FlowSequenceEntryMappingEnd;
1270 Ok((Event::empty_scalar(), mark))
1271 }
1272 }
1273 }
1274
1275 #[allow(clippy::unnecessary_wraps)]
1276 fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
1277 where
1278 'input: 'a,
1279 {
1280 self.state = State::FlowSequenceEntry;
1281 let Token(span, _) = *self.peek_token()?;
1282 Ok((Event::MappingEnd, Span::empty(span.start)))
1283 }
1284
1285 fn resolve_tag(
1287 &self,
1288 span: Span,
1289 handle: &Cow<'input, str>,
1290 suffix: Cow<'input, str>,
1291 ) -> Result<Cow<'input, Tag>, ScanError> {
1292 let suffix = suffix.into_owned();
1293 let tag = if handle == "!!" {
1294 Tag {
1297 handle: self
1298 .tags
1299 .get("!!")
1300 .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
1301 suffix,
1302 }
1303 } else if handle.is_empty() && suffix == "!" {
1304 match self.tags.get("") {
1306 Some(prefix) => Tag {
1307 handle: prefix.clone(),
1308 suffix,
1309 },
1310 None => Tag {
1311 handle: String::new(),
1312 suffix,
1313 },
1314 }
1315 } else {
1316 let prefix = self.tags.get(&**handle);
1318 if let Some(prefix) = prefix {
1319 Tag {
1320 handle: prefix.clone(),
1321 suffix,
1322 }
1323 } else {
1324 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1329 return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
1330 }
1331 Tag {
1332 handle: handle.to_string(),
1333 suffix,
1334 }
1335 }
1336 };
1337 Ok(Cow::Owned(tag))
1338 }
1339}
1340
1341impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
1342 fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
1343 if let Some(ref x) = self.current {
1344 Some(Ok(x))
1345 } else {
1346 if self.stream_end_emitted {
1347 return None;
1348 }
1349 match self.next_event_impl() {
1350 Ok(token) => self.current = Some(token),
1351 Err(e) => return Some(Err(e)),
1352 }
1353 self.current.as_ref().map(Ok)
1354 }
1355 }
1356
1357 fn next_event(&mut self) -> Option<ParseResult<'input>> {
1358 if self.stream_end_emitted {
1359 return None;
1360 }
1361
1362 let tok = self.next_event_impl();
1363 if matches!(tok, Ok((Event::StreamEnd, _))) {
1364 self.stream_end_emitted = true;
1365 }
1366 Some(tok)
1367 }
1368
1369 fn load<R: SpannedEventReceiver<'input>>(
1370 &mut self,
1371 recv: &mut R,
1372 multi: bool,
1373 ) -> Result<(), ScanError> {
1374 let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
1375 if !self.scanner.stream_started() || stream_start_buffered {
1376 let (ev, span) = self.next_event_impl()?;
1377 if ev != Event::StreamStart {
1378 return Err(ScanError::new_str(
1379 span.start,
1380 "did not find expected <stream-start>",
1381 ));
1382 }
1383 recv.on_event(ev, span);
1384 }
1385
1386 if self.scanner.stream_ended() {
1387 recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
1389 return Ok(());
1390 }
1391 loop {
1392 let (ev, span) = self.next_event_impl()?;
1393 if ev == Event::StreamEnd {
1394 recv.on_event(ev, span);
1395 return Ok(());
1396 }
1397 self.anchors.clear();
1399 self.load_document(ev, span, recv)?;
1400 if !multi {
1401 break;
1402 }
1403 }
1404 Ok(())
1405 }
1406}
1407
1408impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
1409 type Item = Result<(Event<'input>, Span), ScanError>;
1410
1411 fn next(&mut self) -> Option<Self::Item> {
1412 self.next_event()
1413 }
1414}
1415
1416#[cfg(test)]
1417mod test {
1418 use alloc::{borrow::ToOwned, string::ToString, vec::Vec};
1419
1420 use super::{Event, EventReceiver, Parser, Tag};
1421
1422 #[test]
1423 fn display_resolved_core_tag_without_extra_bang() {
1424 let tag = Tag {
1425 handle: "tag:yaml.org,2002:".to_owned(),
1426 suffix: "str".to_owned(),
1427 };
1428
1429 assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
1430 }
1431
1432 #[test]
1433 fn test_peek_eq_parse() {
1434 let s = "
1435a0 bb: val
1436a1: &x
1437 b1: 4
1438 b2: d
1439a2: 4
1440a3: [1, 2, 3]
1441a4:
1442 - [a1, a2]
1443 - 2
1444a5: *x
1445";
1446 let mut p = Parser::new_from_str(s);
1447 loop {
1448 let event_peek = p.peek().unwrap().unwrap().clone();
1449 let event = p.next_event().unwrap().unwrap();
1450 assert_eq!(event, event_peek);
1451 if event.0 == Event::StreamEnd {
1452 break;
1453 }
1454 }
1455 }
1456
1457 #[test]
1458 fn test_multiple_tag_directives_are_kept_within_document() {
1459 let text = r"
1460%TAG !a! tag:a,2024:
1461%TAG !b! tag:b,2024:
1462---
1463first: !a!x foo
1464second: !b!y bar
1465";
1466
1467 let mut seen_a = false;
1468 let mut seen_b = false;
1469 for event in Parser::new_from_str(text) {
1470 let (event, _) = event.unwrap();
1471 if let Event::Scalar(_, _, _, Some(tag)) = event {
1472 if tag.handle == "tag:a,2024:" {
1473 seen_a = true;
1474 } else if tag.handle == "tag:b,2024:" {
1475 seen_b = true;
1476 }
1477 }
1478 }
1479
1480 assert!(seen_a);
1481 assert!(seen_b);
1482 }
1483
1484 #[test]
1485 fn test_tags_are_cleared_when_next_document_has_no_directives() {
1486 let text = r"
1487%TAG !t! tag:test,2024:
1488--- !t!1
1489foo
1490--- !t!2
1491bar
1492";
1493
1494 let mut parser = Parser::new_from_str(text);
1495 for event in parser.by_ref() {
1496 let (event, _) = event.unwrap();
1497 if let Event::DocumentEnd = event {
1498 break;
1499 }
1500 }
1501
1502 match parser.next().unwrap().unwrap().0 {
1503 Event::DocumentStart(true) => {}
1504 _ => panic!("expected explicit second document start"),
1505 }
1506
1507 let err = parser.next().unwrap().unwrap_err();
1508 assert!(format!("{err}").contains("the handle wasn't declared"));
1509 }
1510
1511 #[test]
1512 fn test_pull_parser_clears_anchors_between_documents() {
1513 let mut parser = Parser::new_from_str(
1514 "--- &a value
1515--- *a
1516",
1517 );
1518
1519 for event in parser.by_ref() {
1520 let (event, _) = event.unwrap();
1521 if matches!(event, Event::DocumentEnd) {
1522 break;
1523 }
1524 }
1525
1526 match parser.next().unwrap().unwrap().0 {
1527 Event::DocumentStart(true) => {}
1528 _ => panic!("expected explicit second document start"),
1529 }
1530
1531 let err = parser.next().unwrap().unwrap_err();
1532 assert!(format!("{err}").contains("unknown anchor"));
1533 }
1534
1535 #[test]
1536 fn test_keep_tags_across_multiple_documents() {
1537 let text = r#"
1538%YAML 1.1
1539%TAG !t! tag:test,2024:
1540--- !t!1 &1
1541foo: "bar"
1542--- !t!2 &2
1543baz: "qux"
1544"#;
1545 for x in Parser::new_from_str(text).keep_tags(true) {
1546 let x = x.unwrap();
1547 if let Event::MappingStart(_, tag) = x.0 {
1548 let tag = tag.unwrap();
1549 assert_eq!(tag.handle, "tag:test,2024:");
1550 }
1551 }
1552
1553 for x in Parser::new_from_str(text).keep_tags(false) {
1554 if x.is_err() {
1555 return;
1557 }
1558 }
1559 panic!("Test failed, did not encounter error")
1560 }
1561
1562 #[test]
1563 fn test_flow_sequence_mapping_allows_empty_key() {
1564 let parser = Parser::new_from_str("[?: value]");
1565 for event in parser {
1566 event.expect("parser should accept flow sequence mappings with empty keys");
1567 }
1568 }
1569
1570 #[test]
1571 fn test_keep_tags_does_not_persist_default_tag_handles() {
1572 let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
1573
1574 let mut int_tags = Vec::new();
1575 for event in Parser::new_from_str(text).keep_tags(true) {
1576 let event = event.unwrap().0;
1577 if let Event::Scalar(_, _, _, Some(tag)) = event {
1578 if tag.suffix == "int" {
1579 int_tags.push(tag.handle.clone());
1580 }
1581 }
1582 }
1583
1584 assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
1585 }
1586
1587 #[test]
1588 fn test_load_after_peek_stream_start() {
1589 #[derive(Default)]
1590 struct Sink<'input> {
1591 events: Vec<Event<'input>>,
1592 }
1593
1594 impl<'input> EventReceiver<'input> for Sink<'input> {
1595 fn on_event(&mut self, ev: Event<'input>) {
1596 self.events.push(ev);
1597 }
1598 }
1599
1600 let mut parser = Parser::new_from_str("key: value\n");
1601 let mut sink = Sink::default();
1602
1603 assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
1604 parser.load(&mut sink, false).unwrap();
1605
1606 assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
1607 assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
1608 }
1609}