1use crate::{
8 input::{str::StrInput, BorrowedInput},
9 scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
10 BufferedInput, Marker,
11};
12
13use alloc::{
14 borrow::Cow,
15 collections::{BTreeMap, BTreeSet},
16 string::{String, ToString},
17 vec::Vec,
18};
19use core::fmt::Display;
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22enum State {
23 StreamStart,
24 ImplicitDocumentStart,
25 DocumentStart,
26 DocumentContent,
27 DocumentEnd,
28 BlockNode,
29 BlockSequenceFirstEntry,
30 BlockSequenceEntry,
31 IndentlessSequenceEntry,
32 BlockMappingFirstKey,
33 BlockMappingKey,
34 BlockMappingValue,
35 FlowSequenceFirstEntry,
36 FlowSequenceEntry,
37 FlowSequenceEntryMappingKey,
38 FlowSequenceEntryMappingValue,
39 FlowSequenceEntryMappingEnd(Marker),
40 FlowMappingFirstKey,
41 FlowMappingKey,
42 FlowMappingValue,
43 FlowMappingEmptyValue,
44 End,
45}
46
47#[derive(Clone, PartialEq, Debug, Eq)]
52pub enum Event<'input> {
53 Nothing,
55 StreamStart,
57 StreamEnd,
59 DocumentStart(bool),
67 DocumentEnd,
69 Alias(
71 usize,
73 ),
74 Scalar(
76 Cow<'input, str>,
77 ScalarStyle,
78 usize,
79 Option<Cow<'input, Tag>>,
80 ),
81 SequenceStart(
83 usize,
85 Option<Cow<'input, Tag>>,
87 ),
88 SequenceEnd,
90 MappingStart(
92 usize,
94 Option<Cow<'input, Tag>>,
96 ),
97 MappingEnd,
99}
100
101#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
103pub struct Tag {
104 pub handle: String,
106 pub suffix: String,
108}
109
110impl Tag {
111 #[must_use]
120 pub fn is_yaml_core_schema(&self) -> bool {
121 self.handle == "tag:yaml.org,2002:"
122 }
123}
124
125impl Display for Tag {
126 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
127 if self.handle == "!" {
128 write!(f, "!{}", self.suffix)
129 } else {
130 write!(f, "{}!{}", self.handle, self.suffix)
131 }
132 }
133}
134
135impl<'input> Event<'input> {
136 fn empty_scalar() -> Self {
138 Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
140 }
141
142 fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
144 Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
145 }
146}
147
148#[derive(Debug)]
150pub struct Parser<'input, T: BorrowedInput<'input>> {
151 scanner: Scanner<'input, T>,
153 states: Vec<State>,
158 state: State,
160 token: Option<Token<'input>>,
162 current: Option<(Event<'input>, Span)>,
164
165 pending_key_indent: Option<usize>,
171 anchors: BTreeMap<Cow<'input, str>, usize>,
173 anchor_id_count: usize,
178 tags: BTreeMap<String, String>,
182 stream_end_emitted: bool,
187 keep_tags: bool,
189}
190
191pub trait EventReceiver<'input> {
261 fn on_event(&mut self, ev: Event<'input>);
263}
264
265pub trait SpannedEventReceiver<'input> {
269 fn on_event(&mut self, ev: Event<'input>, span: Span);
271}
272
273impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
274 fn on_event(&mut self, ev: Event<'input>, _span: Span) {
275 self.on_event(ev);
276 }
277}
278
279pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
281
282pub trait ParserTrait<'input> {
284 fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
286
287 fn next_event(&mut self) -> Option<ParseResult<'input>>;
289
290 fn load<R: SpannedEventReceiver<'input>>(
295 &mut self,
296 recv: &mut R,
297 multi: bool,
298 ) -> Result<(), ScanError>;
299}
300
301impl<'input> Parser<'input, StrInput<'input>> {
302 #[must_use]
304 pub fn new_from_str(value: &'input str) -> Self {
305 debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
306 Parser::new(StrInput::new(value))
307 }
308}
309
310impl<T> Parser<'static, BufferedInput<T>>
311where
312 T: Iterator<Item = char>,
313{
314 #[must_use]
316 pub fn new_from_iter(iter: T) -> Self {
317 debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
318 Parser::new(BufferedInput::new(iter))
319 }
320}
321
322impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
323 pub fn get_anchor_offset(&self) -> usize {
325 self.anchor_id_count
326 }
327
328 pub fn set_anchor_offset(&mut self, offset: usize) {
330 self.anchor_id_count = offset;
331 }
332
333 pub fn new(src: T) -> Self {
335 Parser {
336 scanner: Scanner::new(src),
337 states: Vec::new(),
338 state: State::StreamStart,
339 token: None,
340 current: None,
341
342 pending_key_indent: None,
343
344 anchors: BTreeMap::new(),
345 anchor_id_count: 1,
347 tags: BTreeMap::new(),
348 stream_end_emitted: false,
349 keep_tags: false,
350 }
351 }
352
353 #[must_use]
376 pub fn keep_tags(mut self, value: bool) -> Self {
377 self.keep_tags = value;
378 self
379 }
380
381 pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
389 ParserTrait::peek(self)
390 }
391
392 pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
397 ParserTrait::next_event(self)
398 }
399
400 fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
406 where
407 'input: 'a,
408 {
409 match self.current.take() {
410 None => self.parse(),
411 Some(v) => Ok(v),
412 }
413 }
414
415 fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
417 match self.token {
418 None => {
419 self.token = Some(self.scan_next_token()?);
420 Ok(self.token.as_ref().unwrap())
421 }
422 Some(ref tok) => Ok(tok),
423 }
424 }
425
426 fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
430 let token = self.scanner.next();
431 match token {
432 None => match self.scanner.get_error() {
433 None => Err(self.unexpected_eof()),
434 Some(e) => Err(e),
435 },
436 Some(tok) => Ok(tok),
437 }
438 }
439
440 #[cold]
441 fn unexpected_eof(&self) -> ScanError {
442 let info = match self.state {
443 State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
444 "unexpected EOF while parsing a flow sequence"
445 }
446 State::FlowMappingFirstKey
447 | State::FlowMappingKey
448 | State::FlowMappingValue
449 | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
450 State::FlowSequenceEntryMappingKey
451 | State::FlowSequenceEntryMappingValue
452 | State::FlowSequenceEntryMappingEnd(_) => {
453 "unexpected EOF while parsing an implicit flow mapping"
454 }
455 State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
456 "unexpected EOF while parsing a block sequence"
457 }
458 State::BlockMappingFirstKey | State::BlockMappingKey | State::BlockMappingValue => {
459 "unexpected EOF while parsing a block mapping"
460 }
461 _ => "unexpected eof",
462 };
463 ScanError::new_str(self.scanner.mark(), info)
464 }
465
466 fn fetch_token<'a>(&mut self) -> Token<'a>
467 where
468 'input: 'a,
469 {
470 self.token
471 .take()
472 .expect("fetch_token needs to be preceded by peek_token")
473 }
474
475 fn skip(&mut self) {
477 self.token = None;
478 }
479 fn pop_state(&mut self) {
481 self.state = self.states.pop().unwrap();
482 }
483 fn push_state(&mut self, state: State) {
485 self.states.push(state);
486 }
487
488 fn parse<'a>(&mut self) -> ParseResult<'a>
489 where
490 'input: 'a,
491 {
492 if self.state == State::End {
493 return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
494 }
495 let (ev, span) = self.state_machine()?;
496 if let Some(indent) = self.pending_key_indent.take() {
497 Ok((ev, span.with_indent(Some(indent))))
498 } else {
499 Ok((ev, span))
500 }
501 }
502
503 pub fn load<R: SpannedEventReceiver<'input>>(
516 &mut self,
517 recv: &mut R,
518 multi: bool,
519 ) -> Result<(), ScanError> {
520 ParserTrait::load(self, recv, multi)
521 }
522
523 fn load_document<R: SpannedEventReceiver<'input>>(
524 &mut self,
525 first_ev: Event<'input>,
526 span: Span,
527 recv: &mut R,
528 ) -> Result<(), ScanError> {
529 if !matches!(first_ev, Event::DocumentStart(_)) {
530 return Err(ScanError::new_str(
531 span.start,
532 "did not find expected <document-start>",
533 ));
534 }
535 recv.on_event(first_ev, span);
536
537 let (ev, span) = self.next_event_impl()?;
538 self.load_node(ev, span, recv)?;
539
540 let (ev, mark) = self.next_event_impl()?;
542 assert_eq!(ev, Event::DocumentEnd);
543 recv.on_event(ev, mark);
544
545 Ok(())
546 }
547
548 fn load_node<R: SpannedEventReceiver<'input>>(
549 &mut self,
550 first_ev: Event<'input>,
551 span: Span,
552 recv: &mut R,
553 ) -> Result<(), ScanError> {
554 match first_ev {
555 Event::Alias(..) | Event::Scalar(..) => {
556 recv.on_event(first_ev, span);
557 Ok(())
558 }
559 Event::SequenceStart(..) => {
560 recv.on_event(first_ev, span);
561 self.load_sequence(recv)
562 }
563 Event::MappingStart(..) => {
564 recv.on_event(first_ev, span);
565 self.load_mapping(recv)
566 }
567 _ => {
568 #[cfg(feature = "debug_prints")]
569 std::println!("UNREACHABLE EVENT: {first_ev:?}");
570 unreachable!();
571 }
572 }
573 }
574
575 fn load_mapping<R: SpannedEventReceiver<'input>>(
576 &mut self,
577 recv: &mut R,
578 ) -> Result<(), ScanError> {
579 let (mut key_ev, mut key_mark) = self.next_event_impl()?;
580 while key_ev != Event::MappingEnd {
581 self.load_node(key_ev, key_mark, recv)?;
583
584 let (ev, mark) = self.next_event_impl()?;
586 self.load_node(ev, mark, recv)?;
587
588 let (ev, mark) = self.next_event_impl()?;
590 key_ev = ev;
591 key_mark = mark;
592 }
593 recv.on_event(key_ev, key_mark);
594 Ok(())
595 }
596
597 fn load_sequence<R: SpannedEventReceiver<'input>>(
598 &mut self,
599 recv: &mut R,
600 ) -> Result<(), ScanError> {
601 let (mut ev, mut mark) = self.next_event_impl()?;
602 while ev != Event::SequenceEnd {
603 self.load_node(ev, mark, recv)?;
604
605 let (next_ev, next_mark) = self.next_event_impl()?;
607 ev = next_ev;
608 mark = next_mark;
609 }
610 recv.on_event(ev, mark);
611 Ok(())
612 }
613
614 fn state_machine<'a>(&mut self) -> ParseResult<'a>
615 where
616 'input: 'a,
617 {
618 debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
621
622 match self.state {
623 State::StreamStart => self.stream_start(),
624
625 State::ImplicitDocumentStart => self.document_start(true),
626 State::DocumentStart => self.document_start(false),
627 State::DocumentContent => self.document_content(),
628 State::DocumentEnd => self.document_end(),
629
630 State::BlockNode => self.parse_node(true, false),
631 State::BlockMappingFirstKey => self.block_mapping_key(true),
634 State::BlockMappingKey => self.block_mapping_key(false),
635 State::BlockMappingValue => self.block_mapping_value(),
636
637 State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
638 State::BlockSequenceEntry => self.block_sequence_entry(false),
639
640 State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
641 State::FlowSequenceEntry => self.flow_sequence_entry(false),
642
643 State::FlowMappingFirstKey => self.flow_mapping_key(true),
644 State::FlowMappingKey => self.flow_mapping_key(false),
645 State::FlowMappingValue => self.flow_mapping_value(false),
646
647 State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
648
649 State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
650 State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
651 State::FlowSequenceEntryMappingEnd(mark) => self.flow_sequence_entry_mapping_end(mark),
652 State::FlowMappingEmptyValue => self.flow_mapping_value(true),
653
654 State::End => unreachable!(),
656 }
657 }
658
659 fn stream_start<'a>(&mut self) -> ParseResult<'a>
660 where
661 'input: 'a,
662 {
663 match *self.peek_token()? {
664 Token(span, TokenType::StreamStart(_)) => {
665 self.state = State::ImplicitDocumentStart;
666 self.skip();
667 Ok((Event::StreamStart, span))
668 }
669 Token(span, _) => Err(ScanError::new_str(
670 span.start,
671 "did not find expected <stream-start>",
672 )),
673 }
674 }
675
676 fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
677 where
678 'input: 'a,
679 {
680 while let TokenType::DocumentEnd = self.peek_token()?.1 {
681 self.skip();
682 }
683
684 match *self.peek_token()? {
685 Token(span, TokenType::StreamEnd) => {
686 self.state = State::End;
687 self.skip();
688 Ok((Event::StreamEnd, span))
689 }
690 Token(
691 _,
692 TokenType::VersionDirective(..)
693 | TokenType::TagDirective(..)
694 | TokenType::ReservedDirective(..)
695 | TokenType::DocumentStart,
696 ) => {
697 self.explicit_document_start()
699 }
700 Token(span, _) if implicit => {
701 self.parser_process_directives()?;
702 self.push_state(State::DocumentEnd);
703 self.state = State::BlockNode;
704 Ok((Event::DocumentStart(false), span))
705 }
706 _ => {
707 self.explicit_document_start()
709 }
710 }
711 }
712
713 fn parser_process_directives(&mut self) -> Result<(), ScanError> {
714 let mut version_directive_received = false;
715 let mut tags = if self.keep_tags {
716 self.tags.clone()
717 } else {
718 BTreeMap::new()
719 };
720 let mut document_tag_handles = BTreeSet::new();
721
722 loop {
723 match self.peek_token()? {
724 Token(span, TokenType::VersionDirective(_, _)) => {
725 if version_directive_received {
731 return Err(ScanError::new_str(
732 span.start,
733 "duplicate version directive",
734 ));
735 }
736 version_directive_received = true;
737 }
738 Token(mark, TokenType::TagDirective(handle, prefix)) => {
739 if !document_tag_handles.insert(handle.to_string()) {
740 return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
741 }
742 tags.insert(handle.to_string(), prefix.to_string());
743 }
744 Token(_, TokenType::ReservedDirective(_, _)) => {
745 }
747 _ => break,
748 }
749 self.skip();
750 }
751
752 self.tags = tags;
753 Ok(())
754 }
755
756 fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
757 where
758 'input: 'a,
759 {
760 self.parser_process_directives()?;
761 match *self.peek_token()? {
762 Token(mark, TokenType::DocumentStart) => {
763 self.push_state(State::DocumentEnd);
764 self.state = State::DocumentContent;
765 self.skip();
766 Ok((Event::DocumentStart(true), mark))
767 }
768 Token(span, _) => Err(ScanError::new_str(
769 span.start,
770 "did not find expected <document start>",
771 )),
772 }
773 }
774
775 fn document_content<'a>(&mut self) -> ParseResult<'a>
776 where
777 'input: 'a,
778 {
779 match *self.peek_token()? {
780 Token(
781 mark,
782 TokenType::VersionDirective(..)
783 | TokenType::TagDirective(..)
784 | TokenType::ReservedDirective(..)
785 | TokenType::DocumentStart
786 | TokenType::DocumentEnd
787 | TokenType::StreamEnd,
788 ) => {
789 self.pop_state();
790 Ok((Event::empty_scalar(), mark))
792 }
793 _ => self.parse_node(true, false),
794 }
795 }
796
797 fn document_end<'a>(&mut self) -> ParseResult<'a>
798 where
799 'input: 'a,
800 {
801 let mut explicit_end = false;
802 let span: Span = match *self.peek_token()? {
803 Token(span, TokenType::DocumentEnd) => {
804 explicit_end = true;
805 self.skip();
806 span
807 }
808 Token(span, _) => span,
809 };
810
811 if self.keep_tags {
812 self.tags.remove("!!");
816 self.tags.remove("");
817 } else {
818 self.tags.clear();
819 }
820 if explicit_end {
821 self.state = State::ImplicitDocumentStart;
822 } else {
823 if let Token(
824 span,
825 TokenType::VersionDirective(..)
826 | TokenType::TagDirective(..)
827 | TokenType::ReservedDirective(..),
828 ) = *self.peek_token()?
829 {
830 return Err(ScanError::new_str(
831 span.start,
832 "missing explicit document end marker before directive",
833 ));
834 }
835 self.state = State::DocumentStart;
836 }
837
838 Ok((Event::DocumentEnd, span))
839 }
840
841 fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
842 let new_id = self.anchor_id_count;
848 self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
849 ScanError::new_str(
850 mark.start,
851 "while parsing anchor, anchor count exceeded supported limit",
852 )
853 })?;
854 self.anchors.insert(name, new_id);
855 Ok(new_id)
856 }
857
858 #[allow(clippy::too_many_lines)]
859 fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
860 where
861 'input: 'a,
862 {
863 let mut anchor_id = 0;
864 let mut tag = None;
865 match *self.peek_token()? {
866 Token(_, TokenType::Alias(_)) => {
867 self.pop_state();
868 if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
869 match self.anchors.get(&*name) {
870 None => {
871 return Err(ScanError::new_str(
872 span.start,
873 "while parsing node, found unknown anchor",
874 ))
875 }
876 Some(id) => return Ok((Event::Alias(*id), span)),
877 }
878 }
879 unreachable!()
880 }
881 Token(_, TokenType::Anchor(_)) => {
882 if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
883 anchor_id = self.register_anchor(name, &span)?;
884 if let TokenType::Tag(..) = self.peek_token()?.1 {
885 if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
886 tag = Some(self.resolve_tag(span, &handle, suffix)?);
887 } else {
888 unreachable!()
889 }
890 }
891 } else {
892 unreachable!()
893 }
894 }
895 Token(mark, TokenType::Tag(..)) => {
896 if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
897 tag = Some(self.resolve_tag(mark, &handle, suffix)?);
898 if let TokenType::Anchor(_) = &self.peek_token()?.1 {
899 if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
900 anchor_id = self.register_anchor(name, &mark)?;
901 } else {
902 unreachable!()
903 }
904 }
905 } else {
906 unreachable!()
907 }
908 }
909 _ => {}
910 }
911 match *self.peek_token()? {
912 Token(mark, TokenType::BlockEntry) if indentless_sequence => {
913 self.state = State::IndentlessSequenceEntry;
914 Ok((Event::SequenceStart(anchor_id, tag), mark))
915 }
916 Token(_, TokenType::Scalar(..)) => {
917 self.pop_state();
918 if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
919 Ok((Event::Scalar(v, style, anchor_id, tag), mark))
920 } else {
921 unreachable!()
922 }
923 }
924 Token(mark, TokenType::FlowSequenceStart) => {
925 self.state = State::FlowSequenceFirstEntry;
926 Ok((Event::SequenceStart(anchor_id, tag), mark))
927 }
928 Token(mark, TokenType::FlowMappingStart) => {
929 self.state = State::FlowMappingFirstKey;
930 Ok((Event::MappingStart(anchor_id, tag), mark))
931 }
932 Token(mark, TokenType::BlockSequenceStart) if block => {
933 self.state = State::BlockSequenceFirstEntry;
934 Ok((Event::SequenceStart(anchor_id, tag), mark))
935 }
936 Token(mark, TokenType::BlockMappingStart) if block => {
937 self.state = State::BlockMappingFirstKey;
938 Ok((Event::MappingStart(anchor_id, tag), mark))
939 }
940 Token(mark, _) if tag.is_some() || anchor_id > 0 => {
942 self.pop_state();
943 Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
944 }
945 Token(span, _) => {
946 let info = match self.state {
947 State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
948 "unexpected EOF while parsing a flow sequence"
949 }
950 State::FlowMappingFirstKey
951 | State::FlowMappingKey
952 | State::FlowMappingValue
953 | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
954 State::FlowSequenceEntryMappingKey
955 | State::FlowSequenceEntryMappingValue
956 | State::FlowSequenceEntryMappingEnd(_) => {
957 "unexpected EOF while parsing an implicit flow mapping"
958 }
959 State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
960 "unexpected EOF while parsing a block sequence"
961 }
962 State::BlockMappingFirstKey
963 | State::BlockMappingKey
964 | State::BlockMappingValue => "unexpected EOF while parsing a block mapping",
965 _ => "while parsing a node, did not find expected node content",
966 };
967 Err(ScanError::new_str(span.start, info))
968 }
969 }
970 }
971
972 fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
973 where
974 'input: 'a,
975 {
976 if first {
978 let _ = self.peek_token()?;
979 self.skip();
981 }
982 match *self.peek_token()? {
983 Token(_, TokenType::Key) => {
984 if let Token(key_span, TokenType::Key) = *self.peek_token()? {
986 self.pending_key_indent = Some(key_span.start.col());
987 }
988 self.skip();
989 if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
990 *self.peek_token()?
991 {
992 self.state = State::BlockMappingValue;
993 Ok((Event::empty_scalar(), mark))
995 } else {
996 self.push_state(State::BlockMappingValue);
997 self.parse_node(true, true)
998 }
999 }
1000 Token(mark, TokenType::Value) => {
1002 self.state = State::BlockMappingValue;
1003 Ok((Event::empty_scalar(), mark))
1004 }
1005 Token(mark, TokenType::BlockEnd) => {
1006 self.pop_state();
1007 self.skip();
1008 Ok((Event::MappingEnd, mark))
1009 }
1010 Token(span, _) => Err(ScanError::new_str(
1011 span.start,
1012 "while parsing a block mapping, did not find expected key",
1013 )),
1014 }
1015 }
1016
1017 fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1018 where
1019 'input: 'a,
1020 {
1021 match *self.peek_token()? {
1022 Token(mark, TokenType::Value) => {
1023 self.skip();
1024 if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1025 *self.peek_token()?
1026 {
1027 self.state = State::BlockMappingKey;
1028 Ok((Event::empty_scalar(), mark))
1030 } else {
1031 self.push_state(State::BlockMappingKey);
1032 self.parse_node(true, true)
1033 }
1034 }
1035 Token(mark, _) => {
1036 self.state = State::BlockMappingKey;
1037 Ok((Event::empty_scalar(), mark))
1039 }
1040 }
1041 }
1042
1043 fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1044 where
1045 'input: 'a,
1046 {
1047 if first {
1048 let _ = self.peek_token()?;
1049 self.skip();
1050 }
1051 let span: Span = {
1052 match *self.peek_token()? {
1053 Token(mark, TokenType::FlowMappingEnd) => mark,
1054 Token(mark, _) => {
1055 if !first {
1056 match *self.peek_token()? {
1057 Token(_, TokenType::FlowEntry) => self.skip(),
1058 Token(span, _) => return Err(ScanError::new_str(
1059 span.start,
1060 "while parsing a flow mapping, did not find expected ',' or '}'",
1061 )),
1062 }
1063 }
1064
1065 match *self.peek_token()? {
1066 Token(_, TokenType::Key) => {
1067 self.skip();
1068 if let Token(
1069 mark,
1070 TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
1071 ) = *self.peek_token()?
1072 {
1073 self.state = State::FlowMappingValue;
1074 return Ok((Event::empty_scalar(), mark));
1075 }
1076 self.push_state(State::FlowMappingValue);
1077 return self.parse_node(false, false);
1078 }
1079 Token(marker, TokenType::Value) => {
1080 self.state = State::FlowMappingValue;
1081 return Ok((Event::empty_scalar(), marker));
1082 }
1083 Token(_, TokenType::FlowMappingEnd) => (),
1084 _ => {
1085 self.push_state(State::FlowMappingEmptyValue);
1086 return self.parse_node(false, false);
1087 }
1088 }
1089
1090 mark
1091 }
1092 }
1093 };
1094
1095 self.pop_state();
1096 self.skip();
1097 Ok((Event::MappingEnd, span))
1098 }
1099
1100 fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1101 where
1102 'input: 'a,
1103 {
1104 let span: Span = {
1105 if empty {
1106 let Token(mark, _) = *self.peek_token()?;
1107 self.state = State::FlowMappingKey;
1108 return Ok((Event::empty_scalar(), mark));
1109 }
1110 match *self.peek_token()? {
1111 Token(span, TokenType::Value) => {
1112 self.skip();
1113 match self.peek_token()?.1 {
1114 TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
1115 _ => {
1116 self.push_state(State::FlowMappingKey);
1117 return self.parse_node(false, false);
1118 }
1119 }
1120 span
1121 }
1122 Token(marker, _) => marker,
1123 }
1124 };
1125
1126 self.state = State::FlowMappingKey;
1127 Ok((Event::empty_scalar(), span))
1128 }
1129
1130 fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1131 where
1132 'input: 'a,
1133 {
1134 if first {
1136 let _ = self.peek_token()?;
1137 self.skip();
1139 }
1140 match *self.peek_token()? {
1141 Token(mark, TokenType::FlowSequenceEnd) => {
1142 self.pop_state();
1143 self.skip();
1144 return Ok((Event::SequenceEnd, mark));
1145 }
1146 Token(_, TokenType::FlowEntry) if !first => {
1147 self.skip();
1148 }
1149 Token(span, _) if !first => {
1150 return Err(ScanError::new_str(
1151 span.start,
1152 "while parsing a flow sequence, expected ',' or ']'",
1153 ));
1154 }
1155 _ => { }
1156 }
1157 match *self.peek_token()? {
1158 Token(mark, TokenType::FlowSequenceEnd) => {
1159 self.pop_state();
1160 self.skip();
1161 Ok((Event::SequenceEnd, mark))
1162 }
1163 Token(mark, TokenType::Key) => {
1164 self.state = State::FlowSequenceEntryMappingKey;
1165 self.skip();
1166 Ok((Event::MappingStart(0, None), mark))
1167 }
1168 _ => {
1169 self.push_state(State::FlowSequenceEntry);
1170 self.parse_node(false, false)
1171 }
1172 }
1173 }
1174
1175 fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1176 where
1177 'input: 'a,
1178 {
1179 match *self.peek_token()? {
1180 Token(mark, TokenType::BlockEntry) => {
1181 self.skip();
1182 if let Token(
1183 _,
1184 TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1185 ) = *self.peek_token()?
1186 {
1187 self.state = State::IndentlessSequenceEntry;
1188 Ok((Event::empty_scalar(), mark))
1189 } else {
1190 self.push_state(State::IndentlessSequenceEntry);
1191 self.parse_node(true, false)
1192 }
1193 }
1194 Token(mark, _) => {
1195 self.pop_state();
1196 Ok((Event::SequenceEnd, mark))
1197 }
1198 }
1199 }
1200
1201 fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1202 where
1203 'input: 'a,
1204 {
1205 if first {
1207 let _ = self.peek_token()?;
1208 self.skip();
1210 }
1211 match *self.peek_token()? {
1212 Token(mark, TokenType::BlockEnd) => {
1213 self.pop_state();
1214 self.skip();
1215 Ok((Event::SequenceEnd, mark))
1216 }
1217 Token(mark, TokenType::BlockEntry) => {
1218 self.skip();
1219 if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
1220 self.state = State::BlockSequenceEntry;
1221 Ok((Event::empty_scalar(), mark))
1222 } else {
1223 self.push_state(State::BlockSequenceEntry);
1224 self.parse_node(true, false)
1225 }
1226 }
1227 Token(span, _) => Err(ScanError::new_str(
1228 span.start,
1229 "while parsing a block collection, did not find expected '-' indicator",
1230 )),
1231 }
1232 }
1233
1234 fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
1235 where
1236 'input: 'a,
1237 {
1238 if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1239 *self.peek_token()?
1240 {
1241 self.state = State::FlowSequenceEntryMappingValue;
1242 Ok((Event::empty_scalar(), mark))
1243 } else {
1244 self.push_state(State::FlowSequenceEntryMappingValue);
1245 self.parse_node(false, false)
1246 }
1247 }
1248
1249 fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
1250 where
1251 'input: 'a,
1252 {
1253 match *self.peek_token()? {
1254 Token(_, TokenType::Value) => {
1255 self.skip();
1256 self.state = State::FlowSequenceEntryMappingValue;
1257 let Token(span, ref tok) = *self.peek_token()?;
1258 if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
1259 self.state = State::FlowSequenceEntryMappingEnd(span.end);
1260 Ok((Event::empty_scalar(), span))
1261 } else {
1262 self.push_state(State::FlowSequenceEntryMappingEnd(span.end));
1263 self.parse_node(false, false)
1264 }
1265 }
1266 Token(mark, _) => {
1267 self.state = State::FlowSequenceEntryMappingEnd(mark.end);
1268 Ok((Event::empty_scalar(), mark))
1269 }
1270 }
1271 }
1272
1273 #[allow(clippy::unnecessary_wraps)]
1274 fn flow_sequence_entry_mapping_end<'a>(&mut self, mark: Marker) -> ParseResult<'a>
1275 where
1276 'input: 'a,
1277 {
1278 self.state = State::FlowSequenceEntry;
1279 Ok((Event::MappingEnd, Span::empty(mark)))
1280 }
1281
1282 fn resolve_tag(
1284 &self,
1285 span: Span,
1286 handle: &Cow<'input, str>,
1287 suffix: Cow<'input, str>,
1288 ) -> Result<Cow<'input, Tag>, ScanError> {
1289 let suffix = suffix.into_owned();
1290 let tag = if handle == "!!" {
1291 Tag {
1294 handle: self
1295 .tags
1296 .get("!!")
1297 .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
1298 suffix,
1299 }
1300 } else if handle.is_empty() && suffix == "!" {
1301 match self.tags.get("") {
1303 Some(prefix) => Tag {
1304 handle: prefix.clone(),
1305 suffix,
1306 },
1307 None => Tag {
1308 handle: String::new(),
1309 suffix,
1310 },
1311 }
1312 } else {
1313 let prefix = self.tags.get(&**handle);
1315 if let Some(prefix) = prefix {
1316 Tag {
1317 handle: prefix.clone(),
1318 suffix,
1319 }
1320 } else {
1321 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1326 return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
1327 }
1328 Tag {
1329 handle: handle.to_string(),
1330 suffix,
1331 }
1332 }
1333 };
1334 Ok(Cow::Owned(tag))
1335 }
1336}
1337
1338impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
1339 fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
1340 if let Some(ref x) = self.current {
1341 Some(Ok(x))
1342 } else {
1343 if self.stream_end_emitted {
1344 return None;
1345 }
1346 match self.next_event_impl() {
1347 Ok(token) => self.current = Some(token),
1348 Err(e) => return Some(Err(e)),
1349 }
1350 self.current.as_ref().map(Ok)
1351 }
1352 }
1353
1354 fn next_event(&mut self) -> Option<ParseResult<'input>> {
1355 if self.stream_end_emitted {
1356 return None;
1357 }
1358
1359 let tok = self.next_event_impl();
1360 if matches!(tok, Ok((Event::StreamEnd, _))) {
1361 self.stream_end_emitted = true;
1362 }
1363 Some(tok)
1364 }
1365
1366 fn load<R: SpannedEventReceiver<'input>>(
1367 &mut self,
1368 recv: &mut R,
1369 multi: bool,
1370 ) -> Result<(), ScanError> {
1371 let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
1372 if !self.scanner.stream_started() || stream_start_buffered {
1373 let (ev, span) = self.next_event_impl()?;
1374 if ev != Event::StreamStart {
1375 return Err(ScanError::new_str(
1376 span.start,
1377 "did not find expected <stream-start>",
1378 ));
1379 }
1380 recv.on_event(ev, span);
1381 }
1382
1383 if self.scanner.stream_ended() {
1384 recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
1386 return Ok(());
1387 }
1388 loop {
1389 let (ev, span) = self.next_event_impl()?;
1390 if ev == Event::StreamEnd {
1391 recv.on_event(ev, span);
1392 return Ok(());
1393 }
1394 self.anchors.clear();
1396 self.load_document(ev, span, recv)?;
1397 if !multi {
1398 break;
1399 }
1400 }
1401 Ok(())
1402 }
1403}
1404
1405impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
1406 type Item = Result<(Event<'input>, Span), ScanError>;
1407
1408 fn next(&mut self) -> Option<Self::Item> {
1409 self.next_event()
1410 }
1411}
1412
1413#[cfg(test)]
1414mod test {
1415 use alloc::vec::Vec;
1416
1417 use super::{Event, EventReceiver, Parser};
1418
1419 #[test]
1420 fn test_peek_eq_parse() {
1421 let s = "
1422a0 bb: val
1423a1: &x
1424 b1: 4
1425 b2: d
1426a2: 4
1427a3: [1, 2, 3]
1428a4:
1429 - [a1, a2]
1430 - 2
1431a5: *x
1432";
1433 let mut p = Parser::new_from_str(s);
1434 loop {
1435 let event_peek = p.peek().unwrap().unwrap().clone();
1436 let event = p.next_event().unwrap().unwrap();
1437 assert_eq!(event, event_peek);
1438 if event.0 == Event::StreamEnd {
1439 break;
1440 }
1441 }
1442 }
1443
1444 #[test]
1445 fn test_multiple_tag_directives_are_kept_within_document() {
1446 let text = r"
1447%TAG !a! tag:a,2024:
1448%TAG !b! tag:b,2024:
1449---
1450first: !a!x foo
1451second: !b!y bar
1452";
1453
1454 let mut seen_a = false;
1455 let mut seen_b = false;
1456 for event in Parser::new_from_str(text) {
1457 let (event, _) = event.unwrap();
1458 if let Event::Scalar(_, _, _, Some(tag)) = event {
1459 if tag.handle == "tag:a,2024:" {
1460 seen_a = true;
1461 } else if tag.handle == "tag:b,2024:" {
1462 seen_b = true;
1463 }
1464 }
1465 }
1466
1467 assert!(seen_a);
1468 assert!(seen_b);
1469 }
1470
1471 #[test]
1472 fn test_tags_are_cleared_when_next_document_has_no_directives() {
1473 let text = r"
1474%TAG !t! tag:test,2024:
1475--- !t!1
1476foo
1477--- !t!2
1478bar
1479";
1480
1481 let mut parser = Parser::new_from_str(text);
1482 for event in parser.by_ref() {
1483 let (event, _) = event.unwrap();
1484 if let Event::DocumentEnd = event {
1485 break;
1486 }
1487 }
1488
1489 match parser.next().unwrap().unwrap().0 {
1490 Event::DocumentStart(true) => {}
1491 _ => panic!("expected explicit second document start"),
1492 }
1493
1494 let err = parser.next().unwrap().unwrap_err();
1495 assert!(format!("{err}").contains("the handle wasn't declared"));
1496 }
1497
1498 #[test]
1499 fn test_keep_tags_across_multiple_documents() {
1500 let text = r#"
1501%YAML 1.1
1502%TAG !t! tag:test,2024:
1503--- !t!1 &1
1504foo: "bar"
1505--- !t!2 &2
1506baz: "qux"
1507"#;
1508 for x in Parser::new_from_str(text).keep_tags(true) {
1509 let x = x.unwrap();
1510 if let Event::MappingStart(_, tag) = x.0 {
1511 let tag = tag.unwrap();
1512 assert_eq!(tag.handle, "tag:test,2024:");
1513 }
1514 }
1515
1516 for x in Parser::new_from_str(text).keep_tags(false) {
1517 if x.is_err() {
1518 return;
1520 }
1521 }
1522 panic!("Test failed, did not encounter error")
1523 }
1524
1525 #[test]
1526 fn test_flow_sequence_mapping_allows_empty_key() {
1527 let parser = Parser::new_from_str("[?: value]");
1528 for event in parser {
1529 event.expect("parser should accept flow sequence mappings with empty keys");
1530 }
1531 }
1532
1533 #[test]
1534 fn test_keep_tags_does_not_persist_default_tag_handles() {
1535 let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
1536
1537 let mut int_tags = Vec::new();
1538 for event in Parser::new_from_str(text).keep_tags(true) {
1539 let event = event.unwrap().0;
1540 if let Event::Scalar(_, _, _, Some(tag)) = event {
1541 if tag.suffix == "int" {
1542 int_tags.push(tag.handle.clone());
1543 }
1544 }
1545 }
1546
1547 assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
1548 }
1549
1550 #[test]
1551 fn test_load_after_peek_stream_start() {
1552 #[derive(Default)]
1553 struct Sink<'input> {
1554 events: Vec<Event<'input>>,
1555 }
1556
1557 impl<'input> EventReceiver<'input> for Sink<'input> {
1558 fn on_event(&mut self, ev: Event<'input>) {
1559 self.events.push(ev);
1560 }
1561 }
1562
1563 let mut parser = Parser::new_from_str("key: value\n");
1564 let mut sink = Sink::default();
1565
1566 assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
1567 parser.load(&mut sink, false).unwrap();
1568
1569 assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
1570 assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
1571 }
1572}