1use crate::{
8 input::{str::StrInput, BorrowedInput},
9 scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
10 BufferedInput,
11};
12
13use alloc::{
14 borrow::Cow,
15 collections::{BTreeMap, BTreeSet},
16 string::{String, ToString},
17 vec::Vec,
18};
19use core::{
20 convert::Infallible,
21 fmt::{self, Display},
22};
23
24#[derive(Clone, Copy, PartialEq, Debug, Eq)]
25enum State {
26 StreamStart,
27 ImplicitDocumentStart,
28 DocumentStart,
29 DocumentContent,
30 DocumentEnd,
31 BlockNode,
32 BlockSequenceFirstEntry,
33 BlockSequenceEntry,
34 IndentlessSequenceEntry,
35 BlockMappingFirstKey,
36 BlockMappingKey,
37 BlockMappingValue,
38 FlowSequenceFirstEntry,
39 FlowSequenceEntry,
40 FlowSequenceEntryMappingKey,
41 FlowSequenceEntryMappingValue,
42 FlowSequenceEntryMappingEnd,
43 FlowMappingFirstKey,
44 FlowMappingKey,
45 FlowMappingValue,
46 FlowMappingEmptyValue,
47 End,
48}
49
50#[derive(Clone, PartialEq, Debug, Eq)]
55pub enum Event<'input> {
56 Nothing,
58 StreamStart,
60 StreamEnd,
62 DocumentStart(bool),
70 DocumentEnd,
72 Alias(
74 usize,
76 ),
77 Scalar(
79 Cow<'input, str>,
80 ScalarStyle,
81 usize,
82 Option<Cow<'input, Tag>>,
83 ),
84 SequenceStart(
86 usize,
88 Option<Cow<'input, Tag>>,
90 ),
91 SequenceEnd,
93 MappingStart(
95 usize,
97 Option<Cow<'input, Tag>>,
99 ),
100 MappingEnd,
102}
103
104#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
106pub struct Tag {
107 pub handle: String,
109 pub suffix: String,
111}
112
113impl Tag {
114 #[must_use]
123 pub fn is_yaml_core_schema(&self) -> bool {
124 self.handle == "tag:yaml.org,2002:"
125 }
126}
127
128impl Display for Tag {
129 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
130 if self.handle == "!" {
131 write!(f, "!{}", self.suffix)
132 } else {
133 write!(f, "{}{}", self.handle, self.suffix)
134 }
135 }
136}
137
138impl<'input> Event<'input> {
139 fn empty_scalar() -> Self {
141 Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
143 }
144
145 fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
147 Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
148 }
149}
150
151#[derive(Debug)]
153pub struct Parser<'input, T: BorrowedInput<'input>> {
154 scanner: Scanner<'input, T>,
156 states: Vec<State>,
161 state: State,
163 token: Option<Token<'input>>,
165 current: Option<(Event<'input>, Span)>,
167
168 pending_key_indent: Option<usize>,
174 anchors: BTreeMap<Cow<'input, str>, usize>,
176 anchor_id_count: usize,
181 tags: BTreeMap<String, String>,
185 stream_end_emitted: bool,
190 keep_tags: bool,
192}
193
194pub trait EventReceiver<'input> {
264 fn on_event(&mut self, ev: Event<'input>);
266}
267
268pub trait SpannedEventReceiver<'input> {
272 fn on_event(&mut self, ev: Event<'input>, span: Span);
274}
275
276impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
277 fn on_event(&mut self, ev: Event<'input>, _span: Span) {
278 self.on_event(ev);
279 }
280}
281
282pub trait TryEventReceiver<'input> {
287 type Error;
289
290 fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
297}
298
299pub trait TrySpannedEventReceiver<'input> {
305 type Error;
307
308 fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
315}
316
317impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
318 type Error = R::Error;
319
320 fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
321 TryEventReceiver::on_event(self, ev)
322 }
323}
324
325#[derive(Clone, PartialEq, Debug, Eq)]
327pub enum TryLoadError<E> {
328 Scan(
330 ScanError,
332 ),
333 Receiver(
335 E,
337 ),
338}
339
340impl<E> From<ScanError> for TryLoadError<E> {
341 fn from(error: ScanError) -> Self {
342 Self::Scan(error)
343 }
344}
345
346impl<E: Display> Display for TryLoadError<E> {
347 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
348 match self {
349 Self::Scan(error) => write!(f, "parser error: {error}"),
350 Self::Receiver(error) => write!(f, "receiver error: {error}"),
351 }
352 }
353}
354
355impl<E> core::error::Error for TryLoadError<E>
356where
357 E: core::error::Error + 'static,
358{
359 fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
360 match self {
361 Self::Scan(error) => Some(error),
362 Self::Receiver(error) => Some(error),
363 }
364 }
365}
366
367fn try_emit<'input, R>(
368 recv: &mut R,
369 ev: Event<'input>,
370 span: Span,
371) -> Result<(), TryLoadError<R::Error>>
372where
373 R: TrySpannedEventReceiver<'input>,
374{
375 recv.on_event(ev, span).map_err(TryLoadError::Receiver)
376}
377
378struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
379
380impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
381 for InfallibleSpannedReceiver<'_, R>
382{
383 type Error = Infallible;
384
385 fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
386 self.0.on_event(ev, span);
387 Ok(())
388 }
389}
390
391fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
392 match result {
393 Ok(()) => Ok(()),
394 Err(TryLoadError::Scan(error)) => Err(error),
395 Err(TryLoadError::Receiver(error)) => match error {},
396 }
397}
398
399pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
401
402pub trait ParserTrait<'input> {
404 fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
406
407 fn next_event(&mut self) -> Option<ParseResult<'input>>;
409
410 fn load<R: SpannedEventReceiver<'input>>(
420 &mut self,
421 recv: &mut R,
422 multi: bool,
423 ) -> Result<(), ScanError>;
424
425 fn try_load<R: TrySpannedEventReceiver<'input>>(
437 &mut self,
438 recv: &mut R,
439 multi: bool,
440 ) -> Result<(), TryLoadError<R::Error>> {
441 while let Some(res) = self.next_event() {
442 let (ev, span) = res?;
443 let is_doc_end = matches!(ev, Event::DocumentEnd);
444 let is_stream_end = matches!(ev, Event::StreamEnd);
445
446 try_emit(recv, ev, span)?;
447
448 if is_stream_end {
449 break;
450 }
451 if !multi && is_doc_end {
452 break;
453 }
454 }
455
456 Ok(())
457 }
458}
459
460impl<'input> Parser<'input, StrInput<'input>> {
461 #[must_use]
463 pub fn new_from_str(value: &'input str) -> Self {
464 debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
465 Parser::new(StrInput::new(value))
466 }
467}
468
469impl<T> Parser<'static, BufferedInput<T>>
470where
471 T: Iterator<Item = char>,
472{
473 #[must_use]
475 pub fn new_from_iter(iter: T) -> Self {
476 debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
477 Parser::new(BufferedInput::new(iter))
478 }
479}
480
481impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
482 pub fn get_anchor_offset(&self) -> usize {
484 self.anchor_id_count
485 }
486
487 pub fn set_anchor_offset(&mut self, offset: usize) {
489 self.anchor_id_count = offset;
490 }
491
492 pub fn new(src: T) -> Self {
494 Parser {
495 scanner: Scanner::new(src),
496 states: Vec::new(),
497 state: State::StreamStart,
498 token: None,
499 current: None,
500
501 pending_key_indent: None,
502
503 anchors: BTreeMap::new(),
504 anchor_id_count: 1,
506 tags: BTreeMap::new(),
507 stream_end_emitted: false,
508 keep_tags: false,
509 }
510 }
511
512 #[must_use]
535 pub fn keep_tags(mut self, value: bool) -> Self {
536 self.keep_tags = value;
537 self
538 }
539
540 pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
548 ParserTrait::peek(self)
549 }
550
551 pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
556 ParserTrait::next_event(self)
557 }
558
559 fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
565 where
566 'input: 'a,
567 {
568 match self.current.take() {
569 None => self.parse(),
570 Some(v) => Ok(v),
571 }
572 }
573
574 fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
576 match self.token {
577 None => {
578 self.token = Some(self.scan_next_token()?);
579 Ok(self.token.as_ref().unwrap())
580 }
581 Some(ref tok) => Ok(tok),
582 }
583 }
584
585 fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
589 let token = self.scanner.next();
590 match token {
591 None => match self.scanner.get_error() {
592 None => Err(self.unexpected_eof()),
593 Some(e) => Err(e),
594 },
595 Some(tok) => Ok(tok),
596 }
597 }
598
599 #[cold]
600 fn unexpected_eof(&self) -> ScanError {
601 let info = match self.state {
602 State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
603 "unexpected EOF while parsing a flow sequence"
604 }
605 State::FlowMappingFirstKey
606 | State::FlowMappingKey
607 | State::FlowMappingValue
608 | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
609 State::FlowSequenceEntryMappingKey
610 | State::FlowSequenceEntryMappingValue
611 | State::FlowSequenceEntryMappingEnd => {
612 "unexpected EOF while parsing an implicit flow mapping"
613 }
614 State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
615 "unexpected EOF while parsing a block sequence"
616 }
617 State::BlockMappingFirstKey | State::BlockMappingKey | State::BlockMappingValue => {
618 "unexpected EOF while parsing a block mapping"
619 }
620 _ => "unexpected eof",
621 };
622 ScanError::new_str(self.scanner.mark(), info)
623 }
624
625 fn fetch_token<'a>(&mut self) -> Token<'a>
626 where
627 'input: 'a,
628 {
629 self.token
630 .take()
631 .expect("fetch_token needs to be preceded by peek_token")
632 }
633
634 fn skip(&mut self) {
636 self.token = None;
637 }
638 fn pop_state(&mut self) {
640 self.state = self.states.pop().unwrap();
641 }
642 fn push_state(&mut self, state: State) {
644 self.states.push(state);
645 }
646
647 fn parse<'a>(&mut self) -> ParseResult<'a>
648 where
649 'input: 'a,
650 {
651 if self.state == State::End {
652 return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
653 }
654 let (ev, span) = self.state_machine()?;
655 if let Some(indent) = self.pending_key_indent.take() {
656 Ok((ev, span.with_indent(Some(indent))))
657 } else {
658 Ok((ev, span))
659 }
660 }
661
662 pub fn load<R: SpannedEventReceiver<'input>>(
708 &mut self,
709 recv: &mut R,
710 multi: bool,
711 ) -> Result<(), ScanError> {
712 ParserTrait::load(self, recv, multi)
713 }
714
715 pub fn try_load<R: TrySpannedEventReceiver<'input>>(
760 &mut self,
761 recv: &mut R,
762 multi: bool,
763 ) -> Result<(), TryLoadError<R::Error>> {
764 ParserTrait::try_load(self, recv, multi)
765 }
766
767 fn try_load_document<R: TrySpannedEventReceiver<'input>>(
768 &mut self,
769 first_ev: Event<'input>,
770 span: Span,
771 recv: &mut R,
772 ) -> Result<(), TryLoadError<R::Error>> {
773 if !matches!(first_ev, Event::DocumentStart(_)) {
774 return Err(TryLoadError::Scan(ScanError::new_str(
775 span.start,
776 "did not find expected <document-start>",
777 )));
778 }
779 try_emit(recv, first_ev, span)?;
780
781 let (ev, span) = self.next_event_impl()?;
782 self.try_load_node(ev, span, recv)?;
783
784 let (ev, mark) = self.next_event_impl()?;
786 assert_eq!(ev, Event::DocumentEnd);
787 try_emit(recv, ev, mark)?;
788
789 Ok(())
790 }
791
792 fn try_load_node<R: TrySpannedEventReceiver<'input>>(
793 &mut self,
794 first_ev: Event<'input>,
795 span: Span,
796 recv: &mut R,
797 ) -> Result<(), TryLoadError<R::Error>> {
798 match first_ev {
799 Event::Alias(..) | Event::Scalar(..) => try_emit(recv, first_ev, span),
800 Event::SequenceStart(..) => {
801 try_emit(recv, first_ev, span)?;
802 self.try_load_sequence(recv)
803 }
804 Event::MappingStart(..) => {
805 try_emit(recv, first_ev, span)?;
806 self.try_load_mapping(recv)
807 }
808 _ => {
809 #[cfg(feature = "debug_prints")]
810 std::println!("UNREACHABLE EVENT: {first_ev:?}");
811 unreachable!();
812 }
813 }
814 }
815
816 fn try_load_mapping<R: TrySpannedEventReceiver<'input>>(
817 &mut self,
818 recv: &mut R,
819 ) -> Result<(), TryLoadError<R::Error>> {
820 let (mut key_ev, mut key_mark) = self.next_event_impl()?;
821 while key_ev != Event::MappingEnd {
822 self.try_load_node(key_ev, key_mark, recv)?;
824
825 let (ev, mark) = self.next_event_impl()?;
827 self.try_load_node(ev, mark, recv)?;
828
829 let (ev, mark) = self.next_event_impl()?;
831 key_ev = ev;
832 key_mark = mark;
833 }
834 try_emit(recv, key_ev, key_mark)?;
835 Ok(())
836 }
837
838 fn try_load_sequence<R: TrySpannedEventReceiver<'input>>(
839 &mut self,
840 recv: &mut R,
841 ) -> Result<(), TryLoadError<R::Error>> {
842 let (mut ev, mut mark) = self.next_event_impl()?;
843 while ev != Event::SequenceEnd {
844 self.try_load_node(ev, mark, recv)?;
845
846 let (next_ev, next_mark) = self.next_event_impl()?;
848 ev = next_ev;
849 mark = next_mark;
850 }
851 try_emit(recv, ev, mark)?;
852 Ok(())
853 }
854
855 fn state_machine<'a>(&mut self) -> ParseResult<'a>
856 where
857 'input: 'a,
858 {
859 debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
862
863 match self.state {
864 State::StreamStart => self.stream_start(),
865
866 State::ImplicitDocumentStart => self.document_start(true),
867 State::DocumentStart => self.document_start(false),
868 State::DocumentContent => self.document_content(),
869 State::DocumentEnd => self.document_end(),
870
871 State::BlockNode => self.parse_node(true, false),
872 State::BlockMappingFirstKey => self.block_mapping_key(true),
875 State::BlockMappingKey => self.block_mapping_key(false),
876 State::BlockMappingValue => self.block_mapping_value(),
877
878 State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
879 State::BlockSequenceEntry => self.block_sequence_entry(false),
880
881 State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
882 State::FlowSequenceEntry => self.flow_sequence_entry(false),
883
884 State::FlowMappingFirstKey => self.flow_mapping_key(true),
885 State::FlowMappingKey => self.flow_mapping_key(false),
886 State::FlowMappingValue => self.flow_mapping_value(false),
887
888 State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
889
890 State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
891 State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
892 State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
893 State::FlowMappingEmptyValue => self.flow_mapping_value(true),
894
895 State::End => unreachable!(),
897 }
898 }
899
900 fn stream_start<'a>(&mut self) -> ParseResult<'a>
901 where
902 'input: 'a,
903 {
904 match *self.peek_token()? {
905 Token(span, TokenType::StreamStart(_)) => {
906 self.state = State::ImplicitDocumentStart;
907 self.skip();
908 Ok((Event::StreamStart, span))
909 }
910 Token(span, _) => Err(ScanError::new_str(
911 span.start,
912 "did not find expected <stream-start>",
913 )),
914 }
915 }
916
917 fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
918 where
919 'input: 'a,
920 {
921 while let TokenType::DocumentEnd = self.peek_token()?.1 {
922 self.skip();
923 }
924
925 self.anchors.clear();
927
928 match *self.peek_token()? {
929 Token(span, TokenType::StreamEnd) => {
930 self.state = State::End;
931 self.skip();
932 Ok((Event::StreamEnd, span))
933 }
934 Token(
935 _,
936 TokenType::VersionDirective(..)
937 | TokenType::TagDirective(..)
938 | TokenType::ReservedDirective(..)
939 | TokenType::DocumentStart,
940 ) => {
941 self.explicit_document_start()
943 }
944 Token(span, _) if implicit => {
945 self.parser_process_directives()?;
946 self.push_state(State::DocumentEnd);
947 self.state = State::BlockNode;
948 Ok((Event::DocumentStart(false), span))
949 }
950 _ => {
951 self.explicit_document_start()
953 }
954 }
955 }
956
957 fn parser_process_directives(&mut self) -> Result<(), ScanError> {
958 let mut version_directive_received = false;
959 let mut tags = if self.keep_tags {
960 self.tags.clone()
961 } else {
962 BTreeMap::new()
963 };
964 let mut document_tag_handles = BTreeSet::new();
965
966 loop {
967 match self.peek_token()? {
968 Token(span, TokenType::VersionDirective(_, _)) => {
969 if version_directive_received {
975 return Err(ScanError::new_str(
976 span.start,
977 "duplicate version directive",
978 ));
979 }
980 version_directive_received = true;
981 }
982 Token(mark, TokenType::TagDirective(handle, prefix)) => {
983 if !document_tag_handles.insert(handle.to_string()) {
984 return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
985 }
986 tags.insert(handle.to_string(), prefix.to_string());
987 }
988 Token(_, TokenType::ReservedDirective(_, _)) => {
989 }
991 _ => break,
992 }
993 self.skip();
994 }
995
996 self.tags = tags;
997 Ok(())
998 }
999
1000 fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
1001 where
1002 'input: 'a,
1003 {
1004 self.parser_process_directives()?;
1005 match *self.peek_token()? {
1006 Token(mark, TokenType::DocumentStart) => {
1007 self.push_state(State::DocumentEnd);
1008 self.state = State::DocumentContent;
1009 self.skip();
1010 Ok((Event::DocumentStart(true), mark))
1011 }
1012 Token(span, _) => Err(ScanError::new_str(
1013 span.start,
1014 "did not find expected <document start>",
1015 )),
1016 }
1017 }
1018
1019 fn document_content<'a>(&mut self) -> ParseResult<'a>
1020 where
1021 'input: 'a,
1022 {
1023 match *self.peek_token()? {
1024 Token(
1025 mark,
1026 TokenType::VersionDirective(..)
1027 | TokenType::TagDirective(..)
1028 | TokenType::ReservedDirective(..)
1029 | TokenType::DocumentStart
1030 | TokenType::DocumentEnd
1031 | TokenType::StreamEnd,
1032 ) => {
1033 self.pop_state();
1034 Ok((Event::empty_scalar(), mark))
1036 }
1037 _ => self.parse_node(true, false),
1038 }
1039 }
1040
1041 fn document_end<'a>(&mut self) -> ParseResult<'a>
1042 where
1043 'input: 'a,
1044 {
1045 let mut explicit_end = false;
1046 let span: Span = match *self.peek_token()? {
1047 Token(span, TokenType::DocumentEnd) => {
1048 explicit_end = true;
1049 self.skip();
1050 span
1051 }
1052 Token(span, _) => span,
1053 };
1054
1055 if self.keep_tags {
1056 self.tags.remove("!!");
1060 self.tags.remove("");
1061 } else {
1062 self.tags.clear();
1063 }
1064 if explicit_end {
1065 self.state = State::ImplicitDocumentStart;
1066 } else {
1067 if let Token(
1068 span,
1069 TokenType::VersionDirective(..)
1070 | TokenType::TagDirective(..)
1071 | TokenType::ReservedDirective(..),
1072 ) = *self.peek_token()?
1073 {
1074 return Err(ScanError::new_str(
1075 span.start,
1076 "missing explicit document end marker before directive",
1077 ));
1078 }
1079 self.state = State::DocumentStart;
1080 }
1081
1082 Ok((Event::DocumentEnd, span))
1083 }
1084
1085 fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
1086 let new_id = self.anchor_id_count;
1092 self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
1093 ScanError::new_str(
1094 mark.start,
1095 "while parsing anchor, anchor count exceeded supported limit",
1096 )
1097 })?;
1098 self.anchors.insert(name, new_id);
1099 Ok(new_id)
1100 }
1101
1102 #[allow(clippy::too_many_lines)]
1103 fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
1104 where
1105 'input: 'a,
1106 {
1107 let mut anchor_id = 0;
1108 let mut tag = None;
1109 match *self.peek_token()? {
1110 Token(_, TokenType::Alias(_)) => {
1111 self.pop_state();
1112 if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
1113 match self.anchors.get(&*name) {
1114 None => {
1115 return Err(ScanError::new_str(
1116 span.start,
1117 "while parsing node, found unknown anchor",
1118 ))
1119 }
1120 Some(id) => return Ok((Event::Alias(*id), span)),
1121 }
1122 }
1123 unreachable!()
1124 }
1125 Token(_, TokenType::Anchor(_)) => {
1126 if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
1127 anchor_id = self.register_anchor(name, &span)?;
1128 if let TokenType::Tag(..) = self.peek_token()?.1 {
1129 if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
1130 tag = Some(self.resolve_tag(span, &handle, suffix)?);
1131 } else {
1132 unreachable!()
1133 }
1134 }
1135 } else {
1136 unreachable!()
1137 }
1138 }
1139 Token(mark, TokenType::Tag(..)) => {
1140 if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
1141 tag = Some(self.resolve_tag(mark, &handle, suffix)?);
1142 if let TokenType::Anchor(_) = &self.peek_token()?.1 {
1143 if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
1144 anchor_id = self.register_anchor(name, &mark)?;
1145 } else {
1146 unreachable!()
1147 }
1148 }
1149 } else {
1150 unreachable!()
1151 }
1152 }
1153 _ => {}
1154 }
1155 match *self.peek_token()? {
1156 Token(mark, TokenType::BlockEntry) if indentless_sequence => {
1157 self.state = State::IndentlessSequenceEntry;
1158 Ok((Event::SequenceStart(anchor_id, tag), mark))
1159 }
1160 Token(_, TokenType::Scalar(..)) => {
1161 self.pop_state();
1162 if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
1163 Ok((Event::Scalar(v, style, anchor_id, tag), mark))
1164 } else {
1165 unreachable!()
1166 }
1167 }
1168 Token(mark, TokenType::FlowSequenceStart) => {
1169 self.state = State::FlowSequenceFirstEntry;
1170 Ok((Event::SequenceStart(anchor_id, tag), mark))
1171 }
1172 Token(mark, TokenType::FlowMappingStart) => {
1173 self.state = State::FlowMappingFirstKey;
1174 Ok((Event::MappingStart(anchor_id, tag), mark))
1175 }
1176 Token(mark, TokenType::BlockSequenceStart) if block => {
1177 self.state = State::BlockSequenceFirstEntry;
1178 Ok((Event::SequenceStart(anchor_id, tag), mark))
1179 }
1180 Token(mark, TokenType::BlockMappingStart) if block => {
1181 self.state = State::BlockMappingFirstKey;
1182 Ok((Event::MappingStart(anchor_id, tag), mark))
1183 }
1184 Token(mark, _) if tag.is_some() || anchor_id > 0 => {
1186 self.pop_state();
1187 Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
1188 }
1189 Token(span, _) => {
1190 let info = match self.state {
1191 State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
1192 "unexpected EOF while parsing a flow sequence"
1193 }
1194 State::FlowMappingFirstKey
1195 | State::FlowMappingKey
1196 | State::FlowMappingValue
1197 | State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
1198 State::FlowSequenceEntryMappingKey
1199 | State::FlowSequenceEntryMappingValue
1200 | State::FlowSequenceEntryMappingEnd => {
1201 "unexpected EOF while parsing an implicit flow mapping"
1202 }
1203 State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
1204 "unexpected EOF while parsing a block sequence"
1205 }
1206 State::BlockMappingFirstKey
1207 | State::BlockMappingKey
1208 | State::BlockMappingValue => "unexpected EOF while parsing a block mapping",
1209 _ => "while parsing a node, did not find expected node content",
1210 };
1211 Err(ScanError::new_str(span.start, info))
1212 }
1213 }
1214 }
1215
1216 fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1217 where
1218 'input: 'a,
1219 {
1220 if first {
1222 let _ = self.peek_token()?;
1223 self.skip();
1225 }
1226 match *self.peek_token()? {
1227 Token(_, TokenType::Key) => {
1228 if let Token(key_span, TokenType::Key) = *self.peek_token()? {
1230 self.pending_key_indent = Some(key_span.start.col());
1231 }
1232 self.skip();
1233 if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1234 *self.peek_token()?
1235 {
1236 self.state = State::BlockMappingValue;
1237 Ok((Event::empty_scalar(), mark))
1239 } else {
1240 self.push_state(State::BlockMappingValue);
1241 self.parse_node(true, true)
1242 }
1243 }
1244 Token(mark, TokenType::Value) => {
1246 self.state = State::BlockMappingValue;
1247 Ok((Event::empty_scalar(), mark))
1248 }
1249 Token(mark, TokenType::BlockEnd) => {
1250 self.pop_state();
1251 self.skip();
1252 Ok((Event::MappingEnd, mark))
1253 }
1254 Token(span, _) => Err(ScanError::new_str(
1255 span.start,
1256 "while parsing a block mapping, did not find expected key",
1257 )),
1258 }
1259 }
1260
1261 fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
1262 where
1263 'input: 'a,
1264 {
1265 match *self.peek_token()? {
1266 Token(mark, TokenType::Value) => {
1267 self.skip();
1268 if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
1269 *self.peek_token()?
1270 {
1271 self.state = State::BlockMappingKey;
1272 Ok((Event::empty_scalar(), mark))
1274 } else {
1275 self.push_state(State::BlockMappingKey);
1276 self.parse_node(true, true)
1277 }
1278 }
1279 Token(mark, _) => {
1280 self.state = State::BlockMappingKey;
1281 Ok((Event::empty_scalar(), mark))
1283 }
1284 }
1285 }
1286
1287 fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
1288 where
1289 'input: 'a,
1290 {
1291 if first {
1292 let _ = self.peek_token()?;
1293 self.skip();
1294 }
1295 let span: Span = if let Token(mark, TokenType::FlowMappingEnd) = *self.peek_token()? {
1296 mark
1297 } else {
1298 if !first {
1299 match *self.peek_token()? {
1300 Token(_, TokenType::FlowEntry) => self.skip(),
1301 Token(span, _) => {
1302 return Err(ScanError::new_str(
1303 span.start,
1304 "while parsing a flow mapping, did not find expected ',' or '}'",
1305 ))
1306 }
1307 }
1308 }
1309
1310 match *self.peek_token()? {
1311 Token(_, TokenType::Key) => {
1312 self.skip();
1313 if let Token(
1314 mark,
1315 TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
1316 ) = *self.peek_token()?
1317 {
1318 self.state = State::FlowMappingValue;
1319 return Ok((Event::empty_scalar(), mark));
1320 }
1321 self.push_state(State::FlowMappingValue);
1322 return self.parse_node(false, false);
1323 }
1324 Token(marker, TokenType::Value) => {
1325 self.state = State::FlowMappingValue;
1326 return Ok((Event::empty_scalar(), marker));
1327 }
1328 Token(_, TokenType::FlowMappingEnd) => (),
1329 _ => {
1330 self.push_state(State::FlowMappingEmptyValue);
1331 return self.parse_node(false, false);
1332 }
1333 }
1334
1335 self.peek_token()?.0
1336 };
1337
1338 self.pop_state();
1339 self.skip();
1340 Ok((Event::MappingEnd, span))
1341 }
1342
1343 fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
1344 where
1345 'input: 'a,
1346 {
1347 let span: Span = {
1348 if empty {
1349 let Token(mark, _) = *self.peek_token()?;
1350 self.state = State::FlowMappingKey;
1351 return Ok((Event::empty_scalar(), mark));
1352 }
1353 match *self.peek_token()? {
1354 Token(span, TokenType::Value) => {
1355 self.skip();
1356 match self.peek_token()?.1 {
1357 TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
1358 _ => {
1359 self.push_state(State::FlowMappingKey);
1360 return self.parse_node(false, false);
1361 }
1362 }
1363 span
1364 }
1365 Token(marker, _) => marker,
1366 }
1367 };
1368
1369 self.state = State::FlowMappingKey;
1370 Ok((Event::empty_scalar(), span))
1371 }
1372
1373 fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1374 where
1375 'input: 'a,
1376 {
1377 if first {
1379 let _ = self.peek_token()?;
1380 self.skip();
1382 }
1383 match *self.peek_token()? {
1384 Token(mark, TokenType::FlowSequenceEnd) => {
1385 self.pop_state();
1386 self.skip();
1387 return Ok((Event::SequenceEnd, mark));
1388 }
1389 Token(_, TokenType::FlowEntry) if !first => {
1390 self.skip();
1391 }
1392 Token(span, _) if !first => {
1393 return Err(ScanError::new_str(
1394 span.start,
1395 "while parsing a flow sequence, expected ',' or ']'",
1396 ));
1397 }
1398 _ => { }
1399 }
1400 match *self.peek_token()? {
1401 Token(mark, TokenType::FlowSequenceEnd) => {
1402 self.pop_state();
1403 self.skip();
1404 Ok((Event::SequenceEnd, mark))
1405 }
1406 Token(mark, TokenType::Key) => {
1407 self.state = State::FlowSequenceEntryMappingKey;
1408 self.skip();
1409 Ok((Event::MappingStart(0, None), mark))
1410 }
1411 _ => {
1412 self.push_state(State::FlowSequenceEntry);
1413 self.parse_node(false, false)
1414 }
1415 }
1416 }
1417
1418 fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
1419 where
1420 'input: 'a,
1421 {
1422 match *self.peek_token()? {
1423 Token(mark, TokenType::BlockEntry) => {
1424 self.skip();
1425 if let Token(
1426 _,
1427 TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
1428 ) = *self.peek_token()?
1429 {
1430 self.state = State::IndentlessSequenceEntry;
1431 Ok((Event::empty_scalar(), mark))
1432 } else {
1433 self.push_state(State::IndentlessSequenceEntry);
1434 self.parse_node(true, false)
1435 }
1436 }
1437 Token(mark, _) => {
1438 self.pop_state();
1439 Ok((Event::SequenceEnd, mark))
1440 }
1441 }
1442 }
1443
1444 fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
1445 where
1446 'input: 'a,
1447 {
1448 if first {
1450 let _ = self.peek_token()?;
1451 self.skip();
1453 }
1454 match *self.peek_token()? {
1455 Token(mark, TokenType::BlockEnd) => {
1456 self.pop_state();
1457 self.skip();
1458 Ok((Event::SequenceEnd, mark))
1459 }
1460 Token(mark, TokenType::BlockEntry) => {
1461 self.skip();
1462 if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
1463 self.state = State::BlockSequenceEntry;
1464 Ok((Event::empty_scalar(), mark))
1465 } else {
1466 self.push_state(State::BlockSequenceEntry);
1467 self.parse_node(true, false)
1468 }
1469 }
1470 Token(span, _) => Err(ScanError::new_str(
1471 span.start,
1472 "while parsing a block collection, did not find expected '-' indicator",
1473 )),
1474 }
1475 }
1476
1477 fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
1478 where
1479 'input: 'a,
1480 {
1481 if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
1482 *self.peek_token()?
1483 {
1484 self.state = State::FlowSequenceEntryMappingValue;
1485 Ok((Event::empty_scalar(), mark))
1486 } else {
1487 self.push_state(State::FlowSequenceEntryMappingValue);
1488 self.parse_node(false, false)
1489 }
1490 }
1491
1492 fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
1493 where
1494 'input: 'a,
1495 {
1496 match *self.peek_token()? {
1497 Token(_, TokenType::Value) => {
1498 self.skip();
1499 self.state = State::FlowSequenceEntryMappingValue;
1500 let Token(span, ref tok) = *self.peek_token()?;
1501 if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
1502 self.state = State::FlowSequenceEntryMappingEnd;
1503 Ok((Event::empty_scalar(), Span::empty(span.start)))
1504 } else {
1505 self.push_state(State::FlowSequenceEntryMappingEnd);
1506 self.parse_node(false, false)
1507 }
1508 }
1509 Token(mark, _) => {
1510 self.state = State::FlowSequenceEntryMappingEnd;
1511 Ok((Event::empty_scalar(), mark))
1512 }
1513 }
1514 }
1515
1516 #[allow(clippy::unnecessary_wraps)]
1517 fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
1518 where
1519 'input: 'a,
1520 {
1521 self.state = State::FlowSequenceEntry;
1522 let Token(span, _) = *self.peek_token()?;
1523 Ok((Event::MappingEnd, Span::empty(span.start)))
1524 }
1525
1526 fn resolve_tag(
1528 &self,
1529 span: Span,
1530 handle: &Cow<'input, str>,
1531 suffix: Cow<'input, str>,
1532 ) -> Result<Cow<'input, Tag>, ScanError> {
1533 let suffix = suffix.into_owned();
1534 let tag = if handle == "!!" {
1535 Tag {
1538 handle: self
1539 .tags
1540 .get("!!")
1541 .map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
1542 suffix,
1543 }
1544 } else if handle.is_empty() && suffix == "!" {
1545 match self.tags.get("") {
1547 Some(prefix) => Tag {
1548 handle: prefix.clone(),
1549 suffix,
1550 },
1551 None => Tag {
1552 handle: String::new(),
1553 suffix,
1554 },
1555 }
1556 } else {
1557 let prefix = self.tags.get(&**handle);
1559 if let Some(prefix) = prefix {
1560 Tag {
1561 handle: prefix.clone(),
1562 suffix,
1563 }
1564 } else {
1565 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1570 return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
1571 }
1572 Tag {
1573 handle: handle.to_string(),
1574 suffix,
1575 }
1576 }
1577 };
1578 Ok(Cow::Owned(tag))
1579 }
1580}
1581
1582impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
1583 fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
1584 if let Some(ref x) = self.current {
1585 Some(Ok(x))
1586 } else {
1587 if self.stream_end_emitted {
1588 return None;
1589 }
1590 match self.next_event_impl() {
1591 Ok(token) => self.current = Some(token),
1592 Err(e) => return Some(Err(e)),
1593 }
1594 self.current.as_ref().map(Ok)
1595 }
1596 }
1597
1598 fn next_event(&mut self) -> Option<ParseResult<'input>> {
1599 if self.stream_end_emitted {
1600 return None;
1601 }
1602
1603 let tok = self.next_event_impl();
1604 if matches!(tok, Ok((Event::StreamEnd, _))) {
1605 self.stream_end_emitted = true;
1606 }
1607 Some(tok)
1608 }
1609
1610 fn load<R: SpannedEventReceiver<'input>>(
1611 &mut self,
1612 recv: &mut R,
1613 multi: bool,
1614 ) -> Result<(), ScanError> {
1615 let mut recv = InfallibleSpannedReceiver(recv);
1616 into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
1617 }
1618
1619 fn try_load<R: TrySpannedEventReceiver<'input>>(
1620 &mut self,
1621 recv: &mut R,
1622 multi: bool,
1623 ) -> Result<(), TryLoadError<R::Error>> {
1624 let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
1625 if !self.scanner.stream_started() || stream_start_buffered {
1626 let (ev, span) = self.next_event_impl()?;
1627 if ev != Event::StreamStart {
1628 return Err(TryLoadError::Scan(ScanError::new_str(
1629 span.start,
1630 "did not find expected <stream-start>",
1631 )));
1632 }
1633 try_emit(recv, ev, span)?;
1634 }
1635
1636 if self.scanner.stream_ended() {
1637 try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
1639 return Ok(());
1640 }
1641 loop {
1642 let (ev, span) = self.next_event_impl()?;
1643 if ev == Event::StreamEnd {
1644 try_emit(recv, ev, span)?;
1645 return Ok(());
1646 }
1647 self.anchors.clear();
1649 self.try_load_document(ev, span, recv)?;
1650 if !multi {
1651 break;
1652 }
1653 }
1654 Ok(())
1655 }
1656}
1657
1658impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
1659 type Item = Result<(Event<'input>, Span), ScanError>;
1660
1661 fn next(&mut self) -> Option<Self::Item> {
1662 self.next_event()
1663 }
1664}
1665
1666#[cfg(test)]
1667mod test {
1668 use alloc::{
1669 borrow::ToOwned,
1670 string::{String, ToString},
1671 vec::Vec,
1672 };
1673
1674 use crate::scanner::{ScalarStyle, Span};
1675
1676 use super::{
1677 Event, EventReceiver, Parser, Tag, TryEventReceiver, TryLoadError, TrySpannedEventReceiver,
1678 };
1679
1680 #[derive(Default)]
1681 struct CollectingSink<'input> {
1682 events: Vec<Event<'input>>,
1683 }
1684
1685 impl<'input> EventReceiver<'input> for CollectingSink<'input> {
1686 fn on_event(&mut self, ev: Event<'input>) {
1687 self.events.push(ev);
1688 }
1689 }
1690
1691 fn first_error_info(input: &str) -> String {
1692 for event in Parser::new_from_str(input) {
1693 if let Err(err) = event {
1694 return err.info().to_owned();
1695 }
1696 }
1697 panic!("expected parser error")
1698 }
1699
1700 #[test]
1701 fn display_resolved_core_tag_without_extra_bang() {
1702 let tag = Tag {
1703 handle: "tag:yaml.org,2002:".to_owned(),
1704 suffix: "str".to_owned(),
1705 };
1706
1707 assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
1708 }
1709
1710 #[test]
1711 fn tag_helpers_distinguish_core_and_local_tags() {
1712 let core = Tag {
1713 handle: "tag:yaml.org,2002:".to_owned(),
1714 suffix: "int".to_owned(),
1715 };
1716 let local = Tag {
1717 handle: "!".to_owned(),
1718 suffix: "thing".to_owned(),
1719 };
1720
1721 assert!(core.is_yaml_core_schema());
1722 assert!(!local.is_yaml_core_schema());
1723 assert_eq!(local.to_string(), "!thing");
1724 }
1725
1726 #[test]
1727 fn test_peek_eq_parse() {
1728 let s = "
1729a0 bb: val
1730a1: &x
1731 b1: 4
1732 b2: d
1733a2: 4
1734a3: [1, 2, 3]
1735a4:
1736 - [a1, a2]
1737 - 2
1738a5: *x
1739";
1740 let mut p = Parser::new_from_str(s);
1741 loop {
1742 let event_peek = p.peek().unwrap().unwrap().clone();
1743 let event = p.next_event().unwrap().unwrap();
1744 assert_eq!(event, event_peek);
1745 if event.0 == Event::StreamEnd {
1746 break;
1747 }
1748 }
1749 }
1750
1751 #[test]
1752 fn test_peek_and_next_return_none_after_stream_end() {
1753 let mut parser = Parser::new_from_str("");
1754
1755 assert!(matches!(
1756 parser.next_event().unwrap().unwrap().0,
1757 Event::StreamStart
1758 ));
1759 assert!(matches!(
1760 parser.next_event().unwrap().unwrap().0,
1761 Event::StreamEnd
1762 ));
1763 assert!(parser.next_event().is_none());
1764 assert!(parser.peek().is_none());
1765 }
1766
1767 #[test]
1768 fn test_load_after_stream_already_ended_emits_stream_end() {
1769 let mut parser = Parser::new_from_str("");
1770 while parser.next_event().is_some() {}
1771
1772 let mut sink = CollectingSink::default();
1773 parser.load(&mut sink, true).unwrap();
1774
1775 assert_eq!(sink.events, vec![Event::StreamEnd]);
1776 }
1777
1778 #[test]
1779 fn test_load_visits_nested_collection_events() {
1780 let mut parser = Parser::new_from_str("root:\n - item: value\n - [a, b]\n");
1781 let mut sink = CollectingSink::default();
1782
1783 parser.load(&mut sink, true).unwrap();
1784
1785 assert_eq!(
1786 sink.events,
1787 vec![
1788 Event::StreamStart,
1789 Event::DocumentStart(false),
1790 Event::MappingStart(0, None),
1791 Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
1792 Event::SequenceStart(0, None),
1793 Event::MappingStart(0, None),
1794 Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
1795 Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
1796 Event::MappingEnd,
1797 Event::SequenceStart(0, None),
1798 Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
1799 Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
1800 Event::SequenceEnd,
1801 Event::SequenceEnd,
1802 Event::MappingEnd,
1803 Event::DocumentEnd,
1804 Event::StreamEnd,
1805 ]
1806 );
1807 }
1808
1809 #[derive(Clone, Debug, PartialEq, Eq)]
1810 enum ValidationError {
1811 ForbiddenValue,
1812 }
1813
1814 struct FailingSink<'input> {
1815 events: Vec<Event<'input>>,
1816 }
1817
1818 impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
1819 type Error = ValidationError;
1820
1821 fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
1822 let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
1823 self.events.push(ev);
1824 if should_fail {
1825 Err(ValidationError::ForbiddenValue)
1826 } else {
1827 Ok(())
1828 }
1829 }
1830 }
1831
1832 #[test]
1833 fn test_try_load_stops_on_receiver_error() {
1834 let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
1835 let mut sink = FailingSink { events: Vec::new() };
1836
1837 let err = parser.try_load(&mut sink, true).unwrap_err();
1838
1839 assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
1840 assert!(sink
1841 .events
1842 .iter()
1843 .any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
1844 assert!(sink
1845 .events
1846 .iter()
1847 .any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
1848 assert!(!sink
1849 .events
1850 .iter()
1851 .any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
1852 }
1853
1854 struct SpannedFailingSink {
1855 failed_span: Option<Span>,
1856 }
1857
1858 impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
1859 type Error = Span;
1860
1861 fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
1862 if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
1863 self.failed_span = Some(span);
1864 Err(span)
1865 } else {
1866 Ok(())
1867 }
1868 }
1869 }
1870
1871 #[test]
1872 fn test_try_load_spanned_receiver_gets_span() {
1873 let mut parser = Parser::new_from_str("value: bad\n");
1874 let mut sink = SpannedFailingSink { failed_span: None };
1875
1876 let err = parser.try_load(&mut sink, false).unwrap_err();
1877
1878 let TryLoadError::Receiver(span) = err else {
1879 panic!("expected receiver error");
1880 };
1881
1882 assert_eq!(Some(span), sink.failed_span);
1883 assert!(!span.is_empty());
1884 }
1885
1886 struct NeverFails {
1887 count: usize,
1888 }
1889
1890 impl<'input> TryEventReceiver<'input> for NeverFails {
1891 type Error = ValidationError;
1892
1893 fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
1894 self.count += 1;
1895 Ok(())
1896 }
1897 }
1898
1899 #[test]
1900 fn test_try_load_returns_scan_error() {
1901 let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
1902 let mut sink = NeverFails { count: 0 };
1903
1904 let err = parser.try_load(&mut sink, true).unwrap_err();
1905
1906 let TryLoadError::Scan(err) = err else {
1907 panic!("expected scan error");
1908 };
1909 assert_eq!(err.info(), "duplicate version directive");
1910 }
1911
1912 #[test]
1913 fn test_try_load_after_stream_already_ended_emits_stream_end() {
1914 let mut parser = Parser::new_from_str("");
1915 while parser.next_event().is_some() {}
1916
1917 let mut sink = FailingSink { events: Vec::new() };
1918 parser.try_load(&mut sink, true).unwrap();
1919
1920 assert_eq!(sink.events, vec![Event::StreamEnd]);
1921 }
1922
1923 #[test]
1924 fn test_load_single_document_stops_before_next_document() {
1925 let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
1926 let mut sink = CollectingSink::default();
1927
1928 parser.load(&mut sink, false).unwrap();
1929
1930 assert!(sink
1931 .events
1932 .iter()
1933 .any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
1934 assert!(!sink
1935 .events
1936 .iter()
1937 .any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
1938 assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
1939 }
1940
1941 #[test]
1942 fn test_duplicate_version_directive_errors() {
1943 assert_eq!(
1944 first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
1945 "duplicate version directive"
1946 );
1947 }
1948
1949 #[test]
1950 fn test_duplicate_tag_directive_errors() {
1951 assert_eq!(
1952 first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
1953 "the TAG directive must only be given at most once per handle in the same document"
1954 );
1955 }
1956
1957 #[test]
1958 fn test_directive_after_implicit_document_requires_explicit_end() {
1959 assert_eq!(
1960 first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
1961 "missing explicit document end marker before directive"
1962 );
1963 }
1964
1965 #[test]
1966 fn test_anchor_offset_overflow_reports_error() {
1967 let mut parser = Parser::new_from_str("&a value");
1968 parser.set_anchor_offset(usize::MAX);
1969
1970 let err = parser
1971 .find_map(Result::err)
1972 .expect("anchor registration should overflow");
1973
1974 assert_eq!(
1975 err.info(),
1976 "while parsing anchor, anchor count exceeded supported limit"
1977 );
1978 }
1979
1980 #[test]
1981 fn test_alias_resolves_to_registered_anchor_id() {
1982 let events = Parser::new_from_str("- &a value\n- *a\n")
1983 .map(|event| event.unwrap().0)
1984 .collect::<Vec<_>>();
1985
1986 assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
1987 }
1988
1989 #[test]
1990 fn test_anchor_then_tag_applies_both_to_scalar() {
1991 let events = Parser::new_from_str("&a !!str value")
1992 .map(|event| event.unwrap().0)
1993 .collect::<Vec<_>>();
1994
1995 let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
1996 .iter()
1997 .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
1998 else {
1999 panic!("expected tagged anchored scalar");
2000 };
2001
2002 assert_eq!(value, "value");
2003 assert_eq!(*anchor_id, 1);
2004 assert_eq!(tag.handle, "tag:yaml.org,2002:");
2005 assert_eq!(tag.suffix, "str");
2006 }
2007
2008 #[test]
2009 fn test_tag_then_anchor_applies_both_to_scalar() {
2010 let events = Parser::new_from_str("!!str &a value")
2011 .map(|event| event.unwrap().0)
2012 .collect::<Vec<_>>();
2013
2014 let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
2015 .iter()
2016 .find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
2017 else {
2018 panic!("expected tagged anchored scalar");
2019 };
2020
2021 assert_eq!(value, "value");
2022 assert_eq!(*anchor_id, 1);
2023 assert_eq!(tag.handle, "tag:yaml.org,2002:");
2024 assert_eq!(tag.suffix, "str");
2025 }
2026
2027 #[test]
2028 fn test_multiple_tag_directives_are_kept_within_document() {
2029 let text = r"
2030%TAG !a! tag:a,2024:
2031%TAG !b! tag:b,2024:
2032---
2033first: !a!x foo
2034second: !b!y bar
2035";
2036
2037 let mut seen_a = false;
2038 let mut seen_b = false;
2039 for event in Parser::new_from_str(text) {
2040 let (event, _) = event.unwrap();
2041 if let Event::Scalar(_, _, _, Some(tag)) = event {
2042 if tag.handle == "tag:a,2024:" {
2043 seen_a = true;
2044 } else if tag.handle == "tag:b,2024:" {
2045 seen_b = true;
2046 }
2047 }
2048 }
2049
2050 assert!(seen_a);
2051 assert!(seen_b);
2052 }
2053
2054 #[test]
2055 fn test_tags_are_cleared_when_next_document_has_no_directives() {
2056 let text = r"
2057%TAG !t! tag:test,2024:
2058--- !t!1
2059foo
2060--- !t!2
2061bar
2062";
2063
2064 let mut parser = Parser::new_from_str(text);
2065 for event in parser.by_ref() {
2066 let (event, _) = event.unwrap();
2067 if let Event::DocumentEnd = event {
2068 break;
2069 }
2070 }
2071
2072 match parser.next().unwrap().unwrap().0 {
2073 Event::DocumentStart(true) => {}
2074 _ => panic!("expected explicit second document start"),
2075 }
2076
2077 let err = parser.next().unwrap().unwrap_err();
2078 assert!(format!("{err}").contains("the handle wasn't declared"));
2079 }
2080
2081 #[test]
2082 fn test_pull_parser_clears_anchors_between_documents() {
2083 let mut parser = Parser::new_from_str(
2084 "--- &a value
2085--- *a
2086",
2087 );
2088
2089 for event in parser.by_ref() {
2090 let (event, _) = event.unwrap();
2091 if matches!(event, Event::DocumentEnd) {
2092 break;
2093 }
2094 }
2095
2096 match parser.next().unwrap().unwrap().0 {
2097 Event::DocumentStart(true) => {}
2098 _ => panic!("expected explicit second document start"),
2099 }
2100
2101 let err = parser.next().unwrap().unwrap_err();
2102 assert!(format!("{err}").contains("unknown anchor"));
2103 }
2104
2105 #[test]
2106 fn test_keep_tags_across_multiple_documents() {
2107 let text = r#"
2108%YAML 1.1
2109%TAG !t! tag:test,2024:
2110--- !t!1 &1
2111foo: "bar"
2112--- !t!2 &2
2113baz: "qux"
2114"#;
2115 for x in Parser::new_from_str(text).keep_tags(true) {
2116 let x = x.unwrap();
2117 if let Event::MappingStart(_, tag) = x.0 {
2118 let tag = tag.unwrap();
2119 assert_eq!(tag.handle, "tag:test,2024:");
2120 }
2121 }
2122
2123 for x in Parser::new_from_str(text).keep_tags(false) {
2124 if x.is_err() {
2125 return;
2127 }
2128 }
2129 panic!("Test failed, did not encounter error")
2130 }
2131
2132 #[test]
2133 fn test_flow_sequence_mapping_allows_empty_key() {
2134 let parser = Parser::new_from_str("[?: value]");
2135 for event in parser {
2136 event.expect("parser should accept flow sequence mappings with empty keys");
2137 }
2138 }
2139
2140 #[test]
2141 fn test_keep_tags_does_not_persist_default_tag_handles() {
2142 let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
2143
2144 let mut int_tags = Vec::new();
2145 for event in Parser::new_from_str(text).keep_tags(true) {
2146 let event = event.unwrap().0;
2147 if let Event::Scalar(_, _, _, Some(tag)) = event {
2148 if tag.suffix == "int" {
2149 int_tags.push(tag.handle.clone());
2150 }
2151 }
2152 }
2153
2154 assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
2155 }
2156
2157 #[test]
2158 fn test_load_after_peek_stream_start() {
2159 #[derive(Default)]
2160 struct Sink<'input> {
2161 events: Vec<Event<'input>>,
2162 }
2163
2164 impl<'input> EventReceiver<'input> for Sink<'input> {
2165 fn on_event(&mut self, ev: Event<'input>) {
2166 self.events.push(ev);
2167 }
2168 }
2169
2170 let mut parser = Parser::new_from_str("key: value\n");
2171 let mut sink = Sink::default();
2172
2173 assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
2174 parser.load(&mut sink, false).unwrap();
2175
2176 assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
2177 assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
2178 }
2179}