1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use alloc::{
13 borrow::{Cow, ToOwned},
14 collections::VecDeque,
15 string::String,
16 vec::Vec,
17};
18use core::{char, fmt};
19
20use crate::{
21 char_traits::{
22 as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
23 is_tag_char, is_uri_char,
24 },
25 input::{BorrowedInput, SkipTabs},
26};
27
28#[derive(Clone, Copy, PartialEq, Debug, Eq)]
30pub enum TEncoding {
31 Utf8,
33}
34
35#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
37pub enum ScalarStyle {
38 Plain,
40 SingleQuoted,
42 DoubleQuoted,
44
45 Literal,
51 Folded,
58}
59
60#[derive(Clone, Copy, Debug, Default)]
67pub struct MarkerOffsets {
68 chars: usize,
70 bytes: Option<usize>,
72}
73
74impl PartialEq for MarkerOffsets {
75 fn eq(&self, other: &Self) -> bool {
76 self.chars == other.chars
80 }
81}
82
83impl Eq for MarkerOffsets {}
84
85#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
87pub struct Marker {
88 offsets: MarkerOffsets,
90 line: usize,
92 col: usize,
94}
95
96impl Marker {
97 #[must_use]
99 pub fn new(index: usize, line: usize, col: usize) -> Marker {
100 Marker {
101 offsets: MarkerOffsets {
102 chars: index,
103 bytes: None,
104 },
105 line,
106 col,
107 }
108 }
109
110 #[must_use]
112 pub fn with_byte_offset(mut self, byte_offset: Option<usize>) -> Marker {
113 self.offsets.bytes = byte_offset;
114 self
115 }
116
117 #[must_use]
119 pub fn index(&self) -> usize {
120 self.offsets.chars
121 }
122
123 #[must_use]
125 pub fn byte_offset(&self) -> Option<usize> {
126 self.offsets.bytes
127 }
128
129 #[must_use]
131 pub fn line(&self) -> usize {
132 self.line
133 }
134
135 #[must_use]
137 pub fn col(&self) -> usize {
138 self.col
139 }
140}
141
142#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
144pub struct Span {
145 pub start: Marker,
147 pub end: Marker,
149
150 pub indent: Option<usize>,
155}
156
157impl Span {
158 #[must_use]
160 pub fn new(start: Marker, end: Marker) -> Span {
161 Span {
162 start,
163 end,
164 indent: None,
165 }
166 }
167
168 #[must_use]
175 pub fn empty(mark: Marker) -> Span {
176 Span {
177 start: mark,
178 end: mark,
179 indent: None,
180 }
181 }
182
183 #[must_use]
185 pub fn with_indent(mut self, indent: Option<usize>) -> Span {
186 self.indent = indent;
187 self
188 }
189
190 #[must_use]
192 pub fn len(&self) -> usize {
193 self.end.index() - self.start.index()
194 }
195
196 #[must_use]
198 pub fn is_empty(&self) -> bool {
199 self.len() == 0
200 }
201
202 #[must_use]
204 pub fn byte_range(&self) -> Option<core::ops::Range<usize>> {
205 let start = self.start.byte_offset()?;
206 let end = self.end.byte_offset()?;
207 Some(start..end)
208 }
209}
210
211#[derive(Clone, PartialEq, Debug, Eq)]
213pub struct ScanError {
214 mark: Marker,
216 info: String,
218}
219
220impl ScanError {
221 #[must_use]
223 #[cold]
224 pub fn new(loc: Marker, info: String) -> ScanError {
225 ScanError { mark: loc, info }
226 }
227
228 #[must_use]
230 #[cold]
231 pub fn new_str(loc: Marker, info: &str) -> ScanError {
232 ScanError {
233 mark: loc,
234 info: info.to_owned(),
235 }
236 }
237
238 #[must_use]
240 pub fn marker(&self) -> &Marker {
241 &self.mark
242 }
243
244 #[must_use]
246 pub fn info(&self) -> &str {
247 self.info.as_ref()
248 }
249}
250
251impl fmt::Display for ScanError {
252 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
253 write!(
254 f,
255 "{} at char {} line {} column {}",
256 self.info,
257 self.mark.index(),
258 self.mark.line(),
259 self.mark.col() + 1
260 )
261 }
262}
263
264impl core::error::Error for ScanError {}
265
266#[derive(Clone, PartialEq, Debug, Eq)]
268pub enum TokenType<'input> {
269 StreamStart(TEncoding),
271 StreamEnd,
273 VersionDirective(
275 u32,
277 u32,
279 ),
280 TagDirective(
282 Cow<'input, str>,
284 Cow<'input, str>,
286 ),
287 DocumentStart,
289 DocumentEnd,
291 BlockSequenceStart,
295 BlockMappingStart,
299 BlockEnd,
301 FlowSequenceStart,
303 FlowSequenceEnd,
305 FlowMappingStart,
307 FlowMappingEnd,
309 BlockEntry,
311 FlowEntry,
313 Key,
315 Value,
317 Alias(Cow<'input, str>),
319 Anchor(Cow<'input, str>),
321 Tag(
323 Cow<'input, str>,
325 Cow<'input, str>,
327 ),
328 Scalar(ScalarStyle, Cow<'input, str>),
330 ReservedDirective(
332 String,
334 Vec<String>,
336 ),
337}
338
339#[derive(Clone, PartialEq, Debug, Eq)]
341pub struct Token<'input>(pub Span, pub TokenType<'input>);
342
343#[derive(Clone, PartialEq, Debug, Eq)]
378struct SimpleKey {
379 possible: bool,
392 required: bool,
400 token_number: usize,
406 mark: Marker,
408}
409
410impl SimpleKey {
411 fn new(mark: Marker) -> SimpleKey {
413 SimpleKey {
414 possible: false,
415 required: false,
416 token_number: 0,
417 mark,
418 }
419 }
420}
421
422#[derive(Clone, Debug, Default)]
424struct Indent {
425 indent: isize,
427 needs_block_end: bool,
445}
446
447#[derive(Debug, PartialEq)]
469enum ImplicitMappingState {
470 Possible,
475 Inside(u8),
479}
480
481#[derive(Debug)]
491#[allow(clippy::struct_excessive_bools)]
492pub struct Scanner<'input, T> {
493 input: T,
497 mark: Marker,
499 tokens: VecDeque<Token<'input>>,
506 error: Option<ScanError>,
508
509 stream_start_produced: bool,
511 stream_end_produced: bool,
513 adjacent_value_allowed_at: usize,
516 simple_key_allowed: bool,
520 simple_keys: smallvec::SmallVec<[SimpleKey; 8]>,
525 indent: isize,
527 indents: smallvec::SmallVec<[Indent; 8]>,
529 flow_level: u8,
531 tokens_parsed: usize,
535 token_available: bool,
537 leading_whitespace: bool,
539 flow_mapping_started: bool,
546 implicit_flow_mapping_states: smallvec::SmallVec<[ImplicitMappingState; 8]>,
559 interrupted_plain_by_comment: Option<Marker>,
562 flow_markers: smallvec::SmallVec<[(Marker, char); 8]>,
564 buf_leading_break: String,
565 buf_trailing_breaks: String,
566 buf_whitespaces: String,
567}
568
569impl<'input, T: BorrowedInput<'input>> Iterator for Scanner<'input, T> {
570 type Item = Token<'input>;
571
572 fn next(&mut self) -> Option<Self::Item> {
573 if self.error.is_some() {
574 return None;
575 }
576 match self.next_token() {
577 Ok(Some(tok)) => {
578 debug_print!(
579 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
580 tok.1,
581 tok.0
582 );
583 Some(tok)
584 }
585 Ok(tok) => tok,
586 Err(e) => {
587 self.error = Some(e);
588 None
589 }
590 }
591 }
592}
593
594pub type ScanResult = Result<(), ScanError>;
596
597#[derive(Debug)]
598enum FlowScalarBuf {
599 Borrowed {
605 start: usize,
606 end: usize,
607 pending_ws_start: Option<usize>,
608 pending_ws_end: usize,
609 },
610 Owned(String),
611}
612
613impl FlowScalarBuf {
614 #[inline]
615 fn new_borrowed(start: usize) -> Self {
616 Self::Borrowed {
617 start,
618 end: start,
619 pending_ws_start: None,
620 pending_ws_end: start,
621 }
622 }
623
624 #[inline]
625 fn new_owned() -> Self {
626 Self::Owned(String::new())
627 }
628
629 #[inline]
630 fn as_owned_mut(&mut self) -> Option<&mut String> {
631 match self {
632 Self::Owned(s) => Some(s),
633 Self::Borrowed { .. } => None,
634 }
635 }
636
637 #[inline]
638 fn commit_pending_ws(&mut self) {
639 if let Self::Borrowed {
640 end,
641 pending_ws_start,
642 pending_ws_end,
643 ..
644 } = self
645 {
646 if pending_ws_start.is_some() {
647 *end = *pending_ws_end;
648 *pending_ws_start = None;
649 }
650 }
651 }
652
653 #[inline]
654 fn note_pending_ws(&mut self, ws_start: usize, ws_end: usize) {
655 if let Self::Borrowed {
656 pending_ws_start,
657 pending_ws_end,
658 ..
659 } = self
660 {
661 if pending_ws_start.is_none() {
662 *pending_ws_start = Some(ws_start);
663 }
664 *pending_ws_end = ws_end;
665 }
666 }
667
668 #[inline]
669 fn discard_pending_ws(&mut self) {
670 if let Self::Borrowed {
671 pending_ws_start,
672 pending_ws_end,
673 end,
674 ..
675 } = self
676 {
677 *pending_ws_start = None;
678 *pending_ws_end = *end;
679 }
680 }
681}
682
683impl<'input, T: BorrowedInput<'input>> Scanner<'input, T> {
684 #[inline]
685 fn promote_flow_scalar_buf_to_owned(
686 &self,
687 start_mark: &Marker,
688 buf: &mut FlowScalarBuf,
689 ) -> Result<(), ScanError> {
690 let FlowScalarBuf::Borrowed {
691 start,
692 end,
693 pending_ws_start: _,
694 pending_ws_end: _,
695 } = *buf
696 else {
697 return Ok(());
698 };
699
700 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
701 ScanError::new_str(
702 *start_mark,
703 "internal error: input advertised offsets but did not provide a slice",
704 )
705 })?;
706 *buf = FlowScalarBuf::Owned(slice.to_owned());
707 Ok(())
708 }
709 #[inline]
715 fn try_borrow_slice(&self, start: usize, end: usize) -> Option<&'input str> {
716 self.input.slice_borrowed(start, end)
717 }
718
719 fn scan_tag_handle_directive_cow(
724 &mut self,
725 mark: &Marker,
726 ) -> Result<Cow<'input, str>, ScanError> {
727 let Some(start) = self.input.byte_offset() else {
728 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
729 };
730
731 if self.input.look_ch() != '!' {
732 return Err(ScanError::new_str(
733 *mark,
734 "while scanning a tag, did not find expected '!'",
735 ));
736 }
737
738 self.skip_non_blank();
740
741 self.input.lookahead(1);
744 while self.input.next_is_alpha() {
745 self.skip_non_blank();
746 self.input.lookahead(1);
747 }
748
749 if self.input.peek() == '!' {
751 self.skip_non_blank();
752 }
753
754 let Some(end) = self.input.byte_offset() else {
755 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
757 };
758
759 let Some(slice) = self.try_borrow_slice(start, end) else {
760 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
762 ScanError::new_str(
763 *mark,
764 "internal error: input advertised slicing but did not provide a slice",
765 )
766 })?;
767 if !slice.ends_with('!') && slice != "!" {
768 return Err(ScanError::new_str(
769 *mark,
770 "while parsing a tag directive, did not find expected '!'",
771 ));
772 }
773 return Ok(Cow::Owned(slice.to_owned()));
774 };
775
776 if !slice.ends_with('!') && slice != "!" {
777 return Err(ScanError::new_str(
778 *mark,
779 "while parsing a tag directive, did not find expected '!'",
780 ));
781 }
782
783 Ok(Cow::Borrowed(slice))
784 }
785
786 fn scan_tag_prefix_directive_cow(
791 &mut self,
792 start_mark: &Marker,
793 ) -> Result<Cow<'input, str>, ScanError> {
794 let Some(start) = self.input.byte_offset() else {
795 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
796 };
797
798 if self.input.look_ch() == '!' {
800 self.skip_non_blank();
801 } else if !is_tag_char(self.input.peek()) {
802 return Err(ScanError::new_str(
803 *start_mark,
804 "invalid global tag character",
805 ));
806 } else if self.input.peek() == '%' {
807 } else {
809 self.skip_non_blank();
810 }
811
812 while is_uri_char(self.input.look_ch()) {
814 if self.input.peek() == '%' {
815 break;
816 }
817 self.skip_non_blank();
818 }
819
820 if self.input.peek() == '%' {
822 let current = self
823 .input
824 .byte_offset()
825 .expect("byte_offset() must remain available once enabled");
826 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
827 slice.to_owned()
828 } else {
829 String::new()
830 };
831
832 while is_uri_char(self.input.look_ch()) {
833 if self.input.peek() == '%' {
834 out.push(self.scan_uri_escapes(start_mark)?);
835 } else {
836 out.push(self.input.peek());
837 self.skip_non_blank();
838 }
839 }
840 return Ok(Cow::Owned(out));
841 }
842
843 let Some(end) = self.input.byte_offset() else {
844 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
845 };
846
847 let Some(slice) = self.try_borrow_slice(start, end) else {
848 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
850 ScanError::new_str(
851 *start_mark,
852 "internal error: input advertised slicing but did not provide a slice",
853 )
854 })?;
855 return Ok(Cow::Owned(slice.to_owned()));
856 };
857
858 Ok(Cow::Borrowed(slice))
859 }
860 pub fn new(input: T) -> Self {
862 let initial_byte_offset = input.byte_offset();
863 Scanner {
864 input,
865 mark: Marker::new(0, 1, 0).with_byte_offset(initial_byte_offset),
866 tokens: VecDeque::with_capacity(64),
867 error: None,
868
869 stream_start_produced: false,
870 stream_end_produced: false,
871 adjacent_value_allowed_at: 0,
872 simple_key_allowed: true,
873 simple_keys: smallvec::SmallVec::new(),
874 indent: -1,
875 indents: smallvec::SmallVec::new(),
876 flow_level: 0,
877 tokens_parsed: 0,
878 token_available: false,
879 leading_whitespace: true,
880 flow_mapping_started: false,
881 implicit_flow_mapping_states: smallvec::SmallVec::new(),
882 flow_markers: smallvec::SmallVec::new(),
883 interrupted_plain_by_comment: None,
884
885 buf_leading_break: String::with_capacity(128),
886 buf_trailing_breaks: String::with_capacity(128),
887 buf_whitespaces: String::with_capacity(128),
888 }
889 }
890
891 #[inline]
896 pub fn get_error(&self) -> Option<ScanError> {
897 self.error.clone()
898 }
899
900 #[cold]
901 fn simple_key_expected(&self) -> ScanError {
902 ScanError::new_str(self.mark, "simple key expected")
903 }
904
905 #[cold]
906 fn unclosed_bracket(mark: Marker, bracket: char) -> ScanError {
907 ScanError::new(mark, format!("unclosed bracket '{bracket}'"))
908 }
909
910 #[inline]
912 fn skip_blank(&mut self) {
913 self.input.skip();
914
915 self.mark.offsets.chars += 1;
916 self.mark.col += 1;
917 self.mark.offsets.bytes = self.input.byte_offset();
918 }
919
920 #[inline]
922 fn skip_non_blank(&mut self) {
923 self.input.skip();
924
925 self.mark.offsets.chars += 1;
926 self.mark.col += 1;
927 self.mark.offsets.bytes = self.input.byte_offset();
928 self.leading_whitespace = false;
929 }
930
931 #[inline]
933 fn skip_n_non_blank(&mut self, count: usize) {
934 for _ in 0..count {
935 self.input.skip();
936 self.mark.offsets.chars += 1;
937 self.mark.col += 1;
938 }
939 self.mark.offsets.bytes = self.input.byte_offset();
940 self.leading_whitespace = false;
941 }
942
943 #[inline]
945 fn skip_nl(&mut self) {
946 self.input.skip();
947
948 self.mark.offsets.chars += 1;
949 self.mark.col = 0;
950 self.mark.line += 1;
951 self.mark.offsets.bytes = self.input.byte_offset();
952 self.leading_whitespace = true;
953 }
954
955 #[inline]
957 fn skip_linebreak(&mut self) {
958 if self.input.next_2_are('\r', '\n') {
959 self.skip_blank();
962 self.skip_nl();
963 } else if self.input.next_is_break() {
964 self.skip_nl();
965 }
966 }
967
968 #[inline]
970 pub fn stream_started(&self) -> bool {
971 self.stream_start_produced
972 }
973
974 #[inline]
976 pub fn stream_ended(&self) -> bool {
977 self.stream_end_produced
978 }
979
980 #[inline]
982 pub fn mark(&self) -> Marker {
983 self.mark
984 }
985
986 #[inline]
993 fn read_break(&mut self, s: &mut String) {
994 self.skip_break();
995 s.push('\n');
996 }
997
998 #[inline]
1003 fn skip_break(&mut self) {
1004 let c = self.input.peek();
1005 let nc = self.input.peek_nth(1);
1006 debug_assert!(is_break(c));
1007 if c == '\r' && nc == '\n' {
1008 self.skip_blank();
1009 }
1010 self.skip_nl();
1011 }
1012
1013 fn insert_token(&mut self, pos: usize, tok: Token<'input>) {
1015 let old_len = self.tokens.len();
1016 assert!(pos <= old_len);
1017 self.tokens.insert(pos, tok);
1018 }
1019
1020 #[inline]
1021 fn allow_simple_key(&mut self) {
1022 self.simple_key_allowed = true;
1023 }
1024
1025 #[inline]
1026 fn disallow_simple_key(&mut self) {
1027 self.simple_key_allowed = false;
1028 }
1029
1030 pub fn fetch_next_token(&mut self) -> ScanResult {
1035 self.input.lookahead(1);
1036
1037 if !self.stream_start_produced {
1038 self.fetch_stream_start();
1039 return Ok(());
1040 }
1041 self.skip_to_next_token()?;
1042
1043 debug_print!(
1044 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
1045 self.mark,
1046 self.input.peek()
1047 );
1048
1049 self.stale_simple_keys()?;
1050
1051 let mark = self.mark;
1052 self.unroll_indent(mark.col as isize);
1053
1054 self.input.lookahead(4);
1055
1056 if self.input.next_is_z() {
1057 self.fetch_stream_end()?;
1058 return Ok(());
1059 }
1060
1061 if self.mark.col == 0 {
1062 if self.input.next_char_is('%') {
1063 return self.fetch_directive();
1064 } else if self.input.next_is_document_start() {
1065 return self.fetch_document_indicator(TokenType::DocumentStart);
1066 } else if self.input.next_is_document_end() {
1067 self.fetch_document_indicator(TokenType::DocumentEnd)?;
1068 self.skip_ws_to_eol(SkipTabs::Yes)?;
1069 if !self.input.next_is_breakz() {
1070 return Err(ScanError::new_str(
1071 self.mark,
1072 "invalid content after document end marker",
1073 ));
1074 }
1075 return Ok(());
1076 }
1077 }
1078
1079 if (self.mark.col as isize) < self.indent {
1080 self.input.lookahead(1);
1081 let c = self.input.peek();
1082 if self.flow_level == 0 || !matches!(c, ']' | '}' | ',') {
1083 return Err(ScanError::new_str(self.mark, "invalid indentation"));
1084 }
1085 }
1086
1087 let c = self.input.peek();
1088 let nc = self.input.peek_nth(1);
1089 match c {
1090 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
1091 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
1092 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
1093 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
1094 ',' => self.fetch_flow_entry(),
1095 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
1096 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
1097 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
1098 ':' if self.flow_level > 0
1099 && (is_flow(nc) || self.mark.index() == self.adjacent_value_allowed_at) =>
1100 {
1101 self.fetch_flow_value()
1102 }
1103 '*' => self.fetch_anchor(true),
1105 '&' => self.fetch_anchor(false),
1107 '!' => self.fetch_tag(),
1108 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
1110 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
1112 '\'' => self.fetch_flow_scalar(true),
1113 '"' => self.fetch_flow_scalar(false),
1114 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
1116 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
1117 self.fetch_plain_scalar()
1118 }
1119 '%' | '@' | '`' => Err(ScanError::new(
1120 self.mark,
1121 format!("unexpected character: `{c}'"),
1122 )),
1123 _ => self.fetch_plain_scalar(),
1124 }
1125 }
1126
1127 pub fn next_token(&mut self) -> Result<Option<Token<'input>>, ScanError> {
1131 if self.stream_end_produced {
1132 return Ok(None);
1133 }
1134
1135 if !self.token_available {
1136 self.fetch_more_tokens()?;
1137 }
1138 let Some(t) = self.tokens.pop_front() else {
1139 return Err(ScanError::new_str(
1140 self.mark,
1141 "did not find expected next token",
1142 ));
1143 };
1144 self.token_available = false;
1145 self.tokens_parsed += 1;
1146
1147 if let TokenType::StreamEnd = t.1 {
1148 self.stream_end_produced = true;
1149 }
1150 Ok(Some(t))
1151 }
1152
1153 pub fn fetch_more_tokens(&mut self) -> ScanResult {
1157 let mut need_more;
1158 loop {
1159 if self.tokens.is_empty() {
1160 need_more = true;
1161 } else {
1162 need_more = false;
1163 self.stale_simple_keys()?;
1165 for sk in &self.simple_keys {
1167 if sk.possible && sk.token_number == self.tokens_parsed {
1168 need_more = true;
1169 break;
1170 }
1171 }
1172 }
1173
1174 if let Some(token) = self.tokens.back() {
1177 if matches!(token.1, TokenType::DocumentEnd | TokenType::DocumentStart) {
1178 break;
1179 }
1180 }
1181
1182 if !need_more {
1183 break;
1184 }
1185 self.fetch_next_token()?;
1186 }
1187 self.token_available = true;
1188
1189 Ok(())
1190 }
1191
1192 fn stale_simple_keys(&mut self) -> ScanResult {
1200 for sk in &mut self.simple_keys {
1201 if sk.possible
1202 && self.flow_level == 0
1204 && (sk.mark.line < self.mark.line
1205 || sk.mark.index() + 1024 < self.mark.index())
1206 {
1207 if sk.required {
1208 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
1209 }
1210 sk.possible = false;
1211 }
1212 }
1213 Ok(())
1214 }
1215
1216 fn skip_to_next_token(&mut self) -> ScanResult {
1222 let consume_linebreak = |this: &mut Self| {
1225 this.input.lookahead(2);
1226 this.skip_linebreak();
1227 if this.flow_level == 0 {
1228 this.allow_simple_key();
1229 }
1230 };
1231
1232 loop {
1233 match self.input.look_ch() {
1234 '\t' => {
1236 if self.is_within_block()
1237 && self.leading_whitespace
1238 && (self.mark.col as isize) < self.indent
1239 {
1240 self.skip_ws_to_eol(SkipTabs::Yes)?;
1241
1242 if !self.input.next_is_breakz() {
1244 return Err(ScanError::new_str(
1245 self.mark,
1246 "tabs disallowed within this context (block indentation)",
1247 ));
1248 }
1249
1250 if matches!(self.input.look_ch(), '\n' | '\r') {
1252 consume_linebreak(self);
1253 }
1254 } else {
1255 self.skip_blank();
1257 }
1258 }
1259
1260 ' ' => self.skip_blank(),
1261
1262 '\n' | '\r' => consume_linebreak(self),
1263
1264 '#' => {
1265 let n = self.input.skip_while_non_breakz();
1267 self.mark.offsets.chars += n;
1268 self.mark.col += n;
1269 self.mark.offsets.bytes = self.input.byte_offset();
1270
1271 if matches!(self.input.look_ch(), '\n' | '\r') {
1273 consume_linebreak(self);
1274 }
1275 }
1276
1277 _ => break,
1278 }
1279 }
1280
1281 if let Some(err_mark) = self.interrupted_plain_by_comment.take() {
1284 let is_immediate_next_line = self.mark.line == err_mark.line + 1;
1288
1289 if self.flow_level == 0
1291 && is_immediate_next_line
1292 && (self.mark.col as isize) > self.indent
1293 {
1294 self.input.lookahead(4);
1298
1299 if !self.input.next_is_z()
1300 && !self.input.next_is_document_indicator()
1301 && self.input.next_can_be_plain_scalar(false)
1302 {
1303 return Err(ScanError::new_str(
1304 err_mark,
1305 "comment intercepting the multiline text",
1306 ));
1307 }
1308 }
1309 }
1310
1311 Ok(())
1312 }
1313
1314 fn skip_yaml_whitespace(&mut self) -> ScanResult {
1319 let mut need_whitespace = true;
1320 loop {
1321 match self.input.look_ch() {
1322 ' ' => {
1323 self.skip_blank();
1324
1325 need_whitespace = false;
1326 }
1327 '\n' | '\r' => {
1328 self.input.lookahead(2);
1329 self.skip_linebreak();
1330 if self.flow_level == 0 {
1331 self.allow_simple_key();
1332 }
1333 need_whitespace = false;
1334 }
1335 '#' => {
1336 let comment_length = self.input.skip_while_non_breakz();
1337 self.mark.offsets.chars += comment_length;
1338 self.mark.col += comment_length;
1339 self.mark.offsets.bytes = self.input.byte_offset();
1340 }
1341 _ => break,
1342 }
1343 }
1344
1345 if need_whitespace {
1346 Err(ScanError::new_str(self.mark(), "expected whitespace"))
1347 } else {
1348 Ok(())
1349 }
1350 }
1351
1352 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
1353 let (n_bytes, result) = self.input.skip_ws_to_eol(skip_tabs);
1354 self.mark.col += n_bytes;
1355 self.mark.offsets.chars += n_bytes;
1356 self.mark.offsets.bytes = self.input.byte_offset();
1357 result.map_err(|msg| ScanError::new_str(self.mark, msg))
1358 }
1359
1360 fn fetch_stream_start(&mut self) {
1361 let mark = self.mark;
1362 self.indent = -1;
1363 self.stream_start_produced = true;
1364 self.allow_simple_key();
1365 self.tokens.push_back(Token(
1366 Span::empty(mark),
1367 TokenType::StreamStart(TEncoding::Utf8),
1368 ));
1369 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1370 }
1371
1372 fn fetch_stream_end(&mut self) -> ScanResult {
1373 if self.mark.col != 0 {
1375 self.mark.col = 0;
1376 self.mark.line += 1;
1377 }
1378
1379 if let Some((mark, bracket)) = self.flow_markers.pop() {
1380 return Err(Self::unclosed_bracket(mark, bracket));
1381 }
1382
1383 for sk in &mut self.simple_keys {
1386 if sk.required && sk.possible {
1387 return Err(self.simple_key_expected());
1388 }
1389 sk.possible = false;
1390 }
1391
1392 self.unroll_indent(-1);
1393 self.remove_simple_key()?;
1394 self.disallow_simple_key();
1395
1396 self.tokens
1397 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
1398 Ok(())
1399 }
1400
1401 fn fetch_directive(&mut self) -> ScanResult {
1402 self.unroll_indent(-1);
1403 self.remove_simple_key()?;
1404
1405 self.disallow_simple_key();
1406
1407 let tok = self.scan_directive()?;
1408 self.tokens.push_back(tok);
1409
1410 Ok(())
1411 }
1412
1413 fn scan_directive(&mut self) -> Result<Token<'input>, ScanError> {
1414 let start_mark = self.mark;
1415 self.skip_non_blank();
1416
1417 let name = self.scan_directive_name()?;
1418 let tok = match name.as_ref() {
1419 "YAML" => self.scan_version_directive_value(&start_mark)?,
1420 "TAG" => self.scan_tag_directive_value(&start_mark)?,
1421 _ => {
1422 let mut params = Vec::new();
1423 while self.input.next_is_blank() {
1424 let n_blanks = self.input.skip_while_blank();
1425 self.mark.offsets.chars += n_blanks;
1426 self.mark.col += n_blanks;
1427 self.mark.offsets.bytes = self.input.byte_offset();
1428
1429 if !is_blank_or_breakz(self.input.peek()) {
1430 let mut param = String::new();
1431 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut param);
1432 self.mark.offsets.chars += n_chars;
1433 self.mark.col += n_chars;
1434 self.mark.offsets.bytes = self.input.byte_offset();
1435 params.push(param);
1436 }
1437 }
1438
1439 Token(
1440 Span::new(start_mark, self.mark),
1441 TokenType::ReservedDirective(name, params),
1442 )
1443 }
1444 };
1445
1446 self.skip_ws_to_eol(SkipTabs::Yes)?;
1447
1448 if self.input.next_is_breakz() {
1449 self.input.lookahead(2);
1450 self.skip_linebreak();
1451 Ok(tok)
1452 } else {
1453 Err(ScanError::new_str(
1454 start_mark,
1455 "while scanning a directive, did not find expected comment or line break",
1456 ))
1457 }
1458 }
1459
1460 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1461 let n_blanks = self.input.skip_while_blank();
1462 self.mark.offsets.chars += n_blanks;
1463 self.mark.col += n_blanks;
1464 self.mark.offsets.bytes = self.input.byte_offset();
1465
1466 let major = self.scan_version_directive_number(mark)?;
1467
1468 if self.input.peek() != '.' {
1469 return Err(ScanError::new_str(
1470 *mark,
1471 "while scanning a YAML directive, did not find expected digit or '.' character",
1472 ));
1473 }
1474 self.skip_non_blank();
1475
1476 let minor = self.scan_version_directive_number(mark)?;
1477
1478 Ok(Token(
1479 Span::new(*mark, self.mark),
1480 TokenType::VersionDirective(major, minor),
1481 ))
1482 }
1483
1484 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1485 let start_mark = self.mark;
1486 let mut string = String::new();
1487
1488 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut string);
1489 self.mark.offsets.chars += n_chars;
1490 self.mark.col += n_chars;
1491 self.mark.offsets.bytes = self.input.byte_offset();
1492
1493 if string.is_empty() {
1494 return Err(ScanError::new_str(
1495 start_mark,
1496 "while scanning a directive, could not find expected directive name",
1497 ));
1498 }
1499
1500 if !is_blank_or_breakz(self.input.peek()) {
1501 return Err(ScanError::new_str(
1502 start_mark,
1503 "while scanning a directive, found unexpected non-alphabetical character",
1504 ));
1505 }
1506
1507 Ok(string)
1508 }
1509
1510 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1511 let mut val = 0u32;
1512 let mut length = 0usize;
1513 while let Some(digit) = self.input.look_ch().to_digit(10) {
1514 if length + 1 > 9 {
1515 return Err(ScanError::new_str(
1516 *mark,
1517 "while scanning a YAML directive, found extremely long version number",
1518 ));
1519 }
1520 length += 1;
1521 val = val * 10 + digit;
1522 self.skip_non_blank();
1523 }
1524
1525 if length == 0 {
1526 return Err(ScanError::new_str(
1527 *mark,
1528 "while scanning a YAML directive, did not find expected version number",
1529 ));
1530 }
1531
1532 Ok(val)
1533 }
1534
1535 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1536 let n_blanks = self.input.skip_while_blank();
1537 self.mark.offsets.chars += n_blanks;
1538 self.mark.col += n_blanks;
1539 self.mark.offsets.bytes = self.input.byte_offset();
1540
1541 let handle = self.scan_tag_handle_directive_cow(mark)?;
1542
1543 let n_blanks = self.input.skip_while_blank();
1544 self.mark.offsets.chars += n_blanks;
1545 self.mark.col += n_blanks;
1546 self.mark.offsets.bytes = self.input.byte_offset();
1547
1548 let prefix = self.scan_tag_prefix_directive_cow(mark)?;
1549
1550 self.input.lookahead(1);
1551
1552 if self.input.next_is_blank_or_breakz() {
1553 Ok(Token(
1554 Span::new(*mark, self.mark),
1555 TokenType::TagDirective(handle, prefix),
1556 ))
1557 } else {
1558 Err(ScanError::new_str(
1559 *mark,
1560 "while scanning TAG, did not find expected whitespace or line break",
1561 ))
1562 }
1563 }
1564
1565 fn fetch_tag(&mut self) -> ScanResult {
1566 self.save_simple_key();
1567 self.disallow_simple_key();
1568
1569 let tok = self.scan_tag()?;
1570 self.tokens.push_back(tok);
1571 Ok(())
1572 }
1573
1574 fn scan_tag(&mut self) -> Result<Token<'input>, ScanError> {
1575 let start_mark = self.mark;
1576
1577 self.input.lookahead(2);
1579
1580 if self.input.byte_offset().is_none() {
1582 return self.scan_tag_owned(&start_mark);
1583 }
1584
1585 let (handle, suffix): (Cow<'input, str>, Cow<'input, str>) =
1586 if self.input.nth_char_is(1, '<') {
1587 let suffix = self.scan_verbatim_tag(&start_mark)?;
1589 (Cow::Owned(String::new()), Cow::Owned(suffix))
1590 } else {
1591 let handle = self.scan_tag_handle_cow(&start_mark)?;
1593 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1595 let suffix = self.scan_tag_shorthand_suffix_cow(&start_mark)?;
1597 (handle, suffix)
1598 } else {
1599 let remaining_suffix = self.scan_tag_shorthand_suffix_cow(&start_mark)?;
1604
1605 let suffix = if handle.len() > 1 {
1607 if remaining_suffix.is_empty() {
1608 match handle {
1610 Cow::Borrowed(s) => Cow::Borrowed(&s[1..]),
1611 Cow::Owned(s) => Cow::Owned(s[1..].to_owned()),
1612 }
1613 } else {
1614 let mut combined = handle[1..].to_owned();
1616 combined.push_str(&remaining_suffix);
1617 Cow::Owned(combined)
1618 }
1619 } else {
1620 remaining_suffix
1622 };
1623
1624 if suffix.is_empty() {
1627 (Cow::Borrowed(""), Cow::Borrowed("!"))
1628 } else {
1629 (Cow::Borrowed("!"), suffix)
1630 }
1631 }
1632 };
1633
1634 if is_blank_or_breakz(self.input.look_ch())
1635 || (self.flow_level > 0 && self.input.next_is_flow())
1636 {
1637 Ok(Token(
1639 Span::new(start_mark, self.mark),
1640 TokenType::Tag(handle, suffix),
1641 ))
1642 } else {
1643 Err(ScanError::new_str(
1644 start_mark,
1645 "while scanning a tag, did not find expected whitespace or line break",
1646 ))
1647 }
1648 }
1649
1650 fn scan_tag_owned(&mut self, start_mark: &Marker) -> Result<Token<'input>, ScanError> {
1652 let mut handle = String::new();
1653 let mut suffix;
1654
1655 if self.input.nth_char_is(1, '<') {
1656 suffix = self.scan_verbatim_tag(start_mark)?;
1657 } else {
1658 handle = self.scan_tag_handle(false, start_mark)?;
1660 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1662 let is_secondary_handle = handle == "!!";
1664 suffix =
1665 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", start_mark)?;
1666 } else {
1667 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, start_mark)?;
1668 "!".clone_into(&mut handle);
1669 if suffix.is_empty() {
1672 handle.clear();
1673 "!".clone_into(&mut suffix);
1674 }
1675 }
1676 }
1677
1678 if is_blank_or_breakz(self.input.look_ch())
1679 || (self.flow_level > 0 && self.input.next_is_flow())
1680 {
1681 Ok(Token(
1683 Span::new(*start_mark, self.mark),
1684 TokenType::Tag(handle.into(), suffix.into()),
1685 ))
1686 } else {
1687 Err(ScanError::new_str(
1688 *start_mark,
1689 "while scanning a tag, did not find expected whitespace or line break",
1690 ))
1691 }
1692 }
1693
1694 fn scan_tag_handle_cow(&mut self, mark: &Marker) -> Result<Cow<'input, str>, ScanError> {
1699 let Some(start) = self.input.byte_offset() else {
1700 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
1701 };
1702
1703 if self.input.look_ch() != '!' {
1704 return Err(ScanError::new_str(
1705 *mark,
1706 "while scanning a tag, did not find expected '!'",
1707 ));
1708 }
1709
1710 self.skip_non_blank();
1712
1713 self.input.lookahead(1);
1715 while self.input.next_is_alpha() {
1716 self.skip_non_blank();
1717 self.input.lookahead(1);
1718 }
1719
1720 if self.input.peek() == '!' {
1722 self.skip_non_blank();
1723 }
1724
1725 let Some(end) = self.input.byte_offset() else {
1726 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
1727 };
1728
1729 if let Some(slice) = self.try_borrow_slice(start, end) {
1730 Ok(Cow::Borrowed(slice))
1731 } else {
1732 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1733 ScanError::new_str(
1734 *mark,
1735 "internal error: input advertised slicing but did not provide a slice",
1736 )
1737 })?;
1738 Ok(Cow::Owned(slice.to_owned()))
1739 }
1740 }
1741
1742 fn scan_tag_shorthand_suffix_cow(
1746 &mut self,
1747 mark: &Marker,
1748 ) -> Result<Cow<'input, str>, ScanError> {
1749 let Some(start) = self.input.byte_offset() else {
1750 return Ok(Cow::Owned(
1751 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
1752 ));
1753 };
1754
1755 while is_tag_char(self.input.look_ch()) {
1757 if self.input.peek() == '%' {
1758 let current = self
1760 .input
1761 .byte_offset()
1762 .expect("byte_offset() must remain available once enabled");
1763 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
1764 slice.to_owned()
1765 } else {
1766 String::new()
1767 };
1768
1769 while is_tag_char(self.input.look_ch()) {
1771 if self.input.peek() == '%' {
1772 out.push(self.scan_uri_escapes(mark)?);
1773 } else {
1774 out.push(self.input.peek());
1775 self.skip_non_blank();
1776 }
1777 }
1778 return Ok(Cow::Owned(out));
1779 }
1780 self.skip_non_blank();
1781 }
1782
1783 let Some(end) = self.input.byte_offset() else {
1784 return Ok(Cow::Owned(
1785 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
1786 ));
1787 };
1788
1789 if let Some(slice) = self.try_borrow_slice(start, end) {
1790 Ok(Cow::Borrowed(slice))
1791 } else {
1792 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1793 ScanError::new_str(
1794 *mark,
1795 "internal error: input advertised slicing but did not provide a slice",
1796 )
1797 })?;
1798 Ok(Cow::Owned(slice.to_owned()))
1799 }
1800 }
1801
1802 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
1803 let mut string = String::new();
1804 if self.input.look_ch() != '!' {
1805 return Err(ScanError::new_str(
1806 *mark,
1807 "while scanning a tag, did not find expected '!'",
1808 ));
1809 }
1810
1811 string.push(self.input.peek());
1812 self.skip_non_blank();
1813
1814 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1815 self.mark.offsets.chars += n_chars;
1816 self.mark.col += n_chars;
1817 self.mark.offsets.bytes = self.input.byte_offset();
1818
1819 if self.input.peek() == '!' {
1821 string.push(self.input.peek());
1822 self.skip_non_blank();
1823 } else if directive && string != "!" {
1824 return Err(ScanError::new_str(
1828 *mark,
1829 "while parsing a tag directive, did not find expected '!'",
1830 ));
1831 }
1832 Ok(string)
1833 }
1834
1835 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1841 let mut string = String::new();
1842
1843 if self.input.look_ch() == '!' {
1844 string.push(self.input.peek());
1846 self.skip_non_blank();
1847 } else if !is_tag_char(self.input.peek()) {
1848 return Err(ScanError::new_str(
1850 *start_mark,
1851 "invalid global tag character",
1852 ));
1853 } else if self.input.peek() == '%' {
1854 string.push(self.scan_uri_escapes(start_mark)?);
1856 } else {
1857 string.push(self.input.peek());
1859 self.skip_non_blank();
1860 }
1861
1862 while is_uri_char(self.input.look_ch()) {
1863 if self.input.peek() == '%' {
1864 string.push(self.scan_uri_escapes(start_mark)?);
1865 } else {
1866 string.push(self.input.peek());
1867 self.skip_non_blank();
1868 }
1869 }
1870
1871 Ok(string)
1872 }
1873
1874 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1878 self.skip_non_blank();
1880 self.skip_non_blank();
1881
1882 let mut string = String::new();
1883 while is_uri_char(self.input.look_ch()) {
1884 if self.input.peek() == '%' {
1885 string.push(self.scan_uri_escapes(start_mark)?);
1886 } else {
1887 string.push(self.input.peek());
1888 self.skip_non_blank();
1889 }
1890 }
1891
1892 if self.input.peek() != '>' {
1893 return Err(ScanError::new_str(
1894 *start_mark,
1895 "while scanning a verbatim tag, did not find the expected '>'",
1896 ));
1897 }
1898 self.skip_non_blank();
1899
1900 Ok(string)
1901 }
1902
1903 fn scan_tag_shorthand_suffix(
1904 &mut self,
1905 _directive: bool,
1906 _is_secondary: bool,
1907 head: &str,
1908 mark: &Marker,
1909 ) -> Result<String, ScanError> {
1910 let mut length = head.len();
1911 let mut string = String::new();
1912
1913 if length > 1 {
1916 string.extend(head.chars().skip(1));
1917 }
1918
1919 while is_tag_char(self.input.look_ch()) {
1920 if self.input.peek() == '%' {
1922 string.push(self.scan_uri_escapes(mark)?);
1923 } else {
1924 string.push(self.input.peek());
1925 self.skip_non_blank();
1926 }
1927
1928 length += 1;
1929 }
1930
1931 if length == 0 {
1932 return Err(ScanError::new_str(
1933 *mark,
1934 "while parsing a tag, did not find expected tag URI",
1935 ));
1936 }
1937
1938 Ok(string)
1939 }
1940
1941 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
1942 let mut width = 0usize;
1943 let mut code = 0u32;
1944 loop {
1945 self.input.lookahead(3);
1946
1947 let c = self.input.peek_nth(1);
1948 let nc = self.input.peek_nth(2);
1949
1950 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
1951 return Err(ScanError::new_str(
1952 *mark,
1953 "while parsing a tag, found an invalid escape sequence",
1954 ));
1955 }
1956
1957 let byte = (as_hex(c) << 4) + as_hex(nc);
1958 if width == 0 {
1959 width = match byte {
1960 _ if byte & 0x80 == 0x00 => 1,
1961 _ if byte & 0xE0 == 0xC0 => 2,
1962 _ if byte & 0xF0 == 0xE0 => 3,
1963 _ if byte & 0xF8 == 0xF0 => 4,
1964 _ => {
1965 return Err(ScanError::new_str(
1966 *mark,
1967 "while parsing a tag, found an incorrect leading UTF-8 byte",
1968 ));
1969 }
1970 };
1971 code = byte;
1972 } else {
1973 if byte & 0xc0 != 0x80 {
1974 return Err(ScanError::new_str(
1975 *mark,
1976 "while parsing a tag, found an incorrect trailing UTF-8 byte",
1977 ));
1978 }
1979 code = (code << 8) + byte;
1980 }
1981
1982 self.skip_n_non_blank(3);
1983
1984 width -= 1;
1985 if width == 0 {
1986 break;
1987 }
1988 }
1989
1990 match char::from_u32(code) {
1991 Some(ch) => Ok(ch),
1992 None => Err(ScanError::new_str(
1993 *mark,
1994 "while parsing a tag, found an invalid UTF-8 codepoint",
1995 )),
1996 }
1997 }
1998
1999 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
2000 self.save_simple_key();
2001 self.disallow_simple_key();
2002
2003 let tok = self.scan_anchor(alias)?;
2004
2005 self.tokens.push_back(tok);
2006
2007 Ok(())
2008 }
2009
2010 fn scan_anchor(&mut self, alias: bool) -> Result<Token<'input>, ScanError> {
2011 let start_mark = self.mark;
2012
2013 self.skip_non_blank();
2015
2016 if let Some(start) = self.input.byte_offset() {
2018 while is_anchor_char(self.input.look_ch()) {
2019 self.skip_non_blank();
2020 }
2021
2022 let end = self
2023 .input
2024 .byte_offset()
2025 .expect("byte_offset() must remain available once enabled");
2026
2027 if start == end {
2028 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2029 }
2030
2031 let cow = if let Some(slice) = self.try_borrow_slice(start, end) {
2032 Cow::Borrowed(slice)
2033 } else if let Some(slice) = self.input.slice_bytes(start, end) {
2034 Cow::Owned(slice.to_owned())
2035 } else {
2036 return Err(ScanError::new_str(
2037 start_mark,
2038 "internal error: input advertised slicing but did not provide a slice",
2039 ));
2040 };
2041
2042 let tok = if alias {
2043 TokenType::Alias(cow)
2044 } else {
2045 TokenType::Anchor(cow)
2046 };
2047 return Ok(Token(Span::new(start_mark, self.mark), tok));
2048 }
2049
2050 let mut string = String::new();
2051 while is_anchor_char(self.input.look_ch()) {
2052 string.push(self.input.peek());
2053 self.skip_non_blank();
2054 }
2055
2056 if string.is_empty() {
2057 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2058 }
2059
2060 let tok = if alias {
2061 TokenType::Alias(string.into())
2062 } else {
2063 TokenType::Anchor(string.into())
2064 };
2065 Ok(Token(Span::new(start_mark, self.mark), tok))
2066 }
2067
2068 fn fetch_flow_collection_start(&mut self, tok: TokenType<'input>) -> ScanResult {
2069 self.save_simple_key();
2071
2072 let start_mark = self.mark;
2073 let indicator = self.input.peek();
2074 self.flow_markers.push((start_mark, indicator));
2075
2076 self.roll_one_col_indent();
2077 self.increase_flow_level()?;
2078
2079 self.allow_simple_key();
2080
2081 self.skip_non_blank();
2082
2083 if tok == TokenType::FlowMappingStart {
2084 self.flow_mapping_started = true;
2085 } else {
2086 self.implicit_flow_mapping_states
2087 .push(ImplicitMappingState::Possible);
2088 }
2089
2090 self.skip_ws_to_eol(SkipTabs::Yes)?;
2091
2092 self.tokens
2093 .push_back(Token(Span::new(start_mark, self.mark), tok));
2094 Ok(())
2095 }
2096
2097 fn fetch_flow_collection_end(&mut self, tok: TokenType<'input>) -> ScanResult {
2098 if self.flow_level == 0 {
2100 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2101 }
2102
2103 let flow_level = self.flow_level;
2104
2105 self.flow_markers.pop();
2106 self.remove_simple_key()?;
2107
2108 if matches!(tok, TokenType::FlowSequenceEnd) {
2109 self.end_implicit_mapping(self.mark, flow_level);
2110 self.implicit_flow_mapping_states.pop();
2112 }
2113
2114 self.decrease_flow_level();
2115
2116 self.disallow_simple_key();
2117
2118 let start_mark = self.mark;
2119 self.skip_non_blank();
2120 self.skip_ws_to_eol(SkipTabs::Yes)?;
2121
2122 if self.flow_level > 0 {
2128 self.adjacent_value_allowed_at = self.mark.index();
2129 }
2130
2131 self.tokens
2132 .push_back(Token(Span::new(start_mark, self.mark), tok));
2133 Ok(())
2134 }
2135
2136 fn fetch_flow_entry(&mut self) -> ScanResult {
2138 self.remove_simple_key()?;
2139 self.allow_simple_key();
2140
2141 self.end_implicit_mapping(self.mark, self.flow_level);
2142
2143 let start_mark = self.mark;
2144 self.skip_non_blank();
2145 self.skip_ws_to_eol(SkipTabs::Yes)?;
2146
2147 self.tokens.push_back(Token(
2148 Span::new(start_mark, self.mark),
2149 TokenType::FlowEntry,
2150 ));
2151 Ok(())
2152 }
2153
2154 fn increase_flow_level(&mut self) -> ScanResult {
2155 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
2156 self.flow_level = self
2157 .flow_level
2158 .checked_add(1)
2159 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
2160 Ok(())
2161 }
2162
2163 fn decrease_flow_level(&mut self) {
2164 if self.flow_level > 0 {
2165 self.flow_level -= 1;
2166 self.simple_keys.pop().unwrap();
2167 }
2168 }
2169
2170 fn fetch_block_entry(&mut self) -> ScanResult {
2176 if self.flow_level > 0 {
2177 return Err(ScanError::new_str(
2179 self.mark,
2180 r#""-" is only valid inside a block"#,
2181 ));
2182 }
2183 if !self.simple_key_allowed {
2185 return Err(ScanError::new_str(
2186 self.mark,
2187 "block sequence entries are not allowed in this context",
2188 ));
2189 }
2190
2191 if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
2193 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
2194 return Err(ScanError::new_str(
2195 span.start,
2196 "invalid indentation for anchor",
2197 ));
2198 }
2199 }
2200
2201 let mark = self.mark;
2203 self.skip_non_blank();
2204
2205 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
2207 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
2208 self.input.lookahead(2);
2209 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
2210 {
2211 return Err(ScanError::new_str(
2212 self.mark,
2213 "'-' must be followed by a valid YAML whitespace",
2214 ));
2215 }
2216
2217 self.skip_ws_to_eol(SkipTabs::No)?;
2218 self.input.lookahead(1);
2219 if self.input.next_is_break() || self.input.next_is_flow() {
2220 self.roll_one_col_indent();
2221 }
2222
2223 self.remove_simple_key()?;
2224 self.allow_simple_key();
2225
2226 self.tokens
2227 .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
2228
2229 Ok(())
2230 }
2231
2232 fn fetch_document_indicator(&mut self, t: TokenType<'input>) -> ScanResult {
2233 if let Some((mark, bracket)) = self.flow_markers.pop() {
2234 return Err(ScanError::new(
2235 mark,
2236 format!("unclosed bracket '{bracket}'"),
2237 ));
2238 }
2239
2240 self.unroll_indent(-1);
2241 self.remove_simple_key()?;
2242 self.disallow_simple_key();
2243
2244 let mark = self.mark;
2245
2246 self.skip_n_non_blank(3);
2247
2248 self.tokens.push_back(Token(Span::new(mark, self.mark), t));
2249 Ok(())
2250 }
2251
2252 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
2253 self.save_simple_key();
2254 self.allow_simple_key();
2255 let tok = self.scan_block_scalar(literal)?;
2256
2257 self.tokens.push_back(tok);
2258 Ok(())
2259 }
2260
2261 #[allow(clippy::too_many_lines)]
2262 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token<'input>, ScanError> {
2263 let start_mark = self.mark;
2264 let mut chomping = Chomping::Clip;
2265 let mut increment: usize = 0;
2266 let mut indent: usize = 0;
2267 let mut trailing_blank: bool;
2268 let mut leading_blank: bool = false;
2269 let style = if literal {
2270 ScalarStyle::Literal
2271 } else {
2272 ScalarStyle::Folded
2273 };
2274
2275 let mut string = String::new();
2276 let mut leading_break = String::new();
2277 let mut trailing_breaks = String::new();
2278 let mut chomping_break = String::new();
2279
2280 self.skip_non_blank();
2282 self.unroll_non_block_indents();
2283
2284 if self.input.look_ch() == '+' || self.input.peek() == '-' {
2285 if self.input.peek() == '+' {
2286 chomping = Chomping::Keep;
2287 } else {
2288 chomping = Chomping::Strip;
2289 }
2290 self.skip_non_blank();
2291 self.input.lookahead(1);
2292 if self.input.next_is_digit() {
2293 if self.input.peek() == '0' {
2294 return Err(ScanError::new_str(
2295 start_mark,
2296 "while scanning a block scalar, found an indentation indicator equal to 0",
2297 ));
2298 }
2299 increment = (self.input.peek() as usize) - ('0' as usize);
2300 self.skip_non_blank();
2301 }
2302 } else if self.input.next_is_digit() {
2303 if self.input.peek() == '0' {
2304 return Err(ScanError::new_str(
2305 start_mark,
2306 "while scanning a block scalar, found an indentation indicator equal to 0",
2307 ));
2308 }
2309
2310 increment = (self.input.peek() as usize) - ('0' as usize);
2311 self.skip_non_blank();
2312 self.input.lookahead(1);
2313 if self.input.peek() == '+' || self.input.peek() == '-' {
2314 if self.input.peek() == '+' {
2315 chomping = Chomping::Keep;
2316 } else {
2317 chomping = Chomping::Strip;
2318 }
2319 self.skip_non_blank();
2320 }
2321 }
2322
2323 self.skip_ws_to_eol(SkipTabs::Yes)?;
2324
2325 self.input.lookahead(1);
2327 if !self.input.next_is_breakz() {
2328 return Err(ScanError::new_str(
2329 start_mark,
2330 "while scanning a block scalar, did not find expected comment or line break",
2331 ));
2332 }
2333
2334 if self.input.next_is_break() {
2335 self.input.lookahead(2);
2336 self.read_break(&mut chomping_break);
2337 }
2338
2339 if self.input.look_ch() == '\t' {
2340 return Err(ScanError::new_str(
2341 start_mark,
2342 "a block scalar content cannot start with a tab",
2343 ));
2344 }
2345
2346 if increment > 0 {
2347 indent = if self.indent >= 0 {
2348 (self.indent + increment as isize) as usize
2349 } else {
2350 increment
2351 }
2352 }
2353
2354 if indent == 0 {
2356 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
2357 } else {
2358 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2359 }
2360
2361 if self.input.next_is_z() {
2366 let contents = match chomping {
2367 Chomping::Strip => String::new(),
2369 _ if self.mark.line == start_mark.line() => String::new(),
2371 Chomping::Clip => chomping_break,
2374 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
2377 Chomping::Keep => trailing_breaks,
2379 };
2380 return Ok(Token(
2381 Span::new(start_mark, self.mark),
2382 TokenType::Scalar(style, contents.into()),
2383 ));
2384 }
2385
2386 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
2387 return Err(ScanError::new_str(
2388 self.mark,
2389 "wrongly indented line in block scalar",
2390 ));
2391 }
2392
2393 let mut line_buffer = String::with_capacity(100);
2394 let start_mark = self.mark;
2395 while self.mark.col == indent && !self.input.next_is_z() {
2396 if indent == 0 {
2397 self.input.lookahead(4);
2398 if self.input.next_is_document_end() {
2399 break;
2400 }
2401 }
2402
2403 trailing_blank = self.input.next_is_blank();
2405 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
2406 string.push_str(&trailing_breaks);
2407 if trailing_breaks.is_empty() {
2408 string.push(' ');
2409 }
2410 } else {
2411 string.push_str(&leading_break);
2412 string.push_str(&trailing_breaks);
2413 }
2414
2415 leading_break.clear();
2416 trailing_breaks.clear();
2417
2418 leading_blank = self.input.next_is_blank();
2419
2420 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
2421
2422 self.input.lookahead(2);
2424 if self.input.next_is_z() {
2425 break;
2426 }
2427
2428 self.read_break(&mut leading_break);
2429
2430 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2432 }
2433
2434 if chomping != Chomping::Strip {
2436 string.push_str(&leading_break);
2437 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
2441 string.push('\n');
2442 }
2443 }
2444
2445 if chomping == Chomping::Keep {
2446 string.push_str(&trailing_breaks);
2447 }
2448
2449 Ok(Token(
2450 Span::new(start_mark, self.mark),
2451 TokenType::Scalar(style, string.into()),
2452 ))
2453 }
2454
2455 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
2465 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
2467 string.push(self.input.peek());
2468 self.skip_blank();
2474 }
2475
2476 if self.input.buf_is_empty() {
2479 let mut n_chars = 0;
2487 debug_assert!(line_buffer.is_empty());
2488 while let Some(c) = self.input.raw_read_non_breakz_ch() {
2489 line_buffer.push(c);
2490 n_chars += 1;
2491 }
2492
2493 self.mark.col += n_chars;
2495 self.mark.offsets.chars += n_chars;
2496 self.mark.offsets.bytes = self.input.byte_offset();
2497
2498 string.reserve(line_buffer.len());
2500 string.push_str(line_buffer);
2501 line_buffer.clear();
2503 }
2504 }
2505
2506 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
2508 loop {
2509 if indent < self.input.bufmaxlen() - 2 {
2511 self.input.lookahead(self.input.bufmaxlen());
2512 while self.mark.col < indent && self.input.peek() == ' ' {
2513 self.skip_blank();
2514 }
2515 } else {
2516 loop {
2517 self.input.lookahead(self.input.bufmaxlen());
2518 while !self.input.buf_is_empty()
2519 && self.mark.col < indent
2520 && self.input.peek() == ' '
2521 {
2522 self.skip_blank();
2523 }
2524 if self.mark.col == indent
2528 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
2529 {
2530 break;
2531 }
2532 }
2533 self.input.lookahead(2);
2534 }
2535
2536 if self.input.next_is_break() {
2538 self.read_break(breaks);
2539 } else {
2540 break;
2542 }
2543 }
2544 }
2545
2546 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
2551 let mut max_indent = 0;
2552 loop {
2553 while self.input.look_ch() == ' ' {
2555 self.skip_blank();
2556 }
2557
2558 if self.mark.col > max_indent {
2559 max_indent = self.mark.col;
2560 }
2561
2562 if self.input.next_is_break() {
2563 self.input.lookahead(2);
2565 self.read_break(breaks);
2566 } else {
2567 break;
2569 }
2570 }
2571
2572 *indent = max_indent.max((self.indent + 1) as usize);
2581 if self.indent > 0 {
2582 *indent = (*indent).max(1);
2583 }
2584 }
2585
2586 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
2587 self.save_simple_key();
2588 self.disallow_simple_key();
2589
2590 let tok = self.scan_flow_scalar(single)?;
2591
2592 self.skip_to_next_token()?;
2595 self.adjacent_value_allowed_at = self.mark.index();
2596
2597 self.tokens.push_back(tok);
2598 Ok(())
2599 }
2600
2601 #[allow(clippy::too_many_lines)]
2602 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token<'input>, ScanError> {
2603 let start_mark = self.mark;
2604
2605 let mut buf = match self.input.byte_offset() {
2607 Some(off) => FlowScalarBuf::new_borrowed(off + self.input.peek().len_utf8()),
2608 None => FlowScalarBuf::new_owned(),
2609 };
2610
2611 let mut break_scratch = String::new();
2614
2615 self.skip_non_blank();
2617
2618 loop {
2619 self.input.lookahead(4);
2621
2622 if self.mark.col == 0 && self.input.next_is_document_indicator() {
2623 return Err(ScanError::new_str(
2624 start_mark,
2625 "while scanning a quoted scalar, found unexpected document indicator",
2626 ));
2627 }
2628
2629 if self.input.next_is_z() {
2630 return Err(ScanError::new_str(start_mark, "unclosed quote"));
2631 }
2632
2633 let mut leading_blanks = false;
2636 self.consume_flow_scalar_non_whitespace_chars(
2637 single,
2638 &mut buf,
2639 &mut leading_blanks,
2640 &start_mark,
2641 )?;
2642
2643 match self.input.look_ch() {
2644 '\'' if single => break,
2645 '"' if !single => break,
2646 _ => {}
2647 }
2648
2649 let mut trailing_ws_start: Option<usize> = None;
2665 let mut has_leading_break = false;
2666 let mut has_trailing_breaks = false;
2667
2668 let mut pending_ws_start: Option<usize> = None;
2670
2671 while self.input.next_is_blank() || self.input.next_is_break() {
2673 if self.input.next_is_blank() {
2674 if leading_blanks {
2676 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
2677 return Err(ScanError::new_str(
2678 self.mark,
2679 "tab cannot be used as indentation",
2680 ));
2681 }
2682 self.skip_blank();
2683 } else {
2684 match buf {
2686 FlowScalarBuf::Owned(ref mut string) => {
2687 if trailing_ws_start.is_none() {
2688 trailing_ws_start = Some(string.len());
2689 }
2690 string.push(self.input.peek());
2691 }
2692 FlowScalarBuf::Borrowed { .. } => {
2693 if pending_ws_start.is_none() {
2694 pending_ws_start = self.input.byte_offset();
2695 }
2696 }
2697 }
2698 self.skip_blank();
2699
2700 if let (FlowScalarBuf::Borrowed { .. }, Some(ws_start), Some(ws_end)) =
2701 (&mut buf, pending_ws_start, self.input.byte_offset())
2702 {
2703 buf.note_pending_ws(ws_start, ws_end);
2704 }
2705 }
2706 } else {
2707 self.input.lookahead(2);
2708
2709 if leading_blanks {
2711 match buf {
2713 FlowScalarBuf::Owned(ref mut string) => self.read_break(string),
2714 FlowScalarBuf::Borrowed { .. } => {
2715 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2716 let Some(string) = buf.as_owned_mut() else {
2717 unreachable!()
2718 };
2719 self.read_break(string);
2720 }
2721 }
2722 has_trailing_breaks = true;
2723 } else {
2724 if let Some(pos) = trailing_ws_start.take() {
2726 if let FlowScalarBuf::Owned(ref mut string) = buf {
2727 string.truncate(pos);
2728 }
2729 }
2730
2731 if pending_ws_start.take().is_some() {
2732 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2734 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2735 }
2736 buf.discard_pending_ws();
2737 } else {
2738 buf.commit_pending_ws();
2739 }
2740
2741 break_scratch.clear();
2742 self.read_break(&mut break_scratch);
2743 has_leading_break = true;
2746 leading_blanks = true;
2747 }
2748 }
2749
2750 self.input.lookahead(1);
2751 }
2752
2753 if leading_blanks && has_leading_break && self.flow_level == 0 {
2756 let next_ch = self.input.peek();
2757 let is_closing_quote = (single && next_ch == '\'') || (!single && next_ch == '"');
2758 if !is_closing_quote && (self.mark.col as isize) <= self.indent {
2759 return Err(ScanError::new_str(
2760 self.mark,
2761 "invalid indentation in multiline quoted scalar",
2762 ));
2763 }
2764 }
2765
2766 if leading_blanks {
2768 if has_leading_break && !has_trailing_breaks {
2773 match buf {
2774 FlowScalarBuf::Owned(ref mut string) => string.push(' '),
2775 FlowScalarBuf::Borrowed { .. } => {
2776 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2777 let Some(string) = buf.as_owned_mut() else {
2778 unreachable!()
2779 };
2780 string.push(' ');
2781 }
2782 }
2783 }
2784 }
2785 } self.skip_non_blank();
2790
2791 self.skip_ws_to_eol(SkipTabs::Yes)?;
2793 match self.input.peek() {
2794 ',' | '}' | ']' if self.flow_level > 0 => {}
2796 c if is_breakz(c) => {}
2798 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
2801 ':' if self.flow_level > 0 => {}
2803 _ => {
2804 return Err(ScanError::new_str(
2805 self.mark,
2806 "invalid trailing content after double-quoted scalar",
2807 ));
2808 }
2809 }
2810
2811 let style = if single {
2812 ScalarStyle::SingleQuoted
2813 } else {
2814 ScalarStyle::DoubleQuoted
2815 };
2816
2817 let contents = match buf {
2818 FlowScalarBuf::Owned(string) => Cow::Owned(string),
2819 FlowScalarBuf::Borrowed {
2820 start,
2821 mut end,
2822 pending_ws_start,
2823 pending_ws_end,
2824 } => {
2825 if pending_ws_start.is_some() {
2827 end = pending_ws_end;
2828 }
2829 if let Some(slice) = self.try_borrow_slice(start, end) {
2830 Cow::Borrowed(slice)
2831 } else {
2832 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2833 ScanError::new_str(
2834 start_mark,
2835 "internal error: input advertised offsets but did not provide a slice",
2836 )
2837 })?;
2838 Cow::Owned(slice.to_owned())
2839 }
2840 }
2841 };
2842
2843 Ok(Token(
2844 Span::new(start_mark, self.mark),
2845 TokenType::Scalar(style, contents),
2846 ))
2847 }
2848
2849 fn consume_flow_scalar_non_whitespace_chars(
2858 &mut self,
2859 single: bool,
2860 buf: &mut FlowScalarBuf,
2861 leading_blanks: &mut bool,
2862 start_mark: &Marker,
2863 ) -> Result<(), ScanError> {
2864 self.input.lookahead(2);
2865 while !is_blank_or_breakz(self.input.peek()) {
2866 match self.input.peek() {
2867 '\'' if self.input.peek_nth(1) == '\'' && single => {
2869 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2870 buf.commit_pending_ws();
2871 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2872 }
2873 let Some(string) = buf.as_owned_mut() else {
2874 unreachable!()
2875 };
2876 string.push('\'');
2877 self.skip_n_non_blank(2);
2878 }
2879 '\'' if single => break,
2881 '"' if !single => break,
2882 '\\' if !single && is_break(self.input.peek_nth(1)) => {
2884 self.input.lookahead(3);
2885 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2886 buf.commit_pending_ws();
2887 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2888 }
2889 self.skip_non_blank();
2890 self.skip_linebreak();
2891 *leading_blanks = true;
2892 break;
2893 }
2894 '\\' if !single => {
2896 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2897 buf.commit_pending_ws();
2898 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2899 }
2900 let Some(string) = buf.as_owned_mut() else {
2901 unreachable!()
2902 };
2903 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
2904 }
2905 c => {
2906 match buf {
2907 FlowScalarBuf::Owned(ref mut string) => {
2908 string.push(c);
2909 }
2910 FlowScalarBuf::Borrowed { .. } => {
2911 buf.commit_pending_ws();
2912 }
2913 }
2914 self.skip_non_blank();
2915
2916 if let Some(new_end) = self.input.byte_offset() {
2917 if let FlowScalarBuf::Borrowed { end, .. } = buf {
2918 *end = new_end;
2919 }
2920 }
2921 }
2922 }
2923 self.input.lookahead(2);
2924 }
2925 Ok(())
2926 }
2927
2928 fn resolve_flow_scalar_escape_sequence(
2935 &mut self,
2936 start_mark: &Marker,
2937 ) -> Result<char, ScanError> {
2938 let mut code_length = 0usize;
2939 let mut ret = '\0';
2940
2941 match self.input.peek_nth(1) {
2942 '0' => ret = '\0',
2943 'a' => ret = '\x07',
2944 'b' => ret = '\x08',
2945 't' | '\t' => ret = '\t',
2946 'n' => ret = '\n',
2947 'v' => ret = '\x0b',
2948 'f' => ret = '\x0c',
2949 'r' => ret = '\x0d',
2950 'e' => ret = '\x1b',
2951 ' ' => ret = '\x20',
2952 '"' => ret = '"',
2953 '/' => ret = '/',
2954 '\\' => ret = '\\',
2955 'N' => ret = char::from_u32(0x85).unwrap(),
2957 '_' => ret = char::from_u32(0xA0).unwrap(),
2959 'L' => ret = char::from_u32(0x2028).unwrap(),
2961 'P' => ret = char::from_u32(0x2029).unwrap(),
2963 'x' => code_length = 2,
2964 'u' => code_length = 4,
2965 'U' => code_length = 8,
2966 _ => {
2967 return Err(ScanError::new_str(
2968 *start_mark,
2969 "while parsing a quoted scalar, found unknown escape character",
2970 ))
2971 }
2972 }
2973 self.skip_n_non_blank(2);
2974
2975 if code_length > 0 {
2977 self.input.lookahead(code_length);
2978 let mut value = 0u32;
2979 for i in 0..code_length {
2980 let c = self.input.peek_nth(i);
2981 if !is_hex(c) {
2982 return Err(ScanError::new_str(
2983 *start_mark,
2984 "while parsing a quoted scalar, did not find expected hexadecimal number",
2985 ));
2986 }
2987 value = (value << 4) + as_hex(c);
2988 }
2989
2990 self.skip_n_non_blank(code_length);
2991
2992 if code_length == 4 && (0xD800..=0xDBFF).contains(&value) {
2994 self.input.lookahead(2);
2995 if self.input.peek() == '\\' && self.input.peek_nth(1) == 'u' {
2996 self.skip_n_non_blank(2);
2997 self.input.lookahead(4);
2998 let mut low_value = 0u32;
2999 for i in 0..4 {
3000 let c = self.input.peek_nth(i);
3001 if !is_hex(c) {
3002 return Err(ScanError::new_str(
3003 *start_mark,
3004 "while parsing a quoted scalar, did not find expected hexadecimal number for low surrogate",
3005 ));
3006 }
3007 low_value = (low_value << 4) + as_hex(c);
3008 }
3009 if (0xDC00..=0xDFFF).contains(&low_value) {
3010 value = 0x10000 + (((value - 0xD800) << 10) | (low_value - 0xDC00));
3011 self.skip_n_non_blank(4);
3012 } else {
3013 return Err(ScanError::new_str(
3014 *start_mark,
3015 "while parsing a quoted scalar, found invalid low surrogate",
3016 ));
3017 }
3018 } else {
3019 return Err(ScanError::new_str(
3020 *start_mark,
3021 "while parsing a quoted scalar, found high surrogate without following low surrogate",
3022 ));
3023 }
3024 } else if code_length == 4 && (0xDC00..=0xDFFF).contains(&value) {
3025 return Err(ScanError::new_str(
3026 *start_mark,
3027 "while parsing a quoted scalar, found unpaired low surrogate",
3028 ));
3029 }
3030
3031 let Some(ch) = char::from_u32(value) else {
3032 return Err(ScanError::new_str(
3033 *start_mark,
3034 "while parsing a quoted scalar, found invalid Unicode character escape code",
3035 ));
3036 };
3037 ret = ch;
3038 }
3039 Ok(ret)
3040 }
3041
3042 fn fetch_plain_scalar(&mut self) -> ScanResult {
3043 self.save_simple_key();
3044 self.disallow_simple_key();
3045
3046 let tok = self.scan_plain_scalar()?;
3047
3048 self.tokens.push_back(tok);
3049 Ok(())
3050 }
3051
3052 #[allow(clippy::too_many_lines)]
3057 fn scan_plain_scalar(&mut self) -> Result<Token<'input>, ScanError> {
3058 self.unroll_non_block_indents();
3059 let indent = self.indent + 1;
3060 let start_mark = self.mark;
3061
3062 if self.flow_level > 0 && (start_mark.col as isize) < indent {
3063 return Err(ScanError::new_str(
3064 start_mark,
3065 "invalid indentation in flow construct",
3066 ));
3067 }
3068
3069 let mut string = String::with_capacity(32);
3070 self.buf_whitespaces.clear();
3071 self.buf_leading_break.clear();
3072 self.buf_trailing_breaks.clear();
3073 let mut end_mark = self.mark;
3074
3075 loop {
3076 self.input.lookahead(4);
3077 if (self.mark.col == 0 && self.input.next_is_document_indicator())
3078 || self.input.peek() == '#'
3079 {
3080 if self.input.peek() == '#'
3085 && !string.is_empty()
3086 && !self.buf_whitespaces.is_empty()
3087 && self.flow_level == 0
3088 {
3089 self.interrupted_plain_by_comment = Some(self.mark);
3090 }
3091 break;
3092 }
3093
3094 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
3095 return Err(ScanError::new_str(
3096 self.mark,
3097 "plain scalar cannot start with '-' followed by ,[]{}",
3098 ));
3099 }
3100
3101 if !self.input.next_is_blank_or_breakz()
3102 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
3103 {
3104 if self.leading_whitespace {
3105 if self.buf_leading_break.is_empty() {
3106 string.push_str(&self.buf_leading_break);
3107 string.push_str(&self.buf_trailing_breaks);
3108 self.buf_trailing_breaks.clear();
3109 self.buf_leading_break.clear();
3110 } else {
3111 if self.buf_trailing_breaks.is_empty() {
3112 string.push(' ');
3113 } else {
3114 string.push_str(&self.buf_trailing_breaks);
3115 self.buf_trailing_breaks.clear();
3116 }
3117 self.buf_leading_break.clear();
3118 }
3119 self.leading_whitespace = false;
3120 } else if !self.buf_whitespaces.is_empty() {
3121 string.push_str(&self.buf_whitespaces);
3122 self.buf_whitespaces.clear();
3123 }
3124
3125 string.push(self.input.peek());
3127 self.skip_non_blank();
3128 string.reserve(self.input.bufmaxlen());
3129
3130 let mut end = false;
3132 while !end {
3133 self.input.lookahead(self.input.bufmaxlen());
3137 let (stop, chars_consumed) = self.input.fetch_plain_scalar_chunk(
3138 &mut string,
3139 self.input.bufmaxlen() - 1,
3140 self.flow_level > 0,
3141 );
3142 end = stop;
3143 self.mark.offsets.chars += chars_consumed;
3144 self.mark.col += chars_consumed;
3145 self.mark.offsets.bytes = self.input.byte_offset();
3146 }
3147 end_mark = self.mark;
3148 }
3149
3150 if !(self.input.next_is_blank() || self.input.next_is_break()) {
3155 break;
3156 }
3157
3158 self.input.lookahead(2);
3160 while self.input.next_is_blank_or_break() {
3161 if self.input.next_is_blank() {
3162 if !self.leading_whitespace {
3163 self.buf_whitespaces.push(self.input.peek());
3164 self.skip_blank();
3165 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
3166 self.skip_ws_to_eol(SkipTabs::Yes)?;
3169 if !self.input.next_is_breakz() {
3170 return Err(ScanError::new_str(
3171 start_mark,
3172 "while scanning a plain scalar, found a tab",
3173 ));
3174 }
3175 } else {
3176 self.skip_blank();
3177 }
3178 } else {
3179 if self.leading_whitespace {
3181 self.skip_break();
3182 self.buf_trailing_breaks.push('\n');
3183 } else {
3184 self.buf_whitespaces.clear();
3185 self.skip_break();
3186 self.buf_leading_break.push('\n');
3187 self.leading_whitespace = true;
3188 }
3189 }
3190 self.input.lookahead(2);
3191 }
3192
3193 if self.flow_level == 0 && (self.mark.col as isize) < indent {
3195 break;
3196 }
3197 }
3198
3199 if self.leading_whitespace {
3200 self.allow_simple_key();
3201 }
3202
3203 if string.is_empty() {
3204 Err(ScanError::new_str(
3208 start_mark,
3209 "unexpected end of plain scalar",
3210 ))
3211 } else {
3212 let contents = if let (Some(start), Some(end)) =
3213 (start_mark.byte_offset(), end_mark.byte_offset())
3214 {
3215 match self.try_borrow_slice(start, end) {
3216 Some(slice) if slice == string => Cow::Borrowed(slice),
3217 _ => Cow::Owned(string),
3218 }
3219 } else {
3220 Cow::Owned(string)
3221 };
3222
3223 Ok(Token(
3224 Span::new(start_mark, end_mark),
3225 TokenType::Scalar(ScalarStyle::Plain, contents),
3226 ))
3227 }
3228 }
3229
3230 fn fetch_key(&mut self) -> ScanResult {
3231 let start_mark = self.mark;
3232 if self.flow_level == 0 {
3233 if !self.simple_key_allowed {
3235 return Err(ScanError::new_str(
3236 self.mark,
3237 "mapping keys are not allowed in this context",
3238 ));
3239 }
3240 self.roll_indent(
3241 start_mark.col,
3242 None,
3243 TokenType::BlockMappingStart,
3244 start_mark,
3245 );
3246 } else {
3247 self.flow_mapping_started = true;
3249 }
3250
3251 self.remove_simple_key()?;
3252
3253 if self.flow_level == 0 {
3254 self.allow_simple_key();
3255 } else {
3256 self.disallow_simple_key();
3257 }
3258
3259 self.skip_non_blank();
3260 self.skip_yaml_whitespace()?;
3261 if self.input.peek() == '\t' {
3262 return Err(ScanError::new_str(
3263 self.mark(),
3264 "tabs disallowed in this context",
3265 ));
3266 }
3267 self.tokens
3268 .push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
3269 Ok(())
3270 }
3271
3272 fn fetch_flow_value(&mut self) -> ScanResult {
3280 let nc = self.input.peek_nth(1);
3281
3282 if self.mark.index() != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
3294 return Err(ScanError::new_str(
3295 self.mark,
3296 "':' may not precede any of `[{` in flow mapping",
3297 ));
3298 }
3299
3300 self.fetch_value()
3301 }
3302
3303 fn fetch_value(&mut self) -> ScanResult {
3305 let sk = self.simple_keys.last().unwrap().clone();
3306 let start_mark = self.mark;
3307 let is_implicit_flow_mapping =
3308 !self.implicit_flow_mapping_states.is_empty() && !self.flow_mapping_started;
3309 if is_implicit_flow_mapping {
3310 *self.implicit_flow_mapping_states.last_mut().unwrap() =
3311 ImplicitMappingState::Inside(self.flow_level);
3312 }
3313
3314 self.skip_non_blank();
3316 if self.input.look_ch() == '\t'
3323 && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
3324 && (self.input.peek() == '-' || self.input.next_is_alpha())
3325 {
3326 return Err(ScanError::new_str(
3327 self.mark,
3328 "':' must be followed by a valid YAML whitespace",
3329 ));
3330 }
3331
3332 if sk.possible {
3333 let tok = Token(Span::empty(sk.mark), TokenType::Key);
3335 self.insert_token(sk.token_number - self.tokens_parsed, tok);
3336 if is_implicit_flow_mapping {
3337 if sk.mark.line < start_mark.line {
3338 return Err(ScanError::new_str(
3339 start_mark,
3340 "illegal placement of ':' indicator",
3341 ));
3342 }
3343 self.insert_token(
3344 sk.token_number - self.tokens_parsed,
3345 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
3346 );
3347 }
3348
3349 self.roll_indent(
3351 sk.mark.col,
3352 Some(sk.token_number),
3353 TokenType::BlockMappingStart,
3354 sk.mark,
3355 );
3356 self.roll_one_col_indent();
3357
3358 self.simple_keys.last_mut().unwrap().possible = false;
3359 self.disallow_simple_key();
3360 } else {
3361 if is_implicit_flow_mapping {
3362 self.tokens
3363 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
3364 }
3365 if self.flow_level == 0 {
3367 if !self.simple_key_allowed {
3368 return Err(ScanError::new_str(
3369 start_mark,
3370 "mapping values are not allowed in this context",
3371 ));
3372 }
3373
3374 self.roll_indent(
3375 start_mark.col,
3376 None,
3377 TokenType::BlockMappingStart,
3378 start_mark,
3379 );
3380 }
3381 self.roll_one_col_indent();
3382
3383 if self.flow_level == 0 {
3384 self.allow_simple_key();
3385 } else {
3386 self.disallow_simple_key();
3387 }
3388 }
3389 self.tokens
3390 .push_back(Token(Span::empty(start_mark), TokenType::Value));
3391
3392 Ok(())
3393 }
3394
3395 fn roll_indent(
3401 &mut self,
3402 col: usize,
3403 number: Option<usize>,
3404 tok: TokenType<'input>,
3405 mark: Marker,
3406 ) {
3407 if self.flow_level > 0 {
3408 return;
3409 }
3410
3411 if self.indent <= col as isize {
3415 if let Some(indent) = self.indents.last() {
3416 if !indent.needs_block_end {
3417 self.indent = indent.indent;
3418 self.indents.pop();
3419 }
3420 }
3421 }
3422
3423 if self.indent < col as isize {
3424 self.indents.push(Indent {
3425 indent: self.indent,
3426 needs_block_end: true,
3427 });
3428 self.indent = col as isize;
3429 let tokens_parsed = self.tokens_parsed;
3430 match number {
3431 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
3432 None => self.tokens.push_back(Token(Span::empty(mark), tok)),
3433 }
3434 }
3435 }
3436
3437 fn unroll_indent(&mut self, col: isize) {
3443 if self.flow_level > 0 {
3444 return;
3445 }
3446 while self.indent > col {
3447 let indent = self.indents.pop().unwrap();
3448 self.indent = indent.indent;
3449 if indent.needs_block_end {
3450 self.tokens
3451 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
3452 }
3453 }
3454 }
3455
3456 fn roll_one_col_indent(&mut self) {
3462 if self.flow_level == 0 && self.indents.last().is_some_and(|x| x.needs_block_end) {
3463 self.indents.push(Indent {
3464 indent: self.indent,
3465 needs_block_end: false,
3466 });
3467 self.indent += 1;
3468 }
3469 }
3470
3471 fn unroll_non_block_indents(&mut self) {
3473 while let Some(indent) = self.indents.last() {
3474 if indent.needs_block_end {
3475 break;
3476 }
3477 self.indent = indent.indent;
3478 self.indents.pop();
3479 }
3480 }
3481
3482 fn save_simple_key(&mut self) {
3484 if self.simple_key_allowed {
3485 let required = self.flow_level == 0
3486 && self.indent == (self.mark.col as isize)
3487 && self.indents.last().unwrap().needs_block_end;
3488
3489 if let Some(last) = self.simple_keys.last_mut() {
3490 *last = SimpleKey {
3491 mark: self.mark,
3492 possible: true,
3493 required,
3494 token_number: self.tokens_parsed + self.tokens.len(),
3495 };
3496 }
3497 }
3498 }
3499
3500 fn remove_simple_key(&mut self) -> ScanResult {
3501 let last = self.simple_keys.last_mut().unwrap();
3502 if last.possible && last.required {
3503 return Err(self.simple_key_expected());
3504 }
3505
3506 last.possible = false;
3507 Ok(())
3508 }
3509
3510 fn is_within_block(&self) -> bool {
3512 !self.indents.is_empty()
3513 }
3514
3515 fn end_implicit_mapping(&mut self, mark: Marker, flow_level: u8) {
3521 if let Some(implicit_mapping) = self.implicit_flow_mapping_states.last_mut() {
3522 if *implicit_mapping == ImplicitMappingState::Inside(flow_level) {
3523 self.flow_mapping_started = false;
3524 *implicit_mapping = ImplicitMappingState::Possible;
3525 self.tokens
3526 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
3527 }
3528 }
3529 }
3530}
3531
3532#[derive(PartialEq, Eq)]
3536pub enum Chomping {
3537 Strip,
3539 Clip,
3541 Keep,
3543}
3544
3545#[cfg(test)]
3546mod test {
3547 use alloc::borrow::Cow;
3548
3549 use crate::{
3550 input::str::StrInput,
3551 scanner::{Scanner, TokenType},
3552 };
3553
3554 #[test]
3555 fn test_is_anchor_char() {
3556 use super::is_anchor_char;
3557 assert!(is_anchor_char('x'));
3558 }
3559
3560 #[test]
3562 fn anchor_name_is_borrowed_for_str_input() {
3563 let mut scanner = Scanner::new(StrInput::new("&anch\n"));
3564
3565 loop {
3566 let tok = scanner
3567 .next_token()
3568 .expect("valid YAML must scan without errors")
3569 .expect("scanner must eventually produce a token");
3570 if let TokenType::Anchor(name) = tok.1 {
3571 assert!(matches!(name, Cow::Borrowed("anch")));
3572 break;
3573 }
3574 }
3575 }
3576
3577 #[test]
3579 fn alias_name_is_borrowed_for_str_input() {
3580 let mut scanner = Scanner::new(StrInput::new("*anch\n"));
3581
3582 loop {
3583 let tok = scanner
3584 .next_token()
3585 .expect("valid YAML must scan without errors")
3586 .expect("scanner must eventually produce a token");
3587 if let TokenType::Alias(name) = tok.1 {
3588 assert!(matches!(name, Cow::Borrowed("anch")));
3589 break;
3590 }
3591 }
3592 }
3593
3594 #[test]
3596 fn tag_directive_parts_are_borrowed_for_str_input() {
3597 let mut scanner = Scanner::new(StrInput::new("%TAG !e! tag:example.com,2000:app/\n"));
3598
3599 loop {
3600 let tok = scanner
3601 .next_token()
3602 .expect("valid YAML must scan without errors")
3603 .expect("scanner must eventually produce a token");
3604 if let TokenType::TagDirective(handle, prefix) = tok.1 {
3605 assert!(matches!(handle, Cow::Borrowed("!e!")));
3606 assert!(matches!(prefix, Cow::Borrowed("tag:example.com,2000:app/")));
3607 break;
3608 }
3609 }
3610 }
3611
3612 #[test]
3613 fn plain_scalar_is_borrowed_when_whitespace_free_for_str_input() {
3614 let mut scanner = Scanner::new(StrInput::new("foo\n"));
3615
3616 loop {
3617 let tok = scanner
3618 .next_token()
3619 .expect("valid YAML must scan without errors")
3620 .expect("scanner must eventually produce a token");
3621 if let TokenType::Scalar(_, value) = tok.1 {
3622 assert!(matches!(value, Cow::Borrowed("foo")));
3623 break;
3624 }
3625 }
3626 }
3627
3628 #[test]
3629 fn plain_scalar_is_borrowed_when_whitespace_present_for_str_input() {
3630 let mut scanner = Scanner::new(StrInput::new("foo bar\n"));
3631
3632 loop {
3633 let tok = scanner
3634 .next_token()
3635 .expect("valid YAML must scan without errors")
3636 .expect("scanner must eventually produce a token");
3637 if let TokenType::Scalar(_, value) = tok.1 {
3638 assert!(matches!(value, Cow::Borrowed("foo bar")));
3639 break;
3640 }
3641 }
3642 }
3643
3644 #[test]
3645 fn single_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
3646 let mut scanner = Scanner::new(StrInput::new("'foo bar'\n"));
3647
3648 loop {
3649 let tok = scanner
3650 .next_token()
3651 .expect("valid YAML must scan without errors")
3652 .expect("scanner must eventually produce a token");
3653 if let TokenType::Scalar(_, value) = tok.1 {
3654 assert!(matches!(value, Cow::Borrowed("foo bar")));
3655 break;
3656 }
3657 }
3658 }
3659
3660 #[test]
3661 fn single_quoted_scalar_is_owned_when_quote_is_escaped_for_str_input() {
3662 let mut scanner = Scanner::new(StrInput::new("'foo''bar'\n"));
3663
3664 loop {
3665 let tok = scanner
3666 .next_token()
3667 .expect("valid YAML must scan without errors")
3668 .expect("scanner must eventually produce a token");
3669 if let TokenType::Scalar(_, value) = tok.1 {
3670 assert!(matches!(value, Cow::Owned(_)));
3671 assert_eq!(&*value, "foo'bar");
3672 break;
3673 }
3674 }
3675 }
3676
3677 #[test]
3678 fn double_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
3679 let mut scanner = Scanner::new(StrInput::new("\"foo bar\"\n"));
3680
3681 loop {
3682 let tok = scanner
3683 .next_token()
3684 .expect("valid YAML must scan without errors")
3685 .expect("scanner must eventually produce a token");
3686 if let TokenType::Scalar(_, value) = tok.1 {
3687 assert!(matches!(value, Cow::Borrowed("foo bar")));
3688 break;
3689 }
3690 }
3691 }
3692
3693 #[test]
3694 fn double_quoted_scalar_is_owned_when_escape_sequence_present_for_str_input() {
3695 let mut scanner = Scanner::new(StrInput::new("\"foo\\nbar\"\n"));
3696
3697 loop {
3698 let tok = scanner
3699 .next_token()
3700 .expect("valid YAML must scan without errors")
3701 .expect("scanner must eventually produce a token");
3702 if let TokenType::Scalar(_, value) = tok.1 {
3703 assert!(matches!(value, Cow::Owned(_)));
3704 assert_eq!(&*value, "foo\nbar");
3705 break;
3706 }
3707 }
3708 }
3709
3710 #[test]
3711 fn plain_key_is_borrowed_for_str_input() {
3712 let mut scanner = Scanner::new(StrInput::new("mykey: value\n"));
3714
3715 let mut found_key = false;
3716 let mut key_value: Option<Cow<'_, str>> = None;
3717
3718 loop {
3719 let tok = scanner
3720 .next_token()
3721 .expect("valid YAML must scan without errors");
3722 let Some(tok) = tok else { break };
3723
3724 if matches!(tok.1, TokenType::Key) {
3725 found_key = true;
3726 } else if found_key {
3727 if let TokenType::Scalar(_, value) = tok.1 {
3728 key_value = Some(value);
3729 break;
3730 }
3731 }
3732 }
3733
3734 assert!(found_key, "expected to find a Key token");
3735 let key_value = key_value.expect("expected to find a scalar after Key token");
3736 assert!(
3737 matches!(key_value, Cow::Borrowed("mykey")),
3738 "key should be borrowed, got: {key_value:?}"
3739 );
3740 }
3741
3742 #[test]
3743 fn quoted_key_is_borrowed_when_verbatim_for_str_input() {
3744 let mut scanner = Scanner::new(StrInput::new("\"mykey\": value\n"));
3745
3746 let mut found_key = false;
3747 let mut key_value: Option<Cow<'_, str>> = None;
3748
3749 loop {
3750 let tok = scanner
3751 .next_token()
3752 .expect("valid YAML must scan without errors");
3753 let Some(tok) = tok else { break };
3754
3755 if matches!(tok.1, TokenType::Key) {
3756 found_key = true;
3757 } else if found_key {
3758 if let TokenType::Scalar(_, value) = tok.1 {
3759 key_value = Some(value);
3760 break;
3761 }
3762 }
3763 }
3764
3765 assert!(found_key, "expected to find a Key token");
3766 let key_value = key_value.expect("expected to find a scalar after Key token");
3767 assert!(
3768 matches!(key_value, Cow::Borrowed("mykey")),
3769 "quoted key should be borrowed when verbatim, got: {key_value:?}"
3770 );
3771 }
3772
3773 #[test]
3774 fn tag_handle_and_suffix_are_borrowed_for_str_input() {
3775 let mut scanner = Scanner::new(StrInput::new("!!str foo\n"));
3777
3778 loop {
3779 let tok = scanner
3780 .next_token()
3781 .expect("valid YAML must scan without errors")
3782 .expect("scanner must eventually produce a token");
3783 if let TokenType::Tag(handle, suffix) = tok.1 {
3784 assert!(
3785 matches!(handle, Cow::Borrowed("!!")),
3786 "tag handle should be borrowed, got: {handle:?}"
3787 );
3788 assert!(
3789 matches!(suffix, Cow::Borrowed("str")),
3790 "tag suffix should be borrowed, got: {suffix:?}"
3791 );
3792 break;
3793 }
3794 }
3795 }
3796
3797 #[test]
3798 fn local_tag_suffix_is_borrowed_for_str_input() {
3799 let mut scanner = Scanner::new(StrInput::new("!mytag foo\n"));
3801
3802 loop {
3803 let tok = scanner
3804 .next_token()
3805 .expect("valid YAML must scan without errors")
3806 .expect("scanner must eventually produce a token");
3807 if let TokenType::Tag(handle, suffix) = tok.1 {
3808 assert!(
3809 matches!(handle, Cow::Borrowed("!")),
3810 "local tag handle should be '!', got: {handle:?}"
3811 );
3812 assert!(
3813 matches!(suffix, Cow::Borrowed("mytag")),
3814 "local tag suffix should be borrowed, got: {suffix:?}"
3815 );
3816 break;
3817 }
3818 }
3819 }
3820
3821 #[test]
3822 fn tag_with_uri_escape_is_owned_for_str_input() {
3823 let mut scanner = Scanner::new(StrInput::new("!!my%20tag foo\n"));
3825
3826 loop {
3827 let tok = scanner
3828 .next_token()
3829 .expect("valid YAML must scan without errors")
3830 .expect("scanner must eventually produce a token");
3831 if let TokenType::Tag(handle, suffix) = tok.1 {
3832 assert!(
3833 matches!(handle, Cow::Borrowed("!!")),
3834 "tag handle should still be borrowed, got: {handle:?}"
3835 );
3836 assert!(
3837 matches!(suffix, Cow::Owned(_)),
3838 "tag suffix with URI escape should be owned, got: {suffix:?}"
3839 );
3840 assert_eq!(&*suffix, "my tag");
3841 break;
3842 }
3843 }
3844 }
3845}