1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use alloc::{
13 borrow::{Cow, ToOwned},
14 collections::VecDeque,
15 string::String,
16 vec::Vec,
17};
18use core::{char, fmt};
19
20use crate::{
21 char_traits::{
22 as_hex, is_anchor_char, is_blank_or_breakz, is_bom, is_break, is_breakz, is_flow, is_hex,
23 is_tag_char, is_uri_char,
24 },
25 input::{BorrowedInput, SkipTabs},
26};
27
28const SIMPLE_KEY_MAX_LOOKAHEAD: usize = 1024;
30
31#[derive(Clone, Copy, PartialEq, Debug, Eq)]
33pub enum TEncoding {
34 Utf8,
36}
37
38#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
40pub enum ScalarStyle {
41 Plain,
43 SingleQuoted,
45 DoubleQuoted,
47
48 Literal,
54 Folded,
61}
62
63#[derive(Clone, Copy, Debug, Default)]
70pub struct MarkerOffsets {
71 chars: usize,
73 bytes: Option<usize>,
75}
76
77impl PartialEq for MarkerOffsets {
78 fn eq(&self, other: &Self) -> bool {
79 self.chars == other.chars
83 }
84}
85
86impl Eq for MarkerOffsets {}
87
88#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
90pub struct Marker {
91 offsets: MarkerOffsets,
93 line: usize,
95 col: usize,
97}
98
99impl Marker {
100 #[must_use]
102 pub fn new(index: usize, line: usize, col: usize) -> Marker {
103 Marker {
104 offsets: MarkerOffsets {
105 chars: index,
106 bytes: None,
107 },
108 line,
109 col,
110 }
111 }
112
113 #[must_use]
115 pub fn with_byte_offset(mut self, byte_offset: Option<usize>) -> Marker {
116 self.offsets.bytes = byte_offset;
117 self
118 }
119
120 #[must_use]
122 pub fn index(&self) -> usize {
123 self.offsets.chars
124 }
125
126 #[must_use]
128 pub fn byte_offset(&self) -> Option<usize> {
129 self.offsets.bytes
130 }
131
132 #[must_use]
134 pub fn line(&self) -> usize {
135 self.line
136 }
137
138 #[must_use]
140 pub fn col(&self) -> usize {
141 self.col
142 }
143}
144
145#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
147pub struct Span {
148 pub start: Marker,
150 pub end: Marker,
152
153 pub indent: Option<usize>,
158}
159
160impl Span {
161 #[must_use]
163 pub fn new(start: Marker, end: Marker) -> Span {
164 Span {
165 start,
166 end,
167 indent: None,
168 }
169 }
170
171 #[must_use]
178 pub fn empty(mark: Marker) -> Span {
179 Span {
180 start: mark,
181 end: mark,
182 indent: None,
183 }
184 }
185
186 #[must_use]
188 pub fn with_indent(mut self, indent: Option<usize>) -> Span {
189 self.indent = indent;
190 self
191 }
192
193 #[must_use]
195 pub fn len(&self) -> usize {
196 self.end.index() - self.start.index()
197 }
198
199 #[must_use]
201 pub fn is_empty(&self) -> bool {
202 self.len() == 0
203 }
204
205 #[must_use]
207 pub fn byte_range(&self) -> Option<core::ops::Range<usize>> {
208 let start = self.start.byte_offset()?;
209 let end = self.end.byte_offset()?;
210 Some(start..end)
211 }
212
213 #[must_use]
216 pub fn slice<'source>(&self, source: &'source str) -> Option<&'source str> {
217 source.get(self.byte_range()?)
218 }
219}
220
221#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
236pub enum Placement {
237 Above,
243 Right,
246 #[default]
252 Free,
253 Last,
258}
259
260#[derive(Clone, PartialEq, Debug, Eq)]
266pub struct Comment<'input> {
267 pub span: Span,
269 pub text: Cow<'input, str>,
273 pub placement: Placement,
275}
276
277impl<'input> Comment<'input> {
278 #[must_use]
283 pub fn new(span: Span, text: impl Into<Cow<'input, str>>) -> Self {
284 Self {
285 span,
286 text: text.into(),
287 placement: Placement::Free,
288 }
289 }
290
291 #[must_use]
293 pub fn with_placement(mut self, placement: Placement) -> Self {
294 self.placement = placement;
295 self
296 }
297
298 #[must_use]
302 pub fn trimmed_text(&self) -> &str {
303 self.text.trim()
304 }
305}
306
307impl AsRef<str> for Comment<'_> {
308 fn as_ref(&self) -> &str {
309 self.text.as_ref()
310 }
311}
312
313#[derive(Clone, PartialEq, Debug, Eq)]
315pub struct ScanError {
316 mark: Marker,
318 info: String,
320}
321
322impl ScanError {
323 #[must_use]
325 #[cold]
326 pub fn new(loc: Marker, info: String) -> ScanError {
327 ScanError { mark: loc, info }
328 }
329
330 #[must_use]
332 #[cold]
333 pub fn new_str(loc: Marker, info: &str) -> ScanError {
334 ScanError {
335 mark: loc,
336 info: info.to_owned(),
337 }
338 }
339
340 #[cold]
341 pub(crate) fn into_result<T>(self) -> Result<T, ScanError> {
342 Err(self)
343 }
344
345 #[must_use]
347 pub fn marker(&self) -> &Marker {
348 &self.mark
349 }
350
351 #[must_use]
353 pub fn info(&self) -> &str {
354 self.info.as_ref()
355 }
356}
357
358impl fmt::Display for ScanError {
359 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
360 write!(
361 f,
362 "{} at char {} line {} column {}",
363 self.info,
364 self.mark.index(),
365 self.mark.line(),
366 self.mark.col() + 1
367 )
368 }
369}
370
371impl core::error::Error for ScanError {}
372
373#[derive(Clone, PartialEq, Debug, Eq)]
375pub enum TokenType<'input> {
376 StreamStart(TEncoding),
378 StreamEnd,
380 VersionDirective(
382 u32,
384 u32,
386 ),
387 TagDirective(
389 Cow<'input, str>,
391 Cow<'input, str>,
393 ),
394 DocumentStart,
396 DocumentEnd,
398 BlockSequenceStart,
402 BlockMappingStart,
406 BlockEnd,
408 FlowSequenceStart,
410 FlowSequenceEnd,
412 FlowMappingStart,
414 FlowMappingEnd,
416 BlockEntry,
418 FlowEntry,
420 Key,
422 Value,
424 Alias(Cow<'input, str>),
426 Anchor(Cow<'input, str>),
428 Tag(
430 Cow<'input, str>,
432 Cow<'input, str>,
434 ),
435 Scalar(ScalarStyle, Cow<'input, str>),
437 Comment(
442 Comment<'input>,
444 ),
445 ReservedDirective(
447 String,
449 Vec<String>,
451 ),
452}
453
454#[derive(Clone, PartialEq, Debug, Eq)]
456pub struct Token<'input>(
457 pub Span,
459 pub TokenType<'input>,
461);
462
463#[derive(Clone, PartialEq, Debug, Eq)]
498struct SimpleKey {
499 possible: bool,
512 required: bool,
521 token_number: usize,
527 mark: Marker,
529}
530
531impl SimpleKey {
532 fn new(mark: Marker) -> SimpleKey {
534 SimpleKey {
535 possible: false,
536 required: false,
537 token_number: 0,
538 mark,
539 }
540 }
541}
542
543#[derive(Clone, Debug, Default)]
545struct Indent {
546 indent: isize,
548 needs_block_end: bool,
566}
567
568#[derive(Debug, PartialEq)]
590enum ImplicitMappingState {
591 Possible,
596 Inside(u8),
600}
601
602#[derive(Debug)]
612#[allow(clippy::struct_excessive_bools)]
613pub struct Scanner<'input, T> {
614 input: T,
618 mark: Marker,
620 tokens: VecDeque<Token<'input>>,
627 error: Option<ScanError>,
629 deferred_error: Option<ScanError>,
631
632 stream_start_produced: bool,
634 stream_end_produced: bool,
636 document_prefix_allowed: bool,
642 adjacent_value_allowed_at: usize,
645 simple_key_allowed: bool,
649 simple_keys: smallvec::SmallVec<[SimpleKey; 8]>,
654 indent: isize,
656 indents: smallvec::SmallVec<[Indent; 8]>,
658 flow_level: u8,
660 tokens_parsed: usize,
664 token_available: bool,
666 leading_whitespace: bool,
668 flow_mapping_started: smallvec::SmallVec<[bool; 8]>,
675 implicit_flow_mapping_states: smallvec::SmallVec<[ImplicitMappingState; 8]>,
688 interrupted_plain_by_comment: Option<Marker>,
691 flow_markers: smallvec::SmallVec<[(Marker, char); 8]>,
693 buf_leading_break: String,
694 buf_trailing_breaks: String,
695 buf_whitespaces: String,
696}
697
698impl<'input, T: BorrowedInput<'input>> Iterator for Scanner<'input, T> {
699 type Item = Token<'input>;
700
701 fn next(&mut self) -> Option<Self::Item> {
702 if self.error.is_some() {
703 return None;
704 }
705 match self.next_token() {
706 Ok(Some(tok)) => {
707 debug_print!(
708 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
709 tok.1,
710 tok.0
711 );
712 Some(tok)
713 }
714 Ok(tok) => tok,
715 Err(e) => self.stop_after_error(e),
716 }
717 }
718}
719
720pub type ScanResult = Result<(), ScanError>;
722
723#[derive(Debug)]
724enum FlowScalarBuf {
725 Borrowed {
731 start: usize,
732 end: usize,
733 pending_ws_start: Option<usize>,
734 pending_ws_end: usize,
735 },
736 Owned(String),
737}
738
739impl FlowScalarBuf {
740 #[inline]
741 fn new_borrowed(start: usize) -> Self {
742 Self::Borrowed {
743 start,
744 end: start,
745 pending_ws_start: None,
746 pending_ws_end: start,
747 }
748 }
749
750 #[inline]
751 fn new_owned() -> Self {
752 Self::Owned(String::new())
753 }
754
755 #[inline]
756 fn as_owned_mut(&mut self) -> Option<&mut String> {
757 match self {
758 Self::Owned(s) => Some(s),
759 Self::Borrowed { .. } => None,
760 }
761 }
762
763 #[inline]
764 fn commit_pending_ws(&mut self) {
765 if let Self::Borrowed {
766 end,
767 pending_ws_start,
768 pending_ws_end,
769 ..
770 } = self
771 {
772 if pending_ws_start.is_some() {
773 *end = *pending_ws_end;
774 *pending_ws_start = None;
775 }
776 }
777 }
778
779 #[inline]
780 fn note_pending_ws(&mut self, ws_start: usize, ws_end: usize) {
781 if let Self::Borrowed {
782 pending_ws_start,
783 pending_ws_end,
784 ..
785 } = self
786 {
787 if pending_ws_start.is_none() {
788 *pending_ws_start = Some(ws_start);
789 }
790 *pending_ws_end = ws_end;
791 }
792 }
793
794 #[inline]
795 fn discard_pending_ws(&mut self) {
796 if let Self::Borrowed {
797 pending_ws_start,
798 pending_ws_end,
799 end,
800 ..
801 } = self
802 {
803 *pending_ws_start = None;
804 *pending_ws_end = *end;
805 }
806 }
807}
808
809impl<'input, T: BorrowedInput<'input>> Scanner<'input, T> {
810 #[inline]
811 fn promote_flow_scalar_buf_to_owned(
812 &self,
813 start_mark: &Marker,
814 buf: &mut FlowScalarBuf,
815 ) -> Result<(), ScanError> {
816 let FlowScalarBuf::Borrowed {
817 start,
818 end,
819 pending_ws_start: _,
820 pending_ws_end: _,
821 } = *buf
822 else {
823 return Ok(());
824 };
825
826 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
827 ScanError::new_str(
828 *start_mark,
829 "internal error: input advertised offsets but did not provide a slice",
830 )
831 })?;
832 *buf = FlowScalarBuf::Owned(slice.to_owned());
833 Ok(())
834 }
835 #[inline]
841 fn try_borrow_slice(&self, start: usize, end: usize) -> Option<&'input str> {
842 self.input.slice_borrowed(start, end)
843 }
844
845 fn scan_tag_handle_directive_cow(
850 &mut self,
851 mark: &Marker,
852 ) -> Result<Cow<'input, str>, ScanError> {
853 let Some(start) = self.input.byte_offset() else {
854 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
855 };
856
857 if self.input.look_ch() != '!' {
858 return Err(ScanError::new_str(
859 *mark,
860 "while scanning a tag, did not find expected '!'",
861 ));
862 }
863
864 self.skip_non_blank();
866
867 self.input.lookahead(1);
870 while self.input.next_is_alpha() {
871 self.skip_non_blank();
872 self.input.lookahead(1);
873 }
874
875 if self.input.peek() == '!' {
877 self.skip_non_blank();
878 }
879
880 let Some(end) = self.input.byte_offset() else {
881 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
883 };
884
885 let Some(slice) = self.try_borrow_slice(start, end) else {
886 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
888 ScanError::new_str(
889 *mark,
890 "internal error: input advertised slicing but did not provide a slice",
891 )
892 })?;
893 if !slice.ends_with('!') && slice != "!" {
894 return Err(ScanError::new_str(
895 *mark,
896 "while parsing a tag directive, did not find expected '!'",
897 ));
898 }
899 return Ok(Cow::Owned(slice.to_owned()));
900 };
901
902 if !slice.ends_with('!') && slice != "!" {
903 return Err(ScanError::new_str(
904 *mark,
905 "while parsing a tag directive, did not find expected '!'",
906 ));
907 }
908
909 Ok(Cow::Borrowed(slice))
910 }
911
912 fn scan_tag_prefix_directive_cow(
917 &mut self,
918 start_mark: &Marker,
919 ) -> Result<Cow<'input, str>, ScanError> {
920 let Some(start) = self.input.byte_offset() else {
921 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
922 };
923
924 if self.input.look_ch() == '!' {
926 self.skip_non_blank();
927 } else if !is_tag_char(self.input.peek()) {
928 return Err(ScanError::new_str(
929 *start_mark,
930 "invalid global tag character",
931 ));
932 } else if self.input.peek() == '%' {
933 } else {
935 self.skip_non_blank();
936 }
937
938 while is_uri_char(self.input.look_ch()) {
940 if self.input.peek() == '%' {
941 break;
942 }
943 self.skip_non_blank();
944 }
945
946 if self.input.peek() == '%' {
948 let current = self
949 .input
950 .byte_offset()
951 .expect("byte_offset() must remain available once enabled");
952 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
953 slice.to_owned()
954 } else {
955 String::new()
956 };
957
958 while is_uri_char(self.input.look_ch()) {
959 if self.input.peek() == '%' {
960 out.push(self.scan_uri_escapes(start_mark)?);
961 } else {
962 out.push(self.input.peek());
963 self.skip_non_blank();
964 }
965 }
966 return Ok(Cow::Owned(out));
967 }
968
969 let Some(end) = self.input.byte_offset() else {
970 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
971 };
972
973 let Some(slice) = self.try_borrow_slice(start, end) else {
974 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
976 ScanError::new_str(
977 *start_mark,
978 "internal error: input advertised slicing but did not provide a slice",
979 )
980 })?;
981 return Ok(Cow::Owned(slice.to_owned()));
982 };
983
984 Ok(Cow::Borrowed(slice))
985 }
986 pub fn new(input: T) -> Self {
988 let initial_byte_offset = input.byte_offset();
989 Scanner {
990 input,
991 mark: Marker::new(0, 1, 0).with_byte_offset(initial_byte_offset),
992 tokens: VecDeque::with_capacity(64),
993 error: None,
994 deferred_error: None,
995
996 stream_start_produced: false,
997 stream_end_produced: false,
998 document_prefix_allowed: true,
999 adjacent_value_allowed_at: 0,
1000 simple_key_allowed: true,
1001 simple_keys: smallvec::SmallVec::new(),
1002 indent: -1,
1003 indents: smallvec::SmallVec::new(),
1004 flow_level: 0,
1005 tokens_parsed: 0,
1006 token_available: false,
1007 leading_whitespace: true,
1008 flow_mapping_started: smallvec::SmallVec::new(),
1009 implicit_flow_mapping_states: smallvec::SmallVec::new(),
1010 flow_markers: smallvec::SmallVec::new(),
1011 interrupted_plain_by_comment: None,
1012
1013 buf_leading_break: String::with_capacity(128),
1014 buf_trailing_breaks: String::with_capacity(128),
1015 buf_whitespaces: String::with_capacity(128),
1016 }
1017 }
1018
1019 #[inline]
1024 pub fn get_error(&self) -> Option<ScanError> {
1025 self.error.clone().or_else(|| self.deferred_error.clone())
1026 }
1027
1028 #[cold]
1029 fn stop_after_error(&mut self, error: ScanError) -> Option<Token<'input>> {
1030 self.error = Some(error);
1031 None
1032 }
1033
1034 #[cold]
1035 fn simple_key_expected(&self) -> ScanError {
1036 ScanError::new_str(self.mark, "simple key expected")
1037 }
1038
1039 #[cold]
1040 fn unclosed_bracket(mark: Marker, bracket: char) -> ScanError {
1041 ScanError::new(mark, format!("unclosed bracket '{bracket}'"))
1042 }
1043
1044 #[inline]
1046 fn skip_blank(&mut self) {
1047 self.input.skip();
1048
1049 self.mark.offsets.chars += 1;
1050 self.mark.col += 1;
1051 self.mark.offsets.bytes = self.input.byte_offset();
1052 }
1053
1054 #[inline]
1056 fn skip_non_blank(&mut self) {
1057 self.input.skip();
1058
1059 self.mark.offsets.chars += 1;
1060 self.mark.col += 1;
1061 self.mark.offsets.bytes = self.input.byte_offset();
1062 self.leading_whitespace = false;
1063 }
1064
1065 #[inline]
1070 fn skip_bom(&mut self) {
1071 self.input.skip();
1072
1073 self.mark.offsets.chars += 1;
1074 self.mark.offsets.bytes = self.input.byte_offset();
1075 }
1076
1077 #[inline]
1083 fn skip_comment_char(&mut self) {
1084 self.input.skip();
1085
1086 self.mark.offsets.chars += 1;
1087 self.mark.col += 1;
1088 self.mark.offsets.bytes = self.input.byte_offset();
1089 }
1090
1091 #[inline]
1093 fn skip_n_non_blank(&mut self, count: usize) {
1094 for _ in 0..count {
1095 self.input.skip();
1096 self.mark.offsets.chars += 1;
1097 self.mark.col += 1;
1098 }
1099 self.mark.offsets.bytes = self.input.byte_offset();
1100 self.leading_whitespace = false;
1101 }
1102
1103 #[inline]
1105 fn skip_nl(&mut self) {
1106 self.input.skip();
1107
1108 self.mark.offsets.chars += 1;
1109 self.mark.col = 0;
1110 self.mark.line += 1;
1111 self.mark.offsets.bytes = self.input.byte_offset();
1112 self.leading_whitespace = true;
1113 }
1114
1115 #[inline]
1117 fn skip_linebreak(&mut self) {
1118 if self.input.next_2_are('\r', '\n') {
1119 self.skip_blank();
1122 self.skip_nl();
1123 } else if self.input.next_is_break() {
1124 self.skip_nl();
1125 }
1126 }
1127
1128 fn scan_comment_token(&mut self) -> Result<Token<'input>, ScanError> {
1129 let start_mark = self.mark;
1130 debug_assert_eq!(self.input.peek(), '#');
1131 let placement = if self.leading_whitespace {
1132 Placement::Free
1133 } else {
1134 Placement::Right
1135 };
1136
1137 self.skip_comment_char();
1138
1139 let text = if let Some(start) = self.input.byte_offset() {
1140 let n = self.input.skip_while_non_breakz();
1142 self.mark.offsets.chars += n;
1143 self.mark.col += n;
1144 let byte_offset = self.input.byte_offset();
1145 self.mark.offsets.bytes = byte_offset;
1146 let end = byte_offset.expect("byte_offset must remain available once enabled");
1147
1148 if let Some(slice) = self.try_borrow_slice(start, end) {
1149 Cow::Borrowed(slice)
1150 } else if let Some(slice) = self.input.slice_bytes(start, end) {
1151 Cow::Owned(slice.to_owned())
1153 } else {
1154 return Err(ScanError::new_str(
1155 start_mark,
1156 "internal error: input advertised offsets but did not provide a slice",
1157 ));
1158 }
1159 } else {
1160 let mut owned = String::new();
1162 while !is_breakz(self.input.look_ch()) {
1163 owned.push(self.input.peek());
1164 self.skip_comment_char();
1165 }
1166 Cow::Owned(owned)
1167 };
1168
1169 let end_mark = self.mark;
1170 let span = Span::new(start_mark, end_mark);
1171 Ok(Token(
1172 span,
1173 TokenType::Comment(Comment::new(span, text).with_placement(placement)),
1174 ))
1175 }
1176
1177 fn push_comment_token(&mut self) -> ScanResult {
1178 let token = self.scan_comment_token()?;
1179 self.tokens.push_back(token);
1180 Ok(())
1181 }
1182
1183 fn skip_comment(&mut self) {
1184 debug_assert_eq!(self.input.peek(), '#');
1185
1186 self.skip_comment_char();
1187 let n = self.input.skip_while_non_breakz();
1188 self.mark.offsets.chars += n;
1189 self.mark.col += n;
1190 self.mark.offsets.bytes = self.input.byte_offset();
1191 }
1192
1193 #[inline]
1195 pub fn stream_started(&self) -> bool {
1196 self.stream_start_produced
1197 }
1198
1199 #[inline]
1201 pub fn stream_ended(&self) -> bool {
1202 self.stream_end_produced
1203 }
1204
1205 #[inline]
1207 pub fn mark(&self) -> Marker {
1208 self.mark
1209 }
1210
1211 #[inline]
1218 fn read_break(&mut self, s: &mut String) {
1219 self.skip_break();
1220 s.push('\n');
1221 }
1222
1223 #[inline]
1228 fn skip_break(&mut self) {
1229 let c = self.input.peek();
1230 let nc = self.input.peek_nth(1);
1231 debug_assert!(is_break(c));
1232 if c == '\r' && nc == '\n' {
1233 self.skip_blank();
1234 }
1235 self.skip_nl();
1236 }
1237
1238 fn insert_token(&mut self, pos: usize, tok: Token<'input>) {
1240 let old_len = self.tokens.len();
1241 assert!(pos <= old_len);
1242 self.tokens.insert(pos, tok);
1243 }
1244
1245 #[inline]
1246 fn allow_simple_key(&mut self) {
1247 self.simple_key_allowed = true;
1248 }
1249
1250 #[inline]
1251 fn disallow_simple_key(&mut self) {
1252 self.simple_key_allowed = false;
1253 }
1254
1255 pub fn fetch_next_token(&mut self) -> ScanResult {
1260 self.input.lookahead(1);
1261
1262 if !self.stream_start_produced {
1263 self.fetch_stream_start();
1264 return Ok(());
1265 }
1266 self.skip_to_next_token()?;
1267
1268 debug_print!(
1269 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
1270 self.mark,
1271 self.input.peek()
1272 );
1273
1274 self.stale_simple_keys()?;
1275
1276 let mark = self.mark;
1277 self.unroll_indent(mark.col as isize);
1278
1279 self.input.lookahead(4);
1280
1281 if self.input.next_is_z() {
1282 self.fetch_stream_end()?;
1283 return Ok(());
1284 }
1285
1286 if self.mark.col == 0 {
1287 if self.input.next_char_is('%') {
1288 return self.fetch_directive();
1289 } else if self.input.next_is_document_start() {
1290 return self.fetch_document_indicator(TokenType::DocumentStart);
1291 } else if self.input.next_is_document_end() {
1292 self.fetch_document_indicator(TokenType::DocumentEnd)?;
1293 self.skip_ws_to_eol(SkipTabs::Yes)?;
1294 if !self.input.next_is_breakz() {
1295 return Err(ScanError::new_str(
1296 self.mark,
1297 "invalid content after document end marker",
1298 ));
1299 }
1300 return Ok(());
1301 }
1302 }
1303
1304 if self.document_prefix_allowed {
1305 self.document_prefix_allowed = false;
1306 }
1307
1308 if (self.mark.col as isize) < self.indent {
1309 self.input.lookahead(1);
1310 let c = self.input.peek();
1311 if self.flow_level == 0 || !matches!(c, ']' | '}' | ',') {
1312 return Err(ScanError::new_str(self.mark, "invalid indentation"));
1313 }
1314 }
1315
1316 let c = self.input.peek();
1317 let nc = self.input.peek_nth(1);
1318 match c {
1319 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
1320 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
1321 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
1322 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
1323 ',' => self.fetch_flow_entry(),
1324 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
1325 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
1326 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
1327 ':' if self.flow_level > 0
1328 && (is_flow(nc) || self.mark.index() == self.adjacent_value_allowed_at) =>
1329 {
1330 self.fetch_flow_value()
1331 }
1332 '*' => self.fetch_anchor(true),
1334 '&' => self.fetch_anchor(false),
1336 '!' => self.fetch_tag(),
1337 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
1339 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
1341 '\'' => self.fetch_flow_scalar(true),
1342 '"' => self.fetch_flow_scalar(false),
1343 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
1345 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
1346 self.fetch_plain_scalar()
1347 }
1348 c if is_bom(c) => Err(ScanError::new_str(
1349 self.mark,
1350 "a BOM must not appear inside a document",
1351 )),
1352 '%' | '@' | '`' => Err(ScanError::new(
1353 self.mark,
1354 format!("unexpected character: `{c}'"),
1355 )),
1356 _ => self.fetch_plain_scalar(),
1357 }
1358 }
1359
1360 pub fn next_token(&mut self) -> Result<Option<Token<'input>>, ScanError> {
1365 if self.deferred_error.is_some() {
1366 if !matches!(
1367 self.tokens.front().map(|token| &token.1),
1368 Some(TokenType::Comment(_))
1369 ) {
1370 if let Some(error) = self.deferred_error.take() {
1371 return error.into_result();
1372 }
1373 }
1374 self.token_available = true;
1375 }
1376
1377 if self.stream_end_produced {
1378 return Ok(None);
1379 }
1380
1381 if !self.token_available {
1382 if let Err(error) = self.fetch_more_tokens() {
1383 if matches!(
1384 self.tokens.front().map(|token| &token.1),
1385 Some(TokenType::Comment(_))
1386 ) {
1387 self.deferred_error = Some(error);
1388 } else {
1389 return Err(error);
1390 }
1391 }
1392 }
1393 let Some(t) = self.tokens.pop_front() else {
1394 return Err(ScanError::new_str(
1395 self.mark,
1396 "did not find expected next token",
1397 ));
1398 };
1399 self.token_available = false;
1400 self.tokens_parsed += 1;
1401
1402 if let TokenType::StreamEnd = t.1 {
1403 self.stream_end_produced = true;
1404 }
1405 Ok(Some(t))
1406 }
1407
1408 pub fn fetch_more_tokens(&mut self) -> ScanResult {
1413 let mut need_more;
1414 loop {
1415 if self.tokens.is_empty() {
1416 need_more = true;
1417 } else {
1418 need_more = false;
1419 self.stale_simple_keys()?;
1421 for sk in &self.simple_keys {
1423 if sk.possible && sk.token_number == self.tokens_parsed {
1424 need_more = true;
1425 break;
1426 }
1427 }
1428 }
1429
1430 if let Some(token) = self.tokens.back() {
1433 if matches!(token.1, TokenType::DocumentEnd | TokenType::DocumentStart) {
1434 break;
1435 }
1436 }
1437
1438 if !need_more {
1439 break;
1440 }
1441 self.fetch_next_token()?;
1442 }
1443 self.token_available = true;
1444
1445 Ok(())
1446 }
1447
1448 fn stale_simple_keys(&mut self) -> ScanResult {
1457 for sk in &mut self.simple_keys {
1458 let is_line_stale = self.flow_level == 0 && sk.mark.line < self.mark.line;
1459 let is_length_stale =
1462 self.mark.index().saturating_sub(sk.mark.index()) > SIMPLE_KEY_MAX_LOOKAHEAD;
1463
1464 if sk.possible && (is_line_stale || is_length_stale) {
1465 if sk.required {
1466 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
1467 }
1468 sk.possible = false;
1469 }
1470 }
1471 Ok(())
1472 }
1473
1474 fn skip_to_next_token(&mut self) -> ScanResult {
1483 let consume_linebreak = |this: &mut Self| {
1486 this.input.lookahead(2);
1487 this.skip_linebreak();
1488 if this.flow_level == 0 {
1489 this.allow_simple_key();
1490 }
1491 };
1492
1493 loop {
1494 match self.input.look_ch() {
1495 '\t' => {
1497 if self.is_within_block()
1498 && self.leading_whitespace
1499 && (self.mark.col as isize) < self.indent
1500 {
1501 self.skip_ws_to_eol(SkipTabs::Yes)?;
1502
1503 if !self.input.next_is_breakz() {
1505 return Err(ScanError::new_str(
1506 self.mark,
1507 "tabs disallowed within this context (block indentation)",
1508 ));
1509 }
1510
1511 if matches!(self.input.look_ch(), '\n' | '\r') {
1513 consume_linebreak(self);
1514 }
1515 } else {
1516 self.skip_blank();
1518 }
1519 }
1520
1521 ' ' => self.skip_blank(),
1522
1523 '\n' | '\r' => consume_linebreak(self),
1524
1525 c if is_bom(c)
1526 && self.document_prefix_allowed
1527 && self.flow_level == 0
1528 && self.mark.col == 0 =>
1529 {
1530 self.skip_bom();
1531 }
1532
1533 '#' => {
1534 self.push_comment_token()?;
1535
1536 if matches!(self.input.look_ch(), '\n' | '\r') {
1538 consume_linebreak(self);
1539 }
1540 }
1541
1542 _ => break,
1543 }
1544 }
1545
1546 if let Some(err_mark) = self.interrupted_plain_by_comment.take() {
1549 let is_immediate_next_line = self.mark.line == err_mark.line + 1;
1553
1554 if self.flow_level == 0
1556 && is_immediate_next_line
1557 && (self.mark.col as isize) > self.indent
1558 {
1559 self.input.lookahead(4);
1563
1564 if !self.input.next_is_z()
1565 && !self.input.next_is_document_indicator()
1566 && self.input.next_can_be_plain_scalar(false)
1567 {
1568 return Err(ScanError::new_str(
1569 err_mark,
1570 "comment intercepting the multiline text",
1571 ));
1572 }
1573 }
1574 }
1575
1576 Ok(())
1577 }
1578
1579 fn skip_yaml_whitespace(&mut self) -> ScanResult {
1584 let mut need_whitespace = true;
1585 loop {
1586 match self.input.look_ch() {
1587 ' ' => {
1588 self.skip_blank();
1589
1590 need_whitespace = false;
1591 }
1592 '\n' | '\r' => {
1593 self.input.lookahead(2);
1594 self.skip_linebreak();
1595 if self.flow_level == 0 {
1596 self.allow_simple_key();
1597 }
1598 need_whitespace = false;
1599 }
1600 '#' => {
1601 if need_whitespace {
1602 self.skip_comment();
1603 } else {
1604 self.push_comment_token()?;
1605 }
1606 }
1607 _ => break,
1608 }
1609 }
1610
1611 if need_whitespace {
1612 Err(ScanError::new_str(self.mark(), "expected whitespace"))
1613 } else {
1614 Ok(())
1615 }
1616 }
1617
1618 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
1619 debug_assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
1620
1621 let mut encountered_tab = false;
1622 let mut has_yaml_ws = false;
1623
1624 loop {
1625 match self.input.look_ch() {
1626 ' ' => {
1627 has_yaml_ws = true;
1628 self.skip_blank();
1629 }
1630 '\t' if skip_tabs != SkipTabs::No => {
1631 encountered_tab = true;
1632 self.skip_blank();
1633 }
1634 '#' if !encountered_tab && !has_yaml_ws => {
1635 return Err(ScanError::new_str(
1636 self.mark,
1637 "comments must be separated from other tokens by whitespace",
1638 ));
1639 }
1640 '#' => self.push_comment_token()?,
1641 _ => break,
1642 }
1643 }
1644
1645 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws))
1646 }
1647
1648 fn fetch_stream_start(&mut self) {
1649 let mark = self.mark;
1650 self.indent = -1;
1651 self.stream_start_produced = true;
1652 self.allow_simple_key();
1653 self.tokens.push_back(Token(
1654 Span::empty(mark),
1655 TokenType::StreamStart(TEncoding::Utf8),
1656 ));
1657 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1658 }
1659
1660 fn fetch_stream_end(&mut self) -> ScanResult {
1661 if self.mark.col != 0 {
1663 self.mark.col = 0;
1664 self.mark.line += 1;
1665 }
1666
1667 if let Some((mark, bracket)) = self.flow_markers.pop() {
1668 return Err(Self::unclosed_bracket(mark, bracket));
1669 }
1670
1671 for sk in &mut self.simple_keys {
1674 if sk.required && sk.possible {
1675 return Err(self.simple_key_expected());
1676 }
1677 sk.possible = false;
1678 }
1679
1680 self.unroll_indent(-1);
1681 self.remove_simple_key()?;
1682 self.disallow_simple_key();
1683
1684 self.tokens
1685 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
1686 Ok(())
1687 }
1688
1689 fn fetch_directive(&mut self) -> ScanResult {
1690 self.unroll_indent(-1);
1691 self.remove_simple_key()?;
1692
1693 self.disallow_simple_key();
1694
1695 let token_index = self.tokens.len();
1696 let tok = self.scan_directive()?;
1697 self.insert_token(token_index, tok);
1698
1699 Ok(())
1700 }
1701
1702 fn scan_directive(&mut self) -> Result<Token<'input>, ScanError> {
1703 let start_mark = self.mark;
1704 self.skip_non_blank();
1705
1706 let name = self.scan_directive_name()?;
1707 let tok = match name.as_ref() {
1708 "YAML" => self.scan_version_directive_value(&start_mark)?,
1709 "TAG" => self.scan_tag_directive_value(&start_mark)?,
1710 _ => {
1711 let mut params = Vec::new();
1712 while self.input.next_is_blank() {
1713 let n_blanks = self.input.skip_while_blank();
1714 self.mark.offsets.chars += n_blanks;
1715 self.mark.col += n_blanks;
1716 self.mark.offsets.bytes = self.input.byte_offset();
1717
1718 if !is_blank_or_breakz(self.input.peek()) {
1719 let mut param = String::new();
1720 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut param);
1721 self.mark.offsets.chars += n_chars;
1722 self.mark.col += n_chars;
1723 self.mark.offsets.bytes = self.input.byte_offset();
1724 params.push(param);
1725 }
1726 }
1727
1728 Token(
1729 Span::new(start_mark, self.mark),
1730 TokenType::ReservedDirective(name, params),
1731 )
1732 }
1733 };
1734
1735 self.skip_ws_to_eol(SkipTabs::Yes)?;
1736
1737 if self.input.next_is_breakz() {
1738 self.input.lookahead(2);
1739 self.skip_linebreak();
1740 Ok(tok)
1741 } else {
1742 Err(ScanError::new_str(
1743 start_mark,
1744 "while scanning a directive, did not find expected comment or line break",
1745 ))
1746 }
1747 }
1748
1749 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1750 let n_blanks = self.input.skip_while_blank();
1751 self.mark.offsets.chars += n_blanks;
1752 self.mark.col += n_blanks;
1753 self.mark.offsets.bytes = self.input.byte_offset();
1754
1755 let major = self.scan_version_directive_number(mark)?;
1756
1757 if self.input.peek() != '.' {
1758 return Err(ScanError::new_str(
1759 *mark,
1760 "while scanning a YAML directive, did not find expected digit or '.' character",
1761 ));
1762 }
1763 self.skip_non_blank();
1764
1765 let minor = self.scan_version_directive_number(mark)?;
1766
1767 Ok(Token(
1768 Span::new(*mark, self.mark),
1769 TokenType::VersionDirective(major, minor),
1770 ))
1771 }
1772
1773 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1774 let start_mark = self.mark;
1775 let mut string = String::new();
1776
1777 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut string);
1778 self.mark.offsets.chars += n_chars;
1779 self.mark.col += n_chars;
1780 self.mark.offsets.bytes = self.input.byte_offset();
1781
1782 if string.is_empty() {
1783 return Err(ScanError::new_str(
1784 start_mark,
1785 "while scanning a directive, could not find expected directive name",
1786 ));
1787 }
1788
1789 if !is_blank_or_breakz(self.input.peek()) {
1790 return Err(ScanError::new_str(
1791 start_mark,
1792 "while scanning a directive, found unexpected non-alphabetical character",
1793 ));
1794 }
1795
1796 Ok(string)
1797 }
1798
1799 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1800 let mut val = 0u32;
1801 let mut length = 0usize;
1802 while let Some(digit) = self.input.look_ch().to_digit(10) {
1803 if length + 1 > 9 {
1804 return Err(ScanError::new_str(
1805 *mark,
1806 "while scanning a YAML directive, found extremely long version number",
1807 ));
1808 }
1809 length += 1;
1810 val = val * 10 + digit;
1811 self.skip_non_blank();
1812 }
1813
1814 if length == 0 {
1815 return Err(ScanError::new_str(
1816 *mark,
1817 "while scanning a YAML directive, did not find expected version number",
1818 ));
1819 }
1820
1821 Ok(val)
1822 }
1823
1824 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1825 let n_blanks = self.input.skip_while_blank();
1826 self.mark.offsets.chars += n_blanks;
1827 self.mark.col += n_blanks;
1828 self.mark.offsets.bytes = self.input.byte_offset();
1829
1830 let handle = self.scan_tag_handle_directive_cow(mark)?;
1831
1832 let n_blanks = self.input.skip_while_blank();
1833 self.mark.offsets.chars += n_blanks;
1834 self.mark.col += n_blanks;
1835 self.mark.offsets.bytes = self.input.byte_offset();
1836
1837 let prefix = self.scan_tag_prefix_directive_cow(mark)?;
1838
1839 self.input.lookahead(1);
1840
1841 if self.input.next_is_blank_or_breakz() {
1842 Ok(Token(
1843 Span::new(*mark, self.mark),
1844 TokenType::TagDirective(handle, prefix),
1845 ))
1846 } else {
1847 Err(ScanError::new_str(
1848 *mark,
1849 "while scanning TAG, did not find expected whitespace or line break",
1850 ))
1851 }
1852 }
1853
1854 fn fetch_tag(&mut self) -> ScanResult {
1855 self.save_simple_key();
1856 self.disallow_simple_key();
1857
1858 let tok = self.scan_tag()?;
1859 self.tokens.push_back(tok);
1860 Ok(())
1861 }
1862
1863 fn scan_tag(&mut self) -> Result<Token<'input>, ScanError> {
1864 let start_mark = self.mark;
1865
1866 self.input.lookahead(2);
1868
1869 if self.input.byte_offset().is_none() {
1871 return self.scan_tag_owned(&start_mark);
1872 }
1873
1874 let (handle, suffix): (Cow<'input, str>, Cow<'input, str>) =
1875 if self.input.nth_char_is(1, '<') {
1876 let suffix = self.scan_verbatim_tag(&start_mark)?;
1878 (Cow::Owned(String::new()), Cow::Owned(suffix))
1879 } else {
1880 let handle = self.scan_tag_handle_cow(&start_mark)?;
1882 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1884 let suffix = self.scan_tag_shorthand_suffix_cow(&start_mark, true)?;
1886 (handle, suffix)
1887 } else {
1888 let remaining_suffix =
1893 self.scan_tag_shorthand_suffix_cow(&start_mark, false)?;
1894
1895 let suffix = if handle.len() > 1 {
1897 if remaining_suffix.is_empty() {
1898 match handle {
1900 Cow::Borrowed(s) => Cow::Borrowed(&s[1..]),
1901 Cow::Owned(s) => Cow::Owned(s[1..].to_owned()),
1902 }
1903 } else {
1904 let mut combined = handle[1..].to_owned();
1906 combined.push_str(&remaining_suffix);
1907 Cow::Owned(combined)
1908 }
1909 } else {
1910 remaining_suffix
1912 };
1913
1914 if suffix.is_empty() {
1917 (Cow::Borrowed(""), Cow::Borrowed("!"))
1918 } else {
1919 (Cow::Borrowed("!"), suffix)
1920 }
1921 }
1922 };
1923
1924 if is_blank_or_breakz(self.input.look_ch())
1925 || (self.flow_level > 0 && matches!(self.input.peek(), ',' | ']' | '}'))
1926 {
1927 Ok(Token(
1930 Span::new(start_mark, self.mark),
1931 TokenType::Tag(handle, suffix),
1932 ))
1933 } else {
1934 Err(ScanError::new_str(
1935 start_mark,
1936 "while scanning a tag, did not find expected whitespace or line break",
1937 ))
1938 }
1939 }
1940
1941 fn scan_tag_owned(&mut self, start_mark: &Marker) -> Result<Token<'input>, ScanError> {
1943 let mut handle = String::new();
1944 let mut suffix;
1945
1946 if self.input.nth_char_is(1, '<') {
1947 suffix = self.scan_verbatim_tag(start_mark)?;
1948 } else {
1949 handle = self.scan_tag_handle(false, start_mark)?;
1951 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1953 let is_secondary_handle = handle == "!!";
1955 suffix =
1956 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", start_mark)?;
1957 } else {
1958 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, start_mark)?;
1959 "!".clone_into(&mut handle);
1960 if suffix.is_empty() {
1963 handle.clear();
1964 "!".clone_into(&mut suffix);
1965 }
1966 }
1967 }
1968
1969 if is_blank_or_breakz(self.input.look_ch())
1970 || (self.flow_level > 0 && matches!(self.input.peek(), ',' | ']' | '}'))
1971 {
1972 Ok(Token(
1975 Span::new(*start_mark, self.mark),
1976 TokenType::Tag(handle.into(), suffix.into()),
1977 ))
1978 } else {
1979 Err(ScanError::new_str(
1980 *start_mark,
1981 "while scanning a tag, did not find expected whitespace or line break",
1982 ))
1983 }
1984 }
1985
1986 fn scan_tag_handle_cow(&mut self, mark: &Marker) -> Result<Cow<'input, str>, ScanError> {
1991 let Some(start) = self.input.byte_offset() else {
1992 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
1993 };
1994
1995 if self.input.look_ch() != '!' {
1996 return Err(ScanError::new_str(
1997 *mark,
1998 "while scanning a tag, did not find expected '!'",
1999 ));
2000 }
2001
2002 self.skip_non_blank();
2004
2005 self.input.lookahead(1);
2007 while self.input.next_is_alpha() {
2008 self.skip_non_blank();
2009 self.input.lookahead(1);
2010 }
2011
2012 if self.input.peek() == '!' {
2014 self.skip_non_blank();
2015 }
2016
2017 let Some(end) = self.input.byte_offset() else {
2018 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
2019 };
2020
2021 if let Some(slice) = self.try_borrow_slice(start, end) {
2022 Ok(Cow::Borrowed(slice))
2023 } else {
2024 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2025 ScanError::new_str(
2026 *mark,
2027 "internal error: input advertised slicing but did not provide a slice",
2028 )
2029 })?;
2030 Ok(Cow::Owned(slice.to_owned()))
2031 }
2032 }
2033
2034 fn scan_tag_shorthand_suffix_cow(
2038 &mut self,
2039 mark: &Marker,
2040 require_non_empty: bool,
2041 ) -> Result<Cow<'input, str>, ScanError> {
2042 let Some(start) = self.input.byte_offset() else {
2043 return Ok(Cow::Owned(
2044 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
2045 ));
2046 };
2047
2048 while is_tag_char(self.input.look_ch()) {
2050 if self.input.peek() == '%' {
2051 let current = self
2053 .input
2054 .byte_offset()
2055 .expect("byte_offset() must remain available once enabled");
2056 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
2057 slice.to_owned()
2058 } else {
2059 String::new()
2060 };
2061
2062 while is_tag_char(self.input.look_ch()) {
2064 if self.input.peek() == '%' {
2065 out.push(self.scan_uri_escapes(mark)?);
2066 } else {
2067 out.push(self.input.peek());
2068 self.skip_non_blank();
2069 }
2070 }
2071 return Ok(Cow::Owned(out));
2072 }
2073 self.skip_non_blank();
2074 }
2075
2076 let Some(end) = self.input.byte_offset() else {
2077 return Ok(Cow::Owned(
2078 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
2079 ));
2080 };
2081
2082 if require_non_empty && start == end {
2083 return Err(ScanError::new_str(
2084 *mark,
2085 "while parsing a tag, did not find expected tag URI",
2086 ));
2087 }
2088
2089 if let Some(slice) = self.try_borrow_slice(start, end) {
2090 Ok(Cow::Borrowed(slice))
2091 } else {
2092 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2093 ScanError::new_str(
2094 *mark,
2095 "internal error: input advertised slicing but did not provide a slice",
2096 )
2097 })?;
2098 Ok(Cow::Owned(slice.to_owned()))
2099 }
2100 }
2101
2102 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
2103 let mut string = String::new();
2104 if self.input.look_ch() != '!' {
2105 return Err(ScanError::new_str(
2106 *mark,
2107 "while scanning a tag, did not find expected '!'",
2108 ));
2109 }
2110
2111 string.push(self.input.peek());
2112 self.skip_non_blank();
2113
2114 let n_chars = self.input.fetch_while_is_alpha(&mut string);
2115 self.mark.offsets.chars += n_chars;
2116 self.mark.col += n_chars;
2117 self.mark.offsets.bytes = self.input.byte_offset();
2118
2119 if self.input.peek() == '!' {
2121 string.push(self.input.peek());
2122 self.skip_non_blank();
2123 } else if directive && string != "!" {
2124 return Err(ScanError::new_str(
2128 *mark,
2129 "while parsing a tag directive, did not find expected '!'",
2130 ));
2131 }
2132 Ok(string)
2133 }
2134
2135 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
2141 let mut string = String::new();
2142
2143 if self.input.look_ch() == '!' {
2144 string.push(self.input.peek());
2146 self.skip_non_blank();
2147 } else if !is_tag_char(self.input.peek()) {
2148 return Err(ScanError::new_str(
2150 *start_mark,
2151 "invalid global tag character",
2152 ));
2153 } else if self.input.peek() == '%' {
2154 string.push(self.scan_uri_escapes(start_mark)?);
2156 } else {
2157 string.push(self.input.peek());
2159 self.skip_non_blank();
2160 }
2161
2162 while is_uri_char(self.input.look_ch()) {
2163 if self.input.peek() == '%' {
2164 string.push(self.scan_uri_escapes(start_mark)?);
2165 } else {
2166 string.push(self.input.peek());
2167 self.skip_non_blank();
2168 }
2169 }
2170
2171 Ok(string)
2172 }
2173
2174 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
2178 self.skip_non_blank();
2180 self.skip_non_blank();
2181
2182 let mut string = String::new();
2183 while is_uri_char(self.input.look_ch()) {
2184 if self.input.peek() == '%' {
2185 string.push(self.scan_uri_escapes(start_mark)?);
2186 } else {
2187 string.push(self.input.peek());
2188 self.skip_non_blank();
2189 }
2190 }
2191
2192 if string.is_empty() {
2193 return Err(ScanError::new_str(
2194 *start_mark,
2195 "while parsing a tag, did not find expected tag URI",
2196 ));
2197 }
2198
2199 if self.input.peek() != '>' {
2200 return Err(ScanError::new_str(
2201 *start_mark,
2202 "while scanning a verbatim tag, did not find the expected '>'",
2203 ));
2204 }
2205 self.skip_non_blank();
2206
2207 Ok(string)
2208 }
2209
2210 fn scan_tag_shorthand_suffix(
2211 &mut self,
2212 _directive: bool,
2213 _is_secondary: bool,
2214 head: &str,
2215 mark: &Marker,
2216 ) -> Result<String, ScanError> {
2217 let mut length = head.len();
2218 let mut string = String::new();
2219
2220 if length > 1 {
2223 string.extend(head.chars().skip(1));
2224 }
2225
2226 while is_tag_char(self.input.look_ch()) {
2227 if self.input.peek() == '%' {
2229 string.push(self.scan_uri_escapes(mark)?);
2230 } else {
2231 string.push(self.input.peek());
2232 self.skip_non_blank();
2233 }
2234
2235 length += 1;
2236 }
2237
2238 if length == 0 {
2239 return Err(ScanError::new_str(
2240 *mark,
2241 "while parsing a tag, did not find expected tag URI",
2242 ));
2243 }
2244
2245 Ok(string)
2246 }
2247
2248 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
2249 let mut width = 0usize;
2250 let mut bytes = [0u8; 4];
2251 let mut bytes_len = 0usize;
2252 loop {
2253 self.input.lookahead(3);
2254
2255 let c = self.input.peek_nth(1);
2256 let nc = self.input.peek_nth(2);
2257
2258 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
2259 return Err(ScanError::new_str(
2260 *mark,
2261 "while parsing a tag, found an invalid escape sequence",
2262 ));
2263 }
2264
2265 let byte = u8::try_from((as_hex(c) << 4) + as_hex(nc))
2266 .expect("two hex nibbles always fit in a byte");
2267 if width == 0 {
2268 width = match byte {
2269 _ if byte & 0x80 == 0x00 => 1,
2270 _ if byte & 0xE0 == 0xC0 => 2,
2271 _ if byte & 0xF0 == 0xE0 => 3,
2272 _ if byte & 0xF8 == 0xF0 => 4,
2273 _ => {
2274 return Err(ScanError::new_str(
2275 *mark,
2276 "while parsing a tag, found an incorrect leading UTF-8 byte",
2277 ));
2278 }
2279 };
2280 } else if byte & 0xc0 != 0x80 {
2281 return Err(ScanError::new_str(
2282 *mark,
2283 "while parsing a tag, found an incorrect trailing UTF-8 byte",
2284 ));
2285 }
2286
2287 bytes[bytes_len] = byte;
2288 bytes_len += 1;
2289
2290 self.skip_n_non_blank(3);
2291
2292 width -= 1;
2293 if width == 0 {
2294 break;
2295 }
2296 }
2297
2298 let s = core::str::from_utf8(&bytes[..bytes_len]).map_err(|_| {
2299 ScanError::new_str(
2300 *mark,
2301 "while parsing a tag, found an invalid UTF-8 codepoint",
2302 )
2303 })?;
2304
2305 let mut chars = s.chars();
2306 match (chars.next(), chars.next()) {
2307 (Some(ch), None) => Ok(ch),
2308 _ => Err(ScanError::new_str(
2309 *mark,
2310 "while parsing a tag, found an invalid UTF-8 codepoint",
2311 )),
2312 }
2313 }
2314
2315 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
2316 self.save_simple_key();
2317 self.disallow_simple_key();
2318
2319 let tok = self.scan_anchor(alias)?;
2320
2321 self.tokens.push_back(tok);
2322
2323 Ok(())
2324 }
2325
2326 fn scan_anchor(&mut self, alias: bool) -> Result<Token<'input>, ScanError> {
2327 let start_mark = self.mark;
2328
2329 self.skip_non_blank();
2331
2332 if let Some(start) = self.input.byte_offset() {
2334 while is_anchor_char(self.input.look_ch()) {
2335 self.skip_non_blank();
2336 }
2337
2338 let end = self
2339 .input
2340 .byte_offset()
2341 .expect("byte_offset() must remain available once enabled");
2342
2343 if start == end {
2344 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2345 }
2346
2347 let cow = if let Some(slice) = self.try_borrow_slice(start, end) {
2348 Cow::Borrowed(slice)
2349 } else if let Some(slice) = self.input.slice_bytes(start, end) {
2350 Cow::Owned(slice.to_owned())
2351 } else {
2352 return Err(ScanError::new_str(
2353 start_mark,
2354 "internal error: input advertised slicing but did not provide a slice",
2355 ));
2356 };
2357
2358 let tok = if alias {
2359 TokenType::Alias(cow)
2360 } else {
2361 TokenType::Anchor(cow)
2362 };
2363 return Ok(Token(Span::new(start_mark, self.mark), tok));
2364 }
2365
2366 let mut string = String::new();
2367 while is_anchor_char(self.input.look_ch()) {
2368 string.push(self.input.peek());
2369 self.skip_non_blank();
2370 }
2371
2372 if string.is_empty() {
2373 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2374 }
2375
2376 let tok = if alias {
2377 TokenType::Alias(string.into())
2378 } else {
2379 TokenType::Anchor(string.into())
2380 };
2381 Ok(Token(Span::new(start_mark, self.mark), tok))
2382 }
2383
2384 fn fetch_flow_collection_start(&mut self, tok: TokenType<'input>) -> ScanResult {
2385 self.save_simple_key();
2387
2388 let start_mark = self.mark;
2389 let indicator = self.input.peek();
2390 self.flow_markers.push((start_mark, indicator));
2391
2392 self.roll_one_col_indent();
2393 self.increase_flow_level()?;
2394
2395 self.allow_simple_key();
2396
2397 self.skip_non_blank();
2398
2399 if tok == TokenType::FlowMappingStart {
2400 self.flow_mapping_started.push(true);
2401 } else {
2402 self.flow_mapping_started.push(false);
2403 self.implicit_flow_mapping_states
2404 .push(ImplicitMappingState::Possible);
2405 }
2406
2407 let token_index = self.tokens.len();
2408 self.skip_ws_to_eol(SkipTabs::Yes)?;
2409
2410 self.insert_token(token_index, Token(Span::new(start_mark, self.mark), tok));
2411 Ok(())
2412 }
2413
2414 fn fetch_flow_collection_end(&mut self, tok: TokenType<'input>) -> ScanResult {
2415 if self.flow_level == 0 {
2417 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2418 }
2419
2420 let Some((open_mark, open_ch)) = self.flow_markers.pop() else {
2421 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2422 };
2423
2424 let (expected_open, actual_close) = match tok {
2425 TokenType::FlowSequenceEnd => ('[', ']'),
2426 TokenType::FlowMappingEnd => ('{', '}'),
2427 _ => unreachable!("flow collection end called with non-closing token"),
2428 };
2429
2430 if open_ch != expected_open {
2431 return Err(ScanError::new(
2432 open_mark,
2433 format!("mismatched bracket '{open_ch}' closed by '{actual_close}'"),
2434 ));
2435 }
2436
2437 let flow_level = self.flow_level;
2438
2439 self.remove_simple_key()?;
2440
2441 if matches!(tok, TokenType::FlowSequenceEnd) {
2442 self.end_implicit_mapping(self.mark, flow_level);
2443 self.implicit_flow_mapping_states.pop();
2445 }
2446 self.flow_mapping_started.pop();
2447
2448 self.decrease_flow_level();
2449
2450 self.disallow_simple_key();
2451
2452 let start_mark = self.mark;
2453 self.skip_non_blank();
2454 let token_index = self.tokens.len();
2455 self.skip_ws_to_eol(SkipTabs::Yes)?;
2456
2457 if self.flow_level > 0 {
2463 self.adjacent_value_allowed_at = self.mark.index();
2464 }
2465
2466 self.insert_token(token_index, Token(Span::new(start_mark, self.mark), tok));
2467 Ok(())
2468 }
2469
2470 fn fetch_flow_entry(&mut self) -> ScanResult {
2472 self.remove_simple_key()?;
2473 self.allow_simple_key();
2474
2475 self.end_implicit_mapping(self.mark, self.flow_level);
2476 if self.current_flow_collection_is_sequence() {
2477 self.set_current_flow_mapping_started(false);
2478 }
2479
2480 let start_mark = self.mark;
2481 self.skip_non_blank();
2482 let token_index = self.tokens.len();
2483 self.skip_ws_to_eol(SkipTabs::Yes)?;
2484
2485 self.insert_token(
2486 token_index,
2487 Token(Span::new(start_mark, self.mark), TokenType::FlowEntry),
2488 );
2489 Ok(())
2490 }
2491
2492 fn increase_flow_level(&mut self) -> ScanResult {
2493 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
2494 self.flow_level = self
2495 .flow_level
2496 .checked_add(1)
2497 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
2498 Ok(())
2499 }
2500
2501 fn decrease_flow_level(&mut self) {
2502 if self.flow_level > 0 {
2503 self.flow_level -= 1;
2504 self.simple_keys.pop().unwrap();
2505 }
2506 }
2507
2508 fn fetch_block_entry(&mut self) -> ScanResult {
2514 if self.flow_level > 0 {
2515 return Err(ScanError::new_str(
2517 self.mark,
2518 r#""-" is only valid inside a block"#,
2519 ));
2520 }
2521 if !self.simple_key_allowed {
2523 return Err(ScanError::new_str(
2524 self.mark,
2525 "block sequence entries are not allowed in this context",
2526 ));
2527 }
2528
2529 if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
2531 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
2532 return Err(ScanError::new_str(
2533 span.start,
2534 "invalid indentation for anchor",
2535 ));
2536 }
2537 }
2538
2539 let mark = self.mark;
2541 self.skip_non_blank();
2542
2543 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
2545 let token_index = self.tokens.len();
2546 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
2547 self.input.lookahead(2);
2548 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
2549 {
2550 return Err(ScanError::new_str(
2551 self.mark,
2552 "'-' must be followed by a valid YAML whitespace",
2553 ));
2554 }
2555
2556 self.skip_ws_to_eol(SkipTabs::No)?;
2557 self.input.lookahead(1);
2558 if self.input.next_is_break() || self.input.next_is_flow() {
2559 self.roll_one_col_indent();
2560 }
2561
2562 self.remove_simple_key()?;
2563 self.allow_simple_key();
2564
2565 self.insert_token(
2566 token_index,
2567 Token(Span::empty(self.mark), TokenType::BlockEntry),
2568 );
2569
2570 Ok(())
2571 }
2572
2573 fn fetch_document_indicator(&mut self, t: TokenType<'input>) -> ScanResult {
2574 if let Some((mark, bracket)) = self.flow_markers.pop() {
2575 return Err(ScanError::new(
2576 mark,
2577 format!("unclosed bracket '{bracket}'"),
2578 ));
2579 }
2580
2581 self.unroll_indent(-1);
2582 self.remove_simple_key()?;
2583 self.disallow_simple_key();
2584
2585 let mark = self.mark;
2586
2587 self.skip_n_non_blank(3);
2588
2589 self.document_prefix_allowed = matches!(t, TokenType::DocumentEnd);
2590 self.tokens.push_back(Token(Span::new(mark, self.mark), t));
2591 Ok(())
2592 }
2593
2594 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
2595 self.save_simple_key();
2596 self.allow_simple_key();
2597 let tok = self.scan_block_scalar(literal)?;
2598
2599 self.tokens.push_back(tok);
2600 Ok(())
2601 }
2602
2603 #[allow(clippy::too_many_lines)]
2604 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token<'input>, ScanError> {
2605 let start_mark = self.mark;
2606 let mut chomping = Chomping::Clip;
2607 let mut increment: usize = 0;
2608 let mut indent: usize = 0;
2609 let mut trailing_blank: bool;
2610 let mut leading_blank: bool = false;
2611 let style = if literal {
2612 ScalarStyle::Literal
2613 } else {
2614 ScalarStyle::Folded
2615 };
2616
2617 let mut string = String::new();
2618 let mut leading_break = String::new();
2619 let mut trailing_breaks = String::new();
2620 let mut chomping_break = String::new();
2621
2622 self.skip_non_blank();
2624 self.unroll_non_block_indents();
2625
2626 if self.input.look_ch() == '+' || self.input.peek() == '-' {
2627 if self.input.peek() == '+' {
2628 chomping = Chomping::Keep;
2629 } else {
2630 chomping = Chomping::Strip;
2631 }
2632 self.skip_non_blank();
2633 self.input.lookahead(1);
2634 if self.input.next_is_digit() {
2635 if self.input.peek() == '0' {
2636 return Err(ScanError::new_str(
2637 start_mark,
2638 "while scanning a block scalar, found an indentation indicator equal to 0",
2639 ));
2640 }
2641 increment = (self.input.peek() as usize) - ('0' as usize);
2642 self.skip_non_blank();
2643 }
2644 } else if self.input.next_is_digit() {
2645 if self.input.peek() == '0' {
2646 return Err(ScanError::new_str(
2647 start_mark,
2648 "while scanning a block scalar, found an indentation indicator equal to 0",
2649 ));
2650 }
2651
2652 increment = (self.input.peek() as usize) - ('0' as usize);
2653 self.skip_non_blank();
2654 self.input.lookahead(1);
2655 if self.input.peek() == '+' || self.input.peek() == '-' {
2656 if self.input.peek() == '+' {
2657 chomping = Chomping::Keep;
2658 } else {
2659 chomping = Chomping::Strip;
2660 }
2661 self.skip_non_blank();
2662 }
2663 }
2664
2665 self.skip_ws_to_eol(SkipTabs::Yes)?;
2666
2667 self.input.lookahead(1);
2669 if !self.input.next_is_breakz() {
2670 return Err(ScanError::new_str(
2671 start_mark,
2672 "while scanning a block scalar, did not find expected comment or line break",
2673 ));
2674 }
2675
2676 if self.input.next_is_break() {
2677 self.input.lookahead(2);
2678 self.read_break(&mut chomping_break);
2679 }
2680
2681 if self.input.look_ch() == '\t' {
2682 return Err(ScanError::new_str(
2683 start_mark,
2684 "a block scalar content cannot start with a tab",
2685 ));
2686 }
2687
2688 if increment > 0 {
2689 indent = if self.indent >= 0 {
2690 (self.indent + increment as isize) as usize
2691 } else {
2692 increment
2693 }
2694 }
2695
2696 if indent == 0 {
2698 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
2699 } else {
2700 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2701 }
2702
2703 if self.input.next_is_z() {
2708 let contents = match chomping {
2709 Chomping::Strip => String::new(),
2711 _ if self.mark.line == start_mark.line() => String::new(),
2713 Chomping::Clip => chomping_break,
2716 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
2719 Chomping::Keep => trailing_breaks,
2721 };
2722 return Ok(Token(
2723 Span::new(start_mark, self.mark),
2724 TokenType::Scalar(style, contents.into()),
2725 ));
2726 }
2727
2728 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
2729 if self.indent < 0 && self.mark.col == 0 {
2730 self.input.lookahead(4);
2731 if self.input.next_is_document_start()
2732 || self.input.next_is_document_end()
2733 || self.input.peek() == '#'
2734 {
2735 } else {
2739 return Err(ScanError::new_str(
2740 self.mark,
2741 "wrongly indented line in block scalar",
2742 ));
2743 }
2744 } else {
2745 return Err(ScanError::new_str(
2746 self.mark,
2747 "wrongly indented line in block scalar",
2748 ));
2749 }
2750 }
2751
2752 let mut line_buffer = String::with_capacity(100);
2753 let start_mark = self.mark;
2754 while self.mark.col == indent && !self.input.next_is_z() {
2755 if indent == 0 {
2756 self.input.lookahead(4);
2757 if self.input.next_is_document_end() {
2758 break;
2759 }
2760 }
2761
2762 trailing_blank = self.input.next_is_blank();
2764 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
2765 string.push_str(&trailing_breaks);
2766 if trailing_breaks.is_empty() {
2767 string.push(' ');
2768 }
2769 } else {
2770 string.push_str(&leading_break);
2771 string.push_str(&trailing_breaks);
2772 }
2773
2774 leading_break.clear();
2775 trailing_breaks.clear();
2776
2777 leading_blank = self.input.next_is_blank();
2778
2779 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
2780
2781 self.input.lookahead(2);
2783 if self.input.next_is_z() {
2784 break;
2785 }
2786
2787 self.read_break(&mut leading_break);
2788
2789 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2791 }
2792
2793 if chomping != Chomping::Strip {
2795 string.push_str(&leading_break);
2796 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
2800 string.push('\n');
2801 }
2802 }
2803
2804 if chomping == Chomping::Keep {
2805 string.push_str(&trailing_breaks);
2806 }
2807
2808 Ok(Token(
2809 Span::new(start_mark, self.mark),
2810 TokenType::Scalar(style, string.into()),
2811 ))
2812 }
2813
2814 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
2824 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
2826 string.push(self.input.peek());
2827 self.skip_blank();
2833 }
2834
2835 if self.input.buf_is_empty() {
2838 let mut n_chars = 0;
2846 debug_assert!(line_buffer.is_empty());
2847 while let Some(c) = self.input.raw_read_non_breakz_ch() {
2848 line_buffer.push(c);
2849 n_chars += 1;
2850 }
2851
2852 self.mark.col += n_chars;
2854 self.mark.offsets.chars += n_chars;
2855 self.mark.offsets.bytes = self.input.byte_offset();
2856
2857 string.reserve(line_buffer.len());
2859 string.push_str(line_buffer);
2860 line_buffer.clear();
2862 }
2863 }
2864
2865 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
2867 loop {
2868 if indent < self.input.bufmaxlen() - 2 {
2870 self.input.lookahead(self.input.bufmaxlen());
2871 while self.mark.col < indent && self.input.peek() == ' ' {
2872 self.skip_blank();
2873 }
2874 } else {
2875 loop {
2876 self.input.lookahead(self.input.bufmaxlen());
2877 while !self.input.buf_is_empty()
2878 && self.mark.col < indent
2879 && self.input.peek() == ' '
2880 {
2881 self.skip_blank();
2882 }
2883 if self.mark.col == indent
2887 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
2888 {
2889 break;
2890 }
2891 }
2892 self.input.lookahead(2);
2893 }
2894
2895 if self.input.next_is_break() {
2897 self.read_break(breaks);
2898 } else {
2899 break;
2901 }
2902 }
2903 }
2904
2905 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
2910 let mut max_indent = 0;
2911 loop {
2912 while self.input.look_ch() == ' ' {
2914 self.skip_blank();
2915 }
2916
2917 if self.mark.col > max_indent {
2918 max_indent = self.mark.col;
2919 }
2920
2921 if self.input.next_is_break() {
2922 self.input.lookahead(2);
2924 self.read_break(breaks);
2925 } else {
2926 break;
2928 }
2929 }
2930
2931 *indent = max_indent.max((self.indent + 1) as usize);
2940 if self.indent > 0 {
2941 *indent = (*indent).max(1);
2942 }
2943 }
2944
2945 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
2946 self.save_simple_key();
2947 self.disallow_simple_key();
2948
2949 let token_index = self.tokens.len();
2950 let tok = self.scan_flow_scalar(single)?;
2951
2952 self.skip_to_next_token()?;
2955 self.adjacent_value_allowed_at = self.mark.index();
2956
2957 self.insert_token(token_index, tok);
2958 Ok(())
2959 }
2960
2961 #[allow(clippy::too_many_lines)]
2962 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token<'input>, ScanError> {
2963 let start_mark = self.mark;
2964
2965 let mut buf = match self.input.byte_offset() {
2967 Some(off) => FlowScalarBuf::new_borrowed(off + self.input.peek().len_utf8()),
2968 None => FlowScalarBuf::new_owned(),
2969 };
2970
2971 let mut break_scratch = String::new();
2974
2975 self.skip_non_blank();
2977
2978 loop {
2979 self.input.lookahead(4);
2981
2982 if self.mark.col == 0 && self.input.next_is_document_indicator() {
2983 return Err(ScanError::new_str(
2984 start_mark,
2985 "while scanning a quoted scalar, found unexpected document indicator",
2986 ));
2987 }
2988
2989 if self.input.next_is_z() {
2990 return Err(ScanError::new_str(start_mark, "unclosed quote"));
2991 }
2992
2993 let mut leading_blanks = false;
2996 self.consume_flow_scalar_non_whitespace_chars(
2997 single,
2998 &mut buf,
2999 &mut leading_blanks,
3000 &start_mark,
3001 )?;
3002
3003 match self.input.look_ch() {
3004 '\'' if single => break,
3005 '"' if !single => break,
3006 _ => {}
3007 }
3008
3009 let mut trailing_ws_start: Option<usize> = None;
3025 let mut has_leading_break = false;
3026 let mut has_trailing_breaks = false;
3027
3028 let mut pending_ws_start: Option<usize> = None;
3030
3031 while self.input.next_is_blank() || self.input.next_is_break() {
3033 if self.input.next_is_blank() {
3034 if leading_blanks {
3036 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
3037 return Err(ScanError::new_str(
3038 self.mark,
3039 "tab cannot be used as indentation",
3040 ));
3041 }
3042 self.skip_blank();
3043 } else {
3044 match buf {
3046 FlowScalarBuf::Owned(ref mut string) => {
3047 if trailing_ws_start.is_none() {
3048 trailing_ws_start = Some(string.len());
3049 }
3050 string.push(self.input.peek());
3051 }
3052 FlowScalarBuf::Borrowed { .. } => {
3053 if pending_ws_start.is_none() {
3054 pending_ws_start = self.input.byte_offset();
3055 }
3056 }
3057 }
3058 self.skip_blank();
3059
3060 if let (FlowScalarBuf::Borrowed { .. }, Some(ws_start), Some(ws_end)) =
3061 (&mut buf, pending_ws_start, self.input.byte_offset())
3062 {
3063 buf.note_pending_ws(ws_start, ws_end);
3064 }
3065 }
3066 } else {
3067 self.input.lookahead(2);
3068
3069 if leading_blanks {
3071 match buf {
3073 FlowScalarBuf::Owned(ref mut string) => self.read_break(string),
3074 FlowScalarBuf::Borrowed { .. } => {
3075 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3076 let Some(string) = buf.as_owned_mut() else {
3077 unreachable!()
3078 };
3079 self.read_break(string);
3080 }
3081 }
3082 has_trailing_breaks = true;
3083 } else {
3084 if let Some(pos) = trailing_ws_start.take() {
3086 if let FlowScalarBuf::Owned(ref mut string) = buf {
3087 string.truncate(pos);
3088 }
3089 }
3090
3091 if pending_ws_start.take().is_some() {
3092 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3094 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3095 }
3096 buf.discard_pending_ws();
3097 } else {
3098 buf.commit_pending_ws();
3099 }
3100
3101 break_scratch.clear();
3102 self.read_break(&mut break_scratch);
3103 has_leading_break = true;
3106 leading_blanks = true;
3107 }
3108 }
3109
3110 self.input.lookahead(1);
3111 }
3112
3113 if leading_blanks && has_leading_break && self.flow_level == 0 {
3116 let next_ch = self.input.peek();
3117 let is_closing_quote = (single && next_ch == '\'') || (!single && next_ch == '"');
3118 if !is_closing_quote && (self.mark.col as isize) <= self.indent {
3119 return Err(ScanError::new_str(
3120 self.mark,
3121 "invalid indentation in multiline quoted scalar",
3122 ));
3123 }
3124 }
3125
3126 if leading_blanks {
3128 if has_leading_break && !has_trailing_breaks {
3133 match buf {
3134 FlowScalarBuf::Owned(ref mut string) => string.push(' '),
3135 FlowScalarBuf::Borrowed { .. } => {
3136 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3137 let Some(string) = buf.as_owned_mut() else {
3138 unreachable!()
3139 };
3140 string.push(' ');
3141 }
3142 }
3143 }
3144 }
3145 } self.skip_non_blank();
3150 let end_mark = self.mark;
3151
3152 self.skip_ws_to_eol(SkipTabs::Yes)?;
3154 match self.input.peek() {
3155 ',' | '}' | ']' if self.flow_level > 0 => {}
3157 c if is_breakz(c) => {}
3159 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
3162 ':' if self.flow_level > 0 => {}
3164 _ => {
3165 return Err(ScanError::new_str(
3166 self.mark,
3167 "invalid trailing content after double-quoted scalar",
3168 ));
3169 }
3170 }
3171
3172 let style = if single {
3173 ScalarStyle::SingleQuoted
3174 } else {
3175 ScalarStyle::DoubleQuoted
3176 };
3177
3178 let contents = match buf {
3179 FlowScalarBuf::Owned(string) => Cow::Owned(string),
3180 FlowScalarBuf::Borrowed {
3181 start,
3182 mut end,
3183 pending_ws_start,
3184 pending_ws_end,
3185 } => {
3186 if pending_ws_start.is_some() {
3188 end = pending_ws_end;
3189 }
3190 if let Some(slice) = self.try_borrow_slice(start, end) {
3191 Cow::Borrowed(slice)
3192 } else {
3193 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
3194 ScanError::new_str(
3195 start_mark,
3196 "internal error: input advertised offsets but did not provide a slice",
3197 )
3198 })?;
3199 Cow::Owned(slice.to_owned())
3200 }
3201 }
3202 };
3203
3204 Ok(Token(
3205 Span::new(start_mark, end_mark),
3206 TokenType::Scalar(style, contents),
3207 ))
3208 }
3209
3210 fn consume_flow_scalar_non_whitespace_chars(
3219 &mut self,
3220 single: bool,
3221 buf: &mut FlowScalarBuf,
3222 leading_blanks: &mut bool,
3223 start_mark: &Marker,
3224 ) -> Result<(), ScanError> {
3225 self.input.lookahead(2);
3226 while !is_blank_or_breakz(self.input.peek()) {
3227 match self.input.peek() {
3228 '\'' if self.input.peek_nth(1) == '\'' && single => {
3230 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3231 buf.commit_pending_ws();
3232 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3233 }
3234 let Some(string) = buf.as_owned_mut() else {
3235 unreachable!()
3236 };
3237 string.push('\'');
3238 self.skip_n_non_blank(2);
3239 }
3240 '\'' if single => break,
3242 '"' if !single => break,
3243 '\\' if !single && is_break(self.input.peek_nth(1)) => {
3245 self.input.lookahead(3);
3246 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3247 buf.commit_pending_ws();
3248 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3249 }
3250 self.skip_non_blank();
3251 self.skip_linebreak();
3252 *leading_blanks = true;
3253 break;
3254 }
3255 '\\' if !single => {
3257 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3258 buf.commit_pending_ws();
3259 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3260 }
3261 let Some(string) = buf.as_owned_mut() else {
3262 unreachable!()
3263 };
3264 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
3265 }
3266 c => {
3267 match buf {
3268 FlowScalarBuf::Owned(ref mut string) => {
3269 string.push(c);
3270 }
3271 FlowScalarBuf::Borrowed { .. } => {
3272 buf.commit_pending_ws();
3273 }
3274 }
3275 self.skip_non_blank();
3276
3277 if let Some(new_end) = self.input.byte_offset() {
3278 if let FlowScalarBuf::Borrowed { end, .. } = buf {
3279 *end = new_end;
3280 }
3281 }
3282 }
3283 }
3284 self.input.lookahead(2);
3285 }
3286 Ok(())
3287 }
3288
3289 fn resolve_flow_scalar_escape_sequence(
3296 &mut self,
3297 start_mark: &Marker,
3298 ) -> Result<char, ScanError> {
3299 let mut code_length = 0usize;
3300 let mut ret = '\0';
3301
3302 match self.input.peek_nth(1) {
3303 '0' => ret = '\0',
3304 'a' => ret = '\x07',
3305 'b' => ret = '\x08',
3306 't' | '\t' => ret = '\t',
3307 'n' => ret = '\n',
3308 'v' => ret = '\x0b',
3309 'f' => ret = '\x0c',
3310 'r' => ret = '\x0d',
3311 'e' => ret = '\x1b',
3312 ' ' => ret = '\x20',
3313 '"' => ret = '"',
3314 '/' => ret = '/',
3315 '\\' => ret = '\\',
3316 'N' => ret = char::from_u32(0x85).unwrap(),
3318 '_' => ret = char::from_u32(0xA0).unwrap(),
3320 'L' => ret = char::from_u32(0x2028).unwrap(),
3322 'P' => ret = char::from_u32(0x2029).unwrap(),
3324 'x' => code_length = 2,
3325 'u' => code_length = 4,
3326 'U' => code_length = 8,
3327 _ => {
3328 return Err(ScanError::new_str(
3329 *start_mark,
3330 "while parsing a quoted scalar, found unknown escape character",
3331 ))
3332 }
3333 }
3334 self.skip_n_non_blank(2);
3335
3336 if code_length > 0 {
3338 self.input.lookahead(code_length);
3339 let mut value = 0u32;
3340 for i in 0..code_length {
3341 let c = self.input.peek_nth(i);
3342 if !is_hex(c) {
3343 return Err(ScanError::new_str(
3344 *start_mark,
3345 "while parsing a quoted scalar, did not find expected hexadecimal number",
3346 ));
3347 }
3348 value = (value << 4) + as_hex(c);
3349 }
3350
3351 self.skip_n_non_blank(code_length);
3352
3353 if code_length == 4 && (0xD800..=0xDBFF).contains(&value) {
3355 self.input.lookahead(2);
3356 if self.input.peek() == '\\' && self.input.peek_nth(1) == 'u' {
3357 self.skip_n_non_blank(2);
3358 self.input.lookahead(4);
3359 let mut low_value = 0u32;
3360 for i in 0..4 {
3361 let c = self.input.peek_nth(i);
3362 if !is_hex(c) {
3363 return Err(ScanError::new_str(
3364 *start_mark,
3365 "while parsing a quoted scalar, did not find expected hexadecimal number for low surrogate",
3366 ));
3367 }
3368 low_value = (low_value << 4) + as_hex(c);
3369 }
3370 if (0xDC00..=0xDFFF).contains(&low_value) {
3371 value = 0x10000 + (((value - 0xD800) << 10) | (low_value - 0xDC00));
3372 self.skip_n_non_blank(4);
3373 } else {
3374 return Err(ScanError::new_str(
3375 *start_mark,
3376 "while parsing a quoted scalar, found invalid low surrogate",
3377 ));
3378 }
3379 } else {
3380 return Err(ScanError::new_str(
3381 *start_mark,
3382 "while parsing a quoted scalar, found high surrogate without following low surrogate",
3383 ));
3384 }
3385 } else if code_length == 4 && (0xDC00..=0xDFFF).contains(&value) {
3386 return Err(ScanError::new_str(
3387 *start_mark,
3388 "while parsing a quoted scalar, found unpaired low surrogate",
3389 ));
3390 }
3391
3392 let Some(ch) = char::from_u32(value) else {
3393 return Err(ScanError::new_str(
3394 *start_mark,
3395 "while parsing a quoted scalar, found invalid Unicode character escape code",
3396 ));
3397 };
3398 ret = ch;
3399 }
3400 Ok(ret)
3401 }
3402
3403 fn fetch_plain_scalar(&mut self) -> ScanResult {
3404 self.save_simple_key();
3405 self.disallow_simple_key();
3406
3407 let token_index = self.tokens.len();
3408 let tok = self.scan_plain_scalar()?;
3409
3410 self.insert_token(token_index, tok);
3411 Ok(())
3412 }
3413
3414 #[allow(clippy::too_many_lines)]
3419 fn scan_plain_scalar(&mut self) -> Result<Token<'input>, ScanError> {
3420 self.unroll_non_block_indents();
3421 let indent = self.indent + 1;
3422 let start_mark = self.mark;
3423
3424 if self.flow_level > 0 && (start_mark.col as isize) < indent {
3425 return Err(ScanError::new_str(
3426 start_mark,
3427 "invalid indentation in flow construct",
3428 ));
3429 }
3430
3431 let mut string = String::with_capacity(32);
3432 self.buf_whitespaces.clear();
3433 self.buf_leading_break.clear();
3434 self.buf_trailing_breaks.clear();
3435 let mut end_mark = self.mark;
3436
3437 loop {
3438 self.input.lookahead(4);
3439 if (self.mark.col == 0 && self.input.next_is_document_indicator())
3440 || self.input.peek() == '#'
3441 {
3442 if self.input.peek() == '#'
3447 && !string.is_empty()
3448 && !self.buf_whitespaces.is_empty()
3449 && self.flow_level == 0
3450 {
3451 self.interrupted_plain_by_comment = Some(self.mark);
3452 }
3453 break;
3454 }
3455
3456 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
3457 return Err(ScanError::new_str(
3458 self.mark,
3459 "plain scalar cannot start with '-' followed by ,[]{}",
3460 ));
3461 }
3462
3463 if !self.input.next_is_blank_or_breakz()
3464 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
3465 {
3466 if self.leading_whitespace {
3467 if self.buf_leading_break.is_empty() {
3468 string.push_str(&self.buf_leading_break);
3469 string.push_str(&self.buf_trailing_breaks);
3470 self.buf_trailing_breaks.clear();
3471 self.buf_leading_break.clear();
3472 } else {
3473 if self.buf_trailing_breaks.is_empty() {
3474 string.push(' ');
3475 } else {
3476 string.push_str(&self.buf_trailing_breaks);
3477 self.buf_trailing_breaks.clear();
3478 }
3479 self.buf_leading_break.clear();
3480 }
3481 self.leading_whitespace = false;
3482 } else if !self.buf_whitespaces.is_empty() {
3483 string.push_str(&self.buf_whitespaces);
3484 self.buf_whitespaces.clear();
3485 }
3486
3487 string.push(self.input.peek());
3489 self.skip_non_blank();
3490 string.reserve(self.input.bufmaxlen());
3491
3492 let mut end = false;
3494 while !end {
3495 self.input.lookahead(self.input.bufmaxlen());
3499 let (stop, chars_consumed) = self.input.fetch_plain_scalar_chunk(
3500 &mut string,
3501 self.input.bufmaxlen() - 1,
3502 self.flow_level > 0,
3503 );
3504 end = stop;
3505 self.mark.offsets.chars += chars_consumed;
3506 self.mark.col += chars_consumed;
3507 self.mark.offsets.bytes = self.input.byte_offset();
3508 }
3509 end_mark = self.mark;
3510 }
3511
3512 if !(self.input.next_is_blank() || self.input.next_is_break()) {
3517 break;
3518 }
3519
3520 self.input.lookahead(2);
3522 while self.input.next_is_blank_or_break() {
3523 if self.input.next_is_blank() {
3524 if !self.leading_whitespace {
3525 self.buf_whitespaces.push(self.input.peek());
3526 self.skip_blank();
3527 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
3528 self.skip_ws_to_eol(SkipTabs::Yes)?;
3531 if !self.input.next_is_breakz() {
3532 return Err(ScanError::new_str(
3533 start_mark,
3534 "while scanning a plain scalar, found a tab",
3535 ));
3536 }
3537 } else {
3538 self.skip_blank();
3539 }
3540 } else {
3541 if self.leading_whitespace {
3543 self.skip_break();
3544 self.buf_trailing_breaks.push('\n');
3545 } else {
3546 self.buf_whitespaces.clear();
3547 self.skip_break();
3548 self.buf_leading_break.push('\n');
3549 self.leading_whitespace = true;
3550 }
3551 }
3552 self.input.lookahead(2);
3553 }
3554
3555 if self.flow_level == 0 && (self.mark.col as isize) < indent {
3557 break;
3558 }
3559 }
3560
3561 if self.leading_whitespace {
3562 self.allow_simple_key();
3563 }
3564
3565 if string.is_empty() {
3566 Err(ScanError::new_str(
3570 start_mark,
3571 "unexpected end of plain scalar",
3572 ))
3573 } else {
3574 let contents = if let (Some(start), Some(end)) =
3575 (start_mark.byte_offset(), end_mark.byte_offset())
3576 {
3577 match self.try_borrow_slice(start, end) {
3578 Some(slice) if slice == string => Cow::Borrowed(slice),
3579 _ => Cow::Owned(string),
3580 }
3581 } else {
3582 Cow::Owned(string)
3583 };
3584
3585 Ok(Token(
3586 Span::new(start_mark, end_mark),
3587 TokenType::Scalar(ScalarStyle::Plain, contents),
3588 ))
3589 }
3590 }
3591
3592 fn fetch_key(&mut self) -> ScanResult {
3593 let start_mark = self.mark;
3594 if self.flow_level == 0 {
3595 if !self.simple_key_allowed {
3597 return Err(ScanError::new_str(
3598 self.mark,
3599 "mapping keys are not allowed in this context",
3600 ));
3601 }
3602 self.roll_indent(
3603 start_mark.col,
3604 None,
3605 TokenType::BlockMappingStart,
3606 start_mark,
3607 );
3608 } else {
3609 self.set_current_flow_mapping_started(true);
3611 }
3612
3613 self.remove_simple_key()?;
3614
3615 if self.flow_level == 0 {
3616 self.allow_simple_key();
3617 } else {
3618 self.disallow_simple_key();
3619 }
3620
3621 self.skip_non_blank();
3622 let token_index = self.tokens.len();
3623 self.skip_yaml_whitespace()?;
3624 if self.input.peek() == '\t' {
3625 return Err(ScanError::new_str(
3626 self.mark(),
3627 "tabs disallowed in this context",
3628 ));
3629 }
3630 self.insert_token(
3631 token_index,
3632 Token(Span::new(start_mark, self.mark), TokenType::Key),
3633 );
3634 Ok(())
3635 }
3636
3637 fn fetch_flow_value(&mut self) -> ScanResult {
3645 let nc = self.input.peek_nth(1);
3646
3647 if self.mark.index() != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
3659 return Err(ScanError::new_str(
3660 self.mark,
3661 "':' may not precede any of `[{` in flow mapping",
3662 ));
3663 }
3664
3665 self.fetch_value()
3666 }
3667
3668 fn fetch_value(&mut self) -> ScanResult {
3670 let sk = self.simple_keys.last().unwrap().clone();
3671 let start_mark = self.mark;
3672 let is_implicit_flow_mapping = self.current_flow_collection_is_sequence()
3673 && !self.current_flow_mapping_started()
3674 && !self.implicit_flow_mapping_states.is_empty();
3675 if is_implicit_flow_mapping {
3676 *self.implicit_flow_mapping_states.last_mut().unwrap() =
3677 ImplicitMappingState::Inside(self.flow_level);
3678 }
3679
3680 self.skip_non_blank();
3682 let mut trailing_tokens = VecDeque::new();
3689 if self.input.look_ch() == '\t' {
3690 let trailing_token_index = self.tokens.len();
3691 let whitespace = self.skip_ws_to_eol(SkipTabs::Yes)?;
3692 trailing_tokens = self.tokens.split_off(trailing_token_index);
3693
3694 if !whitespace.has_valid_yaml_ws()
3695 && (self.input.peek() == '-' || self.input.next_is_alpha())
3696 {
3697 return Err(ScanError::new_str(
3698 self.mark,
3699 "':' must be followed by a valid YAML whitespace",
3700 ));
3701 }
3702 }
3703
3704 if sk.possible {
3705 let tok = Token(Span::empty(sk.mark), TokenType::Key);
3707 self.insert_token(sk.token_number - self.tokens_parsed, tok);
3708 if is_implicit_flow_mapping {
3709 if sk.mark.line < start_mark.line {
3710 return Err(ScanError::new_str(
3711 start_mark,
3712 "illegal placement of ':' indicator",
3713 ));
3714 }
3715 self.insert_token(
3716 sk.token_number - self.tokens_parsed,
3717 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
3718 );
3719 }
3720
3721 self.roll_indent(
3723 sk.mark.col,
3724 Some(sk.token_number),
3725 TokenType::BlockMappingStart,
3726 sk.mark,
3727 );
3728 self.roll_one_col_indent();
3729
3730 self.simple_keys.last_mut().unwrap().possible = false;
3731 self.disallow_simple_key();
3732 } else {
3733 if is_implicit_flow_mapping {
3734 self.tokens
3735 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
3736 }
3737 if self.flow_level == 0 {
3739 if !self.simple_key_allowed {
3740 return Err(ScanError::new_str(
3741 start_mark,
3742 "mapping values are not allowed in this context",
3743 ));
3744 }
3745
3746 self.roll_indent(
3747 start_mark.col,
3748 None,
3749 TokenType::BlockMappingStart,
3750 start_mark,
3751 );
3752 }
3753 self.roll_one_col_indent();
3754
3755 if self.flow_level == 0 {
3756 self.allow_simple_key();
3757 } else {
3758 self.disallow_simple_key();
3759 }
3760 }
3761 self.tokens
3762 .push_back(Token(Span::empty(start_mark), TokenType::Value));
3763 self.tokens.append(&mut trailing_tokens);
3764
3765 Ok(())
3766 }
3767
3768 fn roll_indent(
3774 &mut self,
3775 col: usize,
3776 number: Option<usize>,
3777 tok: TokenType<'input>,
3778 mark: Marker,
3779 ) {
3780 if self.flow_level > 0 {
3781 return;
3782 }
3783
3784 if self.indent <= col as isize {
3788 if let Some(indent) = self.indents.last() {
3789 if !indent.needs_block_end {
3790 self.indent = indent.indent;
3791 self.indents.pop();
3792 }
3793 }
3794 }
3795
3796 if self.indent < col as isize {
3797 self.indents.push(Indent {
3798 indent: self.indent,
3799 needs_block_end: true,
3800 });
3801 self.indent = col as isize;
3802 let tokens_parsed = self.tokens_parsed;
3803 match number {
3804 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
3805 None => self.tokens.push_back(Token(Span::empty(mark), tok)),
3806 }
3807 }
3808 }
3809
3810 fn unroll_indent(&mut self, col: isize) {
3816 if self.flow_level > 0 {
3817 return;
3818 }
3819 while self.indent > col {
3820 let indent = self.indents.pop().unwrap();
3821 self.indent = indent.indent;
3822 if indent.needs_block_end {
3823 self.tokens
3824 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
3825 }
3826 }
3827 }
3828
3829 fn roll_one_col_indent(&mut self) {
3835 if self.flow_level == 0 && self.indents.last().is_some_and(|x| x.needs_block_end) {
3836 self.indents.push(Indent {
3837 indent: self.indent,
3838 needs_block_end: false,
3839 });
3840 self.indent += 1;
3841 }
3842 }
3843
3844 fn unroll_non_block_indents(&mut self) {
3846 while let Some(indent) = self.indents.last() {
3847 if indent.needs_block_end {
3848 break;
3849 }
3850 self.indent = indent.indent;
3851 self.indents.pop();
3852 }
3853 }
3854
3855 fn save_simple_key(&mut self) {
3857 if self.simple_key_allowed {
3858 let required = self.flow_level == 0
3859 && self.indent == (self.mark.col as isize)
3860 && self.indents.last().unwrap().needs_block_end;
3861
3862 if let Some(last) = self.simple_keys.last_mut() {
3863 *last = SimpleKey {
3864 mark: self.mark,
3865 possible: true,
3866 required,
3867 token_number: self.tokens_parsed + self.tokens.len(),
3868 };
3869 }
3870 }
3871 }
3872
3873 fn remove_simple_key(&mut self) -> ScanResult {
3874 let last = self.simple_keys.last_mut().unwrap();
3875 if last.possible && last.required {
3876 return Err(self.simple_key_expected());
3877 }
3878
3879 last.possible = false;
3880 Ok(())
3881 }
3882
3883 fn is_within_block(&self) -> bool {
3885 !self.indents.is_empty()
3886 }
3887
3888 fn end_implicit_mapping(&mut self, mark: Marker, flow_level: u8) {
3894 if self
3895 .implicit_flow_mapping_states
3896 .last()
3897 .is_some_and(|state| *state == ImplicitMappingState::Inside(flow_level))
3898 {
3899 *self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Possible;
3900 self.set_current_flow_mapping_started(false);
3901 self.tokens
3902 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
3903 }
3904 }
3905
3906 fn current_flow_collection_is_sequence(&self) -> bool {
3907 self.flow_markers
3908 .last()
3909 .is_some_and(|(_, bracket)| *bracket == '[')
3910 }
3911
3912 fn current_flow_mapping_started(&self) -> bool {
3913 self.flow_mapping_started.last().copied().unwrap_or(false)
3914 }
3915
3916 fn set_current_flow_mapping_started(&mut self, started: bool) {
3917 if let Some(current) = self.flow_mapping_started.last_mut() {
3918 *current = started;
3919 }
3920 }
3921}
3922
3923#[derive(PartialEq, Eq)]
3927pub enum Chomping {
3928 Strip,
3930 Clip,
3932 Keep,
3934}
3935
3936#[cfg(test)]
3937mod test {
3938 use alloc::{
3939 borrow::{Cow, ToOwned},
3940 rc::Rc,
3941 string::String,
3942 vec::Vec,
3943 };
3944 use core::cell::Cell;
3945
3946 use crate::{
3947 input::{str::StrInput, BorrowedInput, BufferedInput, Input},
3948 scanner::{Scanner, Token, TokenType},
3949 };
3950
3951 struct CountingChars {
3952 chars: alloc::vec::IntoIter<char>,
3953 read: Rc<Cell<usize>>,
3954 }
3955
3956 impl Iterator for CountingChars {
3957 type Item = char;
3958
3959 fn next(&mut self) -> Option<Self::Item> {
3960 let next = self.chars.next();
3961 if next.is_some() {
3962 self.read.set(self.read.get() + 1);
3963 }
3964 next
3965 }
3966 }
3967
3968 struct SlicingOnlyInput<'input> {
3969 inner: StrInput<'input>,
3970 expose_slice: bool,
3971 }
3972
3973 impl<'input> SlicingOnlyInput<'input> {
3974 fn new(source: &'input str, expose_slice: bool) -> Self {
3975 Self {
3976 inner: StrInput::new(source),
3977 expose_slice,
3978 }
3979 }
3980 }
3981
3982 impl Input for SlicingOnlyInput<'_> {
3983 fn lookahead(&mut self, count: usize) {
3984 self.inner.lookahead(count);
3985 }
3986
3987 fn buflen(&self) -> usize {
3988 self.inner.buflen()
3989 }
3990
3991 fn bufmaxlen(&self) -> usize {
3992 self.inner.bufmaxlen()
3993 }
3994
3995 fn raw_read_ch(&mut self) -> char {
3996 self.inner.raw_read_ch()
3997 }
3998
3999 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
4000 self.inner.raw_read_non_breakz_ch()
4001 }
4002
4003 fn skip(&mut self) {
4004 self.inner.skip();
4005 }
4006
4007 fn skip_n(&mut self, count: usize) {
4008 self.inner.skip_n(count);
4009 }
4010
4011 fn peek(&self) -> char {
4012 self.inner.peek()
4013 }
4014
4015 fn peek_nth(&self, n: usize) -> char {
4016 self.inner.peek_nth(n)
4017 }
4018
4019 fn byte_offset(&self) -> Option<usize> {
4020 self.inner.byte_offset()
4021 }
4022
4023 fn slice_bytes(&self, start: usize, end: usize) -> Option<&str> {
4024 if self.expose_slice {
4025 self.inner.slice_bytes(start, end)
4026 } else {
4027 None
4028 }
4029 }
4030 }
4031
4032 impl<'input> BorrowedInput<'input> for SlicingOnlyInput<'input> {
4033 fn slice_borrowed(&self, _start: usize, _end: usize) -> Option<&'input str> {
4034 None
4035 }
4036 }
4037
4038 #[test]
4039 fn test_is_anchor_char() {
4040 use super::is_anchor_char;
4041 assert!(is_anchor_char('x'));
4042 }
4043
4044 #[test]
4045 fn flow_simple_key_length_limit_bounds_buffering() {
4046 let mut yaml = String::from("[\n\"start\"\n");
4047 for _ in 0..600 {
4048 yaml.push_str("\"x\"\n");
4049 }
4050 let total_chars = yaml.chars().count();
4051 let read = Rc::new(Cell::new(0));
4052 let chars = yaml.chars().collect::<Vec<_>>().into_iter();
4053 let mut scanner = Scanner::new(BufferedInput::new(CountingChars {
4054 chars,
4055 read: Rc::clone(&read),
4056 }));
4057
4058 assert!(matches!(
4059 scanner.next_token().unwrap().unwrap().1,
4060 TokenType::StreamStart(_)
4061 ));
4062
4063 let token = scanner.next_token().unwrap().unwrap();
4064 assert!(matches!(token.1, TokenType::FlowSequenceStart));
4065
4066 let token = scanner.next_token().unwrap().unwrap();
4067 assert!(matches!(
4068 token.1,
4069 TokenType::Scalar(_, ref value) if value == "start"
4070 ));
4071 assert!(
4072 read.get() < total_chars,
4073 "scanner consumed all {total_chars} chars before yielding the first flow scalar"
4074 );
4075 assert!(
4076 read.get() <= super::SIMPLE_KEY_MAX_LOOKAHEAD + 128,
4077 "scanner read {} chars before yielding the first flow scalar",
4078 read.get()
4079 );
4080 }
4081
4082 #[test]
4083 fn comment_capture_does_not_change_leading_whitespace() {
4084 let mut scanner = Scanner::new(StrInput::new("# comment\n"));
4085
4086 let token = scanner.scan_comment_token().unwrap();
4087
4088 assert!(scanner.leading_whitespace);
4089 assert!(matches!(token.1, TokenType::Comment(ref comment) if comment.text == " comment"));
4090
4091 let mut scanner = Scanner::new(BufferedInput::new("# streaming\n".chars()));
4092 scanner.input.lookahead(1);
4093
4094 let token = scanner.scan_comment_token().unwrap();
4095
4096 assert!(scanner.leading_whitespace);
4097 assert!(matches!(token.1, TokenType::Comment(ref comment) if comment.text == " streaming"));
4098 }
4099
4100 #[test]
4101 fn comment_capture_falls_back_to_owned_slice_when_borrow_unavailable() {
4102 let mut scanner = Scanner::new(SlicingOnlyInput::new("# sliced\n", true));
4103 scanner.input.lookahead(2);
4104 assert_eq!(scanner.input.peek_nth(1), ' ');
4105
4106 let token = scanner.scan_comment_token().unwrap();
4107
4108 assert!(matches!(token.1, TokenType::Comment(ref comment)
4109 if matches!(comment.text, Cow::Owned(ref text) if text == " sliced")));
4110 }
4111
4112 #[test]
4113 fn comment_capture_errors_when_offsets_have_no_slice() {
4114 let mut scanner = Scanner::new(SlicingOnlyInput::new("# broken\n", false));
4115
4116 let error = scanner.scan_comment_token().unwrap_err();
4117
4118 assert_eq!(
4119 error.info(),
4120 "internal error: input advertised offsets but did not provide a slice"
4121 );
4122 }
4123
4124 #[test]
4125 fn comment_skipping_path_consumes_comment_without_tokenizing_it() {
4126 let mut scanner = Scanner::new(StrInput::new("# skipped\nnext: value\n"));
4127
4128 scanner.skip_yaml_whitespace().unwrap();
4129
4130 assert!(scanner.tokens.is_empty());
4131 assert_eq!(scanner.mark.line(), 2);
4132 assert_eq!(scanner.mark.col(), 0);
4133 }
4134
4135 #[test]
4136 fn deferred_error_waits_for_all_comment_tokens() {
4137 let mut scanner = Scanner::new(StrInput::new("# first\n# second\n@\n"));
4138
4139 assert!(matches!(
4140 scanner.next_token().unwrap().unwrap().1,
4141 TokenType::StreamStart(_)
4142 ));
4143 assert!(matches!(
4144 scanner.next_token().unwrap().unwrap().1,
4145 TokenType::Comment(ref comment) if comment.text == " first"
4146 ));
4147 assert!(matches!(
4148 scanner.next_token().unwrap().unwrap().1,
4149 TokenType::Comment(ref comment) if comment.text == " second"
4150 ));
4151
4152 let error = scanner.next_token().unwrap_err();
4153
4154 assert!(error.info().contains("unexpected character"));
4155 }
4156
4157 #[test]
4159 fn anchor_name_is_borrowed_for_str_input() {
4160 let mut scanner = Scanner::new(StrInput::new("&anch\n"));
4161
4162 loop {
4163 let tok = scanner
4164 .next_token()
4165 .expect("valid YAML must scan without errors")
4166 .expect("scanner must eventually produce a token");
4167 if let TokenType::Anchor(name) = tok.1 {
4168 assert!(matches!(name, Cow::Borrowed("anch")));
4169 break;
4170 }
4171 }
4172 }
4173
4174 #[test]
4176 fn anchor_name_rejects_non_printable_control_chars() {
4177 let mut scanner = Scanner::new(StrInput::new("&foo\u{0001}\n"));
4178
4179 loop {
4180 let tok = scanner
4181 .next_token()
4182 .expect("scanning should not fail")
4183 .expect("scanner must eventually produce a token");
4184 if let TokenType::Anchor(name) = tok.1 {
4185 assert!(matches!(name, Cow::Borrowed("foo")));
4186 let next = scanner.next_token().expect("scanning should not fail");
4187 if let Some(Token(_, TokenType::Scalar(_, rest))) = next {
4188 assert!(rest.starts_with('\u{0001}'));
4189 }
4190 break;
4191 }
4192 }
4193 }
4194
4195 #[test]
4196 fn alias_name_rejects_non_printable_control_chars() {
4197 let mut scanner = Scanner::new(StrInput::new("*foo\u{0001}\n"));
4198
4199 loop {
4200 let tok = scanner
4201 .next_token()
4202 .expect("scanning should not fail")
4203 .expect("scanner must eventually produce a token");
4204 if let TokenType::Alias(name) = tok.1 {
4205 assert!(matches!(name, Cow::Borrowed("foo")));
4206 let next = scanner.next_token().expect("scanning should not fail");
4207 if let Some(Token(_, TokenType::Scalar(_, rest))) = next {
4208 assert!(rest.starts_with('\u{0001}'));
4209 }
4210 break;
4211 }
4212 }
4213 }
4214
4215 #[test]
4216 fn alias_name_is_borrowed_for_str_input() {
4217 let mut scanner = Scanner::new(StrInput::new("*anch\n"));
4218
4219 loop {
4220 let tok = scanner
4221 .next_token()
4222 .expect("valid YAML must scan without errors")
4223 .expect("scanner must eventually produce a token");
4224 if let TokenType::Alias(name) = tok.1 {
4225 assert!(matches!(name, Cow::Borrowed("anch")));
4226 break;
4227 }
4228 }
4229 }
4230
4231 #[test]
4233 fn tag_directive_parts_are_borrowed_for_str_input() {
4234 let mut scanner = Scanner::new(StrInput::new("%TAG !e! tag:example.com,2000:app/\n"));
4235
4236 loop {
4237 let tok = scanner
4238 .next_token()
4239 .expect("valid YAML must scan without errors")
4240 .expect("scanner must eventually produce a token");
4241 if let TokenType::TagDirective(handle, prefix) = tok.1 {
4242 assert!(matches!(handle, Cow::Borrowed("!e!")));
4243 assert!(matches!(prefix, Cow::Borrowed("tag:example.com,2000:app/")));
4244 break;
4245 }
4246 }
4247 }
4248
4249 #[test]
4250 fn plain_scalar_is_borrowed_when_whitespace_free_for_str_input() {
4251 let mut scanner = Scanner::new(StrInput::new("foo\n"));
4252
4253 loop {
4254 let tok = scanner
4255 .next_token()
4256 .expect("valid YAML must scan without errors")
4257 .expect("scanner must eventually produce a token");
4258 if let TokenType::Scalar(_, value) = tok.1 {
4259 assert!(matches!(value, Cow::Borrowed("foo")));
4260 break;
4261 }
4262 }
4263 }
4264
4265 #[test]
4266 fn plain_scalar_is_borrowed_when_whitespace_present_for_str_input() {
4267 let mut scanner = Scanner::new(StrInput::new("foo bar\n"));
4268
4269 loop {
4270 let tok = scanner
4271 .next_token()
4272 .expect("valid YAML must scan without errors")
4273 .expect("scanner must eventually produce a token");
4274 if let TokenType::Scalar(_, value) = tok.1 {
4275 assert!(matches!(value, Cow::Borrowed("foo bar")));
4276 break;
4277 }
4278 }
4279 }
4280
4281 #[test]
4282 fn single_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
4283 let mut scanner = Scanner::new(StrInput::new("'foo bar'\n"));
4284
4285 loop {
4286 let tok = scanner
4287 .next_token()
4288 .expect("valid YAML must scan without errors")
4289 .expect("scanner must eventually produce a token");
4290 if let TokenType::Scalar(_, value) = tok.1 {
4291 assert!(matches!(value, Cow::Borrowed("foo bar")));
4292 break;
4293 }
4294 }
4295 }
4296
4297 #[test]
4298 fn single_quoted_scalar_is_owned_when_quote_is_escaped_for_str_input() {
4299 let mut scanner = Scanner::new(StrInput::new("'foo''bar'\n"));
4300
4301 loop {
4302 let tok = scanner
4303 .next_token()
4304 .expect("valid YAML must scan without errors")
4305 .expect("scanner must eventually produce a token");
4306 if let TokenType::Scalar(_, value) = tok.1 {
4307 assert!(matches!(value, Cow::Owned(_)));
4308 assert_eq!(&*value, "foo'bar");
4309 break;
4310 }
4311 }
4312 }
4313
4314 #[test]
4315 fn double_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
4316 let mut scanner = Scanner::new(StrInput::new("\"foo bar\"\n"));
4317
4318 loop {
4319 let tok = scanner
4320 .next_token()
4321 .expect("valid YAML must scan without errors")
4322 .expect("scanner must eventually produce a token");
4323 if let TokenType::Scalar(_, value) = tok.1 {
4324 assert!(matches!(value, Cow::Borrowed("foo bar")));
4325 break;
4326 }
4327 }
4328 }
4329
4330 #[test]
4331 fn double_quoted_scalar_is_owned_when_escape_sequence_present_for_str_input() {
4332 let mut scanner = Scanner::new(StrInput::new("\"foo\\nbar\"\n"));
4333
4334 loop {
4335 let tok = scanner
4336 .next_token()
4337 .expect("valid YAML must scan without errors")
4338 .expect("scanner must eventually produce a token");
4339 if let TokenType::Scalar(_, value) = tok.1 {
4340 assert!(matches!(value, Cow::Owned(_)));
4341 assert_eq!(&*value, "foo\nbar");
4342 break;
4343 }
4344 }
4345 }
4346
4347 #[test]
4348 fn plain_key_is_borrowed_for_str_input() {
4349 let mut scanner = Scanner::new(StrInput::new("mykey: value\n"));
4351
4352 let mut found_key = false;
4353 let mut key_value: Option<Cow<'_, str>> = None;
4354
4355 loop {
4356 let tok = scanner
4357 .next_token()
4358 .expect("valid YAML must scan without errors");
4359 let Some(tok) = tok else { break };
4360
4361 if matches!(tok.1, TokenType::Key) {
4362 found_key = true;
4363 } else if found_key {
4364 if let TokenType::Scalar(_, value) = tok.1 {
4365 key_value = Some(value);
4366 break;
4367 }
4368 }
4369 }
4370
4371 assert!(found_key, "expected to find a Key token");
4372 let key_value = key_value.expect("expected to find a scalar after Key token");
4373 assert!(
4374 matches!(key_value, Cow::Borrowed("mykey")),
4375 "key should be borrowed, got: {key_value:?}"
4376 );
4377 }
4378
4379 #[test]
4380 fn quoted_key_is_borrowed_when_verbatim_for_str_input() {
4381 let mut scanner = Scanner::new(StrInput::new("\"mykey\": value\n"));
4382
4383 let mut found_key = false;
4384 let mut key_value: Option<Cow<'_, str>> = None;
4385
4386 loop {
4387 let tok = scanner
4388 .next_token()
4389 .expect("valid YAML must scan without errors");
4390 let Some(tok) = tok else { break };
4391
4392 if matches!(tok.1, TokenType::Key) {
4393 found_key = true;
4394 } else if found_key {
4395 if let TokenType::Scalar(_, value) = tok.1 {
4396 key_value = Some(value);
4397 break;
4398 }
4399 }
4400 }
4401
4402 assert!(found_key, "expected to find a Key token");
4403 let key_value = key_value.expect("expected to find a scalar after Key token");
4404 assert!(
4405 matches!(key_value, Cow::Borrowed("mykey")),
4406 "quoted key should be borrowed when verbatim, got: {key_value:?}"
4407 );
4408 }
4409
4410 #[test]
4411 fn tag_handle_and_suffix_are_borrowed_for_str_input() {
4412 let mut scanner = Scanner::new(StrInput::new("!!str foo\n"));
4414
4415 loop {
4416 let tok = scanner
4417 .next_token()
4418 .expect("valid YAML must scan without errors")
4419 .expect("scanner must eventually produce a token");
4420 if let TokenType::Tag(handle, suffix) = tok.1 {
4421 assert!(
4422 matches!(handle, Cow::Borrowed("!!")),
4423 "tag handle should be borrowed, got: {handle:?}"
4424 );
4425 assert!(
4426 matches!(suffix, Cow::Borrowed("str")),
4427 "tag suffix should be borrowed, got: {suffix:?}"
4428 );
4429 break;
4430 }
4431 }
4432 }
4433
4434 #[test]
4435 fn local_tag_suffix_is_borrowed_for_str_input() {
4436 let mut scanner = Scanner::new(StrInput::new("!mytag foo\n"));
4438
4439 loop {
4440 let tok = scanner
4441 .next_token()
4442 .expect("valid YAML must scan without errors")
4443 .expect("scanner must eventually produce a token");
4444 if let TokenType::Tag(handle, suffix) = tok.1 {
4445 assert!(
4446 matches!(handle, Cow::Borrowed("!")),
4447 "local tag handle should be '!', got: {handle:?}"
4448 );
4449 assert!(
4450 matches!(suffix, Cow::Borrowed("mytag")),
4451 "local tag suffix should be borrowed, got: {suffix:?}"
4452 );
4453 break;
4454 }
4455 }
4456 }
4457
4458 #[test]
4459 fn tag_with_uri_escape_is_owned_for_str_input() {
4460 let mut scanner = Scanner::new(StrInput::new("!!my%20tag foo\n"));
4462
4463 loop {
4464 let tok = scanner
4465 .next_token()
4466 .expect("valid YAML must scan without errors")
4467 .expect("scanner must eventually produce a token");
4468 if let TokenType::Tag(handle, suffix) = tok.1 {
4469 assert!(
4470 matches!(handle, Cow::Borrowed("!!")),
4471 "tag handle should still be borrowed, got: {handle:?}"
4472 );
4473 assert!(
4474 matches!(suffix, Cow::Owned(_)),
4475 "tag suffix with URI escape should be owned, got: {suffix:?}"
4476 );
4477 assert_eq!(&*suffix, "my tag");
4478 break;
4479 }
4480 }
4481 }
4482
4483 #[test]
4484 fn flow_scalar_buffer_tracks_pending_whitespace() {
4485 let mut borrowed = super::FlowScalarBuf::new_borrowed(2);
4486
4487 borrowed.note_pending_ws(5, 8);
4488 borrowed.commit_pending_ws();
4489 assert!(matches!(
4490 borrowed,
4491 super::FlowScalarBuf::Borrowed {
4492 end: 8,
4493 pending_ws_start: None,
4494 pending_ws_end: 8,
4495 ..
4496 }
4497 ));
4498
4499 borrowed.note_pending_ws(9, 11);
4500 borrowed.discard_pending_ws();
4501 assert!(matches!(
4502 borrowed,
4503 super::FlowScalarBuf::Borrowed {
4504 end: 8,
4505 pending_ws_start: None,
4506 pending_ws_end: 8,
4507 ..
4508 }
4509 ));
4510 assert!(borrowed.as_owned_mut().is_none());
4511
4512 let mut owned = super::FlowScalarBuf::new_owned();
4513 owned.as_owned_mut().unwrap().push_str("owned");
4514 assert!(matches!(owned, super::FlowScalarBuf::Owned(ref s) if s == "owned"));
4515 }
4516
4517 fn first_scanner_error_info(input: &str) -> String {
4518 let mut scanner = Scanner::new(StrInput::new(input));
4519 loop {
4520 match scanner.next_token() {
4521 Ok(Some(_)) => {}
4522 Ok(None) => panic!("expected scanner error"),
4523 Err(error) => return error.info().to_owned(),
4524 }
4525 }
4526 }
4527
4528 fn first_scalar_value(input: &str) -> String {
4529 let mut scanner = Scanner::new(StrInput::new(input));
4530 loop {
4531 match scanner.next_token().expect("scanner should not error") {
4532 Some(Token(_, TokenType::Scalar(_, value))) => return value.into_owned(),
4533 Some(_) => {}
4534 None => panic!("expected scalar token"),
4535 }
4536 }
4537 }
4538
4539 #[test]
4540 fn iterator_next_records_error_and_then_stays_empty() {
4541 let mut scanner = Scanner::new(StrInput::new("\"unterminated"));
4542
4543 while scanner.next().is_some() {}
4544
4545 let error = scanner
4546 .get_error()
4547 .expect("scanner should retain the error");
4548 assert_eq!(error.info(), "unclosed quote");
4549 assert!(scanner.next().is_none());
4550 }
4551
4552 #[test]
4553 fn next_token_returns_none_after_stream_end() {
4554 let mut scanner = Scanner::new(StrInput::new(""));
4555
4556 while let Some(token) = scanner.next_token().unwrap() {
4557 if matches!(token.1, TokenType::StreamEnd) {
4558 break;
4559 }
4560 }
4561
4562 assert!(scanner.stream_started());
4563 assert!(scanner.stream_ended());
4564 assert!(scanner.next_token().unwrap().is_none());
4565 }
4566
4567 #[test]
4568 fn directive_name_must_be_present() {
4569 assert_eq!(
4570 first_scanner_error_info("%\n"),
4571 "while scanning a directive, could not find expected directive name"
4572 );
4573 }
4574
4575 #[test]
4576 fn yaml_directive_requires_dot_between_version_numbers() {
4577 assert_eq!(
4578 first_scanner_error_info("%YAML 1\n"),
4579 "while scanning a YAML directive, did not find expected digit or '.' character"
4580 );
4581 }
4582
4583 #[test]
4584 fn yaml_directive_requires_major_version_number() {
4585 assert_eq!(
4586 first_scanner_error_info("%YAML .2\n"),
4587 "while scanning a YAML directive, did not find expected version number"
4588 );
4589 }
4590
4591 #[test]
4592 fn yaml_directive_rejects_extremely_long_version_number() {
4593 assert_eq!(
4594 first_scanner_error_info("%YAML 1234567890.2\n"),
4595 "while scanning a YAML directive, found extremely long version number"
4596 );
4597 }
4598
4599 #[test]
4600 fn tag_directive_handle_must_end_with_bang() {
4601 assert_eq!(
4602 first_scanner_error_info("%TAG !bad tag:example.com,2024:\n"),
4603 "while parsing a tag directive, did not find expected '!'"
4604 );
4605 }
4606
4607 #[test]
4608 fn tag_directive_handle_must_start_with_bang() {
4609 assert_eq!(
4610 first_scanner_error_info("%TAG bad! tag:example.com,2024:\n"),
4611 "while scanning a tag, did not find expected '!'"
4612 );
4613 }
4614
4615 #[test]
4616 fn tag_directive_prefix_must_start_with_tag_character() {
4617 assert_eq!(
4618 first_scanner_error_info("%TAG !e! `bad\n"),
4619 "invalid global tag character"
4620 );
4621 }
4622
4623 #[test]
4624 fn tag_directive_prefix_must_end_before_invalid_content() {
4625 assert_eq!(
4626 first_scanner_error_info("%TAG !e! tag:example.com^suffix\n"),
4627 "while scanning TAG, did not find expected whitespace or line break"
4628 );
4629 }
4630
4631 #[test]
4632 fn tag_directive_prefix_with_uri_escape_is_owned_and_decoded() {
4633 let mut scanner =
4634 Scanner::new(StrInput::new("%TAG !e! tag:example.com,2024:some%20app/\n"));
4635
4636 loop {
4637 let token = scanner
4638 .next_token()
4639 .expect("valid directive should scan")
4640 .expect("scanner must produce a directive token");
4641 if let TokenType::TagDirective(handle, prefix) = token.1 {
4642 assert!(matches!(handle, Cow::Borrowed("!e!")));
4643 assert!(matches!(prefix, Cow::Owned(_)));
4644 assert_eq!(&*prefix, "tag:example.com,2024:some app/");
4645 break;
4646 }
4647 }
4648 }
4649
4650 #[test]
4651 fn bare_bang_tag_scans_as_non_specific_tag() {
4652 let mut scanner = Scanner::new(StrInput::new("! foo\n"));
4653
4654 loop {
4655 let token = scanner
4656 .next_token()
4657 .expect("valid tag should scan")
4658 .expect("scanner must produce a tag token");
4659 if let TokenType::Tag(handle, suffix) = token.1 {
4660 assert_eq!(&*handle, "");
4661 assert_eq!(&*suffix, "!");
4662 break;
4663 }
4664 }
4665 }
4666
4667 #[test]
4668 fn tag_requires_separation_after_suffix() {
4669 assert_eq!(
4670 first_scanner_error_info("!foo,bar\n"),
4671 "while scanning a tag, did not find expected whitespace or line break"
4672 );
4673 }
4674
4675 #[test]
4676 fn verbatim_tag_requires_uri() {
4677 assert_eq!(
4678 first_scanner_error_info("!<> foo\n"),
4679 "while parsing a tag, did not find expected tag URI"
4680 );
4681 }
4682
4683 #[test]
4684 fn verbatim_tag_requires_closing_angle_bracket() {
4685 assert_eq!(
4686 first_scanner_error_info("!<tag:yaml.org,2002:str foo\n"),
4687 "while scanning a verbatim tag, did not find the expected '>'"
4688 );
4689 }
4690
4691 #[test]
4692 fn tag_uri_escape_requires_hex_digits() {
4693 assert_eq!(
4694 first_scanner_error_info("!!bad%zz foo\n"),
4695 "while parsing a tag, found an invalid escape sequence"
4696 );
4697 }
4698
4699 #[test]
4700 fn tag_uri_escape_rejects_bad_leading_utf8_byte() {
4701 assert_eq!(
4702 first_scanner_error_info("!!bad%80 foo\n"),
4703 "while parsing a tag, found an incorrect leading UTF-8 byte"
4704 );
4705 }
4706
4707 #[test]
4708 fn tag_uri_escape_rejects_bad_trailing_utf8_byte() {
4709 assert_eq!(
4710 first_scanner_error_info("!!bad%C2%41 foo\n"),
4711 "while parsing a tag, found an incorrect trailing UTF-8 byte"
4712 );
4713 }
4714
4715 #[test]
4716 fn tag_uri_escape_rejects_invalid_utf8_codepoint() {
4717 assert_eq!(
4718 first_scanner_error_info("!!bad%F4%90%80%80 foo\n"),
4719 "while parsing a tag, found an invalid UTF-8 codepoint"
4720 );
4721 }
4722
4723 #[test]
4724 fn anchors_and_aliases_require_names() {
4725 let expected =
4726 "while scanning an anchor or alias, did not find expected alphabetic or numeric character";
4727
4728 assert_eq!(first_scanner_error_info("& \n"), expected);
4729 assert_eq!(first_scanner_error_info("* \n"), expected);
4730 }
4731
4732 #[test]
4733 fn document_end_marker_rejects_trailing_content() {
4734 assert_eq!(
4735 first_scanner_error_info("... trailing\n"),
4736 "invalid content after document end marker"
4737 );
4738 }
4739
4740 #[test]
4741 fn reserved_indicators_are_rejected_outside_directives() {
4742 assert_eq!(
4743 first_scanner_error_info(" @\n"),
4744 "unexpected character: `@'"
4745 );
4746 }
4747
4748 #[test]
4749 fn flow_block_entry_indicator_is_rejected() {
4750 assert_eq!(
4751 first_scanner_error_info("[- ]\n"),
4752 r#""-" is only valid inside a block"#
4753 );
4754 }
4755
4756 #[test]
4757 fn block_entry_after_tabbed_separator_reports_specific_error() {
4758 assert_eq!(
4759 first_scanner_error_info("-\t- value\n"),
4760 "'-' must be followed by a valid YAML whitespace"
4761 );
4762 }
4763
4764 #[test]
4765 fn document_indicator_reports_unclosed_flow_collection() {
4766 assert_eq!(first_scanner_error_info("[\n---\n"), "unclosed bracket '['");
4767 }
4768
4769 #[test]
4770 fn block_scalar_header_rejects_trailing_content() {
4771 assert_eq!(
4772 first_scanner_error_info("|+ trailing\n"),
4773 "while scanning a block scalar, did not find expected comment or line break"
4774 );
4775 }
4776
4777 #[test]
4778 fn block_scalar_rejects_zero_indent_indicator() {
4779 let expected = "while scanning a block scalar, found an indentation indicator equal to 0";
4780
4781 assert_eq!(first_scanner_error_info("|0\n"), expected);
4782 assert_eq!(first_scanner_error_info("|+0\n"), expected);
4783 }
4784
4785 #[test]
4786 fn empty_block_scalar_at_eof_honors_chomping() {
4787 assert_eq!(first_scalar_value("|-\n"), "");
4788 assert_eq!(first_scalar_value("|+\n"), "\n");
4789 }
4790
4791 #[test]
4792 fn explicit_indent_block_scalar_can_end_at_document_marker() {
4793 assert_eq!(first_scalar_value("|1\n...\n"), "");
4794 }
4795
4796 #[test]
4797 fn root_explicit_indent_block_scalar_rejects_underindented_content() {
4798 assert_eq!(
4799 first_scanner_error_info("|2\nx\n"),
4800 "wrongly indented line in block scalar"
4801 );
4802 }
4803
4804 #[test]
4805 fn quoted_scalar_rejects_document_indicator_at_line_start() {
4806 assert_eq!(
4807 first_scanner_error_info("\"one\n---\ntwo\"\n"),
4808 "while scanning a quoted scalar, found unexpected document indicator"
4809 );
4810 }
4811
4812 #[test]
4813 fn quoted_scalar_rejects_tab_indentation_after_line_break() {
4814 assert_eq!(
4815 first_scanner_error_info("a: \"one\n\tbad\"\n"),
4816 "tab cannot be used as indentation"
4817 );
4818 }
4819
4820 #[test]
4821 fn quoted_scalar_rejects_underindented_continuation() {
4822 assert_eq!(
4823 first_scanner_error_info("a: \"one\nbad\"\n"),
4824 "invalid indentation in multiline quoted scalar"
4825 );
4826 }
4827
4828 #[test]
4829 fn indented_flow_scalar_reports_invalid_indentation() {
4830 assert_eq!(
4831 first_scanner_error_info("a:\n [\nfoo]\n"),
4832 "invalid indentation"
4833 );
4834 }
4835
4836 #[test]
4837 fn required_simple_key_requires_value_at_stream_end() {
4838 assert_eq!(
4839 first_scanner_error_info("a:\n&b\n- c\n"),
4840 "simple key expect ':'"
4841 );
4842 }
4843
4844 #[test]
4845 fn plain_scalar_rejects_dash_before_flow_indicator() {
4846 assert_eq!(
4847 first_scanner_error_info("[-]\n"),
4848 "plain scalar cannot start with '-' followed by ,[]{}"
4849 );
4850 }
4851
4852 #[test]
4853 fn explicit_key_rejects_tab_after_indicator() {
4854 assert_eq!(
4855 first_scanner_error_info("? \tfoo\n"),
4856 "tabs disallowed in this context"
4857 );
4858 }
4859
4860 #[test]
4861 fn flow_mapping_rejects_adjacent_collection_value_after_plain_key() {
4862 assert_eq!(
4863 first_scanner_error_info("[a:[]]\n"),
4864 "':' may not precede any of `[{` in flow mapping"
4865 );
4866 }
4867
4868 #[test]
4869 fn implicit_flow_mapping_colon_cannot_move_to_next_line() {
4870 assert_eq!(
4871 first_scanner_error_info("[foo\n: bar]\n"),
4872 "illegal placement of ':' indicator"
4873 );
4874 }
4875}