1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use alloc::{
13 borrow::{Cow, ToOwned},
14 collections::VecDeque,
15 string::String,
16 vec::Vec,
17};
18use core::{char, fmt};
19
20use crate::{
21 char_traits::{
22 as_hex, is_anchor_char, is_blank_or_breakz, is_bom, is_break, is_breakz, is_flow, is_hex,
23 is_tag_char, is_uri_char,
24 },
25 input::{BorrowedInput, SkipTabs},
26};
27
28const SIMPLE_KEY_MAX_LOOKAHEAD: usize = 1024;
30
31#[derive(Clone, Copy, PartialEq, Debug, Eq)]
33pub enum TEncoding {
34 Utf8,
36}
37
38#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
40pub enum ScalarStyle {
41 Plain,
43 SingleQuoted,
45 DoubleQuoted,
47
48 Literal,
54 Folded,
61}
62
63#[derive(Clone, Copy, Debug, Default)]
70pub struct MarkerOffsets {
71 chars: usize,
73 bytes: Option<usize>,
75}
76
77impl PartialEq for MarkerOffsets {
78 fn eq(&self, other: &Self) -> bool {
79 self.chars == other.chars
83 }
84}
85
86impl Eq for MarkerOffsets {}
87
88#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
90pub struct Marker {
91 offsets: MarkerOffsets,
93 line: usize,
95 col: usize,
97}
98
99impl Marker {
100 #[must_use]
102 pub fn new(index: usize, line: usize, col: usize) -> Marker {
103 Marker {
104 offsets: MarkerOffsets {
105 chars: index,
106 bytes: None,
107 },
108 line,
109 col,
110 }
111 }
112
113 #[must_use]
115 pub fn with_byte_offset(mut self, byte_offset: Option<usize>) -> Marker {
116 self.offsets.bytes = byte_offset;
117 self
118 }
119
120 #[must_use]
122 pub fn index(&self) -> usize {
123 self.offsets.chars
124 }
125
126 #[must_use]
128 pub fn byte_offset(&self) -> Option<usize> {
129 self.offsets.bytes
130 }
131
132 #[must_use]
134 pub fn line(&self) -> usize {
135 self.line
136 }
137
138 #[must_use]
140 pub fn col(&self) -> usize {
141 self.col
142 }
143}
144
145#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
147pub struct Span {
148 pub start: Marker,
150 pub end: Marker,
152
153 pub indent: Option<usize>,
158
159 pub tag_start: Option<Marker>,
168}
169
170impl Span {
171 #[must_use]
173 pub fn new(start: Marker, end: Marker) -> Span {
174 Span {
175 start,
176 end,
177 indent: None,
178 tag_start: None,
179 }
180 }
181
182 #[must_use]
189 pub fn empty(mark: Marker) -> Span {
190 Span {
191 start: mark,
192 end: mark,
193 indent: None,
194 tag_start: None,
195 }
196 }
197
198 #[must_use]
200 pub fn with_indent(mut self, indent: Option<usize>) -> Span {
201 self.indent = indent;
202 self
203 }
204
205 #[must_use]
207 pub fn with_tag_start(mut self, tag_start: Option<Marker>) -> Span {
208 self.tag_start = tag_start;
209 self
210 }
211
212 #[must_use]
218 pub fn tag_start(&self) -> Option<Marker> {
219 self.tag_start
220 }
221
222 #[must_use]
224 pub fn len(&self) -> usize {
225 self.end.index() - self.start.index()
226 }
227
228 #[must_use]
230 pub fn is_empty(&self) -> bool {
231 self.len() == 0
232 }
233
234 #[must_use]
236 pub fn byte_range(&self) -> Option<core::ops::Range<usize>> {
237 let start = self.start.byte_offset()?;
238 let end = self.end.byte_offset()?;
239 Some(start..end)
240 }
241
242 #[must_use]
245 pub fn slice<'source>(&self, source: &'source str) -> Option<&'source str> {
246 source.get(self.byte_range()?)
247 }
248}
249
250#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
265pub enum Placement {
266 Above,
272 Right,
275 #[default]
281 Free,
282 Last,
287}
288
289#[derive(Clone, PartialEq, Debug, Eq)]
295pub struct Comment<'input> {
296 pub span: Span,
298 pub text: Cow<'input, str>,
302 pub placement: Placement,
304}
305
306impl<'input> Comment<'input> {
307 #[must_use]
312 pub fn new(span: Span, text: impl Into<Cow<'input, str>>) -> Self {
313 Self {
314 span,
315 text: text.into(),
316 placement: Placement::Free,
317 }
318 }
319
320 #[must_use]
322 pub fn with_placement(mut self, placement: Placement) -> Self {
323 self.placement = placement;
324 self
325 }
326
327 #[must_use]
331 pub fn trimmed_text(&self) -> &str {
332 self.text.trim()
333 }
334}
335
336impl AsRef<str> for Comment<'_> {
337 fn as_ref(&self) -> &str {
338 self.text.as_ref()
339 }
340}
341
342#[derive(Clone, PartialEq, Debug, Eq)]
344pub struct ScanError {
345 mark: Marker,
347 info: String,
349}
350
351impl ScanError {
352 #[must_use]
354 #[cold]
355 pub fn new(loc: Marker, info: String) -> ScanError {
356 ScanError { mark: loc, info }
357 }
358
359 #[must_use]
361 #[cold]
362 pub fn new_str(loc: Marker, info: &str) -> ScanError {
363 ScanError {
364 mark: loc,
365 info: info.to_owned(),
366 }
367 }
368
369 #[cold]
370 pub(crate) fn into_result<T>(self) -> Result<T, ScanError> {
371 Err(self)
372 }
373
374 #[must_use]
376 pub fn marker(&self) -> &Marker {
377 &self.mark
378 }
379
380 #[must_use]
382 pub fn info(&self) -> &str {
383 self.info.as_ref()
384 }
385}
386
387impl fmt::Display for ScanError {
388 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
389 write!(
390 f,
391 "{} at char {} line {} column {}",
392 self.info,
393 self.mark.index(),
394 self.mark.line(),
395 self.mark.col() + 1
396 )
397 }
398}
399
400impl core::error::Error for ScanError {}
401
402#[derive(Clone, PartialEq, Debug, Eq)]
404pub enum TokenType<'input> {
405 StreamStart(TEncoding),
407 StreamEnd,
409 VersionDirective(
411 u32,
413 u32,
415 ),
416 TagDirective(
418 Cow<'input, str>,
420 Cow<'input, str>,
422 ),
423 DocumentStart,
425 DocumentEnd,
427 BlockSequenceStart,
431 BlockMappingStart,
435 BlockEnd,
437 FlowSequenceStart,
439 FlowSequenceEnd,
441 FlowMappingStart,
443 FlowMappingEnd,
445 BlockEntry,
447 FlowEntry,
449 Key,
451 Value,
453 Alias(Cow<'input, str>),
455 Anchor(Cow<'input, str>),
457 Tag(
459 Cow<'input, str>,
461 Cow<'input, str>,
463 ),
464 Scalar(ScalarStyle, Cow<'input, str>),
466 Comment(
471 Comment<'input>,
473 ),
474 ReservedDirective(
476 String,
478 Vec<String>,
480 ),
481}
482
483#[derive(Clone, PartialEq, Debug, Eq)]
485pub struct Token<'input>(
486 pub Span,
488 pub TokenType<'input>,
490);
491
492#[derive(Clone, PartialEq, Debug, Eq)]
497pub(crate) struct QueuedComment<'input> {
498 pub(crate) text: Cow<'input, str>,
499 pub(crate) placement: Placement,
500}
501
502impl<'input> QueuedComment<'input> {
503 fn into_public(self, span: Span) -> Comment<'input> {
504 Comment::new(span, self.text).with_placement(self.placement)
505 }
506}
507
508impl<'input> From<Comment<'input>> for QueuedComment<'input> {
509 fn from(comment: Comment<'input>) -> Self {
510 Self {
511 text: comment.text,
512 placement: comment.placement,
513 }
514 }
515}
516
517#[derive(Clone, PartialEq, Debug, Eq)]
522pub(crate) enum QueuedTokenType<'input> {
523 StreamStart(TEncoding),
524 StreamEnd,
525 VersionDirective(u32, u32),
526 TagDirective(Cow<'input, str>, Cow<'input, str>),
527 DocumentStart,
528 DocumentEnd,
529 BlockSequenceStart,
530 BlockMappingStart,
531 BlockEnd,
532 FlowSequenceStart,
533 FlowSequenceEnd,
534 FlowMappingStart,
535 FlowMappingEnd,
536 BlockEntry,
537 FlowEntry,
538 Key,
539 Value,
540 Alias(Cow<'input, str>),
541 Anchor(Cow<'input, str>),
542 Tag(Cow<'input, str>, Cow<'input, str>),
543 Scalar(ScalarStyle, Cow<'input, str>),
544 Comment(QueuedComment<'input>),
545 ReservedDirective(String, Vec<String>),
546}
547
548impl<'input> QueuedTokenType<'input> {
549 fn into_public(self, span: Span) -> TokenType<'input> {
550 match self {
551 Self::StreamStart(encoding) => TokenType::StreamStart(encoding),
552 Self::StreamEnd => TokenType::StreamEnd,
553 Self::VersionDirective(major, minor) => TokenType::VersionDirective(major, minor),
554 Self::TagDirective(handle, prefix) => TokenType::TagDirective(handle, prefix),
555 Self::DocumentStart => TokenType::DocumentStart,
556 Self::DocumentEnd => TokenType::DocumentEnd,
557 Self::BlockSequenceStart => TokenType::BlockSequenceStart,
558 Self::BlockMappingStart => TokenType::BlockMappingStart,
559 Self::BlockEnd => TokenType::BlockEnd,
560 Self::FlowSequenceStart => TokenType::FlowSequenceStart,
561 Self::FlowSequenceEnd => TokenType::FlowSequenceEnd,
562 Self::FlowMappingStart => TokenType::FlowMappingStart,
563 Self::FlowMappingEnd => TokenType::FlowMappingEnd,
564 Self::BlockEntry => TokenType::BlockEntry,
565 Self::FlowEntry => TokenType::FlowEntry,
566 Self::Key => TokenType::Key,
567 Self::Value => TokenType::Value,
568 Self::Alias(name) => TokenType::Alias(name),
569 Self::Anchor(name) => TokenType::Anchor(name),
570 Self::Tag(handle, suffix) => TokenType::Tag(handle, suffix),
571 Self::Scalar(style, value) => TokenType::Scalar(style, value),
572 Self::Comment(comment) => TokenType::Comment(comment.into_public(span)),
573 Self::ReservedDirective(name, params) => TokenType::ReservedDirective(name, params),
574 }
575 }
576}
577
578impl<'input> From<TokenType<'input>> for QueuedTokenType<'input> {
579 fn from(token: TokenType<'input>) -> Self {
580 match token {
581 TokenType::StreamStart(encoding) => Self::StreamStart(encoding),
582 TokenType::StreamEnd => Self::StreamEnd,
583 TokenType::VersionDirective(major, minor) => Self::VersionDirective(major, minor),
584 TokenType::TagDirective(handle, prefix) => Self::TagDirective(handle, prefix),
585 TokenType::DocumentStart => Self::DocumentStart,
586 TokenType::DocumentEnd => Self::DocumentEnd,
587 TokenType::BlockSequenceStart => Self::BlockSequenceStart,
588 TokenType::BlockMappingStart => Self::BlockMappingStart,
589 TokenType::BlockEnd => Self::BlockEnd,
590 TokenType::FlowSequenceStart => Self::FlowSequenceStart,
591 TokenType::FlowSequenceEnd => Self::FlowSequenceEnd,
592 TokenType::FlowMappingStart => Self::FlowMappingStart,
593 TokenType::FlowMappingEnd => Self::FlowMappingEnd,
594 TokenType::BlockEntry => Self::BlockEntry,
595 TokenType::FlowEntry => Self::FlowEntry,
596 TokenType::Key => Self::Key,
597 TokenType::Value => Self::Value,
598 TokenType::Alias(name) => Self::Alias(name),
599 TokenType::Anchor(name) => Self::Anchor(name),
600 TokenType::Tag(handle, suffix) => Self::Tag(handle, suffix),
601 TokenType::Scalar(style, value) => Self::Scalar(style, value),
602 TokenType::Comment(comment) => Self::Comment(comment.into()),
603 TokenType::ReservedDirective(name, params) => Self::ReservedDirective(name, params),
604 }
605 }
606}
607
608#[derive(Clone, PartialEq, Debug, Eq)]
610pub(crate) struct QueuedToken<'input>(pub(crate) Span, pub(crate) QueuedTokenType<'input>);
611
612impl<'input> QueuedToken<'input> {
613 fn into_public(self) -> Token<'input> {
614 Token(self.0, self.1.into_public(self.0))
615 }
616}
617
618impl<'input> From<Token<'input>> for QueuedToken<'input> {
619 fn from(token: Token<'input>) -> Self {
620 Self(token.0, token.1.into())
621 }
622}
623
624#[derive(Clone, PartialEq, Debug, Eq)]
659struct SimpleKey {
660 possible: bool,
673 required: bool,
682 token_number: usize,
688 mark: Marker,
690}
691
692impl SimpleKey {
693 fn new(mark: Marker) -> SimpleKey {
695 SimpleKey {
696 possible: false,
697 required: false,
698 token_number: 0,
699 mark,
700 }
701 }
702}
703
704#[derive(Clone, Debug, Default)]
706struct Indent {
707 indent: isize,
709 needs_block_end: bool,
727}
728
729#[derive(Debug, PartialEq)]
751enum ImplicitMappingState {
752 Possible,
757 Inside(u8),
761}
762
763#[derive(Debug)]
773#[allow(clippy::struct_excessive_bools)]
774pub struct Scanner<'input, T> {
775 input: T,
779 mark: Marker,
781 tokens: VecDeque<QueuedToken<'input>>,
788 error: Option<ScanError>,
790 deferred_error: Option<ScanError>,
792 comments_possible: bool,
794
795 stream_start_produced: bool,
797 stream_end_produced: bool,
799 document_prefix_allowed: bool,
805 adjacent_value_allowed_at: usize,
808 simple_key_allowed: bool,
812 simple_keys: smallvec::SmallVec<[SimpleKey; 8]>,
817 indent: isize,
819 indents: smallvec::SmallVec<[Indent; 8]>,
821 flow_level: u8,
823 tokens_parsed: usize,
827 token_available: bool,
829 leading_whitespace: bool,
831 flow_mapping_started: smallvec::SmallVec<[bool; 8]>,
838 implicit_flow_mapping_states: smallvec::SmallVec<[ImplicitMappingState; 8]>,
851 interrupted_plain_by_comment: Option<Marker>,
854 explicit_key_tab_check_pending: bool,
859 flow_markers: smallvec::SmallVec<[(Marker, char); 8]>,
861 buf_leading_break: String,
862 buf_trailing_breaks: String,
863 buf_whitespaces: String,
864}
865
866impl<'input, T: BorrowedInput<'input>> Iterator for Scanner<'input, T> {
867 type Item = Token<'input>;
868
869 fn next(&mut self) -> Option<Self::Item> {
870 if self.error.is_some() {
871 return None;
872 }
873 match self.next_token() {
874 Ok(Some(tok)) => {
875 debug_print!(
876 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
877 tok.1,
878 tok.0
879 );
880 Some(tok)
881 }
882 Ok(tok) => tok,
883 Err(e) => self.stop_after_error(e),
884 }
885 }
886}
887
888pub type ScanResult = Result<(), ScanError>;
890
891#[derive(Debug)]
892enum FlowScalarBuf {
893 Borrowed {
899 start: usize,
900 end: usize,
901 pending_ws_start: Option<usize>,
902 pending_ws_end: usize,
903 },
904 Owned(String),
905}
906
907impl FlowScalarBuf {
908 #[inline]
909 fn new_borrowed(start: usize) -> Self {
910 Self::Borrowed {
911 start,
912 end: start,
913 pending_ws_start: None,
914 pending_ws_end: start,
915 }
916 }
917
918 #[inline]
919 fn new_owned() -> Self {
920 Self::Owned(String::new())
921 }
922
923 #[inline]
924 fn as_owned_mut(&mut self) -> Option<&mut String> {
925 match self {
926 Self::Owned(s) => Some(s),
927 Self::Borrowed { .. } => None,
928 }
929 }
930
931 #[inline]
932 fn commit_pending_ws(&mut self) {
933 if let Self::Borrowed {
934 end,
935 pending_ws_start,
936 pending_ws_end,
937 ..
938 } = self
939 {
940 if pending_ws_start.is_some() {
941 *end = *pending_ws_end;
942 *pending_ws_start = None;
943 }
944 }
945 }
946
947 #[inline]
948 fn note_pending_ws(&mut self, ws_start: usize, ws_end: usize) {
949 if let Self::Borrowed {
950 pending_ws_start,
951 pending_ws_end,
952 ..
953 } = self
954 {
955 if pending_ws_start.is_none() {
956 *pending_ws_start = Some(ws_start);
957 }
958 *pending_ws_end = ws_end;
959 }
960 }
961
962 #[inline]
963 fn discard_pending_ws(&mut self) {
964 if let Self::Borrowed {
965 pending_ws_start,
966 pending_ws_end,
967 end,
968 ..
969 } = self
970 {
971 *pending_ws_start = None;
972 *pending_ws_end = *end;
973 }
974 }
975}
976
977impl<'input, T: BorrowedInput<'input>> Scanner<'input, T> {
978 #[inline]
979 fn promote_flow_scalar_buf_to_owned(
980 &self,
981 start_mark: &Marker,
982 buf: &mut FlowScalarBuf,
983 ) -> Result<(), ScanError> {
984 let FlowScalarBuf::Borrowed {
985 start,
986 end,
987 pending_ws_start: _,
988 pending_ws_end: _,
989 } = *buf
990 else {
991 return Ok(());
992 };
993
994 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
995 ScanError::new_str(
996 *start_mark,
997 "internal error: input advertised offsets but did not provide a slice",
998 )
999 })?;
1000 *buf = FlowScalarBuf::Owned(slice.to_owned());
1001 Ok(())
1002 }
1003 #[inline]
1009 fn try_borrow_slice(&self, start: usize, end: usize) -> Option<&'input str> {
1010 self.input.slice_borrowed(start, end)
1011 }
1012
1013 fn scan_tag_handle_directive_cow(
1018 &mut self,
1019 mark: &Marker,
1020 ) -> Result<Cow<'input, str>, ScanError> {
1021 let Some(start) = self.input.byte_offset() else {
1022 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
1023 };
1024
1025 if self.input.look_ch() != '!' {
1026 return Err(ScanError::new_str(
1027 *mark,
1028 "while scanning a tag, did not find expected '!'",
1029 ));
1030 }
1031
1032 self.skip_non_blank();
1034
1035 self.input.lookahead(1);
1038 while self.input.next_is_alpha() {
1039 self.skip_non_blank();
1040 self.input.lookahead(1);
1041 }
1042
1043 if self.input.peek() == '!' {
1045 self.skip_non_blank();
1046 }
1047
1048 let Some(end) = self.input.byte_offset() else {
1049 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
1051 };
1052
1053 let Some(slice) = self.try_borrow_slice(start, end) else {
1054 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1056 ScanError::new_str(
1057 *mark,
1058 "internal error: input advertised slicing but did not provide a slice",
1059 )
1060 })?;
1061 if !slice.ends_with('!') && slice != "!" {
1062 return Err(ScanError::new_str(
1063 *mark,
1064 "while parsing a tag directive, did not find expected '!'",
1065 ));
1066 }
1067 return Ok(Cow::Owned(slice.to_owned()));
1068 };
1069
1070 if !slice.ends_with('!') && slice != "!" {
1071 return Err(ScanError::new_str(
1072 *mark,
1073 "while parsing a tag directive, did not find expected '!'",
1074 ));
1075 }
1076
1077 Ok(Cow::Borrowed(slice))
1078 }
1079
1080 fn scan_tag_prefix_directive_cow(
1085 &mut self,
1086 start_mark: &Marker,
1087 ) -> Result<Cow<'input, str>, ScanError> {
1088 let Some(start) = self.input.byte_offset() else {
1089 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
1090 };
1091
1092 if self.input.look_ch() == '!' {
1094 self.skip_non_blank();
1095 } else if !is_tag_char(self.input.peek()) {
1096 return Err(ScanError::new_str(
1097 *start_mark,
1098 "invalid global tag character",
1099 ));
1100 } else if self.input.peek() == '%' {
1101 } else {
1103 self.skip_non_blank();
1104 }
1105
1106 while is_uri_char(self.input.look_ch()) {
1108 if self.input.peek() == '%' {
1109 break;
1110 }
1111 self.skip_non_blank();
1112 }
1113
1114 if self.input.peek() == '%' {
1116 let current = self
1117 .input
1118 .byte_offset()
1119 .expect("byte_offset() must remain available once enabled");
1120 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
1121 slice.to_owned()
1122 } else {
1123 String::new()
1124 };
1125
1126 while is_uri_char(self.input.look_ch()) {
1127 if self.input.peek() == '%' {
1128 out.push(self.scan_uri_escapes(start_mark)?);
1129 } else {
1130 out.push(self.input.peek());
1131 self.skip_non_blank();
1132 }
1133 }
1134 return Ok(Cow::Owned(out));
1135 }
1136
1137 let Some(end) = self.input.byte_offset() else {
1138 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
1139 };
1140
1141 let Some(slice) = self.try_borrow_slice(start, end) else {
1142 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1144 ScanError::new_str(
1145 *start_mark,
1146 "internal error: input advertised slicing but did not provide a slice",
1147 )
1148 })?;
1149 return Ok(Cow::Owned(slice.to_owned()));
1150 };
1151
1152 Ok(Cow::Borrowed(slice))
1153 }
1154 pub fn new(input: T) -> Self {
1156 let initial_byte_offset = input.byte_offset();
1157 let comments_possible = input.may_contain_comments();
1158 Scanner {
1159 input,
1160 mark: Marker::new(0, 1, 0).with_byte_offset(initial_byte_offset),
1161 tokens: VecDeque::with_capacity(64),
1162 error: None,
1163 deferred_error: None,
1164 comments_possible,
1165
1166 stream_start_produced: false,
1167 stream_end_produced: false,
1168 document_prefix_allowed: true,
1169 adjacent_value_allowed_at: 0,
1170 simple_key_allowed: true,
1171 simple_keys: smallvec::SmallVec::new(),
1172 indent: -1,
1173 indents: smallvec::SmallVec::new(),
1174 flow_level: 0,
1175 tokens_parsed: 0,
1176 token_available: false,
1177 leading_whitespace: true,
1178 flow_mapping_started: smallvec::SmallVec::new(),
1179 implicit_flow_mapping_states: smallvec::SmallVec::new(),
1180 flow_markers: smallvec::SmallVec::new(),
1181 interrupted_plain_by_comment: None,
1182 explicit_key_tab_check_pending: false,
1183
1184 buf_leading_break: String::with_capacity(128),
1185 buf_trailing_breaks: String::with_capacity(128),
1186 buf_whitespaces: String::with_capacity(128),
1187 }
1188 }
1189
1190 #[inline]
1195 pub fn get_error(&self) -> Option<ScanError> {
1196 self.error.clone().or_else(|| self.deferred_error.clone())
1197 }
1198
1199 #[cold]
1200 fn stop_after_error(&mut self, error: ScanError) -> Option<Token<'input>> {
1201 self.error = Some(error);
1202 None
1203 }
1204
1205 #[cold]
1206 fn simple_key_expected(&self) -> ScanError {
1207 ScanError::new_str(self.mark, "simple key expected")
1208 }
1209
1210 #[cold]
1211 fn unclosed_bracket(mark: Marker, bracket: char) -> ScanError {
1212 ScanError::new(mark, format!("unclosed bracket '{bracket}'"))
1213 }
1214
1215 #[inline]
1217 fn skip_blank(&mut self) {
1218 self.input.skip();
1219
1220 self.mark.offsets.chars += 1;
1221 self.mark.col += 1;
1222 self.mark.offsets.bytes = self.input.byte_offset();
1223 }
1224
1225 #[inline]
1227 fn skip_non_blank(&mut self) {
1228 self.input.skip();
1229
1230 self.mark.offsets.chars += 1;
1231 self.mark.col += 1;
1232 self.mark.offsets.bytes = self.input.byte_offset();
1233 self.leading_whitespace = false;
1234 }
1235
1236 #[inline]
1241 fn skip_bom(&mut self) {
1242 self.input.skip();
1243
1244 self.mark.offsets.chars += 1;
1245 self.mark.offsets.bytes = self.input.byte_offset();
1246 }
1247
1248 #[inline]
1254 fn skip_comment_char(&mut self) {
1255 self.input.skip();
1256
1257 self.mark.offsets.chars += 1;
1258 self.mark.col += 1;
1259 self.mark.offsets.bytes = self.input.byte_offset();
1260 }
1261
1262 #[inline]
1264 fn skip_n_non_blank(&mut self, count: usize) {
1265 for _ in 0..count {
1266 self.input.skip();
1267 self.mark.offsets.chars += 1;
1268 self.mark.col += 1;
1269 }
1270 self.mark.offsets.bytes = self.input.byte_offset();
1271 self.leading_whitespace = false;
1272 }
1273
1274 #[inline]
1276 fn skip_nl(&mut self) {
1277 self.input.skip();
1278
1279 self.mark.offsets.chars += 1;
1280 self.mark.col = 0;
1281 self.mark.line += 1;
1282 self.mark.offsets.bytes = self.input.byte_offset();
1283 self.leading_whitespace = true;
1284 }
1285
1286 #[inline]
1288 fn skip_linebreak(&mut self) {
1289 if self.input.next_2_are('\r', '\n') {
1290 self.skip_blank();
1293 self.skip_nl();
1294 } else if self.input.next_is_break() {
1295 self.skip_nl();
1296 }
1297 }
1298
1299 #[cfg(test)]
1300 fn scan_comment_token(&mut self) -> Result<Token<'input>, ScanError> {
1301 Ok(self.scan_comment_queued_token()?.into_public())
1302 }
1303
1304 fn scan_comment_queued_token(&mut self) -> Result<QueuedToken<'input>, ScanError> {
1305 let start_mark = self.mark;
1306 debug_assert_eq!(self.input.peek(), '#');
1307 let placement = if self.leading_whitespace {
1308 Placement::Free
1309 } else {
1310 Placement::Right
1311 };
1312
1313 self.skip_comment_char();
1314
1315 let text = if let Some(start) = self.input.byte_offset() {
1316 let n = self.input.skip_while_non_breakz();
1318 self.mark.offsets.chars += n;
1319 self.mark.col += n;
1320 let byte_offset = self.input.byte_offset();
1321 self.mark.offsets.bytes = byte_offset;
1322 let end = byte_offset.expect("byte_offset must remain available once enabled");
1323
1324 if let Some(slice) = self.try_borrow_slice(start, end) {
1325 Cow::Borrowed(slice)
1326 } else if let Some(slice) = self.input.slice_bytes(start, end) {
1327 Cow::Owned(slice.to_owned())
1329 } else {
1330 return Err(ScanError::new_str(
1331 start_mark,
1332 "internal error: input advertised offsets but did not provide a slice",
1333 ));
1334 }
1335 } else {
1336 let mut owned = String::new();
1338 while !is_breakz(self.input.look_ch()) {
1339 owned.push(self.input.peek());
1340 self.skip_comment_char();
1341 }
1342 Cow::Owned(owned)
1343 };
1344
1345 let end_mark = self.mark;
1346 let span = Span::new(start_mark, end_mark);
1347 Ok(QueuedToken(
1348 span,
1349 QueuedTokenType::Comment(QueuedComment { text, placement }),
1350 ))
1351 }
1352
1353 fn push_comment_token(&mut self) -> ScanResult {
1354 let token = self.scan_comment_queued_token()?;
1355 self.tokens.push_back(token);
1356 Ok(())
1357 }
1358
1359 fn skip_comment(&mut self) {
1360 debug_assert_eq!(self.input.peek(), '#');
1361
1362 self.skip_comment_char();
1363 let n = self.input.skip_while_non_breakz();
1364 self.mark.offsets.chars += n;
1365 self.mark.col += n;
1366 self.mark.offsets.bytes = self.input.byte_offset();
1367 }
1368
1369 #[inline]
1371 pub fn stream_started(&self) -> bool {
1372 self.stream_start_produced
1373 }
1374
1375 #[inline]
1377 pub fn stream_ended(&self) -> bool {
1378 self.stream_end_produced
1379 }
1380
1381 #[inline]
1383 pub fn mark(&self) -> Marker {
1384 self.mark
1385 }
1386
1387 #[inline]
1389 pub(crate) fn comments_possible(&self) -> bool {
1390 self.comments_possible
1391 }
1392
1393 #[inline]
1400 fn read_break(&mut self, s: &mut String) {
1401 self.skip_break();
1402 s.push('\n');
1403 }
1404
1405 #[inline]
1410 fn skip_break(&mut self) {
1411 let c = self.input.peek();
1412 let nc = self.input.peek_nth(1);
1413 debug_assert!(is_break(c));
1414 if c == '\r' && nc == '\n' {
1415 self.skip_blank();
1416 }
1417 self.skip_nl();
1418 }
1419
1420 fn insert_token(&mut self, pos: usize, tok: Token<'input>) {
1422 let old_len = self.tokens.len();
1423 assert!(pos <= old_len);
1424 self.tokens.insert(pos, tok.into());
1425 }
1426
1427 fn simple_key_token_index(&self, sk: &SimpleKey, mark: Marker) -> Result<usize, ScanError> {
1428 let Some(index) = sk.token_number.checked_sub(self.tokens_parsed) else {
1429 return Err(ScanError::new_str(mark, "simple key is no longer valid"));
1430 };
1431 if index > self.tokens.len() {
1432 return Err(ScanError::new_str(mark, "simple key is no longer valid"));
1433 }
1434 Ok(index)
1435 }
1436
1437 #[inline]
1438 fn allow_simple_key(&mut self) {
1439 self.simple_key_allowed = true;
1440 }
1441
1442 #[inline]
1443 fn disallow_simple_key(&mut self) {
1444 self.simple_key_allowed = false;
1445 }
1446
1447 pub fn fetch_next_token(&mut self) -> ScanResult {
1452 self.input.lookahead(1);
1453
1454 if !self.stream_start_produced {
1455 self.fetch_stream_start();
1456 return Ok(());
1457 }
1458 if self.skip_to_next_token(true)? {
1459 return Ok(());
1460 }
1461
1462 debug_print!(
1463 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
1464 self.mark,
1465 self.input.peek()
1466 );
1467
1468 self.stale_simple_keys()?;
1469
1470 let mark = self.mark;
1471 self.unroll_indent(mark.col as isize);
1472
1473 self.input.lookahead(4);
1474
1475 if self.input.next_is_z() {
1476 self.fetch_stream_end()?;
1477 return Ok(());
1478 }
1479
1480 if self.mark.col == 0 {
1481 if self.input.next_char_is('%') {
1482 return self.fetch_directive();
1483 } else if self.input.next_is_document_start() {
1484 return self.fetch_document_indicator(TokenType::DocumentStart);
1485 } else if self.input.next_is_document_end() {
1486 self.fetch_document_indicator(TokenType::DocumentEnd)?;
1487 self.skip_ws_to_eol(SkipTabs::Yes)?;
1488 if !self.input.next_is_breakz() {
1489 return Err(ScanError::new_str(
1490 self.mark,
1491 "invalid content after document end marker",
1492 ));
1493 }
1494 return Ok(());
1495 }
1496 }
1497
1498 if self.document_prefix_allowed {
1499 self.document_prefix_allowed = false;
1500 }
1501
1502 if (self.mark.col as isize) < self.indent {
1503 self.input.lookahead(1);
1504 let c = self.input.peek();
1505 if self.flow_level == 0 || !matches!(c, ']' | '}' | ',') {
1506 return Err(ScanError::new_str(self.mark, "invalid indentation"));
1507 }
1508 }
1509
1510 let c = self.input.peek();
1511 let nc = self.input.peek_nth(1);
1512 match c {
1513 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
1514 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
1515 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
1516 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
1517 ',' => self.fetch_flow_entry(),
1518 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
1519 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
1520 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
1521 ':' if self.flow_level > 0
1522 && (is_flow(nc) || self.mark.index() == self.adjacent_value_allowed_at) =>
1523 {
1524 self.fetch_flow_value()
1525 }
1526 '*' => self.fetch_anchor(true),
1528 '&' => self.fetch_anchor(false),
1530 '!' => self.fetch_tag(),
1531 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
1533 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
1535 '\'' => self.fetch_flow_scalar(true),
1536 '"' => self.fetch_flow_scalar(false),
1537 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
1539 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
1540 self.fetch_plain_scalar()
1541 }
1542 c if is_bom(c) => Err(ScanError::new_str(
1543 self.mark,
1544 "a BOM must not appear inside a document",
1545 )),
1546 '%' | '@' | '`' => Err(ScanError::new(
1547 self.mark,
1548 format!("unexpected character: `{c}'"),
1549 )),
1550 _ => self.fetch_plain_scalar(),
1551 }
1552 }
1553
1554 pub(crate) fn next_queued_token(&mut self) -> Result<Option<QueuedToken<'input>>, ScanError> {
1559 if self.deferred_error.is_some() {
1560 if !matches!(
1561 self.tokens.front().map(|token| &token.1),
1562 Some(QueuedTokenType::Comment(_))
1563 ) {
1564 if let Some(error) = self.deferred_error.take() {
1565 return error.into_result();
1566 }
1567 }
1568 self.token_available = true;
1569 }
1570
1571 if self.stream_end_produced {
1572 return Ok(None);
1573 }
1574
1575 if !self.token_available {
1576 if let Err(error) = self.fetch_more_tokens() {
1577 if matches!(
1578 self.tokens.front().map(|token| &token.1),
1579 Some(QueuedTokenType::Comment(_))
1580 ) {
1581 self.deferred_error = Some(error);
1582 } else {
1583 return Err(error);
1584 }
1585 }
1586 }
1587 let Some(t) = self.tokens.pop_front() else {
1588 return Err(ScanError::new_str(
1589 self.mark,
1590 "did not find expected next token",
1591 ));
1592 };
1593 self.token_available = false;
1594 self.tokens_parsed += 1;
1595
1596 let is_stream_end = matches!(t.1, QueuedTokenType::StreamEnd);
1597 if is_stream_end {
1598 self.stream_end_produced = true;
1599 }
1600 Ok(Some(t))
1601 }
1602
1603 pub fn next_token(&mut self) -> Result<Option<Token<'input>>, ScanError> {
1608 Ok(self.next_queued_token()?.map(QueuedToken::into_public))
1609 }
1610
1611 pub fn fetch_more_tokens(&mut self) -> ScanResult {
1616 let mut need_more;
1617 loop {
1618 if self.tokens.is_empty() {
1619 need_more = true;
1620 } else {
1621 need_more = false;
1622 self.stale_simple_keys()?;
1624 if !matches!(
1625 self.tokens.front().map(|token| &token.1),
1626 Some(QueuedTokenType::Comment(_))
1627 ) {
1628 for sk in &self.simple_keys {
1630 if sk.possible && sk.token_number == self.tokens_parsed {
1631 need_more = true;
1632 break;
1633 }
1634 }
1635 }
1636 }
1637
1638 if let Some(token) = self.tokens.back() {
1641 if matches!(
1642 token.1,
1643 QueuedTokenType::DocumentEnd | QueuedTokenType::DocumentStart
1644 ) {
1645 break;
1646 }
1647 }
1648
1649 if !need_more {
1650 break;
1651 }
1652 self.fetch_next_token()?;
1653 }
1654 self.token_available = true;
1655
1656 Ok(())
1657 }
1658
1659 fn stale_simple_keys(&mut self) -> ScanResult {
1668 for sk in &mut self.simple_keys {
1669 let is_line_stale = self.flow_level == 0 && sk.mark.line < self.mark.line;
1670 let is_length_stale =
1673 self.mark.index().saturating_sub(sk.mark.index()) > SIMPLE_KEY_MAX_LOOKAHEAD;
1674
1675 if sk.possible && (is_line_stale || is_length_stale) {
1676 if sk.required {
1677 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
1678 }
1679 sk.possible = false;
1680 }
1681 }
1682 Ok(())
1683 }
1684
1685 fn skip_to_next_token(&mut self, stop_after_comment: bool) -> Result<bool, ScanError> {
1695 let consume_linebreak = |this: &mut Self| {
1698 this.input.lookahead(2);
1699 this.skip_linebreak();
1700 if this.flow_level == 0 {
1701 this.allow_simple_key();
1702 }
1703 };
1704
1705 loop {
1706 let ch = self.input.look_ch();
1707 if self.explicit_key_tab_check_pending {
1708 match ch {
1709 '\t' => {
1710 return Err(ScanError::new_str(
1711 self.mark(),
1712 "tabs disallowed in this context",
1713 ));
1714 }
1715 ' ' | '\n' | '\r' | '#' => {}
1716 _ => self.explicit_key_tab_check_pending = false,
1717 }
1718 }
1719
1720 match ch {
1721 '\t' => {
1723 if self.is_within_block()
1724 && self.leading_whitespace
1725 && (self.mark.col as isize) < self.indent
1726 {
1727 self.skip_ws_to_eol(SkipTabs::Yes)?;
1728
1729 if !self.input.next_is_breakz() {
1731 return Err(ScanError::new_str(
1732 self.mark,
1733 "tabs disallowed within this context (block indentation)",
1734 ));
1735 }
1736
1737 if matches!(self.input.look_ch(), '\n' | '\r') {
1739 consume_linebreak(self);
1740 }
1741 } else {
1742 self.skip_blank();
1744 }
1745 }
1746
1747 ' ' => self.skip_blank(),
1748
1749 '\n' | '\r' => consume_linebreak(self),
1750
1751 c if is_bom(c)
1752 && self.document_prefix_allowed
1753 && self.flow_level == 0
1754 && self.mark.col == 0 =>
1755 {
1756 self.skip_bom();
1757 }
1758
1759 '#' => {
1760 self.push_comment_token()?;
1761
1762 if matches!(self.input.look_ch(), '\n' | '\r') {
1764 consume_linebreak(self);
1765 }
1766 if stop_after_comment {
1767 return Ok(true);
1768 }
1769 }
1770
1771 _ => break,
1772 }
1773 }
1774
1775 if let Some(err_mark) = self.interrupted_plain_by_comment.take() {
1778 let is_immediate_next_line = self.mark.line == err_mark.line + 1;
1782
1783 if self.flow_level == 0
1785 && is_immediate_next_line
1786 && (self.mark.col as isize) > self.indent
1787 {
1788 self.input.lookahead(4);
1792
1793 if !self.input.next_is_z()
1794 && !self.input.next_is_document_indicator()
1795 && self.input.next_can_be_plain_scalar(false)
1796 {
1797 return Err(ScanError::new_str(
1798 err_mark,
1799 "comment intercepting the multiline text",
1800 ));
1801 }
1802 }
1803 }
1804
1805 Ok(false)
1806 }
1807
1808 fn skip_yaml_whitespace(&mut self, stop_after_comment: bool) -> Result<bool, ScanError> {
1816 let mut need_whitespace = true;
1817 loop {
1818 match self.input.look_ch() {
1819 ' ' => {
1820 self.skip_blank();
1821
1822 need_whitespace = false;
1823 }
1824 '\n' | '\r' => {
1825 self.input.lookahead(2);
1826 self.skip_linebreak();
1827 if self.flow_level == 0 {
1828 self.allow_simple_key();
1829 }
1830 need_whitespace = false;
1831 }
1832 '#' => {
1833 if need_whitespace {
1834 self.skip_comment();
1835 } else {
1836 self.push_comment_token()?;
1837 if stop_after_comment {
1838 return Ok(true);
1839 }
1840 }
1841 }
1842 _ => break,
1843 }
1844 }
1845
1846 if need_whitespace {
1847 Err(ScanError::new_str(self.mark(), "expected whitespace"))
1848 } else {
1849 Ok(false)
1850 }
1851 }
1852
1853 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
1854 debug_assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
1855
1856 if !self.comments_possible {
1857 let (chars_consumed, result) = self.input.skip_ws_to_eol(skip_tabs);
1858 self.mark.col += chars_consumed;
1859 self.mark.offsets.chars += chars_consumed;
1860 self.mark.offsets.bytes = self.input.byte_offset();
1861 return result.map_err(|msg| ScanError::new_str(self.mark, msg));
1862 }
1863
1864 let (chars_consumed, whitespace) = self.input.skip_ws_to_eol_blanks(skip_tabs);
1865 self.mark.col += chars_consumed;
1866 self.mark.offsets.chars += chars_consumed;
1867 self.mark.offsets.bytes = self.input.byte_offset();
1868
1869 if self.input.look_ch() != '#' {
1870 return Ok(whitespace);
1871 }
1872
1873 if !whitespace.found_tabs() && !whitespace.has_valid_yaml_ws() {
1874 return Err(ScanError::new_str(
1875 self.mark,
1876 "comments must be separated from other tokens by whitespace",
1877 ));
1878 }
1879
1880 self.push_comment_token()?;
1881 Ok(whitespace)
1882 }
1883
1884 fn fetch_stream_start(&mut self) {
1885 let mark = self.mark;
1886 self.indent = -1;
1887 self.stream_start_produced = true;
1888 self.allow_simple_key();
1889 self.tokens
1890 .push_back(Token(Span::empty(mark), TokenType::StreamStart(TEncoding::Utf8)).into());
1891 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1892 }
1893
1894 fn fetch_stream_end(&mut self) -> ScanResult {
1895 if self.mark.col != 0 {
1897 self.mark.col = 0;
1898 self.mark.line += 1;
1899 }
1900
1901 if let Some((mark, bracket)) = self.flow_markers.pop() {
1902 return Err(Self::unclosed_bracket(mark, bracket));
1903 }
1904
1905 for sk in &mut self.simple_keys {
1908 if sk.required && sk.possible {
1909 return Err(self.simple_key_expected());
1910 }
1911 sk.possible = false;
1912 }
1913
1914 self.unroll_indent(-1);
1915 self.remove_simple_key()?;
1916 self.disallow_simple_key();
1917
1918 self.tokens
1919 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd).into());
1920 Ok(())
1921 }
1922
1923 fn fetch_directive(&mut self) -> ScanResult {
1924 self.unroll_indent(-1);
1925 self.remove_simple_key()?;
1926
1927 self.disallow_simple_key();
1928
1929 let token_index = self.tokens.len();
1930 let tok = self.scan_directive()?;
1931 self.insert_token(token_index, tok);
1932
1933 Ok(())
1934 }
1935
1936 fn scan_directive(&mut self) -> Result<Token<'input>, ScanError> {
1937 let start_mark = self.mark;
1938 self.skip_non_blank();
1939
1940 let name = self.scan_directive_name()?;
1941 let tok = match name.as_ref() {
1942 "YAML" => self.scan_version_directive_value(&start_mark)?,
1943 "TAG" => self.scan_tag_directive_value(&start_mark)?,
1944 _ => {
1945 let mut params = Vec::new();
1946 while self.input.next_is_blank() {
1947 let n_blanks = self.input.skip_while_blank();
1948 self.mark.offsets.chars += n_blanks;
1949 self.mark.col += n_blanks;
1950 self.mark.offsets.bytes = self.input.byte_offset();
1951
1952 if !is_blank_or_breakz(self.input.peek()) {
1953 let mut param = String::new();
1954 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut param);
1955 self.mark.offsets.chars += n_chars;
1956 self.mark.col += n_chars;
1957 self.mark.offsets.bytes = self.input.byte_offset();
1958 params.push(param);
1959 }
1960 }
1961
1962 Token(
1963 Span::new(start_mark, self.mark),
1964 TokenType::ReservedDirective(name, params),
1965 )
1966 }
1967 };
1968
1969 self.skip_ws_to_eol(SkipTabs::Yes)?;
1970
1971 if self.input.next_is_breakz() {
1972 self.input.lookahead(2);
1973 self.skip_linebreak();
1974 Ok(tok)
1975 } else {
1976 Err(ScanError::new_str(
1977 start_mark,
1978 "while scanning a directive, did not find expected comment or line break",
1979 ))
1980 }
1981 }
1982
1983 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1984 let n_blanks = self.input.skip_while_blank();
1985 self.mark.offsets.chars += n_blanks;
1986 self.mark.col += n_blanks;
1987 self.mark.offsets.bytes = self.input.byte_offset();
1988
1989 let major = self.scan_version_directive_number(mark)?;
1990
1991 if self.input.peek() != '.' {
1992 return Err(ScanError::new_str(
1993 *mark,
1994 "while scanning a YAML directive, did not find expected digit or '.' character",
1995 ));
1996 }
1997 self.skip_non_blank();
1998
1999 let minor = self.scan_version_directive_number(mark)?;
2000
2001 Ok(Token(
2002 Span::new(*mark, self.mark),
2003 TokenType::VersionDirective(major, minor),
2004 ))
2005 }
2006
2007 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
2008 let start_mark = self.mark;
2009 let mut string = String::new();
2010
2011 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut string);
2012 self.mark.offsets.chars += n_chars;
2013 self.mark.col += n_chars;
2014 self.mark.offsets.bytes = self.input.byte_offset();
2015
2016 if string.is_empty() {
2017 return Err(ScanError::new_str(
2018 start_mark,
2019 "while scanning a directive, could not find expected directive name",
2020 ));
2021 }
2022
2023 if !is_blank_or_breakz(self.input.peek()) {
2024 return Err(ScanError::new_str(
2025 start_mark,
2026 "while scanning a directive, found unexpected non-alphabetical character",
2027 ));
2028 }
2029
2030 Ok(string)
2031 }
2032
2033 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
2034 let mut val = 0u32;
2035 let mut length = 0usize;
2036 while let Some(digit) = self.input.look_ch().to_digit(10) {
2037 if length + 1 > 9 {
2038 return Err(ScanError::new_str(
2039 *mark,
2040 "while scanning a YAML directive, found extremely long version number",
2041 ));
2042 }
2043 length += 1;
2044 val = val * 10 + digit;
2045 self.skip_non_blank();
2046 }
2047
2048 if length == 0 {
2049 return Err(ScanError::new_str(
2050 *mark,
2051 "while scanning a YAML directive, did not find expected version number",
2052 ));
2053 }
2054
2055 Ok(val)
2056 }
2057
2058 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
2059 let n_blanks = self.input.skip_while_blank();
2060 self.mark.offsets.chars += n_blanks;
2061 self.mark.col += n_blanks;
2062 self.mark.offsets.bytes = self.input.byte_offset();
2063
2064 let handle = self.scan_tag_handle_directive_cow(mark)?;
2065
2066 let n_blanks = self.input.skip_while_blank();
2067 self.mark.offsets.chars += n_blanks;
2068 self.mark.col += n_blanks;
2069 self.mark.offsets.bytes = self.input.byte_offset();
2070
2071 let prefix = self.scan_tag_prefix_directive_cow(mark)?;
2072
2073 self.input.lookahead(1);
2074
2075 if self.input.next_is_blank_or_breakz() {
2076 Ok(Token(
2077 Span::new(*mark, self.mark),
2078 TokenType::TagDirective(handle, prefix),
2079 ))
2080 } else {
2081 Err(ScanError::new_str(
2082 *mark,
2083 "while scanning TAG, did not find expected whitespace or line break",
2084 ))
2085 }
2086 }
2087
2088 fn fetch_tag(&mut self) -> ScanResult {
2089 self.save_simple_key();
2090 self.disallow_simple_key();
2091
2092 let tok = self.scan_tag()?;
2093 self.tokens.push_back(tok.into());
2094 Ok(())
2095 }
2096
2097 fn scan_tag(&mut self) -> Result<Token<'input>, ScanError> {
2098 let start_mark = self.mark;
2099
2100 self.input.lookahead(2);
2102
2103 if self.input.byte_offset().is_none() {
2105 return self.scan_tag_owned(&start_mark);
2106 }
2107
2108 let (handle, suffix): (Cow<'input, str>, Cow<'input, str>) =
2109 if self.input.nth_char_is(1, '<') {
2110 let suffix = self.scan_verbatim_tag(&start_mark)?;
2112 (Cow::Owned(String::new()), Cow::Owned(suffix))
2113 } else {
2114 let handle = self.scan_tag_handle_cow(&start_mark)?;
2116 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2118 let suffix = self.scan_tag_shorthand_suffix_cow(&start_mark, true)?;
2120 (handle, suffix)
2121 } else {
2122 let remaining_suffix =
2127 self.scan_tag_shorthand_suffix_cow(&start_mark, false)?;
2128
2129 let suffix = if handle.len() > 1 {
2131 if remaining_suffix.is_empty() {
2132 match handle {
2134 Cow::Borrowed(s) => Cow::Borrowed(&s[1..]),
2135 Cow::Owned(s) => Cow::Owned(s[1..].to_owned()),
2136 }
2137 } else {
2138 let mut combined = handle[1..].to_owned();
2140 combined.push_str(&remaining_suffix);
2141 Cow::Owned(combined)
2142 }
2143 } else {
2144 remaining_suffix
2146 };
2147
2148 if suffix.is_empty() {
2151 (Cow::Borrowed(""), Cow::Borrowed("!"))
2152 } else {
2153 (Cow::Borrowed("!"), suffix)
2154 }
2155 }
2156 };
2157
2158 if is_blank_or_breakz(self.input.look_ch())
2159 || (self.flow_level > 0 && matches!(self.input.peek(), ',' | ']' | '}'))
2160 {
2161 Ok(Token(
2164 Span::new(start_mark, self.mark),
2165 TokenType::Tag(handle, suffix),
2166 ))
2167 } else {
2168 Err(ScanError::new_str(
2169 start_mark,
2170 "while scanning a tag, did not find expected whitespace or line break",
2171 ))
2172 }
2173 }
2174
2175 fn scan_tag_owned(&mut self, start_mark: &Marker) -> Result<Token<'input>, ScanError> {
2177 let mut handle = String::new();
2178 let mut suffix;
2179
2180 if self.input.nth_char_is(1, '<') {
2181 suffix = self.scan_verbatim_tag(start_mark)?;
2182 } else {
2183 handle = self.scan_tag_handle(false, start_mark)?;
2185 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2187 let is_secondary_handle = handle == "!!";
2189 suffix =
2190 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", start_mark)?;
2191 } else {
2192 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, start_mark)?;
2193 "!".clone_into(&mut handle);
2194 if suffix.is_empty() {
2197 handle.clear();
2198 "!".clone_into(&mut suffix);
2199 }
2200 }
2201 }
2202
2203 if is_blank_or_breakz(self.input.look_ch())
2204 || (self.flow_level > 0 && matches!(self.input.peek(), ',' | ']' | '}'))
2205 {
2206 Ok(Token(
2209 Span::new(*start_mark, self.mark),
2210 TokenType::Tag(handle.into(), suffix.into()),
2211 ))
2212 } else {
2213 Err(ScanError::new_str(
2214 *start_mark,
2215 "while scanning a tag, did not find expected whitespace or line break",
2216 ))
2217 }
2218 }
2219
2220 fn scan_tag_handle_cow(&mut self, mark: &Marker) -> Result<Cow<'input, str>, ScanError> {
2225 let Some(start) = self.input.byte_offset() else {
2226 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
2227 };
2228
2229 if self.input.look_ch() != '!' {
2230 return Err(ScanError::new_str(
2231 *mark,
2232 "while scanning a tag, did not find expected '!'",
2233 ));
2234 }
2235
2236 self.skip_non_blank();
2238
2239 self.input.lookahead(1);
2241 while self.input.next_is_alpha() {
2242 self.skip_non_blank();
2243 self.input.lookahead(1);
2244 }
2245
2246 if self.input.peek() == '!' {
2248 self.skip_non_blank();
2249 }
2250
2251 let Some(end) = self.input.byte_offset() else {
2252 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
2253 };
2254
2255 if let Some(slice) = self.try_borrow_slice(start, end) {
2256 Ok(Cow::Borrowed(slice))
2257 } else {
2258 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2259 ScanError::new_str(
2260 *mark,
2261 "internal error: input advertised slicing but did not provide a slice",
2262 )
2263 })?;
2264 Ok(Cow::Owned(slice.to_owned()))
2265 }
2266 }
2267
2268 fn scan_tag_shorthand_suffix_cow(
2272 &mut self,
2273 mark: &Marker,
2274 require_non_empty: bool,
2275 ) -> Result<Cow<'input, str>, ScanError> {
2276 let Some(start) = self.input.byte_offset() else {
2277 return Ok(Cow::Owned(
2278 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
2279 ));
2280 };
2281
2282 while is_tag_char(self.input.look_ch()) {
2284 if self.input.peek() == '%' {
2285 let current = self
2287 .input
2288 .byte_offset()
2289 .expect("byte_offset() must remain available once enabled");
2290 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
2291 slice.to_owned()
2292 } else {
2293 String::new()
2294 };
2295
2296 while is_tag_char(self.input.look_ch()) {
2298 if self.input.peek() == '%' {
2299 out.push(self.scan_uri_escapes(mark)?);
2300 } else {
2301 out.push(self.input.peek());
2302 self.skip_non_blank();
2303 }
2304 }
2305 return Ok(Cow::Owned(out));
2306 }
2307 self.skip_non_blank();
2308 }
2309
2310 let Some(end) = self.input.byte_offset() else {
2311 return Ok(Cow::Owned(
2312 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
2313 ));
2314 };
2315
2316 if require_non_empty && start == end {
2317 return Err(ScanError::new_str(
2318 *mark,
2319 "while parsing a tag, did not find expected tag URI",
2320 ));
2321 }
2322
2323 if let Some(slice) = self.try_borrow_slice(start, end) {
2324 Ok(Cow::Borrowed(slice))
2325 } else {
2326 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2327 ScanError::new_str(
2328 *mark,
2329 "internal error: input advertised slicing but did not provide a slice",
2330 )
2331 })?;
2332 Ok(Cow::Owned(slice.to_owned()))
2333 }
2334 }
2335
2336 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
2337 let mut string = String::new();
2338 if self.input.look_ch() != '!' {
2339 return Err(ScanError::new_str(
2340 *mark,
2341 "while scanning a tag, did not find expected '!'",
2342 ));
2343 }
2344
2345 string.push(self.input.peek());
2346 self.skip_non_blank();
2347
2348 let n_chars = self.input.fetch_while_is_alpha(&mut string);
2349 self.mark.offsets.chars += n_chars;
2350 self.mark.col += n_chars;
2351 self.mark.offsets.bytes = self.input.byte_offset();
2352
2353 if self.input.peek() == '!' {
2355 string.push(self.input.peek());
2356 self.skip_non_blank();
2357 } else if directive && string != "!" {
2358 return Err(ScanError::new_str(
2362 *mark,
2363 "while parsing a tag directive, did not find expected '!'",
2364 ));
2365 }
2366 Ok(string)
2367 }
2368
2369 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
2375 let mut string = String::new();
2376
2377 if self.input.look_ch() == '!' {
2378 string.push(self.input.peek());
2380 self.skip_non_blank();
2381 } else if !is_tag_char(self.input.peek()) {
2382 return Err(ScanError::new_str(
2384 *start_mark,
2385 "invalid global tag character",
2386 ));
2387 } else if self.input.peek() == '%' {
2388 string.push(self.scan_uri_escapes(start_mark)?);
2390 } else {
2391 string.push(self.input.peek());
2393 self.skip_non_blank();
2394 }
2395
2396 while is_uri_char(self.input.look_ch()) {
2397 if self.input.peek() == '%' {
2398 string.push(self.scan_uri_escapes(start_mark)?);
2399 } else {
2400 string.push(self.input.peek());
2401 self.skip_non_blank();
2402 }
2403 }
2404
2405 Ok(string)
2406 }
2407
2408 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
2412 self.skip_non_blank();
2414 self.skip_non_blank();
2415
2416 let mut string = String::new();
2417 while is_uri_char(self.input.look_ch()) {
2418 if self.input.peek() == '%' {
2419 string.push(self.scan_uri_escapes(start_mark)?);
2420 } else {
2421 string.push(self.input.peek());
2422 self.skip_non_blank();
2423 }
2424 }
2425
2426 if string.is_empty() {
2427 return Err(ScanError::new_str(
2428 *start_mark,
2429 "while parsing a tag, did not find expected tag URI",
2430 ));
2431 }
2432
2433 if self.input.peek() != '>' {
2434 return Err(ScanError::new_str(
2435 *start_mark,
2436 "while scanning a verbatim tag, did not find the expected '>'",
2437 ));
2438 }
2439 self.skip_non_blank();
2440
2441 Ok(string)
2442 }
2443
2444 fn scan_tag_shorthand_suffix(
2445 &mut self,
2446 _directive: bool,
2447 _is_secondary: bool,
2448 head: &str,
2449 mark: &Marker,
2450 ) -> Result<String, ScanError> {
2451 let mut length = head.len();
2452 let mut string = String::new();
2453
2454 if length > 1 {
2457 string.extend(head.chars().skip(1));
2458 }
2459
2460 while is_tag_char(self.input.look_ch()) {
2461 if self.input.peek() == '%' {
2463 string.push(self.scan_uri_escapes(mark)?);
2464 } else {
2465 string.push(self.input.peek());
2466 self.skip_non_blank();
2467 }
2468
2469 length += 1;
2470 }
2471
2472 if length == 0 {
2473 return Err(ScanError::new_str(
2474 *mark,
2475 "while parsing a tag, did not find expected tag URI",
2476 ));
2477 }
2478
2479 Ok(string)
2480 }
2481
2482 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
2483 let mut width = 0usize;
2484 let mut bytes = [0u8; 4];
2485 let mut bytes_len = 0usize;
2486 loop {
2487 self.input.lookahead(3);
2488
2489 let c = self.input.peek_nth(1);
2490 let nc = self.input.peek_nth(2);
2491
2492 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
2493 return Err(ScanError::new_str(
2494 *mark,
2495 "while parsing a tag, found an invalid escape sequence",
2496 ));
2497 }
2498
2499 let byte = u8::try_from((as_hex(c) << 4) + as_hex(nc))
2500 .expect("two hex nibbles always fit in a byte");
2501 if width == 0 {
2502 width = match byte {
2503 _ if byte & 0x80 == 0x00 => 1,
2504 _ if byte & 0xE0 == 0xC0 => 2,
2505 _ if byte & 0xF0 == 0xE0 => 3,
2506 _ if byte & 0xF8 == 0xF0 => 4,
2507 _ => {
2508 return Err(ScanError::new_str(
2509 *mark,
2510 "while parsing a tag, found an incorrect leading UTF-8 byte",
2511 ));
2512 }
2513 };
2514 } else if byte & 0xc0 != 0x80 {
2515 return Err(ScanError::new_str(
2516 *mark,
2517 "while parsing a tag, found an incorrect trailing UTF-8 byte",
2518 ));
2519 }
2520
2521 bytes[bytes_len] = byte;
2522 bytes_len += 1;
2523
2524 self.skip_n_non_blank(3);
2525
2526 width -= 1;
2527 if width == 0 {
2528 break;
2529 }
2530 }
2531
2532 let s = core::str::from_utf8(&bytes[..bytes_len]).map_err(|_| {
2533 ScanError::new_str(
2534 *mark,
2535 "while parsing a tag, found an invalid UTF-8 codepoint",
2536 )
2537 })?;
2538
2539 let mut chars = s.chars();
2540 match (chars.next(), chars.next()) {
2541 (Some(ch), None) => Ok(ch),
2542 _ => Err(ScanError::new_str(
2543 *mark,
2544 "while parsing a tag, found an invalid UTF-8 codepoint",
2545 )),
2546 }
2547 }
2548
2549 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
2550 self.save_simple_key();
2551 self.disallow_simple_key();
2552
2553 let tok = self.scan_anchor(alias)?;
2554
2555 self.tokens.push_back(tok.into());
2556
2557 Ok(())
2558 }
2559
2560 fn scan_anchor(&mut self, alias: bool) -> Result<Token<'input>, ScanError> {
2561 let start_mark = self.mark;
2562
2563 self.skip_non_blank();
2565
2566 if let Some(start) = self.input.byte_offset() {
2568 while is_anchor_char(self.input.look_ch()) {
2569 self.skip_non_blank();
2570 }
2571
2572 let end = self
2573 .input
2574 .byte_offset()
2575 .expect("byte_offset() must remain available once enabled");
2576
2577 if start == end {
2578 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2579 }
2580
2581 let cow = if let Some(slice) = self.try_borrow_slice(start, end) {
2582 Cow::Borrowed(slice)
2583 } else if let Some(slice) = self.input.slice_bytes(start, end) {
2584 Cow::Owned(slice.to_owned())
2585 } else {
2586 return Err(ScanError::new_str(
2587 start_mark,
2588 "internal error: input advertised slicing but did not provide a slice",
2589 ));
2590 };
2591
2592 let tok = if alias {
2593 TokenType::Alias(cow)
2594 } else {
2595 TokenType::Anchor(cow)
2596 };
2597 return Ok(Token(Span::new(start_mark, self.mark), tok));
2598 }
2599
2600 let mut string = String::new();
2601 while is_anchor_char(self.input.look_ch()) {
2602 string.push(self.input.peek());
2603 self.skip_non_blank();
2604 }
2605
2606 if string.is_empty() {
2607 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2608 }
2609
2610 let tok = if alias {
2611 TokenType::Alias(string.into())
2612 } else {
2613 TokenType::Anchor(string.into())
2614 };
2615 Ok(Token(Span::new(start_mark, self.mark), tok))
2616 }
2617
2618 fn fetch_flow_collection_start(&mut self, tok: TokenType<'input>) -> ScanResult {
2619 self.save_simple_key();
2621
2622 let start_mark = self.mark;
2623 let indicator = self.input.peek();
2624 self.flow_markers.push((start_mark, indicator));
2625
2626 self.roll_one_col_indent();
2627 self.increase_flow_level()?;
2628
2629 self.allow_simple_key();
2630
2631 self.skip_non_blank();
2632
2633 if tok == TokenType::FlowMappingStart {
2634 self.flow_mapping_started.push(true);
2635 } else {
2636 self.flow_mapping_started.push(false);
2637 self.implicit_flow_mapping_states
2638 .push(ImplicitMappingState::Possible);
2639 }
2640
2641 let token_index = self.tokens.len();
2642 self.skip_ws_to_eol(SkipTabs::Yes)?;
2643
2644 self.insert_token(token_index, Token(Span::new(start_mark, self.mark), tok));
2645 Ok(())
2646 }
2647
2648 fn fetch_flow_collection_end(&mut self, tok: TokenType<'input>) -> ScanResult {
2649 if self.flow_level == 0 {
2651 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2652 }
2653
2654 let Some((open_mark, open_ch)) = self.flow_markers.pop() else {
2655 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2656 };
2657
2658 let (expected_open, actual_close) = match tok {
2659 TokenType::FlowSequenceEnd => ('[', ']'),
2660 TokenType::FlowMappingEnd => ('{', '}'),
2661 _ => unreachable!("flow collection end called with non-closing token"),
2662 };
2663
2664 if open_ch != expected_open {
2665 return Err(ScanError::new(
2666 open_mark,
2667 format!("mismatched bracket '{open_ch}' closed by '{actual_close}'"),
2668 ));
2669 }
2670
2671 let flow_level = self.flow_level;
2672
2673 self.remove_simple_key()?;
2674
2675 if matches!(tok, TokenType::FlowSequenceEnd) {
2676 self.end_implicit_mapping(self.mark, flow_level);
2677 self.implicit_flow_mapping_states.pop();
2679 }
2680 self.flow_mapping_started.pop();
2681
2682 self.decrease_flow_level();
2683
2684 self.disallow_simple_key();
2685
2686 let start_mark = self.mark;
2687 self.skip_non_blank();
2688 let token_index = self.tokens.len();
2689 self.skip_ws_to_eol(SkipTabs::Yes)?;
2690
2691 if self.flow_level > 0 {
2697 self.adjacent_value_allowed_at = self.mark.index();
2698 }
2699
2700 self.insert_token(token_index, Token(Span::new(start_mark, self.mark), tok));
2701 Ok(())
2702 }
2703
2704 fn fetch_flow_entry(&mut self) -> ScanResult {
2706 self.remove_simple_key()?;
2707 self.allow_simple_key();
2708
2709 self.end_implicit_mapping(self.mark, self.flow_level);
2710 if self.current_flow_collection_is_sequence() {
2711 self.set_current_flow_mapping_started(false);
2712 }
2713
2714 let start_mark = self.mark;
2715 self.skip_non_blank();
2716 let token_index = self.tokens.len();
2717 self.skip_ws_to_eol(SkipTabs::Yes)?;
2718
2719 self.insert_token(
2720 token_index,
2721 Token(Span::new(start_mark, self.mark), TokenType::FlowEntry),
2722 );
2723 Ok(())
2724 }
2725
2726 fn increase_flow_level(&mut self) -> ScanResult {
2727 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
2728 self.flow_level = self
2729 .flow_level
2730 .checked_add(1)
2731 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
2732 Ok(())
2733 }
2734
2735 fn decrease_flow_level(&mut self) {
2736 if self.flow_level > 0 {
2737 self.flow_level -= 1;
2738 self.simple_keys.pop().unwrap();
2739 }
2740 }
2741
2742 fn fetch_block_entry(&mut self) -> ScanResult {
2748 if self.flow_level > 0 {
2749 return Err(ScanError::new_str(
2751 self.mark,
2752 r#""-" is only valid inside a block"#,
2753 ));
2754 }
2755 if !self.simple_key_allowed {
2757 return Err(ScanError::new_str(
2758 self.mark,
2759 "block sequence entries are not allowed in this context",
2760 ));
2761 }
2762
2763 if let Some(QueuedToken(span, QueuedTokenType::Anchor(..) | QueuedTokenType::Tag(..))) =
2765 self.tokens.back()
2766 {
2767 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
2768 return Err(ScanError::new_str(
2769 span.start,
2770 "invalid indentation for anchor",
2771 ));
2772 }
2773 }
2774
2775 let mark = self.mark;
2777 self.skip_non_blank();
2778
2779 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
2781 let token_index = self.tokens.len();
2782 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
2783 self.input.lookahead(2);
2784 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
2785 {
2786 return Err(ScanError::new_str(
2787 self.mark,
2788 "'-' must be followed by a valid YAML whitespace",
2789 ));
2790 }
2791
2792 self.skip_ws_to_eol(SkipTabs::No)?;
2793 self.input.lookahead(1);
2794 if self.input.next_is_break() || self.input.next_is_flow() {
2795 self.roll_one_col_indent();
2796 }
2797
2798 self.remove_simple_key()?;
2799 self.allow_simple_key();
2800
2801 self.insert_token(
2802 token_index,
2803 Token(Span::empty(self.mark), TokenType::BlockEntry),
2804 );
2805
2806 Ok(())
2807 }
2808
2809 fn fetch_document_indicator(&mut self, t: TokenType<'input>) -> ScanResult {
2810 if let Some((mark, bracket)) = self.flow_markers.pop() {
2811 return Err(ScanError::new(
2812 mark,
2813 format!("unclosed bracket '{bracket}'"),
2814 ));
2815 }
2816
2817 self.unroll_indent(-1);
2818 self.remove_simple_key()?;
2819 self.disallow_simple_key();
2820
2821 let mark = self.mark;
2822
2823 self.skip_n_non_blank(3);
2824
2825 self.document_prefix_allowed = matches!(t, TokenType::DocumentEnd);
2826 self.tokens
2827 .push_back(Token(Span::new(mark, self.mark), t).into());
2828 Ok(())
2829 }
2830
2831 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
2832 self.save_simple_key();
2833 self.allow_simple_key();
2834 let tok = self.scan_block_scalar(literal)?;
2835
2836 self.tokens.push_back(tok.into());
2837 Ok(())
2838 }
2839
2840 #[allow(clippy::too_many_lines)]
2841 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token<'input>, ScanError> {
2842 let start_mark = self.mark;
2843 let mut chomping = Chomping::Clip;
2844 let mut increment: usize = 0;
2845 let mut indent: usize = 0;
2846 let mut trailing_blank: bool;
2847 let mut leading_blank: bool = false;
2848 let style = if literal {
2849 ScalarStyle::Literal
2850 } else {
2851 ScalarStyle::Folded
2852 };
2853
2854 let mut string = String::new();
2855 let mut leading_break = String::new();
2856 let mut trailing_breaks = String::new();
2857 let mut chomping_break = String::new();
2858
2859 self.skip_non_blank();
2861 self.unroll_non_block_indents();
2862
2863 if self.input.look_ch() == '+' || self.input.peek() == '-' {
2864 if self.input.peek() == '+' {
2865 chomping = Chomping::Keep;
2866 } else {
2867 chomping = Chomping::Strip;
2868 }
2869 self.skip_non_blank();
2870 self.input.lookahead(1);
2871 if self.input.next_is_digit() {
2872 if self.input.peek() == '0' {
2873 return Err(ScanError::new_str(
2874 start_mark,
2875 "while scanning a block scalar, found an indentation indicator equal to 0",
2876 ));
2877 }
2878 increment = (self.input.peek() as usize) - ('0' as usize);
2879 self.skip_non_blank();
2880 }
2881 } else if self.input.next_is_digit() {
2882 if self.input.peek() == '0' {
2883 return Err(ScanError::new_str(
2884 start_mark,
2885 "while scanning a block scalar, found an indentation indicator equal to 0",
2886 ));
2887 }
2888
2889 increment = (self.input.peek() as usize) - ('0' as usize);
2890 self.skip_non_blank();
2891 self.input.lookahead(1);
2892 if self.input.peek() == '+' || self.input.peek() == '-' {
2893 if self.input.peek() == '+' {
2894 chomping = Chomping::Keep;
2895 } else {
2896 chomping = Chomping::Strip;
2897 }
2898 self.skip_non_blank();
2899 }
2900 }
2901
2902 self.skip_ws_to_eol(SkipTabs::Yes)?;
2903
2904 self.input.lookahead(1);
2906 if !self.input.next_is_breakz() {
2907 return Err(ScanError::new_str(
2908 start_mark,
2909 "while scanning a block scalar, did not find expected comment or line break",
2910 ));
2911 }
2912
2913 if self.input.next_is_break() {
2914 self.input.lookahead(2);
2915 self.read_break(&mut chomping_break);
2916 }
2917
2918 if self.input.look_ch() == '\t' {
2919 return Err(ScanError::new_str(
2920 start_mark,
2921 "a block scalar content cannot start with a tab",
2922 ));
2923 }
2924
2925 if increment > 0 {
2926 indent = if self.indent >= 0 {
2927 (self.indent + increment as isize) as usize
2928 } else {
2929 increment
2930 }
2931 }
2932
2933 if indent == 0 {
2935 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
2936 } else {
2937 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2938 }
2939
2940 if self.input.next_is_z() {
2945 let contents = match chomping {
2946 Chomping::Strip => String::new(),
2948 _ if self.mark.line == start_mark.line() => String::new(),
2950 Chomping::Clip => chomping_break,
2953 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
2956 Chomping::Keep => trailing_breaks,
2958 };
2959
2960 let span = if contents.trim().is_empty() {
2961 Span::new(start_mark, self.mark)
2962 } else {
2963 Span::new(start_mark, self.mark).with_indent(Some(indent))
2964 };
2965
2966 return Ok(Token(span, TokenType::Scalar(style, contents.into())));
2967 }
2968
2969 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
2970 if self.indent < 0 && self.mark.col == 0 {
2971 self.input.lookahead(4);
2972 if self.input.next_is_document_start()
2973 || self.input.next_is_document_end()
2974 || self.input.peek() == '#'
2975 {
2976 } else {
2980 return Err(ScanError::new_str(
2981 self.mark,
2982 "wrongly indented line in block scalar",
2983 ));
2984 }
2985 } else {
2986 return Err(ScanError::new_str(
2987 self.mark,
2988 "wrongly indented line in block scalar",
2989 ));
2990 }
2991 }
2992
2993 let mut line_buffer = String::with_capacity(100);
2994 let start_mark = self.mark;
2995 while self.mark.col == indent && !self.input.next_is_z() {
2996 if indent == 0 {
2997 self.input.lookahead(4);
2998 if self.input.next_is_document_end() {
2999 break;
3000 }
3001 }
3002
3003 trailing_blank = self.input.next_is_blank();
3005 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
3006 string.push_str(&trailing_breaks);
3007 if trailing_breaks.is_empty() {
3008 string.push(' ');
3009 }
3010 } else {
3011 string.push_str(&leading_break);
3012 string.push_str(&trailing_breaks);
3013 }
3014
3015 leading_break.clear();
3016 trailing_breaks.clear();
3017
3018 leading_blank = self.input.next_is_blank();
3019
3020 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
3021
3022 self.input.lookahead(2);
3024 if self.input.next_is_z() {
3025 break;
3026 }
3027
3028 self.read_break(&mut leading_break);
3029
3030 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
3032 }
3033
3034 if chomping != Chomping::Strip {
3036 string.push_str(&leading_break);
3037 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
3041 string.push('\n');
3042 }
3043 }
3044
3045 if chomping == Chomping::Keep {
3046 string.push_str(&trailing_breaks);
3047 }
3048
3049 let span = if string.trim().is_empty() {
3050 Span::new(start_mark, self.mark)
3051 } else {
3052 Span::new(start_mark, self.mark).with_indent(Some(indent))
3053 };
3054
3055 Ok(Token(span, TokenType::Scalar(style, string.into())))
3056 }
3057
3058 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
3068 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
3070 string.push(self.input.peek());
3071 self.skip_blank();
3077 }
3078
3079 if self.input.buf_is_empty() {
3082 let mut n_chars = 0;
3090 debug_assert!(line_buffer.is_empty());
3091 while let Some(c) = self.input.raw_read_non_breakz_ch() {
3092 line_buffer.push(c);
3093 n_chars += 1;
3094 }
3095
3096 self.mark.col += n_chars;
3098 self.mark.offsets.chars += n_chars;
3099 self.mark.offsets.bytes = self.input.byte_offset();
3100
3101 string.reserve(line_buffer.len());
3103 string.push_str(line_buffer);
3104 line_buffer.clear();
3106 }
3107 }
3108
3109 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
3111 loop {
3112 if indent < self.input.bufmaxlen() - 2 {
3114 self.input.lookahead(self.input.bufmaxlen());
3115 while self.mark.col < indent && self.input.peek() == ' ' {
3116 self.skip_blank();
3117 }
3118 } else {
3119 loop {
3120 self.input.lookahead(self.input.bufmaxlen());
3121 while !self.input.buf_is_empty()
3122 && self.mark.col < indent
3123 && self.input.peek() == ' '
3124 {
3125 self.skip_blank();
3126 }
3127 if self.mark.col == indent
3131 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
3132 {
3133 break;
3134 }
3135 }
3136 self.input.lookahead(2);
3137 }
3138
3139 if self.input.next_is_break() {
3141 self.read_break(breaks);
3142 } else {
3143 break;
3145 }
3146 }
3147 }
3148
3149 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
3154 let mut max_indent = 0;
3155 loop {
3156 while self.input.look_ch() == ' ' {
3158 self.skip_blank();
3159 }
3160
3161 if self.mark.col > max_indent {
3162 max_indent = self.mark.col;
3163 }
3164
3165 if self.input.next_is_break() {
3166 self.input.lookahead(2);
3168 self.read_break(breaks);
3169 } else {
3170 break;
3172 }
3173 }
3174
3175 *indent = max_indent.max((self.indent + 1) as usize);
3184 if self.indent > 0 {
3185 *indent = (*indent).max(1);
3186 }
3187 }
3188
3189 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
3190 self.save_simple_key();
3191 self.disallow_simple_key();
3192
3193 let token_index = self.tokens.len();
3194 let tok = self.scan_flow_scalar(single)?;
3195
3196 if self.skip_to_next_token(true)? {
3199 self.adjacent_value_allowed_at = usize::MAX;
3200 } else {
3201 self.adjacent_value_allowed_at = self.mark.index();
3202 }
3203
3204 self.insert_token(token_index, tok);
3205 Ok(())
3206 }
3207
3208 #[allow(clippy::too_many_lines)]
3209 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token<'input>, ScanError> {
3210 let start_mark = self.mark;
3211
3212 let mut buf = match self.input.byte_offset() {
3214 Some(off) => FlowScalarBuf::new_borrowed(off + self.input.peek().len_utf8()),
3215 None => FlowScalarBuf::new_owned(),
3216 };
3217
3218 let mut break_scratch = String::new();
3221
3222 self.skip_non_blank();
3224
3225 loop {
3226 self.input.lookahead(4);
3228
3229 if self.mark.col == 0 && self.input.next_is_document_indicator() {
3230 return Err(ScanError::new_str(
3231 start_mark,
3232 "while scanning a quoted scalar, found unexpected document indicator",
3233 ));
3234 }
3235
3236 if self.input.next_is_z() {
3237 return Err(ScanError::new_str(start_mark, "unclosed quote"));
3238 }
3239
3240 let mut leading_blanks = false;
3243 self.consume_flow_scalar_non_whitespace_chars(
3244 single,
3245 &mut buf,
3246 &mut leading_blanks,
3247 &start_mark,
3248 )?;
3249
3250 match self.input.look_ch() {
3251 '\'' if single => break,
3252 '"' if !single => break,
3253 _ => {}
3254 }
3255
3256 let mut trailing_ws_start: Option<usize> = None;
3272 let mut has_leading_break = false;
3273 let mut has_trailing_breaks = false;
3274
3275 let mut pending_ws_start: Option<usize> = None;
3277
3278 while self.input.next_is_blank() || self.input.next_is_break() {
3280 if self.input.next_is_blank() {
3281 if leading_blanks {
3283 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
3284 return Err(ScanError::new_str(
3285 self.mark,
3286 "tab cannot be used as indentation",
3287 ));
3288 }
3289 self.skip_blank();
3290 } else {
3291 match buf {
3293 FlowScalarBuf::Owned(ref mut string) => {
3294 if trailing_ws_start.is_none() {
3295 trailing_ws_start = Some(string.len());
3296 }
3297 string.push(self.input.peek());
3298 }
3299 FlowScalarBuf::Borrowed { .. } => {
3300 if pending_ws_start.is_none() {
3301 pending_ws_start = self.input.byte_offset();
3302 }
3303 }
3304 }
3305 self.skip_blank();
3306
3307 if let (FlowScalarBuf::Borrowed { .. }, Some(ws_start), Some(ws_end)) =
3308 (&mut buf, pending_ws_start, self.input.byte_offset())
3309 {
3310 buf.note_pending_ws(ws_start, ws_end);
3311 }
3312 }
3313 } else {
3314 self.input.lookahead(2);
3315
3316 if leading_blanks {
3318 match buf {
3320 FlowScalarBuf::Owned(ref mut string) => self.read_break(string),
3321 FlowScalarBuf::Borrowed { .. } => {
3322 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3323 let Some(string) = buf.as_owned_mut() else {
3324 unreachable!()
3325 };
3326 self.read_break(string);
3327 }
3328 }
3329 has_trailing_breaks = true;
3330 } else {
3331 if let Some(pos) = trailing_ws_start.take() {
3333 if let FlowScalarBuf::Owned(ref mut string) = buf {
3334 string.truncate(pos);
3335 }
3336 }
3337
3338 if pending_ws_start.take().is_some() {
3339 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3341 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3342 }
3343 buf.discard_pending_ws();
3344 } else {
3345 buf.commit_pending_ws();
3346 }
3347
3348 break_scratch.clear();
3349 self.read_break(&mut break_scratch);
3350 has_leading_break = true;
3353 leading_blanks = true;
3354 }
3355 }
3356
3357 self.input.lookahead(1);
3358 }
3359
3360 if leading_blanks && has_leading_break && self.flow_level == 0 {
3363 let next_ch = self.input.peek();
3364 let is_closing_quote = (single && next_ch == '\'') || (!single && next_ch == '"');
3365 if !is_closing_quote && (self.mark.col as isize) <= self.indent {
3366 return Err(ScanError::new_str(
3367 self.mark,
3368 "invalid indentation in multiline quoted scalar",
3369 ));
3370 }
3371 }
3372
3373 if leading_blanks {
3375 if has_leading_break && !has_trailing_breaks {
3380 match buf {
3381 FlowScalarBuf::Owned(ref mut string) => string.push(' '),
3382 FlowScalarBuf::Borrowed { .. } => {
3383 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3384 let Some(string) = buf.as_owned_mut() else {
3385 unreachable!()
3386 };
3387 string.push(' ');
3388 }
3389 }
3390 }
3391 }
3392 } self.skip_non_blank();
3397 let end_mark = self.mark;
3398
3399 self.skip_ws_to_eol(SkipTabs::Yes)?;
3401 match self.input.peek() {
3402 ',' | '}' | ']' if self.flow_level > 0 => {}
3404 c if is_breakz(c) => {}
3406 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
3409 ':' if self.flow_level > 0 => {}
3411 _ => {
3412 return Err(ScanError::new_str(
3413 self.mark,
3414 "invalid trailing content after double-quoted scalar",
3415 ));
3416 }
3417 }
3418
3419 let style = if single {
3420 ScalarStyle::SingleQuoted
3421 } else {
3422 ScalarStyle::DoubleQuoted
3423 };
3424
3425 let contents = match buf {
3426 FlowScalarBuf::Owned(string) => Cow::Owned(string),
3427 FlowScalarBuf::Borrowed {
3428 start,
3429 mut end,
3430 pending_ws_start,
3431 pending_ws_end,
3432 } => {
3433 if pending_ws_start.is_some() {
3435 end = pending_ws_end;
3436 }
3437 if let Some(slice) = self.try_borrow_slice(start, end) {
3438 Cow::Borrowed(slice)
3439 } else {
3440 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
3441 ScanError::new_str(
3442 start_mark,
3443 "internal error: input advertised offsets but did not provide a slice",
3444 )
3445 })?;
3446 Cow::Owned(slice.to_owned())
3447 }
3448 }
3449 };
3450
3451 Ok(Token(
3452 Span::new(start_mark, end_mark),
3453 TokenType::Scalar(style, contents),
3454 ))
3455 }
3456
3457 fn consume_flow_scalar_non_whitespace_chars(
3466 &mut self,
3467 single: bool,
3468 buf: &mut FlowScalarBuf,
3469 leading_blanks: &mut bool,
3470 start_mark: &Marker,
3471 ) -> Result<(), ScanError> {
3472 self.input.lookahead(2);
3473 while !is_blank_or_breakz(self.input.peek()) {
3474 match self.input.peek() {
3475 '\'' if self.input.peek_nth(1) == '\'' && single => {
3477 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3478 buf.commit_pending_ws();
3479 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3480 }
3481 let Some(string) = buf.as_owned_mut() else {
3482 unreachable!()
3483 };
3484 string.push('\'');
3485 self.skip_n_non_blank(2);
3486 }
3487 '\'' if single => break,
3489 '"' if !single => break,
3490 '\\' if !single && is_break(self.input.peek_nth(1)) => {
3492 self.input.lookahead(3);
3493 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3494 buf.commit_pending_ws();
3495 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3496 }
3497 self.skip_non_blank();
3498 self.skip_linebreak();
3499 *leading_blanks = true;
3500 break;
3501 }
3502 '\\' if !single => {
3504 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3505 buf.commit_pending_ws();
3506 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3507 }
3508 let Some(string) = buf.as_owned_mut() else {
3509 unreachable!()
3510 };
3511 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
3512 }
3513 c => {
3514 match buf {
3515 FlowScalarBuf::Owned(ref mut string) => {
3516 string.push(c);
3517 }
3518 FlowScalarBuf::Borrowed { .. } => {
3519 buf.commit_pending_ws();
3520 }
3521 }
3522 self.skip_non_blank();
3523
3524 if let Some(new_end) = self.input.byte_offset() {
3525 if let FlowScalarBuf::Borrowed { end, .. } = buf {
3526 *end = new_end;
3527 }
3528 }
3529 }
3530 }
3531 self.input.lookahead(2);
3532 }
3533 Ok(())
3534 }
3535
3536 fn resolve_flow_scalar_escape_sequence(
3543 &mut self,
3544 start_mark: &Marker,
3545 ) -> Result<char, ScanError> {
3546 let mut code_length = 0usize;
3547 let mut ret = '\0';
3548
3549 match self.input.peek_nth(1) {
3550 '0' => ret = '\0',
3551 'a' => ret = '\x07',
3552 'b' => ret = '\x08',
3553 't' | '\t' => ret = '\t',
3554 'n' => ret = '\n',
3555 'v' => ret = '\x0b',
3556 'f' => ret = '\x0c',
3557 'r' => ret = '\x0d',
3558 'e' => ret = '\x1b',
3559 ' ' => ret = '\x20',
3560 '"' => ret = '"',
3561 '/' => ret = '/',
3562 '\\' => ret = '\\',
3563 'N' => ret = char::from_u32(0x85).unwrap(),
3565 '_' => ret = char::from_u32(0xA0).unwrap(),
3567 'L' => ret = char::from_u32(0x2028).unwrap(),
3569 'P' => ret = char::from_u32(0x2029).unwrap(),
3571 'x' => code_length = 2,
3572 'u' => code_length = 4,
3573 'U' => code_length = 8,
3574 _ => {
3575 return Err(ScanError::new_str(
3576 *start_mark,
3577 "while parsing a quoted scalar, found unknown escape character",
3578 ))
3579 }
3580 }
3581 self.skip_n_non_blank(2);
3582
3583 if code_length > 0 {
3585 self.input.lookahead(code_length);
3586 let mut value = 0u32;
3587 for i in 0..code_length {
3588 let c = self.input.peek_nth(i);
3589 if !is_hex(c) {
3590 return Err(ScanError::new_str(
3591 *start_mark,
3592 "while parsing a quoted scalar, did not find expected hexadecimal number",
3593 ));
3594 }
3595 value = (value << 4) + as_hex(c);
3596 }
3597
3598 self.skip_n_non_blank(code_length);
3599
3600 if code_length == 4 && (0xD800..=0xDBFF).contains(&value) {
3602 self.input.lookahead(2);
3603 if self.input.peek() == '\\' && self.input.peek_nth(1) == 'u' {
3604 self.skip_n_non_blank(2);
3605 self.input.lookahead(4);
3606 let mut low_value = 0u32;
3607 for i in 0..4 {
3608 let c = self.input.peek_nth(i);
3609 if !is_hex(c) {
3610 return Err(ScanError::new_str(
3611 *start_mark,
3612 "while parsing a quoted scalar, did not find expected hexadecimal number for low surrogate",
3613 ));
3614 }
3615 low_value = (low_value << 4) + as_hex(c);
3616 }
3617 if (0xDC00..=0xDFFF).contains(&low_value) {
3618 value = 0x10000 + (((value - 0xD800) << 10) | (low_value - 0xDC00));
3619 self.skip_n_non_blank(4);
3620 } else {
3621 return Err(ScanError::new_str(
3622 *start_mark,
3623 "while parsing a quoted scalar, found invalid low surrogate",
3624 ));
3625 }
3626 } else {
3627 return Err(ScanError::new_str(
3628 *start_mark,
3629 "while parsing a quoted scalar, found high surrogate without following low surrogate",
3630 ));
3631 }
3632 } else if code_length == 4 && (0xDC00..=0xDFFF).contains(&value) {
3633 return Err(ScanError::new_str(
3634 *start_mark,
3635 "while parsing a quoted scalar, found unpaired low surrogate",
3636 ));
3637 }
3638
3639 let Some(ch) = char::from_u32(value) else {
3640 return Err(ScanError::new_str(
3641 *start_mark,
3642 "while parsing a quoted scalar, found invalid Unicode character escape code",
3643 ));
3644 };
3645 ret = ch;
3646 }
3647 Ok(ret)
3648 }
3649
3650 fn fetch_plain_scalar(&mut self) -> ScanResult {
3651 self.save_simple_key();
3652 self.disallow_simple_key();
3653
3654 let token_index = self.tokens.len();
3655 let tok = self.scan_plain_scalar()?;
3656
3657 self.insert_token(token_index, tok);
3658 Ok(())
3659 }
3660
3661 #[allow(clippy::too_many_lines)]
3666 fn scan_plain_scalar(&mut self) -> Result<Token<'input>, ScanError> {
3667 self.unroll_non_block_indents();
3668 let indent = self.indent + 1;
3669 let start_mark = self.mark;
3670
3671 if self.flow_level > 0 && (start_mark.col as isize) < indent {
3672 return Err(ScanError::new_str(
3673 start_mark,
3674 "invalid indentation in flow construct",
3675 ));
3676 }
3677
3678 let mut string = String::with_capacity(32);
3679 self.buf_whitespaces.clear();
3680 self.buf_leading_break.clear();
3681 self.buf_trailing_breaks.clear();
3682 let mut end_mark = self.mark;
3683
3684 loop {
3685 self.input.lookahead(4);
3686 if (self.mark.col == 0 && self.input.next_is_document_indicator())
3687 || self.input.peek() == '#'
3688 {
3689 if self.input.peek() == '#'
3694 && !string.is_empty()
3695 && !self.buf_whitespaces.is_empty()
3696 && self.flow_level == 0
3697 {
3698 self.interrupted_plain_by_comment = Some(self.mark);
3699 }
3700 break;
3701 }
3702
3703 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
3704 return Err(ScanError::new_str(
3705 self.mark,
3706 "plain scalar cannot start with '-' followed by ,[]{}",
3707 ));
3708 }
3709
3710 if !self.input.next_is_blank_or_breakz()
3711 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
3712 {
3713 if self.leading_whitespace {
3714 if self.buf_leading_break.is_empty() {
3715 string.push_str(&self.buf_leading_break);
3716 string.push_str(&self.buf_trailing_breaks);
3717 self.buf_trailing_breaks.clear();
3718 self.buf_leading_break.clear();
3719 } else {
3720 if self.buf_trailing_breaks.is_empty() {
3721 string.push(' ');
3722 } else {
3723 string.push_str(&self.buf_trailing_breaks);
3724 self.buf_trailing_breaks.clear();
3725 }
3726 self.buf_leading_break.clear();
3727 }
3728 self.leading_whitespace = false;
3729 } else if !self.buf_whitespaces.is_empty() {
3730 string.push_str(&self.buf_whitespaces);
3731 self.buf_whitespaces.clear();
3732 }
3733
3734 string.push(self.input.peek());
3736 self.skip_non_blank();
3737 string.reserve(self.input.bufmaxlen());
3738
3739 let mut end = false;
3741 while !end {
3742 self.input.lookahead(self.input.bufmaxlen());
3746 let (stop, chars_consumed) = self.input.fetch_plain_scalar_chunk(
3747 &mut string,
3748 self.input.bufmaxlen() - 1,
3749 self.flow_level > 0,
3750 );
3751 end = stop;
3752 self.mark.offsets.chars += chars_consumed;
3753 self.mark.col += chars_consumed;
3754 self.mark.offsets.bytes = self.input.byte_offset();
3755 }
3756 end_mark = self.mark;
3757 }
3758
3759 if !(self.input.next_is_blank() || self.input.next_is_break()) {
3764 break;
3765 }
3766
3767 self.input.lookahead(2);
3769 while self.input.next_is_blank_or_break() {
3770 if self.input.next_is_blank() {
3771 if !self.leading_whitespace {
3772 self.buf_whitespaces.push(self.input.peek());
3773 self.skip_blank();
3774 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
3775 self.skip_ws_to_eol(SkipTabs::Yes)?;
3778 if !self.input.next_is_breakz() {
3779 return Err(ScanError::new_str(
3780 start_mark,
3781 "while scanning a plain scalar, found a tab",
3782 ));
3783 }
3784 } else {
3785 self.skip_blank();
3786 }
3787 } else {
3788 if self.leading_whitespace {
3790 self.skip_break();
3791 self.buf_trailing_breaks.push('\n');
3792 } else {
3793 self.buf_whitespaces.clear();
3794 self.skip_break();
3795 self.buf_leading_break.push('\n');
3796 self.leading_whitespace = true;
3797 }
3798 }
3799 self.input.lookahead(2);
3800 }
3801
3802 if self.flow_level == 0 && (self.mark.col as isize) < indent {
3804 break;
3805 }
3806 }
3807
3808 if self.leading_whitespace {
3809 self.allow_simple_key();
3810 }
3811
3812 if string.is_empty() {
3813 Err(ScanError::new_str(
3817 start_mark,
3818 "unexpected end of plain scalar",
3819 ))
3820 } else {
3821 let contents = if let (Some(start), Some(end)) =
3822 (start_mark.byte_offset(), end_mark.byte_offset())
3823 {
3824 match self.try_borrow_slice(start, end) {
3825 Some(slice) if slice == string => Cow::Borrowed(slice),
3826 _ => Cow::Owned(string),
3827 }
3828 } else {
3829 Cow::Owned(string)
3830 };
3831
3832 Ok(Token(
3833 Span::new(start_mark, end_mark),
3834 TokenType::Scalar(ScalarStyle::Plain, contents),
3835 ))
3836 }
3837 }
3838
3839 fn fetch_key(&mut self) -> ScanResult {
3840 let start_mark = self.mark;
3841 if self.flow_level == 0 {
3842 if !self.simple_key_allowed {
3844 return Err(ScanError::new_str(
3845 self.mark,
3846 "mapping keys are not allowed in this context",
3847 ));
3848 }
3849 self.roll_indent(
3850 start_mark.col,
3851 None,
3852 TokenType::BlockMappingStart,
3853 start_mark,
3854 );
3855 } else {
3856 self.set_current_flow_mapping_started(true);
3858 }
3859
3860 self.remove_simple_key()?;
3861
3862 if self.flow_level == 0 {
3863 self.allow_simple_key();
3864 } else {
3865 self.disallow_simple_key();
3866 }
3867
3868 self.skip_non_blank();
3869 let token_index = self.tokens.len();
3870 self.explicit_key_tab_check_pending = false;
3871 let stopped_after_comment = self.skip_yaml_whitespace(true)?;
3872 if self.input.peek() == '\t' {
3873 return Err(ScanError::new_str(
3874 self.mark(),
3875 "tabs disallowed in this context",
3876 ));
3877 }
3878 self.explicit_key_tab_check_pending = stopped_after_comment;
3879 self.insert_token(
3880 token_index,
3881 Token(Span::new(start_mark, self.mark), TokenType::Key),
3882 );
3883 Ok(())
3884 }
3885
3886 fn fetch_flow_value(&mut self) -> ScanResult {
3894 let nc = self.input.peek_nth(1);
3895
3896 if self.mark.index() != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
3908 return Err(ScanError::new_str(
3909 self.mark,
3910 "':' may not precede any of `[{` in flow mapping",
3911 ));
3912 }
3913
3914 self.fetch_value()
3915 }
3916
3917 fn fetch_value(&mut self) -> ScanResult {
3919 let sk = self.simple_keys.last().unwrap().clone();
3920 let start_mark = self.mark;
3921 let is_implicit_flow_mapping = self.current_flow_collection_is_sequence()
3922 && !self.current_flow_mapping_started()
3923 && !self.implicit_flow_mapping_states.is_empty();
3924 if is_implicit_flow_mapping {
3925 *self.implicit_flow_mapping_states.last_mut().unwrap() =
3926 ImplicitMappingState::Inside(self.flow_level);
3927 }
3928
3929 self.skip_non_blank();
3931 let mut trailing_tokens = VecDeque::new();
3938 if self.input.look_ch() == '\t' {
3939 let trailing_token_index = self.tokens.len();
3940 let whitespace = self.skip_ws_to_eol(SkipTabs::Yes)?;
3941 trailing_tokens = self.tokens.split_off(trailing_token_index);
3942
3943 if !whitespace.has_valid_yaml_ws()
3944 && (self.input.peek() == '-' || self.input.next_is_alpha())
3945 {
3946 return Err(ScanError::new_str(
3947 self.mark,
3948 "':' must be followed by a valid YAML whitespace",
3949 ));
3950 }
3951 }
3952
3953 if sk.possible {
3954 let token_index = self.simple_key_token_index(&sk, start_mark)?;
3955 let tok = Token(Span::empty(sk.mark), TokenType::Key);
3957 self.insert_token(token_index, tok);
3958 if is_implicit_flow_mapping {
3959 if sk.mark.line < start_mark.line {
3960 return Err(ScanError::new_str(
3961 start_mark,
3962 "illegal placement of ':' indicator",
3963 ));
3964 }
3965 self.insert_token(
3966 token_index,
3967 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
3968 );
3969 }
3970
3971 self.roll_indent(
3973 sk.mark.col,
3974 Some(sk.token_number),
3975 TokenType::BlockMappingStart,
3976 sk.mark,
3977 );
3978 self.roll_one_col_indent();
3979
3980 self.simple_keys.last_mut().unwrap().possible = false;
3981 self.disallow_simple_key();
3982 } else {
3983 if is_implicit_flow_mapping {
3984 self.tokens
3985 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart).into());
3986 }
3987 if self.flow_level == 0 {
3989 if !self.simple_key_allowed {
3990 return Err(ScanError::new_str(
3991 start_mark,
3992 "mapping values are not allowed in this context",
3993 ));
3994 }
3995
3996 self.roll_indent(
3997 start_mark.col,
3998 None,
3999 TokenType::BlockMappingStart,
4000 start_mark,
4001 );
4002 }
4003 self.roll_one_col_indent();
4004
4005 if self.flow_level == 0 {
4006 self.allow_simple_key();
4007 } else {
4008 self.disallow_simple_key();
4009 }
4010 }
4011 self.tokens
4012 .push_back(Token(Span::empty(start_mark), TokenType::Value).into());
4013 self.tokens.append(&mut trailing_tokens);
4014
4015 Ok(())
4016 }
4017
4018 fn roll_indent(
4024 &mut self,
4025 col: usize,
4026 number: Option<usize>,
4027 tok: TokenType<'input>,
4028 mark: Marker,
4029 ) {
4030 if self.flow_level > 0 {
4031 return;
4032 }
4033
4034 if self.indent <= col as isize {
4038 if let Some(indent) = self.indents.last() {
4039 if !indent.needs_block_end {
4040 self.indent = indent.indent;
4041 self.indents.pop();
4042 }
4043 }
4044 }
4045
4046 if self.indent < col as isize {
4047 self.indents.push(Indent {
4048 indent: self.indent,
4049 needs_block_end: true,
4050 });
4051 self.indent = col as isize;
4052 let tokens_parsed = self.tokens_parsed;
4053 match number {
4054 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
4055 None => self.tokens.push_back(Token(Span::empty(mark), tok).into()),
4056 }
4057 }
4058 }
4059
4060 fn unroll_indent(&mut self, col: isize) {
4066 if self.flow_level > 0 {
4067 return;
4068 }
4069 while self.indent > col {
4070 let indent = self.indents.pop().unwrap();
4071 self.indent = indent.indent;
4072 if indent.needs_block_end {
4073 self.tokens
4074 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd).into());
4075 }
4076 }
4077 }
4078
4079 fn roll_one_col_indent(&mut self) {
4085 if self.flow_level == 0 && self.indents.last().is_some_and(|x| x.needs_block_end) {
4086 self.indents.push(Indent {
4087 indent: self.indent,
4088 needs_block_end: false,
4089 });
4090 self.indent += 1;
4091 }
4092 }
4093
4094 fn unroll_non_block_indents(&mut self) {
4096 while let Some(indent) = self.indents.last() {
4097 if indent.needs_block_end {
4098 break;
4099 }
4100 self.indent = indent.indent;
4101 self.indents.pop();
4102 }
4103 }
4104
4105 fn save_simple_key(&mut self) {
4107 if self.simple_key_allowed {
4108 let required = self.flow_level == 0
4109 && self.indent == (self.mark.col as isize)
4110 && self.indents.last().unwrap().needs_block_end;
4111
4112 if let Some(last) = self.simple_keys.last_mut() {
4113 *last = SimpleKey {
4114 mark: self.mark,
4115 possible: true,
4116 required,
4117 token_number: self.tokens_parsed + self.tokens.len(),
4118 };
4119 }
4120 }
4121 }
4122
4123 fn remove_simple_key(&mut self) -> ScanResult {
4124 let last = self.simple_keys.last_mut().unwrap();
4125 if last.possible && last.required {
4126 return Err(self.simple_key_expected());
4127 }
4128
4129 last.possible = false;
4130 Ok(())
4131 }
4132
4133 fn is_within_block(&self) -> bool {
4135 !self.indents.is_empty()
4136 }
4137
4138 fn end_implicit_mapping(&mut self, mark: Marker, flow_level: u8) {
4144 if self
4145 .implicit_flow_mapping_states
4146 .last()
4147 .is_some_and(|state| *state == ImplicitMappingState::Inside(flow_level))
4148 {
4149 *self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Possible;
4150 self.set_current_flow_mapping_started(false);
4151 self.tokens
4152 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd).into());
4153 }
4154 }
4155
4156 fn current_flow_collection_is_sequence(&self) -> bool {
4157 self.flow_markers
4158 .last()
4159 .is_some_and(|(_, bracket)| *bracket == '[')
4160 }
4161
4162 fn current_flow_mapping_started(&self) -> bool {
4163 self.flow_mapping_started.last().copied().unwrap_or(false)
4164 }
4165
4166 fn set_current_flow_mapping_started(&mut self, started: bool) {
4167 if let Some(current) = self.flow_mapping_started.last_mut() {
4168 *current = started;
4169 }
4170 }
4171}
4172
4173#[derive(PartialEq, Eq)]
4177pub enum Chomping {
4178 Strip,
4180 Clip,
4182 Keep,
4184}
4185
4186#[cfg(test)]
4187mod test {
4188 use alloc::{
4189 borrow::{Cow, ToOwned},
4190 rc::Rc,
4191 string::String,
4192 vec::Vec,
4193 };
4194 use core::cell::Cell;
4195
4196 use crate::{
4197 input::{str::StrInput, BorrowedInput, BufferedInput, Input},
4198 scanner::{
4199 Comment, Marker, Placement, QueuedToken, QueuedTokenType, ScalarStyle, Scanner, Span,
4200 TEncoding, Token, TokenType,
4201 },
4202 };
4203
4204 struct CountingChars {
4205 chars: alloc::vec::IntoIter<char>,
4206 read: Rc<Cell<usize>>,
4207 }
4208
4209 impl Iterator for CountingChars {
4210 type Item = char;
4211
4212 fn next(&mut self) -> Option<Self::Item> {
4213 let next = self.chars.next();
4214 if next.is_some() {
4215 self.read.set(self.read.get() + 1);
4216 }
4217 next
4218 }
4219 }
4220
4221 struct SlicingOnlyInput<'input> {
4222 inner: StrInput<'input>,
4223 expose_slice: bool,
4224 }
4225
4226 impl<'input> SlicingOnlyInput<'input> {
4227 fn new(source: &'input str, expose_slice: bool) -> Self {
4228 Self {
4229 inner: StrInput::new(source),
4230 expose_slice,
4231 }
4232 }
4233 }
4234
4235 impl Input for SlicingOnlyInput<'_> {
4236 fn lookahead(&mut self, count: usize) {
4237 self.inner.lookahead(count);
4238 }
4239
4240 fn buflen(&self) -> usize {
4241 self.inner.buflen()
4242 }
4243
4244 fn bufmaxlen(&self) -> usize {
4245 self.inner.bufmaxlen()
4246 }
4247
4248 fn raw_read_ch(&mut self) -> char {
4249 self.inner.raw_read_ch()
4250 }
4251
4252 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
4253 self.inner.raw_read_non_breakz_ch()
4254 }
4255
4256 fn skip(&mut self) {
4257 self.inner.skip();
4258 }
4259
4260 fn skip_n(&mut self, count: usize) {
4261 self.inner.skip_n(count);
4262 }
4263
4264 fn peek(&self) -> char {
4265 self.inner.peek()
4266 }
4267
4268 fn peek_nth(&self, n: usize) -> char {
4269 self.inner.peek_nth(n)
4270 }
4271
4272 fn byte_offset(&self) -> Option<usize> {
4273 self.inner.byte_offset()
4274 }
4275
4276 fn slice_bytes(&self, start: usize, end: usize) -> Option<&str> {
4277 if self.expose_slice {
4278 self.inner.slice_bytes(start, end)
4279 } else {
4280 None
4281 }
4282 }
4283 }
4284
4285 impl<'input> BorrowedInput<'input> for SlicingOnlyInput<'input> {
4286 fn slice_borrowed(&self, _start: usize, _end: usize) -> Option<&'input str> {
4287 None
4288 }
4289 }
4290
4291 #[test]
4292 fn anchor_character_set_allows_colon_and_rejects_flow_indicators() {
4293 use super::is_anchor_char;
4294
4295 assert!(is_anchor_char('x'));
4296 assert!(is_anchor_char('-'));
4297 assert!(is_anchor_char('_'));
4298 assert!(is_anchor_char(':'));
4299 assert!(is_anchor_char('#'));
4300 assert!(is_anchor_char('/'));
4301 assert!(is_anchor_char('?'));
4302
4303 for c in [',', '[', ']', '{', '}', ' ', '\t', '\n', '\r', '\0'] {
4304 assert!(
4305 !is_anchor_char(c),
4306 "character {c:?} must not be accepted in anchor/alias names"
4307 );
4308 }
4309 }
4310
4311 #[test]
4312 fn flow_simple_key_length_limit_bounds_buffering() {
4313 let mut yaml = String::from("[\n\"start\"\n");
4314 for _ in 0..600 {
4315 yaml.push_str("\"x\"\n");
4316 }
4317 let total_chars = yaml.chars().count();
4318 let read = Rc::new(Cell::new(0));
4319 let chars = yaml.chars().collect::<Vec<_>>().into_iter();
4320 let mut scanner = Scanner::new(BufferedInput::new(CountingChars {
4321 chars,
4322 read: Rc::clone(&read),
4323 }));
4324
4325 assert!(matches!(
4326 scanner.next_token().unwrap().unwrap().1,
4327 TokenType::StreamStart(_)
4328 ));
4329
4330 let token = scanner.next_token().unwrap().unwrap();
4331 assert!(matches!(token.1, TokenType::FlowSequenceStart));
4332
4333 let token = scanner.next_token().unwrap().unwrap();
4334 assert!(matches!(
4335 token.1,
4336 TokenType::Scalar(_, ref value) if value == "start"
4337 ));
4338 assert!(
4339 read.get() < total_chars,
4340 "scanner consumed all {total_chars} chars before yielding the first flow scalar"
4341 );
4342 assert!(
4343 read.get() <= super::SIMPLE_KEY_MAX_LOOKAHEAD + 128,
4344 "scanner read {} chars before yielding the first flow scalar",
4345 read.get()
4346 );
4347 }
4348
4349 #[test]
4350 fn comment_capture_does_not_change_leading_whitespace() {
4351 let mut scanner = Scanner::new(StrInput::new("# comment\n"));
4352
4353 let token = scanner.scan_comment_token().unwrap();
4354
4355 assert!(scanner.leading_whitespace);
4356 assert!(matches!(token.1, TokenType::Comment(ref comment) if comment.text == " comment"));
4357
4358 let mut scanner = Scanner::new(BufferedInput::new("# streaming\n".chars()));
4359 scanner.input.lookahead(1);
4360
4361 let token = scanner.scan_comment_token().unwrap();
4362
4363 assert!(scanner.leading_whitespace);
4364 assert!(matches!(token.1, TokenType::Comment(ref comment) if comment.text == " streaming"));
4365 }
4366
4367 #[test]
4368 fn comment_capture_falls_back_to_owned_slice_when_borrow_unavailable() {
4369 let mut scanner = Scanner::new(SlicingOnlyInput::new("# sliced\n", true));
4370 scanner.input.lookahead(2);
4371 assert_eq!(scanner.input.peek_nth(1), ' ');
4372
4373 let token = scanner.scan_comment_token().unwrap();
4374
4375 assert!(matches!(token.1, TokenType::Comment(ref comment)
4376 if matches!(comment.text, Cow::Owned(ref text) if text == " sliced")));
4377 }
4378
4379 #[test]
4380 fn comment_capture_errors_when_offsets_have_no_slice() {
4381 let mut scanner = Scanner::new(SlicingOnlyInput::new("# broken\n", false));
4382
4383 let error = scanner.scan_comment_token().unwrap_err();
4384
4385 assert_eq!(
4386 error.info(),
4387 "internal error: input advertised offsets but did not provide a slice"
4388 );
4389 }
4390
4391 #[test]
4392 fn queued_token_roundtrips_public_token_variants() {
4393 let span = Span::new(Marker::new(0, 1, 0), Marker::new(7, 1, 7));
4394 let tokens = [
4395 Token(span, TokenType::StreamStart(TEncoding::Utf8)),
4396 Token(span, TokenType::StreamEnd),
4397 Token(span, TokenType::VersionDirective(1, 2)),
4398 Token(
4399 span,
4400 TokenType::TagDirective(Cow::Borrowed("!app!"), Cow::Borrowed("tag:app.example,")),
4401 ),
4402 Token(span, TokenType::DocumentStart),
4403 Token(span, TokenType::DocumentEnd),
4404 Token(span, TokenType::BlockSequenceStart),
4405 Token(span, TokenType::BlockMappingStart),
4406 Token(span, TokenType::BlockEnd),
4407 Token(span, TokenType::FlowSequenceStart),
4408 Token(span, TokenType::FlowSequenceEnd),
4409 Token(span, TokenType::FlowMappingStart),
4410 Token(span, TokenType::FlowMappingEnd),
4411 Token(span, TokenType::BlockEntry),
4412 Token(span, TokenType::FlowEntry),
4413 Token(span, TokenType::Key),
4414 Token(span, TokenType::Value),
4415 Token(span, TokenType::Alias(Cow::Borrowed("alias"))),
4416 Token(span, TokenType::Anchor(Cow::Borrowed("anchor"))),
4417 Token(
4418 span,
4419 TokenType::Tag(Cow::Borrowed("!"), Cow::Borrowed("tag")),
4420 ),
4421 Token(
4422 span,
4423 TokenType::Scalar(ScalarStyle::Literal, Cow::Borrowed("scalar")),
4424 ),
4425 Token(
4426 span,
4427 TokenType::Comment(
4428 Comment::new(span, Cow::Borrowed(" comment")).with_placement(Placement::Right),
4429 ),
4430 ),
4431 Token(
4432 span,
4433 TokenType::ReservedDirective(
4434 "reserved".to_owned(),
4435 vec!["one".to_owned(), "two".to_owned()],
4436 ),
4437 ),
4438 ];
4439
4440 for token in tokens {
4441 let queued: QueuedToken = token.clone().into();
4442
4443 assert_eq!(queued.into_public(), token);
4444 }
4445 }
4446
4447 #[test]
4448 fn comment_skipping_path_consumes_comment_without_tokenizing_it() {
4449 let mut scanner = Scanner::new(StrInput::new("# skipped\nnext: value\n"));
4450
4451 scanner.skip_yaml_whitespace(false).unwrap();
4452
4453 assert!(scanner.tokens.is_empty());
4454 assert_eq!(scanner.mark.line(), 2);
4455 assert_eq!(scanner.mark.col(), 0);
4456 }
4457
4458 #[test]
4459 fn yaml_whitespace_can_stop_after_queued_comment() {
4460 let mut scanner = Scanner::new(StrInput::new(" # queued\n# later\n"));
4461
4462 assert!(scanner.skip_yaml_whitespace(true).unwrap());
4463
4464 assert_eq!(scanner.tokens.len(), 1);
4465 assert!(matches!(
4466 scanner.tokens.front().unwrap().1,
4467 QueuedTokenType::Comment(ref comment) if comment.text == " queued"
4468 ));
4469 assert_eq!(scanner.mark.line(), 1);
4470 assert_eq!(scanner.mark.col(), 9);
4471 }
4472
4473 #[test]
4474 fn token_skip_can_stop_after_queued_comment() {
4475 let mut scanner = Scanner::new(StrInput::new("# first\n# second\n"));
4476
4477 assert!(scanner.skip_to_next_token(true).unwrap());
4478
4479 assert_eq!(scanner.tokens.len(), 1);
4480 assert!(matches!(
4481 scanner.tokens.front().unwrap().1,
4482 QueuedTokenType::Comment(ref comment) if comment.text == " first"
4483 ));
4484 assert_eq!(scanner.mark.line(), 2);
4485 assert_eq!(scanner.mark.col(), 0);
4486 }
4487
4488 #[test]
4489 fn scanner_emits_first_leading_comment_before_scanning_next_comment() {
4490 let mut scanner = Scanner::new(StrInput::new("# first\n# second\nkey: value\n"));
4491
4492 assert!(matches!(
4493 scanner.next_token().unwrap().unwrap().1,
4494 TokenType::StreamStart(_)
4495 ));
4496 assert!(matches!(
4497 scanner.next_token().unwrap().unwrap().1,
4498 TokenType::Comment(ref comment) if comment.text == " first"
4499 ));
4500 assert!(scanner.tokens.is_empty());
4501 assert!(matches!(
4502 scanner.next_token().unwrap().unwrap().1,
4503 TokenType::Comment(ref comment) if comment.text == " second"
4504 ));
4505 }
4506
4507 #[test]
4508 fn scanner_emits_quoted_scalar_comment_before_scanning_following_value() {
4509 let mut scanner = Scanner::new(StrInput::new("\"key\" # quoted\n: value\n"));
4510
4511 assert!(matches!(
4512 scanner.next_token().unwrap().unwrap().1,
4513 TokenType::StreamStart(_)
4514 ));
4515 assert!(matches!(
4516 scanner.next_token().unwrap().unwrap().1,
4517 TokenType::Scalar(ScalarStyle::DoubleQuoted, ref value) if value == "key"
4518 ));
4519 assert!(matches!(
4520 scanner.next_token().unwrap().unwrap().1,
4521 TokenType::Comment(ref comment) if comment.text == " quoted"
4522 ));
4523 }
4524
4525 #[test]
4526 fn flow_scalar_comment_disables_adjacent_value_lookahead() {
4527 let mut scanner = Scanner::new(StrInput::new("\"key\"\n# quoted\n: value\n"));
4528
4529 scanner.fetch_flow_scalar(false).unwrap();
4530
4531 assert_eq!(scanner.adjacent_value_allowed_at, usize::MAX);
4532 assert!(matches!(
4533 scanner.tokens.front().unwrap().1,
4534 QueuedTokenType::Scalar(ScalarStyle::DoubleQuoted, ref value) if value == "key"
4535 ));
4536 assert!(scanner.tokens.iter().any(|QueuedToken(_, token)| matches!(
4537 token,
4538 QueuedTokenType::Comment(comment) if comment.text == " quoted"
4539 )));
4540 }
4541
4542 #[test]
4543 fn deferred_error_waits_for_all_comment_tokens() {
4544 let mut scanner = Scanner::new(StrInput::new("# first\n# second\n@\n"));
4545
4546 assert!(matches!(
4547 scanner.next_token().unwrap().unwrap().1,
4548 TokenType::StreamStart(_)
4549 ));
4550 assert!(matches!(
4551 scanner.next_token().unwrap().unwrap().1,
4552 TokenType::Comment(ref comment) if comment.text == " first"
4553 ));
4554 assert!(matches!(
4555 scanner.next_token().unwrap().unwrap().1,
4556 TokenType::Comment(ref comment) if comment.text == " second"
4557 ));
4558
4559 let error = scanner.next_token().unwrap_err();
4560
4561 assert!(error.info().contains("unexpected character"));
4562 }
4563
4564 #[test]
4566 fn anchor_name_is_borrowed_for_str_input() {
4567 let mut scanner = Scanner::new(StrInput::new("&anch\n"));
4568
4569 loop {
4570 let tok = scanner
4571 .next_token()
4572 .expect("valid YAML must scan without errors")
4573 .expect("scanner must eventually produce a token");
4574 if let TokenType::Anchor(name) = tok.1 {
4575 assert!(matches!(name, Cow::Borrowed("anch")));
4576 break;
4577 }
4578 }
4579 }
4580
4581 #[test]
4583 fn anchor_name_rejects_non_printable_control_chars() {
4584 let mut scanner = Scanner::new(StrInput::new("&foo\u{0001}\n"));
4585
4586 loop {
4587 let tok = scanner
4588 .next_token()
4589 .expect("scanning should not fail")
4590 .expect("scanner must eventually produce a token");
4591 if let TokenType::Anchor(name) = tok.1 {
4592 assert!(matches!(name, Cow::Borrowed("foo")));
4593 let next = scanner.next_token().expect("scanning should not fail");
4594 if let Some(Token(_, TokenType::Scalar(_, rest))) = next {
4595 assert!(rest.starts_with('\u{0001}'));
4596 }
4597 break;
4598 }
4599 }
4600 }
4601
4602 #[test]
4603 fn alias_name_rejects_non_printable_control_chars() {
4604 let mut scanner = Scanner::new(StrInput::new("*foo\u{0001}\n"));
4605
4606 loop {
4607 let tok = scanner
4608 .next_token()
4609 .expect("scanning should not fail")
4610 .expect("scanner must eventually produce a token");
4611 if let TokenType::Alias(name) = tok.1 {
4612 assert!(matches!(name, Cow::Borrowed("foo")));
4613 let next = scanner.next_token().expect("scanning should not fail");
4614 if let Some(Token(_, TokenType::Scalar(_, rest))) = next {
4615 assert!(rest.starts_with('\u{0001}'));
4616 }
4617 break;
4618 }
4619 }
4620 }
4621
4622 #[test]
4623 fn alias_name_is_borrowed_for_str_input() {
4624 let mut scanner = Scanner::new(StrInput::new("*anch\n"));
4625
4626 loop {
4627 let tok = scanner
4628 .next_token()
4629 .expect("valid YAML must scan without errors")
4630 .expect("scanner must eventually produce a token");
4631 if let TokenType::Alias(name) = tok.1 {
4632 assert!(matches!(name, Cow::Borrowed("anch")));
4633 break;
4634 }
4635 }
4636 }
4637
4638 #[test]
4639 fn alias_name_scans_colon_as_part_of_name() {
4640 let mut scanner = Scanner::new(StrInput::new("*foo: bar\n"));
4641
4642 loop {
4643 let tok = scanner
4644 .next_token()
4645 .expect("scanner must not fail before alias token")
4646 .expect("scanner must eventually emit an alias token");
4647
4648 if let TokenType::Alias(name) = tok.1 {
4649 assert_eq!(name.as_ref(), "foo:");
4650 break;
4651 }
4652 }
4653 }
4654
4655 #[test]
4656 fn anchor_name_scans_colon_as_part_of_name() {
4657 let mut scanner = Scanner::new(StrInput::new("&foo: bar\n"));
4658
4659 loop {
4660 let tok = scanner
4661 .next_token()
4662 .expect("scanner must not fail before anchor token")
4663 .expect("scanner must eventually emit an anchor token");
4664
4665 if let TokenType::Anchor(name) = tok.1 {
4666 assert_eq!(name.as_ref(), "foo:");
4667 break;
4668 }
4669 }
4670 }
4671
4672 #[test]
4674 fn tag_directive_parts_are_borrowed_for_str_input() {
4675 let mut scanner = Scanner::new(StrInput::new("%TAG !e! tag:example.com,2000:app/\n"));
4676
4677 loop {
4678 let tok = scanner
4679 .next_token()
4680 .expect("valid YAML must scan without errors")
4681 .expect("scanner must eventually produce a token");
4682 if let TokenType::TagDirective(handle, prefix) = tok.1 {
4683 assert!(matches!(handle, Cow::Borrowed("!e!")));
4684 assert!(matches!(prefix, Cow::Borrowed("tag:example.com,2000:app/")));
4685 break;
4686 }
4687 }
4688 }
4689
4690 #[test]
4691 fn plain_scalar_is_borrowed_when_whitespace_free_for_str_input() {
4692 let mut scanner = Scanner::new(StrInput::new("foo\n"));
4693
4694 loop {
4695 let tok = scanner
4696 .next_token()
4697 .expect("valid YAML must scan without errors")
4698 .expect("scanner must eventually produce a token");
4699 if let TokenType::Scalar(_, value) = tok.1 {
4700 assert!(matches!(value, Cow::Borrowed("foo")));
4701 break;
4702 }
4703 }
4704 }
4705
4706 #[test]
4707 fn plain_scalar_is_borrowed_when_whitespace_present_for_str_input() {
4708 let mut scanner = Scanner::new(StrInput::new("foo bar\n"));
4709
4710 loop {
4711 let tok = scanner
4712 .next_token()
4713 .expect("valid YAML must scan without errors")
4714 .expect("scanner must eventually produce a token");
4715 if let TokenType::Scalar(_, value) = tok.1 {
4716 assert!(matches!(value, Cow::Borrowed("foo bar")));
4717 break;
4718 }
4719 }
4720 }
4721
4722 #[test]
4723 fn single_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
4724 let mut scanner = Scanner::new(StrInput::new("'foo bar'\n"));
4725
4726 loop {
4727 let tok = scanner
4728 .next_token()
4729 .expect("valid YAML must scan without errors")
4730 .expect("scanner must eventually produce a token");
4731 if let TokenType::Scalar(_, value) = tok.1 {
4732 assert!(matches!(value, Cow::Borrowed("foo bar")));
4733 break;
4734 }
4735 }
4736 }
4737
4738 #[test]
4739 fn single_quoted_scalar_is_owned_when_quote_is_escaped_for_str_input() {
4740 let mut scanner = Scanner::new(StrInput::new("'foo''bar'\n"));
4741
4742 loop {
4743 let tok = scanner
4744 .next_token()
4745 .expect("valid YAML must scan without errors")
4746 .expect("scanner must eventually produce a token");
4747 if let TokenType::Scalar(_, value) = tok.1 {
4748 assert!(matches!(value, Cow::Owned(_)));
4749 assert_eq!(&*value, "foo'bar");
4750 break;
4751 }
4752 }
4753 }
4754
4755 #[test]
4756 fn double_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
4757 let mut scanner = Scanner::new(StrInput::new("\"foo bar\"\n"));
4758
4759 loop {
4760 let tok = scanner
4761 .next_token()
4762 .expect("valid YAML must scan without errors")
4763 .expect("scanner must eventually produce a token");
4764 if let TokenType::Scalar(_, value) = tok.1 {
4765 assert!(matches!(value, Cow::Borrowed("foo bar")));
4766 break;
4767 }
4768 }
4769 }
4770
4771 #[test]
4772 fn double_quoted_scalar_is_owned_when_escape_sequence_present_for_str_input() {
4773 let mut scanner = Scanner::new(StrInput::new("\"foo\\nbar\"\n"));
4774
4775 loop {
4776 let tok = scanner
4777 .next_token()
4778 .expect("valid YAML must scan without errors")
4779 .expect("scanner must eventually produce a token");
4780 if let TokenType::Scalar(_, value) = tok.1 {
4781 assert!(matches!(value, Cow::Owned(_)));
4782 assert_eq!(&*value, "foo\nbar");
4783 break;
4784 }
4785 }
4786 }
4787
4788 #[test]
4789 fn plain_key_is_borrowed_for_str_input() {
4790 let mut scanner = Scanner::new(StrInput::new("mykey: value\n"));
4792
4793 let mut found_key = false;
4794 let mut key_value: Option<Cow<'_, str>> = None;
4795
4796 loop {
4797 let tok = scanner
4798 .next_token()
4799 .expect("valid YAML must scan without errors");
4800 let Some(tok) = tok else { break };
4801
4802 if matches!(tok.1, TokenType::Key) {
4803 found_key = true;
4804 } else if found_key {
4805 if let TokenType::Scalar(_, value) = tok.1 {
4806 key_value = Some(value);
4807 break;
4808 }
4809 }
4810 }
4811
4812 assert!(found_key, "expected to find a Key token");
4813 let key_value = key_value.expect("expected to find a scalar after Key token");
4814 assert!(
4815 matches!(key_value, Cow::Borrowed("mykey")),
4816 "key should be borrowed, got: {key_value:?}"
4817 );
4818 }
4819
4820 #[test]
4821 fn quoted_key_is_borrowed_when_verbatim_for_str_input() {
4822 let mut scanner = Scanner::new(StrInput::new("\"mykey\": value\n"));
4823
4824 let mut found_key = false;
4825 let mut key_value: Option<Cow<'_, str>> = None;
4826
4827 loop {
4828 let tok = scanner
4829 .next_token()
4830 .expect("valid YAML must scan without errors");
4831 let Some(tok) = tok else { break };
4832
4833 if matches!(tok.1, TokenType::Key) {
4834 found_key = true;
4835 } else if found_key {
4836 if let TokenType::Scalar(_, value) = tok.1 {
4837 key_value = Some(value);
4838 break;
4839 }
4840 }
4841 }
4842
4843 assert!(found_key, "expected to find a Key token");
4844 let key_value = key_value.expect("expected to find a scalar after Key token");
4845 assert!(
4846 matches!(key_value, Cow::Borrowed("mykey")),
4847 "quoted key should be borrowed when verbatim, got: {key_value:?}"
4848 );
4849 }
4850
4851 #[test]
4852 fn tag_handle_and_suffix_are_borrowed_for_str_input() {
4853 let mut scanner = Scanner::new(StrInput::new("!!str foo\n"));
4855
4856 loop {
4857 let tok = scanner
4858 .next_token()
4859 .expect("valid YAML must scan without errors")
4860 .expect("scanner must eventually produce a token");
4861 if let TokenType::Tag(handle, suffix) = tok.1 {
4862 assert!(
4863 matches!(handle, Cow::Borrowed("!!")),
4864 "tag handle should be borrowed, got: {handle:?}"
4865 );
4866 assert!(
4867 matches!(suffix, Cow::Borrowed("str")),
4868 "tag suffix should be borrowed, got: {suffix:?}"
4869 );
4870 break;
4871 }
4872 }
4873 }
4874
4875 #[test]
4876 fn local_tag_suffix_is_borrowed_for_str_input() {
4877 let mut scanner = Scanner::new(StrInput::new("!mytag foo\n"));
4879
4880 loop {
4881 let tok = scanner
4882 .next_token()
4883 .expect("valid YAML must scan without errors")
4884 .expect("scanner must eventually produce a token");
4885 if let TokenType::Tag(handle, suffix) = tok.1 {
4886 assert!(
4887 matches!(handle, Cow::Borrowed("!")),
4888 "local tag handle should be '!', got: {handle:?}"
4889 );
4890 assert!(
4891 matches!(suffix, Cow::Borrowed("mytag")),
4892 "local tag suffix should be borrowed, got: {suffix:?}"
4893 );
4894 break;
4895 }
4896 }
4897 }
4898
4899 #[test]
4900 fn tag_with_uri_escape_is_owned_for_str_input() {
4901 let mut scanner = Scanner::new(StrInput::new("!!my%20tag foo\n"));
4903
4904 loop {
4905 let tok = scanner
4906 .next_token()
4907 .expect("valid YAML must scan without errors")
4908 .expect("scanner must eventually produce a token");
4909 if let TokenType::Tag(handle, suffix) = tok.1 {
4910 assert!(
4911 matches!(handle, Cow::Borrowed("!!")),
4912 "tag handle should still be borrowed, got: {handle:?}"
4913 );
4914 assert!(
4915 matches!(suffix, Cow::Owned(_)),
4916 "tag suffix with URI escape should be owned, got: {suffix:?}"
4917 );
4918 assert_eq!(&*suffix, "my tag");
4919 break;
4920 }
4921 }
4922 }
4923
4924 #[test]
4925 fn flow_scalar_buffer_tracks_pending_whitespace() {
4926 let mut borrowed = super::FlowScalarBuf::new_borrowed(2);
4927
4928 borrowed.note_pending_ws(5, 8);
4929 borrowed.commit_pending_ws();
4930 assert!(matches!(
4931 borrowed,
4932 super::FlowScalarBuf::Borrowed {
4933 end: 8,
4934 pending_ws_start: None,
4935 pending_ws_end: 8,
4936 ..
4937 }
4938 ));
4939
4940 borrowed.note_pending_ws(9, 11);
4941 borrowed.discard_pending_ws();
4942 assert!(matches!(
4943 borrowed,
4944 super::FlowScalarBuf::Borrowed {
4945 end: 8,
4946 pending_ws_start: None,
4947 pending_ws_end: 8,
4948 ..
4949 }
4950 ));
4951 assert!(borrowed.as_owned_mut().is_none());
4952
4953 let mut owned = super::FlowScalarBuf::new_owned();
4954 owned.as_owned_mut().unwrap().push_str("owned");
4955 assert!(matches!(owned, super::FlowScalarBuf::Owned(ref s) if s == "owned"));
4956 }
4957
4958 fn first_scanner_error_info(input: &str) -> String {
4959 let mut scanner = Scanner::new(StrInput::new(input));
4960 loop {
4961 match scanner.next_token() {
4962 Ok(Some(_)) => {}
4963 Ok(None) => panic!("expected scanner error"),
4964 Err(error) => return error.info().to_owned(),
4965 }
4966 }
4967 }
4968
4969 fn first_scalar_value(input: &str) -> String {
4970 let mut scanner = Scanner::new(StrInput::new(input));
4971 loop {
4972 match scanner.next_token().expect("scanner should not error") {
4973 Some(Token(_, TokenType::Scalar(_, value))) => return value.into_owned(),
4974 Some(_) => {}
4975 None => panic!("expected scalar token"),
4976 }
4977 }
4978 }
4979
4980 #[test]
4981 fn iterator_next_records_error_and_then_stays_empty() {
4982 let mut scanner = Scanner::new(StrInput::new("\"unterminated"));
4983
4984 while scanner.next().is_some() {}
4985
4986 let error = scanner
4987 .get_error()
4988 .expect("scanner should retain the error");
4989 assert_eq!(error.info(), "unclosed quote");
4990 assert!(scanner.next().is_none());
4991 }
4992
4993 #[test]
4994 fn next_token_returns_none_after_stream_end() {
4995 let mut scanner = Scanner::new(StrInput::new(""));
4996
4997 while let Some(token) = scanner.next_token().unwrap() {
4998 if matches!(token.1, TokenType::StreamEnd) {
4999 break;
5000 }
5001 }
5002
5003 assert!(scanner.stream_started());
5004 assert!(scanner.stream_ended());
5005 assert!(scanner.next_token().unwrap().is_none());
5006 }
5007
5008 #[test]
5009 fn directive_name_must_be_present() {
5010 assert_eq!(
5011 first_scanner_error_info("%\n"),
5012 "while scanning a directive, could not find expected directive name"
5013 );
5014 }
5015
5016 #[test]
5017 fn yaml_directive_requires_dot_between_version_numbers() {
5018 assert_eq!(
5019 first_scanner_error_info("%YAML 1\n"),
5020 "while scanning a YAML directive, did not find expected digit or '.' character"
5021 );
5022 }
5023
5024 #[test]
5025 fn yaml_directive_requires_major_version_number() {
5026 assert_eq!(
5027 first_scanner_error_info("%YAML .2\n"),
5028 "while scanning a YAML directive, did not find expected version number"
5029 );
5030 }
5031
5032 #[test]
5033 fn yaml_directive_rejects_extremely_long_version_number() {
5034 assert_eq!(
5035 first_scanner_error_info("%YAML 1234567890.2\n"),
5036 "while scanning a YAML directive, found extremely long version number"
5037 );
5038 }
5039
5040 #[test]
5041 fn tag_directive_handle_must_end_with_bang() {
5042 assert_eq!(
5043 first_scanner_error_info("%TAG !bad tag:example.com,2024:\n"),
5044 "while parsing a tag directive, did not find expected '!'"
5045 );
5046 }
5047
5048 #[test]
5049 fn tag_directive_handle_must_start_with_bang() {
5050 assert_eq!(
5051 first_scanner_error_info("%TAG bad! tag:example.com,2024:\n"),
5052 "while scanning a tag, did not find expected '!'"
5053 );
5054 }
5055
5056 #[test]
5057 fn tag_directive_prefix_must_start_with_tag_character() {
5058 assert_eq!(
5059 first_scanner_error_info("%TAG !e! `bad\n"),
5060 "invalid global tag character"
5061 );
5062 }
5063
5064 #[test]
5065 fn tag_directive_prefix_must_end_before_invalid_content() {
5066 assert_eq!(
5067 first_scanner_error_info("%TAG !e! tag:example.com^suffix\n"),
5068 "while scanning TAG, did not find expected whitespace or line break"
5069 );
5070 }
5071
5072 #[test]
5073 fn tag_directive_prefix_with_uri_escape_is_owned_and_decoded() {
5074 let mut scanner =
5075 Scanner::new(StrInput::new("%TAG !e! tag:example.com,2024:some%20app/\n"));
5076
5077 loop {
5078 let token = scanner
5079 .next_token()
5080 .expect("valid directive should scan")
5081 .expect("scanner must produce a directive token");
5082 if let TokenType::TagDirective(handle, prefix) = token.1 {
5083 assert!(matches!(handle, Cow::Borrowed("!e!")));
5084 assert!(matches!(prefix, Cow::Owned(_)));
5085 assert_eq!(&*prefix, "tag:example.com,2024:some app/");
5086 break;
5087 }
5088 }
5089 }
5090
5091 #[test]
5092 fn bare_bang_tag_scans_as_non_specific_tag() {
5093 let mut scanner = Scanner::new(StrInput::new("! foo\n"));
5094
5095 loop {
5096 let token = scanner
5097 .next_token()
5098 .expect("valid tag should scan")
5099 .expect("scanner must produce a tag token");
5100 if let TokenType::Tag(handle, suffix) = token.1 {
5101 assert_eq!(&*handle, "");
5102 assert_eq!(&*suffix, "!");
5103 break;
5104 }
5105 }
5106 }
5107
5108 #[test]
5109 fn tag_requires_separation_after_suffix() {
5110 assert_eq!(
5111 first_scanner_error_info("!foo,bar\n"),
5112 "while scanning a tag, did not find expected whitespace or line break"
5113 );
5114 }
5115
5116 #[test]
5117 fn verbatim_tag_requires_uri() {
5118 assert_eq!(
5119 first_scanner_error_info("!<> foo\n"),
5120 "while parsing a tag, did not find expected tag URI"
5121 );
5122 }
5123
5124 #[test]
5125 fn verbatim_tag_requires_closing_angle_bracket() {
5126 assert_eq!(
5127 first_scanner_error_info("!<tag:yaml.org,2002:str foo\n"),
5128 "while scanning a verbatim tag, did not find the expected '>'"
5129 );
5130 }
5131
5132 #[test]
5133 fn tag_uri_escape_requires_hex_digits() {
5134 assert_eq!(
5135 first_scanner_error_info("!!bad%zz foo\n"),
5136 "while parsing a tag, found an invalid escape sequence"
5137 );
5138 }
5139
5140 #[test]
5141 fn tag_uri_escape_rejects_bad_leading_utf8_byte() {
5142 assert_eq!(
5143 first_scanner_error_info("!!bad%80 foo\n"),
5144 "while parsing a tag, found an incorrect leading UTF-8 byte"
5145 );
5146 }
5147
5148 #[test]
5149 fn tag_uri_escape_rejects_bad_trailing_utf8_byte() {
5150 assert_eq!(
5151 first_scanner_error_info("!!bad%C2%41 foo\n"),
5152 "while parsing a tag, found an incorrect trailing UTF-8 byte"
5153 );
5154 }
5155
5156 #[test]
5157 fn tag_uri_escape_rejects_invalid_utf8_codepoint() {
5158 assert_eq!(
5159 first_scanner_error_info("!!bad%F4%90%80%80 foo\n"),
5160 "while parsing a tag, found an invalid UTF-8 codepoint"
5161 );
5162 }
5163
5164 #[test]
5165 fn anchors_and_aliases_require_names() {
5166 let expected =
5167 "while scanning an anchor or alias, did not find expected alphabetic or numeric character";
5168
5169 assert_eq!(first_scanner_error_info("& \n"), expected);
5170 assert_eq!(first_scanner_error_info("* \n"), expected);
5171 }
5172
5173 #[test]
5174 fn document_end_marker_rejects_trailing_content() {
5175 assert_eq!(
5176 first_scanner_error_info("... trailing\n"),
5177 "invalid content after document end marker"
5178 );
5179 }
5180
5181 #[test]
5182 fn reserved_indicators_are_rejected_outside_directives() {
5183 assert_eq!(
5184 first_scanner_error_info(" @\n"),
5185 "unexpected character: `@'"
5186 );
5187 }
5188
5189 #[test]
5190 fn flow_block_entry_indicator_is_rejected() {
5191 assert_eq!(
5192 first_scanner_error_info("[- ]\n"),
5193 r#""-" is only valid inside a block"#
5194 );
5195 }
5196
5197 #[test]
5198 fn block_entry_after_tabbed_separator_reports_specific_error() {
5199 assert_eq!(
5200 first_scanner_error_info("-\t- value\n"),
5201 "'-' must be followed by a valid YAML whitespace"
5202 );
5203 }
5204
5205 #[test]
5206 fn document_indicator_reports_unclosed_flow_collection() {
5207 assert_eq!(first_scanner_error_info("[\n---\n"), "unclosed bracket '['");
5208 }
5209
5210 #[test]
5211 fn block_scalar_header_rejects_trailing_content() {
5212 assert_eq!(
5213 first_scanner_error_info("|+ trailing\n"),
5214 "while scanning a block scalar, did not find expected comment or line break"
5215 );
5216 }
5217
5218 #[test]
5219 fn block_scalar_rejects_zero_indent_indicator() {
5220 let expected = "while scanning a block scalar, found an indentation indicator equal to 0";
5221
5222 assert_eq!(first_scanner_error_info("|0\n"), expected);
5223 assert_eq!(first_scanner_error_info("|+0\n"), expected);
5224 }
5225
5226 #[test]
5227 fn empty_block_scalar_at_eof_honors_chomping() {
5228 assert_eq!(first_scalar_value("|-\n"), "");
5229 assert_eq!(first_scalar_value("|+\n"), "\n");
5230 }
5231
5232 #[test]
5233 fn explicit_indent_block_scalar_can_end_at_document_marker() {
5234 assert_eq!(first_scalar_value("|1\n...\n"), "");
5235 }
5236
5237 #[test]
5238 fn root_explicit_indent_block_scalar_rejects_underindented_content() {
5239 assert_eq!(
5240 first_scanner_error_info("|2\nx\n"),
5241 "wrongly indented line in block scalar"
5242 );
5243 }
5244
5245 #[test]
5246 fn quoted_scalar_rejects_document_indicator_at_line_start() {
5247 assert_eq!(
5248 first_scanner_error_info("\"one\n---\ntwo\"\n"),
5249 "while scanning a quoted scalar, found unexpected document indicator"
5250 );
5251 }
5252
5253 #[test]
5254 fn quoted_scalar_rejects_tab_indentation_after_line_break() {
5255 assert_eq!(
5256 first_scanner_error_info("a: \"one\n\tbad\"\n"),
5257 "tab cannot be used as indentation"
5258 );
5259 }
5260
5261 #[test]
5262 fn quoted_scalar_rejects_underindented_continuation() {
5263 assert_eq!(
5264 first_scanner_error_info("a: \"one\nbad\"\n"),
5265 "invalid indentation in multiline quoted scalar"
5266 );
5267 }
5268
5269 #[test]
5270 fn indented_flow_scalar_reports_invalid_indentation() {
5271 assert_eq!(
5272 first_scanner_error_info("a:\n [\nfoo]\n"),
5273 "invalid indentation"
5274 );
5275 }
5276
5277 #[test]
5278 fn required_simple_key_requires_value_at_stream_end() {
5279 assert_eq!(
5280 first_scanner_error_info("a:\n&b\n- c\n"),
5281 "simple key expect ':'"
5282 );
5283 }
5284
5285 #[test]
5286 fn plain_scalar_rejects_dash_before_flow_indicator() {
5287 assert_eq!(
5288 first_scanner_error_info("[-]\n"),
5289 "plain scalar cannot start with '-' followed by ,[]{}"
5290 );
5291 }
5292
5293 #[test]
5294 fn explicit_key_rejects_tab_after_indicator() {
5295 assert_eq!(
5296 first_scanner_error_info("? \tfoo\n"),
5297 "tabs disallowed in this context"
5298 );
5299 }
5300
5301 #[test]
5302 fn flow_mapping_rejects_adjacent_collection_value_after_plain_key() {
5303 assert_eq!(
5304 first_scanner_error_info("[a:[]]\n"),
5305 "':' may not precede any of `[{` in flow mapping"
5306 );
5307 }
5308
5309 #[test]
5310 fn implicit_flow_mapping_colon_cannot_move_to_next_line() {
5311 assert_eq!(
5312 first_scanner_error_info("[foo\n: bar]\n"),
5313 "illegal placement of ':' indicator"
5314 );
5315 }
5316
5317 #[test]
5318 fn stale_simple_key_token_position_is_a_scan_error() {
5319 let mut scanner = Scanner::new(StrInput::new(": value\n"));
5320 scanner.fetch_stream_start();
5321 scanner.tokens.clear();
5322 scanner.tokens_parsed = 1;
5323
5324 let simple_key = scanner
5325 .simple_keys
5326 .last_mut()
5327 .expect("stream start should create a simple key slot");
5328 simple_key.possible = true;
5329 simple_key.token_number = 0;
5330
5331 let error = scanner
5332 .fetch_value()
5333 .expect_err("stale simple key should be reported as a scan error");
5334 assert_eq!(error.info(), "simple key is no longer valid");
5335 }
5336
5337 #[test]
5338 fn issue14_alias_scanner_consumes_colon_as_name_character() {
5339 let mut scanner = Scanner::new(StrInput::new("*foo: bar\n"));
5340
5341 assert!(matches!(
5342 scanner.next_token().unwrap().unwrap().1,
5343 TokenType::StreamStart(_)
5344 ));
5345
5346 let token = scanner.next_token().unwrap().unwrap();
5347
5348 assert!(
5349 matches!(token.1, TokenType::Alias(ref name) if name.as_ref() == "foo:"),
5350 "expected `*foo: bar` to start with Alias(\"foo:\"), got {token:?}"
5351 );
5352 }
5353
5354 #[test]
5355 fn issue14_anchor_scanner_consumes_colon_as_name_character() {
5356 let mut scanner = Scanner::new(StrInput::new("&foo: bar\n"));
5357
5358 assert!(matches!(
5359 scanner.next_token().unwrap().unwrap().1,
5360 TokenType::StreamStart(_)
5361 ));
5362
5363 let token = scanner.next_token().unwrap().unwrap();
5364
5365 assert!(
5366 matches!(token.1, TokenType::Anchor(ref name) if name.as_ref() == "foo:"),
5367 "expected `&foo: bar` to start with Anchor(\"foo:\"), got {token:?}"
5368 );
5369 }
5370}