1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use alloc::{
13 borrow::{Cow, ToOwned},
14 collections::VecDeque,
15 string::String,
16 vec::Vec,
17};
18use core::{char, fmt};
19
20use crate::{
21 char_traits::{
22 as_hex, is_anchor_char, is_blank_or_breakz, is_bom, is_break, is_breakz, is_flow, is_hex,
23 is_tag_char, is_uri_char,
24 },
25 input::{BorrowedInput, SkipTabs},
26};
27
28const SIMPLE_KEY_MAX_LOOKAHEAD: usize = 1024;
30
31#[derive(Clone, Copy, PartialEq, Debug, Eq)]
33pub enum TEncoding {
34 Utf8,
36}
37
38#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
40pub enum ScalarStyle {
41 Plain,
43 SingleQuoted,
45 DoubleQuoted,
47
48 Literal,
54 Folded,
61}
62
63#[derive(Clone, Copy, Debug, Default)]
70pub struct MarkerOffsets {
71 chars: usize,
73 bytes: Option<usize>,
75}
76
77impl PartialEq for MarkerOffsets {
78 fn eq(&self, other: &Self) -> bool {
79 self.chars == other.chars
83 }
84}
85
86impl Eq for MarkerOffsets {}
87
88#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
90pub struct Marker {
91 offsets: MarkerOffsets,
93 line: usize,
95 col: usize,
97}
98
99impl Marker {
100 #[must_use]
102 pub fn new(index: usize, line: usize, col: usize) -> Marker {
103 Marker {
104 offsets: MarkerOffsets {
105 chars: index,
106 bytes: None,
107 },
108 line,
109 col,
110 }
111 }
112
113 #[must_use]
115 pub fn with_byte_offset(mut self, byte_offset: Option<usize>) -> Marker {
116 self.offsets.bytes = byte_offset;
117 self
118 }
119
120 #[must_use]
122 pub fn index(&self) -> usize {
123 self.offsets.chars
124 }
125
126 #[must_use]
128 pub fn byte_offset(&self) -> Option<usize> {
129 self.offsets.bytes
130 }
131
132 #[must_use]
134 pub fn line(&self) -> usize {
135 self.line
136 }
137
138 #[must_use]
140 pub fn col(&self) -> usize {
141 self.col
142 }
143}
144
145#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
147pub struct Span {
148 pub start: Marker,
150 pub end: Marker,
152
153 pub indent: Option<usize>,
158}
159
160impl Span {
161 #[must_use]
163 pub fn new(start: Marker, end: Marker) -> Span {
164 Span {
165 start,
166 end,
167 indent: None,
168 }
169 }
170
171 #[must_use]
178 pub fn empty(mark: Marker) -> Span {
179 Span {
180 start: mark,
181 end: mark,
182 indent: None,
183 }
184 }
185
186 #[must_use]
188 pub fn with_indent(mut self, indent: Option<usize>) -> Span {
189 self.indent = indent;
190 self
191 }
192
193 #[must_use]
195 pub fn len(&self) -> usize {
196 self.end.index() - self.start.index()
197 }
198
199 #[must_use]
201 pub fn is_empty(&self) -> bool {
202 self.len() == 0
203 }
204
205 #[must_use]
207 pub fn byte_range(&self) -> Option<core::ops::Range<usize>> {
208 let start = self.start.byte_offset()?;
209 let end = self.end.byte_offset()?;
210 Some(start..end)
211 }
212
213 #[must_use]
216 pub fn slice<'source>(&self, source: &'source str) -> Option<&'source str> {
217 source.get(self.byte_range()?)
218 }
219}
220
221#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
236pub enum Placement {
237 Above,
243 Right,
246 #[default]
252 Free,
253 Last,
258}
259
260#[derive(Clone, PartialEq, Debug, Eq)]
266pub struct Comment<'input> {
267 pub span: Span,
269 pub text: Cow<'input, str>,
273 pub placement: Placement,
275}
276
277impl<'input> Comment<'input> {
278 #[must_use]
283 pub fn new(span: Span, text: impl Into<Cow<'input, str>>) -> Self {
284 Self {
285 span,
286 text: text.into(),
287 placement: Placement::Free,
288 }
289 }
290
291 #[must_use]
293 pub fn with_placement(mut self, placement: Placement) -> Self {
294 self.placement = placement;
295 self
296 }
297
298 #[must_use]
302 pub fn trimmed_text(&self) -> &str {
303 self.text.trim()
304 }
305}
306
307impl AsRef<str> for Comment<'_> {
308 fn as_ref(&self) -> &str {
309 self.text.as_ref()
310 }
311}
312
313#[derive(Clone, PartialEq, Debug, Eq)]
315pub struct ScanError {
316 mark: Marker,
318 info: String,
320}
321
322impl ScanError {
323 #[must_use]
325 #[cold]
326 pub fn new(loc: Marker, info: String) -> ScanError {
327 ScanError { mark: loc, info }
328 }
329
330 #[must_use]
332 #[cold]
333 pub fn new_str(loc: Marker, info: &str) -> ScanError {
334 ScanError {
335 mark: loc,
336 info: info.to_owned(),
337 }
338 }
339
340 #[cold]
341 pub(crate) fn into_result<T>(self) -> Result<T, ScanError> {
342 Err(self)
343 }
344
345 #[must_use]
347 pub fn marker(&self) -> &Marker {
348 &self.mark
349 }
350
351 #[must_use]
353 pub fn info(&self) -> &str {
354 self.info.as_ref()
355 }
356}
357
358impl fmt::Display for ScanError {
359 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
360 write!(
361 f,
362 "{} at char {} line {} column {}",
363 self.info,
364 self.mark.index(),
365 self.mark.line(),
366 self.mark.col() + 1
367 )
368 }
369}
370
371impl core::error::Error for ScanError {}
372
373#[derive(Clone, PartialEq, Debug, Eq)]
375pub enum TokenType<'input> {
376 StreamStart(TEncoding),
378 StreamEnd,
380 VersionDirective(
382 u32,
384 u32,
386 ),
387 TagDirective(
389 Cow<'input, str>,
391 Cow<'input, str>,
393 ),
394 DocumentStart,
396 DocumentEnd,
398 BlockSequenceStart,
402 BlockMappingStart,
406 BlockEnd,
408 FlowSequenceStart,
410 FlowSequenceEnd,
412 FlowMappingStart,
414 FlowMappingEnd,
416 BlockEntry,
418 FlowEntry,
420 Key,
422 Value,
424 Alias(Cow<'input, str>),
426 Anchor(Cow<'input, str>),
428 Tag(
430 Cow<'input, str>,
432 Cow<'input, str>,
434 ),
435 Scalar(ScalarStyle, Cow<'input, str>),
437 Comment(
442 Comment<'input>,
444 ),
445 ReservedDirective(
447 String,
449 Vec<String>,
451 ),
452}
453
454#[derive(Clone, PartialEq, Debug, Eq)]
456pub struct Token<'input>(
457 pub Span,
459 pub TokenType<'input>,
461);
462
463#[derive(Clone, PartialEq, Debug, Eq)]
468pub(crate) struct QueuedComment<'input> {
469 pub(crate) text: Cow<'input, str>,
470 pub(crate) placement: Placement,
471}
472
473impl<'input> QueuedComment<'input> {
474 fn into_public(self, span: Span) -> Comment<'input> {
475 Comment::new(span, self.text).with_placement(self.placement)
476 }
477}
478
479impl<'input> From<Comment<'input>> for QueuedComment<'input> {
480 fn from(comment: Comment<'input>) -> Self {
481 Self {
482 text: comment.text,
483 placement: comment.placement,
484 }
485 }
486}
487
488#[derive(Clone, PartialEq, Debug, Eq)]
493pub(crate) enum QueuedTokenType<'input> {
494 StreamStart(TEncoding),
495 StreamEnd,
496 VersionDirective(u32, u32),
497 TagDirective(Cow<'input, str>, Cow<'input, str>),
498 DocumentStart,
499 DocumentEnd,
500 BlockSequenceStart,
501 BlockMappingStart,
502 BlockEnd,
503 FlowSequenceStart,
504 FlowSequenceEnd,
505 FlowMappingStart,
506 FlowMappingEnd,
507 BlockEntry,
508 FlowEntry,
509 Key,
510 Value,
511 Alias(Cow<'input, str>),
512 Anchor(Cow<'input, str>),
513 Tag(Cow<'input, str>, Cow<'input, str>),
514 Scalar(ScalarStyle, Cow<'input, str>),
515 Comment(QueuedComment<'input>),
516 ReservedDirective(String, Vec<String>),
517}
518
519impl<'input> QueuedTokenType<'input> {
520 fn into_public(self, span: Span) -> TokenType<'input> {
521 match self {
522 Self::StreamStart(encoding) => TokenType::StreamStart(encoding),
523 Self::StreamEnd => TokenType::StreamEnd,
524 Self::VersionDirective(major, minor) => TokenType::VersionDirective(major, minor),
525 Self::TagDirective(handle, prefix) => TokenType::TagDirective(handle, prefix),
526 Self::DocumentStart => TokenType::DocumentStart,
527 Self::DocumentEnd => TokenType::DocumentEnd,
528 Self::BlockSequenceStart => TokenType::BlockSequenceStart,
529 Self::BlockMappingStart => TokenType::BlockMappingStart,
530 Self::BlockEnd => TokenType::BlockEnd,
531 Self::FlowSequenceStart => TokenType::FlowSequenceStart,
532 Self::FlowSequenceEnd => TokenType::FlowSequenceEnd,
533 Self::FlowMappingStart => TokenType::FlowMappingStart,
534 Self::FlowMappingEnd => TokenType::FlowMappingEnd,
535 Self::BlockEntry => TokenType::BlockEntry,
536 Self::FlowEntry => TokenType::FlowEntry,
537 Self::Key => TokenType::Key,
538 Self::Value => TokenType::Value,
539 Self::Alias(name) => TokenType::Alias(name),
540 Self::Anchor(name) => TokenType::Anchor(name),
541 Self::Tag(handle, suffix) => TokenType::Tag(handle, suffix),
542 Self::Scalar(style, value) => TokenType::Scalar(style, value),
543 Self::Comment(comment) => TokenType::Comment(comment.into_public(span)),
544 Self::ReservedDirective(name, params) => TokenType::ReservedDirective(name, params),
545 }
546 }
547}
548
549impl<'input> From<TokenType<'input>> for QueuedTokenType<'input> {
550 fn from(token: TokenType<'input>) -> Self {
551 match token {
552 TokenType::StreamStart(encoding) => Self::StreamStart(encoding),
553 TokenType::StreamEnd => Self::StreamEnd,
554 TokenType::VersionDirective(major, minor) => Self::VersionDirective(major, minor),
555 TokenType::TagDirective(handle, prefix) => Self::TagDirective(handle, prefix),
556 TokenType::DocumentStart => Self::DocumentStart,
557 TokenType::DocumentEnd => Self::DocumentEnd,
558 TokenType::BlockSequenceStart => Self::BlockSequenceStart,
559 TokenType::BlockMappingStart => Self::BlockMappingStart,
560 TokenType::BlockEnd => Self::BlockEnd,
561 TokenType::FlowSequenceStart => Self::FlowSequenceStart,
562 TokenType::FlowSequenceEnd => Self::FlowSequenceEnd,
563 TokenType::FlowMappingStart => Self::FlowMappingStart,
564 TokenType::FlowMappingEnd => Self::FlowMappingEnd,
565 TokenType::BlockEntry => Self::BlockEntry,
566 TokenType::FlowEntry => Self::FlowEntry,
567 TokenType::Key => Self::Key,
568 TokenType::Value => Self::Value,
569 TokenType::Alias(name) => Self::Alias(name),
570 TokenType::Anchor(name) => Self::Anchor(name),
571 TokenType::Tag(handle, suffix) => Self::Tag(handle, suffix),
572 TokenType::Scalar(style, value) => Self::Scalar(style, value),
573 TokenType::Comment(comment) => Self::Comment(comment.into()),
574 TokenType::ReservedDirective(name, params) => Self::ReservedDirective(name, params),
575 }
576 }
577}
578
579#[derive(Clone, PartialEq, Debug, Eq)]
581pub(crate) struct QueuedToken<'input>(pub(crate) Span, pub(crate) QueuedTokenType<'input>);
582
583impl<'input> QueuedToken<'input> {
584 fn into_public(self) -> Token<'input> {
585 Token(self.0, self.1.into_public(self.0))
586 }
587}
588
589impl<'input> From<Token<'input>> for QueuedToken<'input> {
590 fn from(token: Token<'input>) -> Self {
591 Self(token.0, token.1.into())
592 }
593}
594
595#[derive(Clone, PartialEq, Debug, Eq)]
630struct SimpleKey {
631 possible: bool,
644 required: bool,
653 token_number: usize,
659 mark: Marker,
661}
662
663impl SimpleKey {
664 fn new(mark: Marker) -> SimpleKey {
666 SimpleKey {
667 possible: false,
668 required: false,
669 token_number: 0,
670 mark,
671 }
672 }
673}
674
675#[derive(Clone, Debug, Default)]
677struct Indent {
678 indent: isize,
680 needs_block_end: bool,
698}
699
700#[derive(Debug, PartialEq)]
722enum ImplicitMappingState {
723 Possible,
728 Inside(u8),
732}
733
734#[derive(Debug)]
744#[allow(clippy::struct_excessive_bools)]
745pub struct Scanner<'input, T> {
746 input: T,
750 mark: Marker,
752 tokens: VecDeque<QueuedToken<'input>>,
759 error: Option<ScanError>,
761 deferred_error: Option<ScanError>,
763 comments_possible: bool,
765
766 stream_start_produced: bool,
768 stream_end_produced: bool,
770 document_prefix_allowed: bool,
776 adjacent_value_allowed_at: usize,
779 simple_key_allowed: bool,
783 simple_keys: smallvec::SmallVec<[SimpleKey; 8]>,
788 indent: isize,
790 indents: smallvec::SmallVec<[Indent; 8]>,
792 flow_level: u8,
794 tokens_parsed: usize,
798 token_available: bool,
800 leading_whitespace: bool,
802 flow_mapping_started: smallvec::SmallVec<[bool; 8]>,
809 implicit_flow_mapping_states: smallvec::SmallVec<[ImplicitMappingState; 8]>,
822 interrupted_plain_by_comment: Option<Marker>,
825 explicit_key_tab_check_pending: bool,
830 flow_markers: smallvec::SmallVec<[(Marker, char); 8]>,
832 buf_leading_break: String,
833 buf_trailing_breaks: String,
834 buf_whitespaces: String,
835}
836
837impl<'input, T: BorrowedInput<'input>> Iterator for Scanner<'input, T> {
838 type Item = Token<'input>;
839
840 fn next(&mut self) -> Option<Self::Item> {
841 if self.error.is_some() {
842 return None;
843 }
844 match self.next_token() {
845 Ok(Some(tok)) => {
846 debug_print!(
847 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
848 tok.1,
849 tok.0
850 );
851 Some(tok)
852 }
853 Ok(tok) => tok,
854 Err(e) => self.stop_after_error(e),
855 }
856 }
857}
858
859pub type ScanResult = Result<(), ScanError>;
861
862#[derive(Debug)]
863enum FlowScalarBuf {
864 Borrowed {
870 start: usize,
871 end: usize,
872 pending_ws_start: Option<usize>,
873 pending_ws_end: usize,
874 },
875 Owned(String),
876}
877
878impl FlowScalarBuf {
879 #[inline]
880 fn new_borrowed(start: usize) -> Self {
881 Self::Borrowed {
882 start,
883 end: start,
884 pending_ws_start: None,
885 pending_ws_end: start,
886 }
887 }
888
889 #[inline]
890 fn new_owned() -> Self {
891 Self::Owned(String::new())
892 }
893
894 #[inline]
895 fn as_owned_mut(&mut self) -> Option<&mut String> {
896 match self {
897 Self::Owned(s) => Some(s),
898 Self::Borrowed { .. } => None,
899 }
900 }
901
902 #[inline]
903 fn commit_pending_ws(&mut self) {
904 if let Self::Borrowed {
905 end,
906 pending_ws_start,
907 pending_ws_end,
908 ..
909 } = self
910 {
911 if pending_ws_start.is_some() {
912 *end = *pending_ws_end;
913 *pending_ws_start = None;
914 }
915 }
916 }
917
918 #[inline]
919 fn note_pending_ws(&mut self, ws_start: usize, ws_end: usize) {
920 if let Self::Borrowed {
921 pending_ws_start,
922 pending_ws_end,
923 ..
924 } = self
925 {
926 if pending_ws_start.is_none() {
927 *pending_ws_start = Some(ws_start);
928 }
929 *pending_ws_end = ws_end;
930 }
931 }
932
933 #[inline]
934 fn discard_pending_ws(&mut self) {
935 if let Self::Borrowed {
936 pending_ws_start,
937 pending_ws_end,
938 end,
939 ..
940 } = self
941 {
942 *pending_ws_start = None;
943 *pending_ws_end = *end;
944 }
945 }
946}
947
948impl<'input, T: BorrowedInput<'input>> Scanner<'input, T> {
949 #[inline]
950 fn promote_flow_scalar_buf_to_owned(
951 &self,
952 start_mark: &Marker,
953 buf: &mut FlowScalarBuf,
954 ) -> Result<(), ScanError> {
955 let FlowScalarBuf::Borrowed {
956 start,
957 end,
958 pending_ws_start: _,
959 pending_ws_end: _,
960 } = *buf
961 else {
962 return Ok(());
963 };
964
965 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
966 ScanError::new_str(
967 *start_mark,
968 "internal error: input advertised offsets but did not provide a slice",
969 )
970 })?;
971 *buf = FlowScalarBuf::Owned(slice.to_owned());
972 Ok(())
973 }
974 #[inline]
980 fn try_borrow_slice(&self, start: usize, end: usize) -> Option<&'input str> {
981 self.input.slice_borrowed(start, end)
982 }
983
984 fn scan_tag_handle_directive_cow(
989 &mut self,
990 mark: &Marker,
991 ) -> Result<Cow<'input, str>, ScanError> {
992 let Some(start) = self.input.byte_offset() else {
993 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
994 };
995
996 if self.input.look_ch() != '!' {
997 return Err(ScanError::new_str(
998 *mark,
999 "while scanning a tag, did not find expected '!'",
1000 ));
1001 }
1002
1003 self.skip_non_blank();
1005
1006 self.input.lookahead(1);
1009 while self.input.next_is_alpha() {
1010 self.skip_non_blank();
1011 self.input.lookahead(1);
1012 }
1013
1014 if self.input.peek() == '!' {
1016 self.skip_non_blank();
1017 }
1018
1019 let Some(end) = self.input.byte_offset() else {
1020 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
1022 };
1023
1024 let Some(slice) = self.try_borrow_slice(start, end) else {
1025 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1027 ScanError::new_str(
1028 *mark,
1029 "internal error: input advertised slicing but did not provide a slice",
1030 )
1031 })?;
1032 if !slice.ends_with('!') && slice != "!" {
1033 return Err(ScanError::new_str(
1034 *mark,
1035 "while parsing a tag directive, did not find expected '!'",
1036 ));
1037 }
1038 return Ok(Cow::Owned(slice.to_owned()));
1039 };
1040
1041 if !slice.ends_with('!') && slice != "!" {
1042 return Err(ScanError::new_str(
1043 *mark,
1044 "while parsing a tag directive, did not find expected '!'",
1045 ));
1046 }
1047
1048 Ok(Cow::Borrowed(slice))
1049 }
1050
1051 fn scan_tag_prefix_directive_cow(
1056 &mut self,
1057 start_mark: &Marker,
1058 ) -> Result<Cow<'input, str>, ScanError> {
1059 let Some(start) = self.input.byte_offset() else {
1060 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
1061 };
1062
1063 if self.input.look_ch() == '!' {
1065 self.skip_non_blank();
1066 } else if !is_tag_char(self.input.peek()) {
1067 return Err(ScanError::new_str(
1068 *start_mark,
1069 "invalid global tag character",
1070 ));
1071 } else if self.input.peek() == '%' {
1072 } else {
1074 self.skip_non_blank();
1075 }
1076
1077 while is_uri_char(self.input.look_ch()) {
1079 if self.input.peek() == '%' {
1080 break;
1081 }
1082 self.skip_non_blank();
1083 }
1084
1085 if self.input.peek() == '%' {
1087 let current = self
1088 .input
1089 .byte_offset()
1090 .expect("byte_offset() must remain available once enabled");
1091 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
1092 slice.to_owned()
1093 } else {
1094 String::new()
1095 };
1096
1097 while is_uri_char(self.input.look_ch()) {
1098 if self.input.peek() == '%' {
1099 out.push(self.scan_uri_escapes(start_mark)?);
1100 } else {
1101 out.push(self.input.peek());
1102 self.skip_non_blank();
1103 }
1104 }
1105 return Ok(Cow::Owned(out));
1106 }
1107
1108 let Some(end) = self.input.byte_offset() else {
1109 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
1110 };
1111
1112 let Some(slice) = self.try_borrow_slice(start, end) else {
1113 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1115 ScanError::new_str(
1116 *start_mark,
1117 "internal error: input advertised slicing but did not provide a slice",
1118 )
1119 })?;
1120 return Ok(Cow::Owned(slice.to_owned()));
1121 };
1122
1123 Ok(Cow::Borrowed(slice))
1124 }
1125 pub fn new(input: T) -> Self {
1127 let initial_byte_offset = input.byte_offset();
1128 let comments_possible = input.may_contain_comments();
1129 Scanner {
1130 input,
1131 mark: Marker::new(0, 1, 0).with_byte_offset(initial_byte_offset),
1132 tokens: VecDeque::with_capacity(64),
1133 error: None,
1134 deferred_error: None,
1135 comments_possible,
1136
1137 stream_start_produced: false,
1138 stream_end_produced: false,
1139 document_prefix_allowed: true,
1140 adjacent_value_allowed_at: 0,
1141 simple_key_allowed: true,
1142 simple_keys: smallvec::SmallVec::new(),
1143 indent: -1,
1144 indents: smallvec::SmallVec::new(),
1145 flow_level: 0,
1146 tokens_parsed: 0,
1147 token_available: false,
1148 leading_whitespace: true,
1149 flow_mapping_started: smallvec::SmallVec::new(),
1150 implicit_flow_mapping_states: smallvec::SmallVec::new(),
1151 flow_markers: smallvec::SmallVec::new(),
1152 interrupted_plain_by_comment: None,
1153 explicit_key_tab_check_pending: false,
1154
1155 buf_leading_break: String::with_capacity(128),
1156 buf_trailing_breaks: String::with_capacity(128),
1157 buf_whitespaces: String::with_capacity(128),
1158 }
1159 }
1160
1161 #[inline]
1166 pub fn get_error(&self) -> Option<ScanError> {
1167 self.error.clone().or_else(|| self.deferred_error.clone())
1168 }
1169
1170 #[cold]
1171 fn stop_after_error(&mut self, error: ScanError) -> Option<Token<'input>> {
1172 self.error = Some(error);
1173 None
1174 }
1175
1176 #[cold]
1177 fn simple_key_expected(&self) -> ScanError {
1178 ScanError::new_str(self.mark, "simple key expected")
1179 }
1180
1181 #[cold]
1182 fn unclosed_bracket(mark: Marker, bracket: char) -> ScanError {
1183 ScanError::new(mark, format!("unclosed bracket '{bracket}'"))
1184 }
1185
1186 #[inline]
1188 fn skip_blank(&mut self) {
1189 self.input.skip();
1190
1191 self.mark.offsets.chars += 1;
1192 self.mark.col += 1;
1193 self.mark.offsets.bytes = self.input.byte_offset();
1194 }
1195
1196 #[inline]
1198 fn skip_non_blank(&mut self) {
1199 self.input.skip();
1200
1201 self.mark.offsets.chars += 1;
1202 self.mark.col += 1;
1203 self.mark.offsets.bytes = self.input.byte_offset();
1204 self.leading_whitespace = false;
1205 }
1206
1207 #[inline]
1212 fn skip_bom(&mut self) {
1213 self.input.skip();
1214
1215 self.mark.offsets.chars += 1;
1216 self.mark.offsets.bytes = self.input.byte_offset();
1217 }
1218
1219 #[inline]
1225 fn skip_comment_char(&mut self) {
1226 self.input.skip();
1227
1228 self.mark.offsets.chars += 1;
1229 self.mark.col += 1;
1230 self.mark.offsets.bytes = self.input.byte_offset();
1231 }
1232
1233 #[inline]
1235 fn skip_n_non_blank(&mut self, count: usize) {
1236 for _ in 0..count {
1237 self.input.skip();
1238 self.mark.offsets.chars += 1;
1239 self.mark.col += 1;
1240 }
1241 self.mark.offsets.bytes = self.input.byte_offset();
1242 self.leading_whitespace = false;
1243 }
1244
1245 #[inline]
1247 fn skip_nl(&mut self) {
1248 self.input.skip();
1249
1250 self.mark.offsets.chars += 1;
1251 self.mark.col = 0;
1252 self.mark.line += 1;
1253 self.mark.offsets.bytes = self.input.byte_offset();
1254 self.leading_whitespace = true;
1255 }
1256
1257 #[inline]
1259 fn skip_linebreak(&mut self) {
1260 if self.input.next_2_are('\r', '\n') {
1261 self.skip_blank();
1264 self.skip_nl();
1265 } else if self.input.next_is_break() {
1266 self.skip_nl();
1267 }
1268 }
1269
1270 #[cfg(test)]
1271 fn scan_comment_token(&mut self) -> Result<Token<'input>, ScanError> {
1272 Ok(self.scan_comment_queued_token()?.into_public())
1273 }
1274
1275 fn scan_comment_queued_token(&mut self) -> Result<QueuedToken<'input>, ScanError> {
1276 let start_mark = self.mark;
1277 debug_assert_eq!(self.input.peek(), '#');
1278 let placement = if self.leading_whitespace {
1279 Placement::Free
1280 } else {
1281 Placement::Right
1282 };
1283
1284 self.skip_comment_char();
1285
1286 let text = if let Some(start) = self.input.byte_offset() {
1287 let n = self.input.skip_while_non_breakz();
1289 self.mark.offsets.chars += n;
1290 self.mark.col += n;
1291 let byte_offset = self.input.byte_offset();
1292 self.mark.offsets.bytes = byte_offset;
1293 let end = byte_offset.expect("byte_offset must remain available once enabled");
1294
1295 if let Some(slice) = self.try_borrow_slice(start, end) {
1296 Cow::Borrowed(slice)
1297 } else if let Some(slice) = self.input.slice_bytes(start, end) {
1298 Cow::Owned(slice.to_owned())
1300 } else {
1301 return Err(ScanError::new_str(
1302 start_mark,
1303 "internal error: input advertised offsets but did not provide a slice",
1304 ));
1305 }
1306 } else {
1307 let mut owned = String::new();
1309 while !is_breakz(self.input.look_ch()) {
1310 owned.push(self.input.peek());
1311 self.skip_comment_char();
1312 }
1313 Cow::Owned(owned)
1314 };
1315
1316 let end_mark = self.mark;
1317 let span = Span::new(start_mark, end_mark);
1318 Ok(QueuedToken(
1319 span,
1320 QueuedTokenType::Comment(QueuedComment { text, placement }),
1321 ))
1322 }
1323
1324 fn push_comment_token(&mut self) -> ScanResult {
1325 let token = self.scan_comment_queued_token()?;
1326 self.tokens.push_back(token);
1327 Ok(())
1328 }
1329
1330 fn skip_comment(&mut self) {
1331 debug_assert_eq!(self.input.peek(), '#');
1332
1333 self.skip_comment_char();
1334 let n = self.input.skip_while_non_breakz();
1335 self.mark.offsets.chars += n;
1336 self.mark.col += n;
1337 self.mark.offsets.bytes = self.input.byte_offset();
1338 }
1339
1340 #[inline]
1342 pub fn stream_started(&self) -> bool {
1343 self.stream_start_produced
1344 }
1345
1346 #[inline]
1348 pub fn stream_ended(&self) -> bool {
1349 self.stream_end_produced
1350 }
1351
1352 #[inline]
1354 pub fn mark(&self) -> Marker {
1355 self.mark
1356 }
1357
1358 #[inline]
1360 pub(crate) fn comments_possible(&self) -> bool {
1361 self.comments_possible
1362 }
1363
1364 #[inline]
1371 fn read_break(&mut self, s: &mut String) {
1372 self.skip_break();
1373 s.push('\n');
1374 }
1375
1376 #[inline]
1381 fn skip_break(&mut self) {
1382 let c = self.input.peek();
1383 let nc = self.input.peek_nth(1);
1384 debug_assert!(is_break(c));
1385 if c == '\r' && nc == '\n' {
1386 self.skip_blank();
1387 }
1388 self.skip_nl();
1389 }
1390
1391 fn insert_token(&mut self, pos: usize, tok: Token<'input>) {
1393 let old_len = self.tokens.len();
1394 assert!(pos <= old_len);
1395 self.tokens.insert(pos, tok.into());
1396 }
1397
1398 #[inline]
1399 fn allow_simple_key(&mut self) {
1400 self.simple_key_allowed = true;
1401 }
1402
1403 #[inline]
1404 fn disallow_simple_key(&mut self) {
1405 self.simple_key_allowed = false;
1406 }
1407
1408 pub fn fetch_next_token(&mut self) -> ScanResult {
1413 self.input.lookahead(1);
1414
1415 if !self.stream_start_produced {
1416 self.fetch_stream_start();
1417 return Ok(());
1418 }
1419 if self.skip_to_next_token(true)? {
1420 return Ok(());
1421 }
1422
1423 debug_print!(
1424 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
1425 self.mark,
1426 self.input.peek()
1427 );
1428
1429 self.stale_simple_keys()?;
1430
1431 let mark = self.mark;
1432 self.unroll_indent(mark.col as isize);
1433
1434 self.input.lookahead(4);
1435
1436 if self.input.next_is_z() {
1437 self.fetch_stream_end()?;
1438 return Ok(());
1439 }
1440
1441 if self.mark.col == 0 {
1442 if self.input.next_char_is('%') {
1443 return self.fetch_directive();
1444 } else if self.input.next_is_document_start() {
1445 return self.fetch_document_indicator(TokenType::DocumentStart);
1446 } else if self.input.next_is_document_end() {
1447 self.fetch_document_indicator(TokenType::DocumentEnd)?;
1448 self.skip_ws_to_eol(SkipTabs::Yes)?;
1449 if !self.input.next_is_breakz() {
1450 return Err(ScanError::new_str(
1451 self.mark,
1452 "invalid content after document end marker",
1453 ));
1454 }
1455 return Ok(());
1456 }
1457 }
1458
1459 if self.document_prefix_allowed {
1460 self.document_prefix_allowed = false;
1461 }
1462
1463 if (self.mark.col as isize) < self.indent {
1464 self.input.lookahead(1);
1465 let c = self.input.peek();
1466 if self.flow_level == 0 || !matches!(c, ']' | '}' | ',') {
1467 return Err(ScanError::new_str(self.mark, "invalid indentation"));
1468 }
1469 }
1470
1471 let c = self.input.peek();
1472 let nc = self.input.peek_nth(1);
1473 match c {
1474 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
1475 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
1476 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
1477 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
1478 ',' => self.fetch_flow_entry(),
1479 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
1480 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
1481 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
1482 ':' if self.flow_level > 0
1483 && (is_flow(nc) || self.mark.index() == self.adjacent_value_allowed_at) =>
1484 {
1485 self.fetch_flow_value()
1486 }
1487 '*' => self.fetch_anchor(true),
1489 '&' => self.fetch_anchor(false),
1491 '!' => self.fetch_tag(),
1492 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
1494 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
1496 '\'' => self.fetch_flow_scalar(true),
1497 '"' => self.fetch_flow_scalar(false),
1498 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
1500 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
1501 self.fetch_plain_scalar()
1502 }
1503 c if is_bom(c) => Err(ScanError::new_str(
1504 self.mark,
1505 "a BOM must not appear inside a document",
1506 )),
1507 '%' | '@' | '`' => Err(ScanError::new(
1508 self.mark,
1509 format!("unexpected character: `{c}'"),
1510 )),
1511 _ => self.fetch_plain_scalar(),
1512 }
1513 }
1514
1515 pub(crate) fn next_queued_token(&mut self) -> Result<Option<QueuedToken<'input>>, ScanError> {
1520 if self.deferred_error.is_some() {
1521 if !matches!(
1522 self.tokens.front().map(|token| &token.1),
1523 Some(QueuedTokenType::Comment(_))
1524 ) {
1525 if let Some(error) = self.deferred_error.take() {
1526 return error.into_result();
1527 }
1528 }
1529 self.token_available = true;
1530 }
1531
1532 if self.stream_end_produced {
1533 return Ok(None);
1534 }
1535
1536 if !self.token_available {
1537 if let Err(error) = self.fetch_more_tokens() {
1538 if matches!(
1539 self.tokens.front().map(|token| &token.1),
1540 Some(QueuedTokenType::Comment(_))
1541 ) {
1542 self.deferred_error = Some(error);
1543 } else {
1544 return Err(error);
1545 }
1546 }
1547 }
1548 let Some(t) = self.tokens.pop_front() else {
1549 return Err(ScanError::new_str(
1550 self.mark,
1551 "did not find expected next token",
1552 ));
1553 };
1554 self.token_available = false;
1555 self.tokens_parsed += 1;
1556
1557 let is_stream_end = matches!(t.1, QueuedTokenType::StreamEnd);
1558 if is_stream_end {
1559 self.stream_end_produced = true;
1560 }
1561 Ok(Some(t))
1562 }
1563
1564 pub fn next_token(&mut self) -> Result<Option<Token<'input>>, ScanError> {
1569 Ok(self.next_queued_token()?.map(QueuedToken::into_public))
1570 }
1571
1572 pub fn fetch_more_tokens(&mut self) -> ScanResult {
1577 let mut need_more;
1578 loop {
1579 if self.tokens.is_empty() {
1580 need_more = true;
1581 } else {
1582 need_more = false;
1583 self.stale_simple_keys()?;
1585 if !matches!(
1586 self.tokens.front().map(|token| &token.1),
1587 Some(QueuedTokenType::Comment(_))
1588 ) {
1589 for sk in &self.simple_keys {
1591 if sk.possible && sk.token_number == self.tokens_parsed {
1592 need_more = true;
1593 break;
1594 }
1595 }
1596 }
1597 }
1598
1599 if let Some(token) = self.tokens.back() {
1602 if matches!(
1603 token.1,
1604 QueuedTokenType::DocumentEnd | QueuedTokenType::DocumentStart
1605 ) {
1606 break;
1607 }
1608 }
1609
1610 if !need_more {
1611 break;
1612 }
1613 self.fetch_next_token()?;
1614 }
1615 self.token_available = true;
1616
1617 Ok(())
1618 }
1619
1620 fn stale_simple_keys(&mut self) -> ScanResult {
1629 for sk in &mut self.simple_keys {
1630 let is_line_stale = self.flow_level == 0 && sk.mark.line < self.mark.line;
1631 let is_length_stale =
1634 self.mark.index().saturating_sub(sk.mark.index()) > SIMPLE_KEY_MAX_LOOKAHEAD;
1635
1636 if sk.possible && (is_line_stale || is_length_stale) {
1637 if sk.required {
1638 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
1639 }
1640 sk.possible = false;
1641 }
1642 }
1643 Ok(())
1644 }
1645
1646 fn skip_to_next_token(&mut self, stop_after_comment: bool) -> Result<bool, ScanError> {
1656 let consume_linebreak = |this: &mut Self| {
1659 this.input.lookahead(2);
1660 this.skip_linebreak();
1661 if this.flow_level == 0 {
1662 this.allow_simple_key();
1663 }
1664 };
1665
1666 loop {
1667 let ch = self.input.look_ch();
1668 if self.explicit_key_tab_check_pending {
1669 match ch {
1670 '\t' => {
1671 return Err(ScanError::new_str(
1672 self.mark(),
1673 "tabs disallowed in this context",
1674 ));
1675 }
1676 ' ' | '\n' | '\r' | '#' => {}
1677 _ => self.explicit_key_tab_check_pending = false,
1678 }
1679 }
1680
1681 match ch {
1682 '\t' => {
1684 if self.is_within_block()
1685 && self.leading_whitespace
1686 && (self.mark.col as isize) < self.indent
1687 {
1688 self.skip_ws_to_eol(SkipTabs::Yes)?;
1689
1690 if !self.input.next_is_breakz() {
1692 return Err(ScanError::new_str(
1693 self.mark,
1694 "tabs disallowed within this context (block indentation)",
1695 ));
1696 }
1697
1698 if matches!(self.input.look_ch(), '\n' | '\r') {
1700 consume_linebreak(self);
1701 }
1702 } else {
1703 self.skip_blank();
1705 }
1706 }
1707
1708 ' ' => self.skip_blank(),
1709
1710 '\n' | '\r' => consume_linebreak(self),
1711
1712 c if is_bom(c)
1713 && self.document_prefix_allowed
1714 && self.flow_level == 0
1715 && self.mark.col == 0 =>
1716 {
1717 self.skip_bom();
1718 }
1719
1720 '#' => {
1721 self.push_comment_token()?;
1722
1723 if matches!(self.input.look_ch(), '\n' | '\r') {
1725 consume_linebreak(self);
1726 }
1727 if stop_after_comment {
1728 return Ok(true);
1729 }
1730 }
1731
1732 _ => break,
1733 }
1734 }
1735
1736 if let Some(err_mark) = self.interrupted_plain_by_comment.take() {
1739 let is_immediate_next_line = self.mark.line == err_mark.line + 1;
1743
1744 if self.flow_level == 0
1746 && is_immediate_next_line
1747 && (self.mark.col as isize) > self.indent
1748 {
1749 self.input.lookahead(4);
1753
1754 if !self.input.next_is_z()
1755 && !self.input.next_is_document_indicator()
1756 && self.input.next_can_be_plain_scalar(false)
1757 {
1758 return Err(ScanError::new_str(
1759 err_mark,
1760 "comment intercepting the multiline text",
1761 ));
1762 }
1763 }
1764 }
1765
1766 Ok(false)
1767 }
1768
1769 fn skip_yaml_whitespace(&mut self, stop_after_comment: bool) -> Result<bool, ScanError> {
1777 let mut need_whitespace = true;
1778 loop {
1779 match self.input.look_ch() {
1780 ' ' => {
1781 self.skip_blank();
1782
1783 need_whitespace = false;
1784 }
1785 '\n' | '\r' => {
1786 self.input.lookahead(2);
1787 self.skip_linebreak();
1788 if self.flow_level == 0 {
1789 self.allow_simple_key();
1790 }
1791 need_whitespace = false;
1792 }
1793 '#' => {
1794 if need_whitespace {
1795 self.skip_comment();
1796 } else {
1797 self.push_comment_token()?;
1798 if stop_after_comment {
1799 return Ok(true);
1800 }
1801 }
1802 }
1803 _ => break,
1804 }
1805 }
1806
1807 if need_whitespace {
1808 Err(ScanError::new_str(self.mark(), "expected whitespace"))
1809 } else {
1810 Ok(false)
1811 }
1812 }
1813
1814 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
1815 debug_assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
1816
1817 if !self.comments_possible {
1818 let (chars_consumed, result) = self.input.skip_ws_to_eol(skip_tabs);
1819 self.mark.col += chars_consumed;
1820 self.mark.offsets.chars += chars_consumed;
1821 self.mark.offsets.bytes = self.input.byte_offset();
1822 return result.map_err(|msg| ScanError::new_str(self.mark, msg));
1823 }
1824
1825 let (chars_consumed, whitespace) = self.input.skip_ws_to_eol_blanks(skip_tabs);
1826 self.mark.col += chars_consumed;
1827 self.mark.offsets.chars += chars_consumed;
1828 self.mark.offsets.bytes = self.input.byte_offset();
1829
1830 if self.input.look_ch() != '#' {
1831 return Ok(whitespace);
1832 }
1833
1834 if !whitespace.found_tabs() && !whitespace.has_valid_yaml_ws() {
1835 return Err(ScanError::new_str(
1836 self.mark,
1837 "comments must be separated from other tokens by whitespace",
1838 ));
1839 }
1840
1841 self.push_comment_token()?;
1842 Ok(whitespace)
1843 }
1844
1845 fn fetch_stream_start(&mut self) {
1846 let mark = self.mark;
1847 self.indent = -1;
1848 self.stream_start_produced = true;
1849 self.allow_simple_key();
1850 self.tokens
1851 .push_back(Token(Span::empty(mark), TokenType::StreamStart(TEncoding::Utf8)).into());
1852 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1853 }
1854
1855 fn fetch_stream_end(&mut self) -> ScanResult {
1856 if self.mark.col != 0 {
1858 self.mark.col = 0;
1859 self.mark.line += 1;
1860 }
1861
1862 if let Some((mark, bracket)) = self.flow_markers.pop() {
1863 return Err(Self::unclosed_bracket(mark, bracket));
1864 }
1865
1866 for sk in &mut self.simple_keys {
1869 if sk.required && sk.possible {
1870 return Err(self.simple_key_expected());
1871 }
1872 sk.possible = false;
1873 }
1874
1875 self.unroll_indent(-1);
1876 self.remove_simple_key()?;
1877 self.disallow_simple_key();
1878
1879 self.tokens
1880 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd).into());
1881 Ok(())
1882 }
1883
1884 fn fetch_directive(&mut self) -> ScanResult {
1885 self.unroll_indent(-1);
1886 self.remove_simple_key()?;
1887
1888 self.disallow_simple_key();
1889
1890 let token_index = self.tokens.len();
1891 let tok = self.scan_directive()?;
1892 self.insert_token(token_index, tok);
1893
1894 Ok(())
1895 }
1896
1897 fn scan_directive(&mut self) -> Result<Token<'input>, ScanError> {
1898 let start_mark = self.mark;
1899 self.skip_non_blank();
1900
1901 let name = self.scan_directive_name()?;
1902 let tok = match name.as_ref() {
1903 "YAML" => self.scan_version_directive_value(&start_mark)?,
1904 "TAG" => self.scan_tag_directive_value(&start_mark)?,
1905 _ => {
1906 let mut params = Vec::new();
1907 while self.input.next_is_blank() {
1908 let n_blanks = self.input.skip_while_blank();
1909 self.mark.offsets.chars += n_blanks;
1910 self.mark.col += n_blanks;
1911 self.mark.offsets.bytes = self.input.byte_offset();
1912
1913 if !is_blank_or_breakz(self.input.peek()) {
1914 let mut param = String::new();
1915 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut param);
1916 self.mark.offsets.chars += n_chars;
1917 self.mark.col += n_chars;
1918 self.mark.offsets.bytes = self.input.byte_offset();
1919 params.push(param);
1920 }
1921 }
1922
1923 Token(
1924 Span::new(start_mark, self.mark),
1925 TokenType::ReservedDirective(name, params),
1926 )
1927 }
1928 };
1929
1930 self.skip_ws_to_eol(SkipTabs::Yes)?;
1931
1932 if self.input.next_is_breakz() {
1933 self.input.lookahead(2);
1934 self.skip_linebreak();
1935 Ok(tok)
1936 } else {
1937 Err(ScanError::new_str(
1938 start_mark,
1939 "while scanning a directive, did not find expected comment or line break",
1940 ))
1941 }
1942 }
1943
1944 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1945 let n_blanks = self.input.skip_while_blank();
1946 self.mark.offsets.chars += n_blanks;
1947 self.mark.col += n_blanks;
1948 self.mark.offsets.bytes = self.input.byte_offset();
1949
1950 let major = self.scan_version_directive_number(mark)?;
1951
1952 if self.input.peek() != '.' {
1953 return Err(ScanError::new_str(
1954 *mark,
1955 "while scanning a YAML directive, did not find expected digit or '.' character",
1956 ));
1957 }
1958 self.skip_non_blank();
1959
1960 let minor = self.scan_version_directive_number(mark)?;
1961
1962 Ok(Token(
1963 Span::new(*mark, self.mark),
1964 TokenType::VersionDirective(major, minor),
1965 ))
1966 }
1967
1968 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1969 let start_mark = self.mark;
1970 let mut string = String::new();
1971
1972 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut string);
1973 self.mark.offsets.chars += n_chars;
1974 self.mark.col += n_chars;
1975 self.mark.offsets.bytes = self.input.byte_offset();
1976
1977 if string.is_empty() {
1978 return Err(ScanError::new_str(
1979 start_mark,
1980 "while scanning a directive, could not find expected directive name",
1981 ));
1982 }
1983
1984 if !is_blank_or_breakz(self.input.peek()) {
1985 return Err(ScanError::new_str(
1986 start_mark,
1987 "while scanning a directive, found unexpected non-alphabetical character",
1988 ));
1989 }
1990
1991 Ok(string)
1992 }
1993
1994 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1995 let mut val = 0u32;
1996 let mut length = 0usize;
1997 while let Some(digit) = self.input.look_ch().to_digit(10) {
1998 if length + 1 > 9 {
1999 return Err(ScanError::new_str(
2000 *mark,
2001 "while scanning a YAML directive, found extremely long version number",
2002 ));
2003 }
2004 length += 1;
2005 val = val * 10 + digit;
2006 self.skip_non_blank();
2007 }
2008
2009 if length == 0 {
2010 return Err(ScanError::new_str(
2011 *mark,
2012 "while scanning a YAML directive, did not find expected version number",
2013 ));
2014 }
2015
2016 Ok(val)
2017 }
2018
2019 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
2020 let n_blanks = self.input.skip_while_blank();
2021 self.mark.offsets.chars += n_blanks;
2022 self.mark.col += n_blanks;
2023 self.mark.offsets.bytes = self.input.byte_offset();
2024
2025 let handle = self.scan_tag_handle_directive_cow(mark)?;
2026
2027 let n_blanks = self.input.skip_while_blank();
2028 self.mark.offsets.chars += n_blanks;
2029 self.mark.col += n_blanks;
2030 self.mark.offsets.bytes = self.input.byte_offset();
2031
2032 let prefix = self.scan_tag_prefix_directive_cow(mark)?;
2033
2034 self.input.lookahead(1);
2035
2036 if self.input.next_is_blank_or_breakz() {
2037 Ok(Token(
2038 Span::new(*mark, self.mark),
2039 TokenType::TagDirective(handle, prefix),
2040 ))
2041 } else {
2042 Err(ScanError::new_str(
2043 *mark,
2044 "while scanning TAG, did not find expected whitespace or line break",
2045 ))
2046 }
2047 }
2048
2049 fn fetch_tag(&mut self) -> ScanResult {
2050 self.save_simple_key();
2051 self.disallow_simple_key();
2052
2053 let tok = self.scan_tag()?;
2054 self.tokens.push_back(tok.into());
2055 Ok(())
2056 }
2057
2058 fn scan_tag(&mut self) -> Result<Token<'input>, ScanError> {
2059 let start_mark = self.mark;
2060
2061 self.input.lookahead(2);
2063
2064 if self.input.byte_offset().is_none() {
2066 return self.scan_tag_owned(&start_mark);
2067 }
2068
2069 let (handle, suffix): (Cow<'input, str>, Cow<'input, str>) =
2070 if self.input.nth_char_is(1, '<') {
2071 let suffix = self.scan_verbatim_tag(&start_mark)?;
2073 (Cow::Owned(String::new()), Cow::Owned(suffix))
2074 } else {
2075 let handle = self.scan_tag_handle_cow(&start_mark)?;
2077 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2079 let suffix = self.scan_tag_shorthand_suffix_cow(&start_mark, true)?;
2081 (handle, suffix)
2082 } else {
2083 let remaining_suffix =
2088 self.scan_tag_shorthand_suffix_cow(&start_mark, false)?;
2089
2090 let suffix = if handle.len() > 1 {
2092 if remaining_suffix.is_empty() {
2093 match handle {
2095 Cow::Borrowed(s) => Cow::Borrowed(&s[1..]),
2096 Cow::Owned(s) => Cow::Owned(s[1..].to_owned()),
2097 }
2098 } else {
2099 let mut combined = handle[1..].to_owned();
2101 combined.push_str(&remaining_suffix);
2102 Cow::Owned(combined)
2103 }
2104 } else {
2105 remaining_suffix
2107 };
2108
2109 if suffix.is_empty() {
2112 (Cow::Borrowed(""), Cow::Borrowed("!"))
2113 } else {
2114 (Cow::Borrowed("!"), suffix)
2115 }
2116 }
2117 };
2118
2119 if is_blank_or_breakz(self.input.look_ch())
2120 || (self.flow_level > 0 && matches!(self.input.peek(), ',' | ']' | '}'))
2121 {
2122 Ok(Token(
2125 Span::new(start_mark, self.mark),
2126 TokenType::Tag(handle, suffix),
2127 ))
2128 } else {
2129 Err(ScanError::new_str(
2130 start_mark,
2131 "while scanning a tag, did not find expected whitespace or line break",
2132 ))
2133 }
2134 }
2135
2136 fn scan_tag_owned(&mut self, start_mark: &Marker) -> Result<Token<'input>, ScanError> {
2138 let mut handle = String::new();
2139 let mut suffix;
2140
2141 if self.input.nth_char_is(1, '<') {
2142 suffix = self.scan_verbatim_tag(start_mark)?;
2143 } else {
2144 handle = self.scan_tag_handle(false, start_mark)?;
2146 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
2148 let is_secondary_handle = handle == "!!";
2150 suffix =
2151 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", start_mark)?;
2152 } else {
2153 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, start_mark)?;
2154 "!".clone_into(&mut handle);
2155 if suffix.is_empty() {
2158 handle.clear();
2159 "!".clone_into(&mut suffix);
2160 }
2161 }
2162 }
2163
2164 if is_blank_or_breakz(self.input.look_ch())
2165 || (self.flow_level > 0 && matches!(self.input.peek(), ',' | ']' | '}'))
2166 {
2167 Ok(Token(
2170 Span::new(*start_mark, self.mark),
2171 TokenType::Tag(handle.into(), suffix.into()),
2172 ))
2173 } else {
2174 Err(ScanError::new_str(
2175 *start_mark,
2176 "while scanning a tag, did not find expected whitespace or line break",
2177 ))
2178 }
2179 }
2180
2181 fn scan_tag_handle_cow(&mut self, mark: &Marker) -> Result<Cow<'input, str>, ScanError> {
2186 let Some(start) = self.input.byte_offset() else {
2187 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
2188 };
2189
2190 if self.input.look_ch() != '!' {
2191 return Err(ScanError::new_str(
2192 *mark,
2193 "while scanning a tag, did not find expected '!'",
2194 ));
2195 }
2196
2197 self.skip_non_blank();
2199
2200 self.input.lookahead(1);
2202 while self.input.next_is_alpha() {
2203 self.skip_non_blank();
2204 self.input.lookahead(1);
2205 }
2206
2207 if self.input.peek() == '!' {
2209 self.skip_non_blank();
2210 }
2211
2212 let Some(end) = self.input.byte_offset() else {
2213 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
2214 };
2215
2216 if let Some(slice) = self.try_borrow_slice(start, end) {
2217 Ok(Cow::Borrowed(slice))
2218 } else {
2219 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2220 ScanError::new_str(
2221 *mark,
2222 "internal error: input advertised slicing but did not provide a slice",
2223 )
2224 })?;
2225 Ok(Cow::Owned(slice.to_owned()))
2226 }
2227 }
2228
2229 fn scan_tag_shorthand_suffix_cow(
2233 &mut self,
2234 mark: &Marker,
2235 require_non_empty: bool,
2236 ) -> Result<Cow<'input, str>, ScanError> {
2237 let Some(start) = self.input.byte_offset() else {
2238 return Ok(Cow::Owned(
2239 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
2240 ));
2241 };
2242
2243 while is_tag_char(self.input.look_ch()) {
2245 if self.input.peek() == '%' {
2246 let current = self
2248 .input
2249 .byte_offset()
2250 .expect("byte_offset() must remain available once enabled");
2251 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
2252 slice.to_owned()
2253 } else {
2254 String::new()
2255 };
2256
2257 while is_tag_char(self.input.look_ch()) {
2259 if self.input.peek() == '%' {
2260 out.push(self.scan_uri_escapes(mark)?);
2261 } else {
2262 out.push(self.input.peek());
2263 self.skip_non_blank();
2264 }
2265 }
2266 return Ok(Cow::Owned(out));
2267 }
2268 self.skip_non_blank();
2269 }
2270
2271 let Some(end) = self.input.byte_offset() else {
2272 return Ok(Cow::Owned(
2273 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
2274 ));
2275 };
2276
2277 if require_non_empty && start == end {
2278 return Err(ScanError::new_str(
2279 *mark,
2280 "while parsing a tag, did not find expected tag URI",
2281 ));
2282 }
2283
2284 if let Some(slice) = self.try_borrow_slice(start, end) {
2285 Ok(Cow::Borrowed(slice))
2286 } else {
2287 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2288 ScanError::new_str(
2289 *mark,
2290 "internal error: input advertised slicing but did not provide a slice",
2291 )
2292 })?;
2293 Ok(Cow::Owned(slice.to_owned()))
2294 }
2295 }
2296
2297 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
2298 let mut string = String::new();
2299 if self.input.look_ch() != '!' {
2300 return Err(ScanError::new_str(
2301 *mark,
2302 "while scanning a tag, did not find expected '!'",
2303 ));
2304 }
2305
2306 string.push(self.input.peek());
2307 self.skip_non_blank();
2308
2309 let n_chars = self.input.fetch_while_is_alpha(&mut string);
2310 self.mark.offsets.chars += n_chars;
2311 self.mark.col += n_chars;
2312 self.mark.offsets.bytes = self.input.byte_offset();
2313
2314 if self.input.peek() == '!' {
2316 string.push(self.input.peek());
2317 self.skip_non_blank();
2318 } else if directive && string != "!" {
2319 return Err(ScanError::new_str(
2323 *mark,
2324 "while parsing a tag directive, did not find expected '!'",
2325 ));
2326 }
2327 Ok(string)
2328 }
2329
2330 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
2336 let mut string = String::new();
2337
2338 if self.input.look_ch() == '!' {
2339 string.push(self.input.peek());
2341 self.skip_non_blank();
2342 } else if !is_tag_char(self.input.peek()) {
2343 return Err(ScanError::new_str(
2345 *start_mark,
2346 "invalid global tag character",
2347 ));
2348 } else if self.input.peek() == '%' {
2349 string.push(self.scan_uri_escapes(start_mark)?);
2351 } else {
2352 string.push(self.input.peek());
2354 self.skip_non_blank();
2355 }
2356
2357 while is_uri_char(self.input.look_ch()) {
2358 if self.input.peek() == '%' {
2359 string.push(self.scan_uri_escapes(start_mark)?);
2360 } else {
2361 string.push(self.input.peek());
2362 self.skip_non_blank();
2363 }
2364 }
2365
2366 Ok(string)
2367 }
2368
2369 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
2373 self.skip_non_blank();
2375 self.skip_non_blank();
2376
2377 let mut string = String::new();
2378 while is_uri_char(self.input.look_ch()) {
2379 if self.input.peek() == '%' {
2380 string.push(self.scan_uri_escapes(start_mark)?);
2381 } else {
2382 string.push(self.input.peek());
2383 self.skip_non_blank();
2384 }
2385 }
2386
2387 if string.is_empty() {
2388 return Err(ScanError::new_str(
2389 *start_mark,
2390 "while parsing a tag, did not find expected tag URI",
2391 ));
2392 }
2393
2394 if self.input.peek() != '>' {
2395 return Err(ScanError::new_str(
2396 *start_mark,
2397 "while scanning a verbatim tag, did not find the expected '>'",
2398 ));
2399 }
2400 self.skip_non_blank();
2401
2402 Ok(string)
2403 }
2404
2405 fn scan_tag_shorthand_suffix(
2406 &mut self,
2407 _directive: bool,
2408 _is_secondary: bool,
2409 head: &str,
2410 mark: &Marker,
2411 ) -> Result<String, ScanError> {
2412 let mut length = head.len();
2413 let mut string = String::new();
2414
2415 if length > 1 {
2418 string.extend(head.chars().skip(1));
2419 }
2420
2421 while is_tag_char(self.input.look_ch()) {
2422 if self.input.peek() == '%' {
2424 string.push(self.scan_uri_escapes(mark)?);
2425 } else {
2426 string.push(self.input.peek());
2427 self.skip_non_blank();
2428 }
2429
2430 length += 1;
2431 }
2432
2433 if length == 0 {
2434 return Err(ScanError::new_str(
2435 *mark,
2436 "while parsing a tag, did not find expected tag URI",
2437 ));
2438 }
2439
2440 Ok(string)
2441 }
2442
2443 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
2444 let mut width = 0usize;
2445 let mut bytes = [0u8; 4];
2446 let mut bytes_len = 0usize;
2447 loop {
2448 self.input.lookahead(3);
2449
2450 let c = self.input.peek_nth(1);
2451 let nc = self.input.peek_nth(2);
2452
2453 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
2454 return Err(ScanError::new_str(
2455 *mark,
2456 "while parsing a tag, found an invalid escape sequence",
2457 ));
2458 }
2459
2460 let byte = u8::try_from((as_hex(c) << 4) + as_hex(nc))
2461 .expect("two hex nibbles always fit in a byte");
2462 if width == 0 {
2463 width = match byte {
2464 _ if byte & 0x80 == 0x00 => 1,
2465 _ if byte & 0xE0 == 0xC0 => 2,
2466 _ if byte & 0xF0 == 0xE0 => 3,
2467 _ if byte & 0xF8 == 0xF0 => 4,
2468 _ => {
2469 return Err(ScanError::new_str(
2470 *mark,
2471 "while parsing a tag, found an incorrect leading UTF-8 byte",
2472 ));
2473 }
2474 };
2475 } else if byte & 0xc0 != 0x80 {
2476 return Err(ScanError::new_str(
2477 *mark,
2478 "while parsing a tag, found an incorrect trailing UTF-8 byte",
2479 ));
2480 }
2481
2482 bytes[bytes_len] = byte;
2483 bytes_len += 1;
2484
2485 self.skip_n_non_blank(3);
2486
2487 width -= 1;
2488 if width == 0 {
2489 break;
2490 }
2491 }
2492
2493 let s = core::str::from_utf8(&bytes[..bytes_len]).map_err(|_| {
2494 ScanError::new_str(
2495 *mark,
2496 "while parsing a tag, found an invalid UTF-8 codepoint",
2497 )
2498 })?;
2499
2500 let mut chars = s.chars();
2501 match (chars.next(), chars.next()) {
2502 (Some(ch), None) => Ok(ch),
2503 _ => Err(ScanError::new_str(
2504 *mark,
2505 "while parsing a tag, found an invalid UTF-8 codepoint",
2506 )),
2507 }
2508 }
2509
2510 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
2511 self.save_simple_key();
2512 self.disallow_simple_key();
2513
2514 let tok = self.scan_anchor(alias)?;
2515
2516 self.tokens.push_back(tok.into());
2517
2518 Ok(())
2519 }
2520
2521 fn scan_anchor(&mut self, alias: bool) -> Result<Token<'input>, ScanError> {
2522 let start_mark = self.mark;
2523
2524 self.skip_non_blank();
2526
2527 if let Some(start) = self.input.byte_offset() {
2529 while is_anchor_char(self.input.look_ch()) {
2530 self.skip_non_blank();
2531 }
2532
2533 let end = self
2534 .input
2535 .byte_offset()
2536 .expect("byte_offset() must remain available once enabled");
2537
2538 if start == end {
2539 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2540 }
2541
2542 let cow = if let Some(slice) = self.try_borrow_slice(start, end) {
2543 Cow::Borrowed(slice)
2544 } else if let Some(slice) = self.input.slice_bytes(start, end) {
2545 Cow::Owned(slice.to_owned())
2546 } else {
2547 return Err(ScanError::new_str(
2548 start_mark,
2549 "internal error: input advertised slicing but did not provide a slice",
2550 ));
2551 };
2552
2553 let tok = if alias {
2554 TokenType::Alias(cow)
2555 } else {
2556 TokenType::Anchor(cow)
2557 };
2558 return Ok(Token(Span::new(start_mark, self.mark), tok));
2559 }
2560
2561 let mut string = String::new();
2562 while is_anchor_char(self.input.look_ch()) {
2563 string.push(self.input.peek());
2564 self.skip_non_blank();
2565 }
2566
2567 if string.is_empty() {
2568 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2569 }
2570
2571 let tok = if alias {
2572 TokenType::Alias(string.into())
2573 } else {
2574 TokenType::Anchor(string.into())
2575 };
2576 Ok(Token(Span::new(start_mark, self.mark), tok))
2577 }
2578
2579 fn fetch_flow_collection_start(&mut self, tok: TokenType<'input>) -> ScanResult {
2580 self.save_simple_key();
2582
2583 let start_mark = self.mark;
2584 let indicator = self.input.peek();
2585 self.flow_markers.push((start_mark, indicator));
2586
2587 self.roll_one_col_indent();
2588 self.increase_flow_level()?;
2589
2590 self.allow_simple_key();
2591
2592 self.skip_non_blank();
2593
2594 if tok == TokenType::FlowMappingStart {
2595 self.flow_mapping_started.push(true);
2596 } else {
2597 self.flow_mapping_started.push(false);
2598 self.implicit_flow_mapping_states
2599 .push(ImplicitMappingState::Possible);
2600 }
2601
2602 let token_index = self.tokens.len();
2603 self.skip_ws_to_eol(SkipTabs::Yes)?;
2604
2605 self.insert_token(token_index, Token(Span::new(start_mark, self.mark), tok));
2606 Ok(())
2607 }
2608
2609 fn fetch_flow_collection_end(&mut self, tok: TokenType<'input>) -> ScanResult {
2610 if self.flow_level == 0 {
2612 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2613 }
2614
2615 let Some((open_mark, open_ch)) = self.flow_markers.pop() else {
2616 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2617 };
2618
2619 let (expected_open, actual_close) = match tok {
2620 TokenType::FlowSequenceEnd => ('[', ']'),
2621 TokenType::FlowMappingEnd => ('{', '}'),
2622 _ => unreachable!("flow collection end called with non-closing token"),
2623 };
2624
2625 if open_ch != expected_open {
2626 return Err(ScanError::new(
2627 open_mark,
2628 format!("mismatched bracket '{open_ch}' closed by '{actual_close}'"),
2629 ));
2630 }
2631
2632 let flow_level = self.flow_level;
2633
2634 self.remove_simple_key()?;
2635
2636 if matches!(tok, TokenType::FlowSequenceEnd) {
2637 self.end_implicit_mapping(self.mark, flow_level);
2638 self.implicit_flow_mapping_states.pop();
2640 }
2641 self.flow_mapping_started.pop();
2642
2643 self.decrease_flow_level();
2644
2645 self.disallow_simple_key();
2646
2647 let start_mark = self.mark;
2648 self.skip_non_blank();
2649 let token_index = self.tokens.len();
2650 self.skip_ws_to_eol(SkipTabs::Yes)?;
2651
2652 if self.flow_level > 0 {
2658 self.adjacent_value_allowed_at = self.mark.index();
2659 }
2660
2661 self.insert_token(token_index, Token(Span::new(start_mark, self.mark), tok));
2662 Ok(())
2663 }
2664
2665 fn fetch_flow_entry(&mut self) -> ScanResult {
2667 self.remove_simple_key()?;
2668 self.allow_simple_key();
2669
2670 self.end_implicit_mapping(self.mark, self.flow_level);
2671 if self.current_flow_collection_is_sequence() {
2672 self.set_current_flow_mapping_started(false);
2673 }
2674
2675 let start_mark = self.mark;
2676 self.skip_non_blank();
2677 let token_index = self.tokens.len();
2678 self.skip_ws_to_eol(SkipTabs::Yes)?;
2679
2680 self.insert_token(
2681 token_index,
2682 Token(Span::new(start_mark, self.mark), TokenType::FlowEntry),
2683 );
2684 Ok(())
2685 }
2686
2687 fn increase_flow_level(&mut self) -> ScanResult {
2688 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
2689 self.flow_level = self
2690 .flow_level
2691 .checked_add(1)
2692 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
2693 Ok(())
2694 }
2695
2696 fn decrease_flow_level(&mut self) {
2697 if self.flow_level > 0 {
2698 self.flow_level -= 1;
2699 self.simple_keys.pop().unwrap();
2700 }
2701 }
2702
2703 fn fetch_block_entry(&mut self) -> ScanResult {
2709 if self.flow_level > 0 {
2710 return Err(ScanError::new_str(
2712 self.mark,
2713 r#""-" is only valid inside a block"#,
2714 ));
2715 }
2716 if !self.simple_key_allowed {
2718 return Err(ScanError::new_str(
2719 self.mark,
2720 "block sequence entries are not allowed in this context",
2721 ));
2722 }
2723
2724 if let Some(QueuedToken(span, QueuedTokenType::Anchor(..) | QueuedTokenType::Tag(..))) =
2726 self.tokens.back()
2727 {
2728 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
2729 return Err(ScanError::new_str(
2730 span.start,
2731 "invalid indentation for anchor",
2732 ));
2733 }
2734 }
2735
2736 let mark = self.mark;
2738 self.skip_non_blank();
2739
2740 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
2742 let token_index = self.tokens.len();
2743 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
2744 self.input.lookahead(2);
2745 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
2746 {
2747 return Err(ScanError::new_str(
2748 self.mark,
2749 "'-' must be followed by a valid YAML whitespace",
2750 ));
2751 }
2752
2753 self.skip_ws_to_eol(SkipTabs::No)?;
2754 self.input.lookahead(1);
2755 if self.input.next_is_break() || self.input.next_is_flow() {
2756 self.roll_one_col_indent();
2757 }
2758
2759 self.remove_simple_key()?;
2760 self.allow_simple_key();
2761
2762 self.insert_token(
2763 token_index,
2764 Token(Span::empty(self.mark), TokenType::BlockEntry),
2765 );
2766
2767 Ok(())
2768 }
2769
2770 fn fetch_document_indicator(&mut self, t: TokenType<'input>) -> ScanResult {
2771 if let Some((mark, bracket)) = self.flow_markers.pop() {
2772 return Err(ScanError::new(
2773 mark,
2774 format!("unclosed bracket '{bracket}'"),
2775 ));
2776 }
2777
2778 self.unroll_indent(-1);
2779 self.remove_simple_key()?;
2780 self.disallow_simple_key();
2781
2782 let mark = self.mark;
2783
2784 self.skip_n_non_blank(3);
2785
2786 self.document_prefix_allowed = matches!(t, TokenType::DocumentEnd);
2787 self.tokens
2788 .push_back(Token(Span::new(mark, self.mark), t).into());
2789 Ok(())
2790 }
2791
2792 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
2793 self.save_simple_key();
2794 self.allow_simple_key();
2795 let tok = self.scan_block_scalar(literal)?;
2796
2797 self.tokens.push_back(tok.into());
2798 Ok(())
2799 }
2800
2801 #[allow(clippy::too_many_lines)]
2802 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token<'input>, ScanError> {
2803 let start_mark = self.mark;
2804 let mut chomping = Chomping::Clip;
2805 let mut increment: usize = 0;
2806 let mut indent: usize = 0;
2807 let mut trailing_blank: bool;
2808 let mut leading_blank: bool = false;
2809 let style = if literal {
2810 ScalarStyle::Literal
2811 } else {
2812 ScalarStyle::Folded
2813 };
2814
2815 let mut string = String::new();
2816 let mut leading_break = String::new();
2817 let mut trailing_breaks = String::new();
2818 let mut chomping_break = String::new();
2819
2820 self.skip_non_blank();
2822 self.unroll_non_block_indents();
2823
2824 if self.input.look_ch() == '+' || self.input.peek() == '-' {
2825 if self.input.peek() == '+' {
2826 chomping = Chomping::Keep;
2827 } else {
2828 chomping = Chomping::Strip;
2829 }
2830 self.skip_non_blank();
2831 self.input.lookahead(1);
2832 if self.input.next_is_digit() {
2833 if self.input.peek() == '0' {
2834 return Err(ScanError::new_str(
2835 start_mark,
2836 "while scanning a block scalar, found an indentation indicator equal to 0",
2837 ));
2838 }
2839 increment = (self.input.peek() as usize) - ('0' as usize);
2840 self.skip_non_blank();
2841 }
2842 } else if self.input.next_is_digit() {
2843 if self.input.peek() == '0' {
2844 return Err(ScanError::new_str(
2845 start_mark,
2846 "while scanning a block scalar, found an indentation indicator equal to 0",
2847 ));
2848 }
2849
2850 increment = (self.input.peek() as usize) - ('0' as usize);
2851 self.skip_non_blank();
2852 self.input.lookahead(1);
2853 if self.input.peek() == '+' || self.input.peek() == '-' {
2854 if self.input.peek() == '+' {
2855 chomping = Chomping::Keep;
2856 } else {
2857 chomping = Chomping::Strip;
2858 }
2859 self.skip_non_blank();
2860 }
2861 }
2862
2863 self.skip_ws_to_eol(SkipTabs::Yes)?;
2864
2865 self.input.lookahead(1);
2867 if !self.input.next_is_breakz() {
2868 return Err(ScanError::new_str(
2869 start_mark,
2870 "while scanning a block scalar, did not find expected comment or line break",
2871 ));
2872 }
2873
2874 if self.input.next_is_break() {
2875 self.input.lookahead(2);
2876 self.read_break(&mut chomping_break);
2877 }
2878
2879 if self.input.look_ch() == '\t' {
2880 return Err(ScanError::new_str(
2881 start_mark,
2882 "a block scalar content cannot start with a tab",
2883 ));
2884 }
2885
2886 if increment > 0 {
2887 indent = if self.indent >= 0 {
2888 (self.indent + increment as isize) as usize
2889 } else {
2890 increment
2891 }
2892 }
2893
2894 if indent == 0 {
2896 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
2897 } else {
2898 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2899 }
2900
2901 if self.input.next_is_z() {
2906 let contents = match chomping {
2907 Chomping::Strip => String::new(),
2909 _ if self.mark.line == start_mark.line() => String::new(),
2911 Chomping::Clip => chomping_break,
2914 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
2917 Chomping::Keep => trailing_breaks,
2919 };
2920 return Ok(Token(
2921 Span::new(start_mark, self.mark),
2922 TokenType::Scalar(style, contents.into()),
2923 ));
2924 }
2925
2926 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
2927 if self.indent < 0 && self.mark.col == 0 {
2928 self.input.lookahead(4);
2929 if self.input.next_is_document_start()
2930 || self.input.next_is_document_end()
2931 || self.input.peek() == '#'
2932 {
2933 } else {
2937 return Err(ScanError::new_str(
2938 self.mark,
2939 "wrongly indented line in block scalar",
2940 ));
2941 }
2942 } else {
2943 return Err(ScanError::new_str(
2944 self.mark,
2945 "wrongly indented line in block scalar",
2946 ));
2947 }
2948 }
2949
2950 let mut line_buffer = String::with_capacity(100);
2951 let start_mark = self.mark;
2952 while self.mark.col == indent && !self.input.next_is_z() {
2953 if indent == 0 {
2954 self.input.lookahead(4);
2955 if self.input.next_is_document_end() {
2956 break;
2957 }
2958 }
2959
2960 trailing_blank = self.input.next_is_blank();
2962 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
2963 string.push_str(&trailing_breaks);
2964 if trailing_breaks.is_empty() {
2965 string.push(' ');
2966 }
2967 } else {
2968 string.push_str(&leading_break);
2969 string.push_str(&trailing_breaks);
2970 }
2971
2972 leading_break.clear();
2973 trailing_breaks.clear();
2974
2975 leading_blank = self.input.next_is_blank();
2976
2977 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
2978
2979 self.input.lookahead(2);
2981 if self.input.next_is_z() {
2982 break;
2983 }
2984
2985 self.read_break(&mut leading_break);
2986
2987 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2989 }
2990
2991 if chomping != Chomping::Strip {
2993 string.push_str(&leading_break);
2994 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
2998 string.push('\n');
2999 }
3000 }
3001
3002 if chomping == Chomping::Keep {
3003 string.push_str(&trailing_breaks);
3004 }
3005
3006 Ok(Token(
3007 Span::new(start_mark, self.mark),
3008 TokenType::Scalar(style, string.into()),
3009 ))
3010 }
3011
3012 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
3022 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
3024 string.push(self.input.peek());
3025 self.skip_blank();
3031 }
3032
3033 if self.input.buf_is_empty() {
3036 let mut n_chars = 0;
3044 debug_assert!(line_buffer.is_empty());
3045 while let Some(c) = self.input.raw_read_non_breakz_ch() {
3046 line_buffer.push(c);
3047 n_chars += 1;
3048 }
3049
3050 self.mark.col += n_chars;
3052 self.mark.offsets.chars += n_chars;
3053 self.mark.offsets.bytes = self.input.byte_offset();
3054
3055 string.reserve(line_buffer.len());
3057 string.push_str(line_buffer);
3058 line_buffer.clear();
3060 }
3061 }
3062
3063 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
3065 loop {
3066 if indent < self.input.bufmaxlen() - 2 {
3068 self.input.lookahead(self.input.bufmaxlen());
3069 while self.mark.col < indent && self.input.peek() == ' ' {
3070 self.skip_blank();
3071 }
3072 } else {
3073 loop {
3074 self.input.lookahead(self.input.bufmaxlen());
3075 while !self.input.buf_is_empty()
3076 && self.mark.col < indent
3077 && self.input.peek() == ' '
3078 {
3079 self.skip_blank();
3080 }
3081 if self.mark.col == indent
3085 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
3086 {
3087 break;
3088 }
3089 }
3090 self.input.lookahead(2);
3091 }
3092
3093 if self.input.next_is_break() {
3095 self.read_break(breaks);
3096 } else {
3097 break;
3099 }
3100 }
3101 }
3102
3103 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
3108 let mut max_indent = 0;
3109 loop {
3110 while self.input.look_ch() == ' ' {
3112 self.skip_blank();
3113 }
3114
3115 if self.mark.col > max_indent {
3116 max_indent = self.mark.col;
3117 }
3118
3119 if self.input.next_is_break() {
3120 self.input.lookahead(2);
3122 self.read_break(breaks);
3123 } else {
3124 break;
3126 }
3127 }
3128
3129 *indent = max_indent.max((self.indent + 1) as usize);
3138 if self.indent > 0 {
3139 *indent = (*indent).max(1);
3140 }
3141 }
3142
3143 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
3144 self.save_simple_key();
3145 self.disallow_simple_key();
3146
3147 let token_index = self.tokens.len();
3148 let tok = self.scan_flow_scalar(single)?;
3149
3150 if self.skip_to_next_token(true)? {
3153 self.adjacent_value_allowed_at = usize::MAX;
3154 } else {
3155 self.adjacent_value_allowed_at = self.mark.index();
3156 }
3157
3158 self.insert_token(token_index, tok);
3159 Ok(())
3160 }
3161
3162 #[allow(clippy::too_many_lines)]
3163 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token<'input>, ScanError> {
3164 let start_mark = self.mark;
3165
3166 let mut buf = match self.input.byte_offset() {
3168 Some(off) => FlowScalarBuf::new_borrowed(off + self.input.peek().len_utf8()),
3169 None => FlowScalarBuf::new_owned(),
3170 };
3171
3172 let mut break_scratch = String::new();
3175
3176 self.skip_non_blank();
3178
3179 loop {
3180 self.input.lookahead(4);
3182
3183 if self.mark.col == 0 && self.input.next_is_document_indicator() {
3184 return Err(ScanError::new_str(
3185 start_mark,
3186 "while scanning a quoted scalar, found unexpected document indicator",
3187 ));
3188 }
3189
3190 if self.input.next_is_z() {
3191 return Err(ScanError::new_str(start_mark, "unclosed quote"));
3192 }
3193
3194 let mut leading_blanks = false;
3197 self.consume_flow_scalar_non_whitespace_chars(
3198 single,
3199 &mut buf,
3200 &mut leading_blanks,
3201 &start_mark,
3202 )?;
3203
3204 match self.input.look_ch() {
3205 '\'' if single => break,
3206 '"' if !single => break,
3207 _ => {}
3208 }
3209
3210 let mut trailing_ws_start: Option<usize> = None;
3226 let mut has_leading_break = false;
3227 let mut has_trailing_breaks = false;
3228
3229 let mut pending_ws_start: Option<usize> = None;
3231
3232 while self.input.next_is_blank() || self.input.next_is_break() {
3234 if self.input.next_is_blank() {
3235 if leading_blanks {
3237 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
3238 return Err(ScanError::new_str(
3239 self.mark,
3240 "tab cannot be used as indentation",
3241 ));
3242 }
3243 self.skip_blank();
3244 } else {
3245 match buf {
3247 FlowScalarBuf::Owned(ref mut string) => {
3248 if trailing_ws_start.is_none() {
3249 trailing_ws_start = Some(string.len());
3250 }
3251 string.push(self.input.peek());
3252 }
3253 FlowScalarBuf::Borrowed { .. } => {
3254 if pending_ws_start.is_none() {
3255 pending_ws_start = self.input.byte_offset();
3256 }
3257 }
3258 }
3259 self.skip_blank();
3260
3261 if let (FlowScalarBuf::Borrowed { .. }, Some(ws_start), Some(ws_end)) =
3262 (&mut buf, pending_ws_start, self.input.byte_offset())
3263 {
3264 buf.note_pending_ws(ws_start, ws_end);
3265 }
3266 }
3267 } else {
3268 self.input.lookahead(2);
3269
3270 if leading_blanks {
3272 match buf {
3274 FlowScalarBuf::Owned(ref mut string) => self.read_break(string),
3275 FlowScalarBuf::Borrowed { .. } => {
3276 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3277 let Some(string) = buf.as_owned_mut() else {
3278 unreachable!()
3279 };
3280 self.read_break(string);
3281 }
3282 }
3283 has_trailing_breaks = true;
3284 } else {
3285 if let Some(pos) = trailing_ws_start.take() {
3287 if let FlowScalarBuf::Owned(ref mut string) = buf {
3288 string.truncate(pos);
3289 }
3290 }
3291
3292 if pending_ws_start.take().is_some() {
3293 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3295 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3296 }
3297 buf.discard_pending_ws();
3298 } else {
3299 buf.commit_pending_ws();
3300 }
3301
3302 break_scratch.clear();
3303 self.read_break(&mut break_scratch);
3304 has_leading_break = true;
3307 leading_blanks = true;
3308 }
3309 }
3310
3311 self.input.lookahead(1);
3312 }
3313
3314 if leading_blanks && has_leading_break && self.flow_level == 0 {
3317 let next_ch = self.input.peek();
3318 let is_closing_quote = (single && next_ch == '\'') || (!single && next_ch == '"');
3319 if !is_closing_quote && (self.mark.col as isize) <= self.indent {
3320 return Err(ScanError::new_str(
3321 self.mark,
3322 "invalid indentation in multiline quoted scalar",
3323 ));
3324 }
3325 }
3326
3327 if leading_blanks {
3329 if has_leading_break && !has_trailing_breaks {
3334 match buf {
3335 FlowScalarBuf::Owned(ref mut string) => string.push(' '),
3336 FlowScalarBuf::Borrowed { .. } => {
3337 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
3338 let Some(string) = buf.as_owned_mut() else {
3339 unreachable!()
3340 };
3341 string.push(' ');
3342 }
3343 }
3344 }
3345 }
3346 } self.skip_non_blank();
3351 let end_mark = self.mark;
3352
3353 self.skip_ws_to_eol(SkipTabs::Yes)?;
3355 match self.input.peek() {
3356 ',' | '}' | ']' if self.flow_level > 0 => {}
3358 c if is_breakz(c) => {}
3360 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
3363 ':' if self.flow_level > 0 => {}
3365 _ => {
3366 return Err(ScanError::new_str(
3367 self.mark,
3368 "invalid trailing content after double-quoted scalar",
3369 ));
3370 }
3371 }
3372
3373 let style = if single {
3374 ScalarStyle::SingleQuoted
3375 } else {
3376 ScalarStyle::DoubleQuoted
3377 };
3378
3379 let contents = match buf {
3380 FlowScalarBuf::Owned(string) => Cow::Owned(string),
3381 FlowScalarBuf::Borrowed {
3382 start,
3383 mut end,
3384 pending_ws_start,
3385 pending_ws_end,
3386 } => {
3387 if pending_ws_start.is_some() {
3389 end = pending_ws_end;
3390 }
3391 if let Some(slice) = self.try_borrow_slice(start, end) {
3392 Cow::Borrowed(slice)
3393 } else {
3394 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
3395 ScanError::new_str(
3396 start_mark,
3397 "internal error: input advertised offsets but did not provide a slice",
3398 )
3399 })?;
3400 Cow::Owned(slice.to_owned())
3401 }
3402 }
3403 };
3404
3405 Ok(Token(
3406 Span::new(start_mark, end_mark),
3407 TokenType::Scalar(style, contents),
3408 ))
3409 }
3410
3411 fn consume_flow_scalar_non_whitespace_chars(
3420 &mut self,
3421 single: bool,
3422 buf: &mut FlowScalarBuf,
3423 leading_blanks: &mut bool,
3424 start_mark: &Marker,
3425 ) -> Result<(), ScanError> {
3426 self.input.lookahead(2);
3427 while !is_blank_or_breakz(self.input.peek()) {
3428 match self.input.peek() {
3429 '\'' if self.input.peek_nth(1) == '\'' && single => {
3431 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3432 buf.commit_pending_ws();
3433 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3434 }
3435 let Some(string) = buf.as_owned_mut() else {
3436 unreachable!()
3437 };
3438 string.push('\'');
3439 self.skip_n_non_blank(2);
3440 }
3441 '\'' if single => break,
3443 '"' if !single => break,
3444 '\\' if !single && is_break(self.input.peek_nth(1)) => {
3446 self.input.lookahead(3);
3447 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3448 buf.commit_pending_ws();
3449 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3450 }
3451 self.skip_non_blank();
3452 self.skip_linebreak();
3453 *leading_blanks = true;
3454 break;
3455 }
3456 '\\' if !single => {
3458 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
3459 buf.commit_pending_ws();
3460 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
3461 }
3462 let Some(string) = buf.as_owned_mut() else {
3463 unreachable!()
3464 };
3465 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
3466 }
3467 c => {
3468 match buf {
3469 FlowScalarBuf::Owned(ref mut string) => {
3470 string.push(c);
3471 }
3472 FlowScalarBuf::Borrowed { .. } => {
3473 buf.commit_pending_ws();
3474 }
3475 }
3476 self.skip_non_blank();
3477
3478 if let Some(new_end) = self.input.byte_offset() {
3479 if let FlowScalarBuf::Borrowed { end, .. } = buf {
3480 *end = new_end;
3481 }
3482 }
3483 }
3484 }
3485 self.input.lookahead(2);
3486 }
3487 Ok(())
3488 }
3489
3490 fn resolve_flow_scalar_escape_sequence(
3497 &mut self,
3498 start_mark: &Marker,
3499 ) -> Result<char, ScanError> {
3500 let mut code_length = 0usize;
3501 let mut ret = '\0';
3502
3503 match self.input.peek_nth(1) {
3504 '0' => ret = '\0',
3505 'a' => ret = '\x07',
3506 'b' => ret = '\x08',
3507 't' | '\t' => ret = '\t',
3508 'n' => ret = '\n',
3509 'v' => ret = '\x0b',
3510 'f' => ret = '\x0c',
3511 'r' => ret = '\x0d',
3512 'e' => ret = '\x1b',
3513 ' ' => ret = '\x20',
3514 '"' => ret = '"',
3515 '/' => ret = '/',
3516 '\\' => ret = '\\',
3517 'N' => ret = char::from_u32(0x85).unwrap(),
3519 '_' => ret = char::from_u32(0xA0).unwrap(),
3521 'L' => ret = char::from_u32(0x2028).unwrap(),
3523 'P' => ret = char::from_u32(0x2029).unwrap(),
3525 'x' => code_length = 2,
3526 'u' => code_length = 4,
3527 'U' => code_length = 8,
3528 _ => {
3529 return Err(ScanError::new_str(
3530 *start_mark,
3531 "while parsing a quoted scalar, found unknown escape character",
3532 ))
3533 }
3534 }
3535 self.skip_n_non_blank(2);
3536
3537 if code_length > 0 {
3539 self.input.lookahead(code_length);
3540 let mut value = 0u32;
3541 for i in 0..code_length {
3542 let c = self.input.peek_nth(i);
3543 if !is_hex(c) {
3544 return Err(ScanError::new_str(
3545 *start_mark,
3546 "while parsing a quoted scalar, did not find expected hexadecimal number",
3547 ));
3548 }
3549 value = (value << 4) + as_hex(c);
3550 }
3551
3552 self.skip_n_non_blank(code_length);
3553
3554 if code_length == 4 && (0xD800..=0xDBFF).contains(&value) {
3556 self.input.lookahead(2);
3557 if self.input.peek() == '\\' && self.input.peek_nth(1) == 'u' {
3558 self.skip_n_non_blank(2);
3559 self.input.lookahead(4);
3560 let mut low_value = 0u32;
3561 for i in 0..4 {
3562 let c = self.input.peek_nth(i);
3563 if !is_hex(c) {
3564 return Err(ScanError::new_str(
3565 *start_mark,
3566 "while parsing a quoted scalar, did not find expected hexadecimal number for low surrogate",
3567 ));
3568 }
3569 low_value = (low_value << 4) + as_hex(c);
3570 }
3571 if (0xDC00..=0xDFFF).contains(&low_value) {
3572 value = 0x10000 + (((value - 0xD800) << 10) | (low_value - 0xDC00));
3573 self.skip_n_non_blank(4);
3574 } else {
3575 return Err(ScanError::new_str(
3576 *start_mark,
3577 "while parsing a quoted scalar, found invalid low surrogate",
3578 ));
3579 }
3580 } else {
3581 return Err(ScanError::new_str(
3582 *start_mark,
3583 "while parsing a quoted scalar, found high surrogate without following low surrogate",
3584 ));
3585 }
3586 } else if code_length == 4 && (0xDC00..=0xDFFF).contains(&value) {
3587 return Err(ScanError::new_str(
3588 *start_mark,
3589 "while parsing a quoted scalar, found unpaired low surrogate",
3590 ));
3591 }
3592
3593 let Some(ch) = char::from_u32(value) else {
3594 return Err(ScanError::new_str(
3595 *start_mark,
3596 "while parsing a quoted scalar, found invalid Unicode character escape code",
3597 ));
3598 };
3599 ret = ch;
3600 }
3601 Ok(ret)
3602 }
3603
3604 fn fetch_plain_scalar(&mut self) -> ScanResult {
3605 self.save_simple_key();
3606 self.disallow_simple_key();
3607
3608 let token_index = self.tokens.len();
3609 let tok = self.scan_plain_scalar()?;
3610
3611 self.insert_token(token_index, tok);
3612 Ok(())
3613 }
3614
3615 #[allow(clippy::too_many_lines)]
3620 fn scan_plain_scalar(&mut self) -> Result<Token<'input>, ScanError> {
3621 self.unroll_non_block_indents();
3622 let indent = self.indent + 1;
3623 let start_mark = self.mark;
3624
3625 if self.flow_level > 0 && (start_mark.col as isize) < indent {
3626 return Err(ScanError::new_str(
3627 start_mark,
3628 "invalid indentation in flow construct",
3629 ));
3630 }
3631
3632 let mut string = String::with_capacity(32);
3633 self.buf_whitespaces.clear();
3634 self.buf_leading_break.clear();
3635 self.buf_trailing_breaks.clear();
3636 let mut end_mark = self.mark;
3637
3638 loop {
3639 self.input.lookahead(4);
3640 if (self.mark.col == 0 && self.input.next_is_document_indicator())
3641 || self.input.peek() == '#'
3642 {
3643 if self.input.peek() == '#'
3648 && !string.is_empty()
3649 && !self.buf_whitespaces.is_empty()
3650 && self.flow_level == 0
3651 {
3652 self.interrupted_plain_by_comment = Some(self.mark);
3653 }
3654 break;
3655 }
3656
3657 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
3658 return Err(ScanError::new_str(
3659 self.mark,
3660 "plain scalar cannot start with '-' followed by ,[]{}",
3661 ));
3662 }
3663
3664 if !self.input.next_is_blank_or_breakz()
3665 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
3666 {
3667 if self.leading_whitespace {
3668 if self.buf_leading_break.is_empty() {
3669 string.push_str(&self.buf_leading_break);
3670 string.push_str(&self.buf_trailing_breaks);
3671 self.buf_trailing_breaks.clear();
3672 self.buf_leading_break.clear();
3673 } else {
3674 if self.buf_trailing_breaks.is_empty() {
3675 string.push(' ');
3676 } else {
3677 string.push_str(&self.buf_trailing_breaks);
3678 self.buf_trailing_breaks.clear();
3679 }
3680 self.buf_leading_break.clear();
3681 }
3682 self.leading_whitespace = false;
3683 } else if !self.buf_whitespaces.is_empty() {
3684 string.push_str(&self.buf_whitespaces);
3685 self.buf_whitespaces.clear();
3686 }
3687
3688 string.push(self.input.peek());
3690 self.skip_non_blank();
3691 string.reserve(self.input.bufmaxlen());
3692
3693 let mut end = false;
3695 while !end {
3696 self.input.lookahead(self.input.bufmaxlen());
3700 let (stop, chars_consumed) = self.input.fetch_plain_scalar_chunk(
3701 &mut string,
3702 self.input.bufmaxlen() - 1,
3703 self.flow_level > 0,
3704 );
3705 end = stop;
3706 self.mark.offsets.chars += chars_consumed;
3707 self.mark.col += chars_consumed;
3708 self.mark.offsets.bytes = self.input.byte_offset();
3709 }
3710 end_mark = self.mark;
3711 }
3712
3713 if !(self.input.next_is_blank() || self.input.next_is_break()) {
3718 break;
3719 }
3720
3721 self.input.lookahead(2);
3723 while self.input.next_is_blank_or_break() {
3724 if self.input.next_is_blank() {
3725 if !self.leading_whitespace {
3726 self.buf_whitespaces.push(self.input.peek());
3727 self.skip_blank();
3728 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
3729 self.skip_ws_to_eol(SkipTabs::Yes)?;
3732 if !self.input.next_is_breakz() {
3733 return Err(ScanError::new_str(
3734 start_mark,
3735 "while scanning a plain scalar, found a tab",
3736 ));
3737 }
3738 } else {
3739 self.skip_blank();
3740 }
3741 } else {
3742 if self.leading_whitespace {
3744 self.skip_break();
3745 self.buf_trailing_breaks.push('\n');
3746 } else {
3747 self.buf_whitespaces.clear();
3748 self.skip_break();
3749 self.buf_leading_break.push('\n');
3750 self.leading_whitespace = true;
3751 }
3752 }
3753 self.input.lookahead(2);
3754 }
3755
3756 if self.flow_level == 0 && (self.mark.col as isize) < indent {
3758 break;
3759 }
3760 }
3761
3762 if self.leading_whitespace {
3763 self.allow_simple_key();
3764 }
3765
3766 if string.is_empty() {
3767 Err(ScanError::new_str(
3771 start_mark,
3772 "unexpected end of plain scalar",
3773 ))
3774 } else {
3775 let contents = if let (Some(start), Some(end)) =
3776 (start_mark.byte_offset(), end_mark.byte_offset())
3777 {
3778 match self.try_borrow_slice(start, end) {
3779 Some(slice) if slice == string => Cow::Borrowed(slice),
3780 _ => Cow::Owned(string),
3781 }
3782 } else {
3783 Cow::Owned(string)
3784 };
3785
3786 Ok(Token(
3787 Span::new(start_mark, end_mark),
3788 TokenType::Scalar(ScalarStyle::Plain, contents),
3789 ))
3790 }
3791 }
3792
3793 fn fetch_key(&mut self) -> ScanResult {
3794 let start_mark = self.mark;
3795 if self.flow_level == 0 {
3796 if !self.simple_key_allowed {
3798 return Err(ScanError::new_str(
3799 self.mark,
3800 "mapping keys are not allowed in this context",
3801 ));
3802 }
3803 self.roll_indent(
3804 start_mark.col,
3805 None,
3806 TokenType::BlockMappingStart,
3807 start_mark,
3808 );
3809 } else {
3810 self.set_current_flow_mapping_started(true);
3812 }
3813
3814 self.remove_simple_key()?;
3815
3816 if self.flow_level == 0 {
3817 self.allow_simple_key();
3818 } else {
3819 self.disallow_simple_key();
3820 }
3821
3822 self.skip_non_blank();
3823 let token_index = self.tokens.len();
3824 self.explicit_key_tab_check_pending = false;
3825 let stopped_after_comment = self.skip_yaml_whitespace(true)?;
3826 if self.input.peek() == '\t' {
3827 return Err(ScanError::new_str(
3828 self.mark(),
3829 "tabs disallowed in this context",
3830 ));
3831 }
3832 self.explicit_key_tab_check_pending = stopped_after_comment;
3833 self.insert_token(
3834 token_index,
3835 Token(Span::new(start_mark, self.mark), TokenType::Key),
3836 );
3837 Ok(())
3838 }
3839
3840 fn fetch_flow_value(&mut self) -> ScanResult {
3848 let nc = self.input.peek_nth(1);
3849
3850 if self.mark.index() != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
3862 return Err(ScanError::new_str(
3863 self.mark,
3864 "':' may not precede any of `[{` in flow mapping",
3865 ));
3866 }
3867
3868 self.fetch_value()
3869 }
3870
3871 fn fetch_value(&mut self) -> ScanResult {
3873 let sk = self.simple_keys.last().unwrap().clone();
3874 let start_mark = self.mark;
3875 let is_implicit_flow_mapping = self.current_flow_collection_is_sequence()
3876 && !self.current_flow_mapping_started()
3877 && !self.implicit_flow_mapping_states.is_empty();
3878 if is_implicit_flow_mapping {
3879 *self.implicit_flow_mapping_states.last_mut().unwrap() =
3880 ImplicitMappingState::Inside(self.flow_level);
3881 }
3882
3883 self.skip_non_blank();
3885 let mut trailing_tokens = VecDeque::new();
3892 if self.input.look_ch() == '\t' {
3893 let trailing_token_index = self.tokens.len();
3894 let whitespace = self.skip_ws_to_eol(SkipTabs::Yes)?;
3895 trailing_tokens = self.tokens.split_off(trailing_token_index);
3896
3897 if !whitespace.has_valid_yaml_ws()
3898 && (self.input.peek() == '-' || self.input.next_is_alpha())
3899 {
3900 return Err(ScanError::new_str(
3901 self.mark,
3902 "':' must be followed by a valid YAML whitespace",
3903 ));
3904 }
3905 }
3906
3907 if sk.possible {
3908 let tok = Token(Span::empty(sk.mark), TokenType::Key);
3910 self.insert_token(sk.token_number - self.tokens_parsed, tok);
3911 if is_implicit_flow_mapping {
3912 if sk.mark.line < start_mark.line {
3913 return Err(ScanError::new_str(
3914 start_mark,
3915 "illegal placement of ':' indicator",
3916 ));
3917 }
3918 self.insert_token(
3919 sk.token_number - self.tokens_parsed,
3920 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
3921 );
3922 }
3923
3924 self.roll_indent(
3926 sk.mark.col,
3927 Some(sk.token_number),
3928 TokenType::BlockMappingStart,
3929 sk.mark,
3930 );
3931 self.roll_one_col_indent();
3932
3933 self.simple_keys.last_mut().unwrap().possible = false;
3934 self.disallow_simple_key();
3935 } else {
3936 if is_implicit_flow_mapping {
3937 self.tokens
3938 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart).into());
3939 }
3940 if self.flow_level == 0 {
3942 if !self.simple_key_allowed {
3943 return Err(ScanError::new_str(
3944 start_mark,
3945 "mapping values are not allowed in this context",
3946 ));
3947 }
3948
3949 self.roll_indent(
3950 start_mark.col,
3951 None,
3952 TokenType::BlockMappingStart,
3953 start_mark,
3954 );
3955 }
3956 self.roll_one_col_indent();
3957
3958 if self.flow_level == 0 {
3959 self.allow_simple_key();
3960 } else {
3961 self.disallow_simple_key();
3962 }
3963 }
3964 self.tokens
3965 .push_back(Token(Span::empty(start_mark), TokenType::Value).into());
3966 self.tokens.append(&mut trailing_tokens);
3967
3968 Ok(())
3969 }
3970
3971 fn roll_indent(
3977 &mut self,
3978 col: usize,
3979 number: Option<usize>,
3980 tok: TokenType<'input>,
3981 mark: Marker,
3982 ) {
3983 if self.flow_level > 0 {
3984 return;
3985 }
3986
3987 if self.indent <= col as isize {
3991 if let Some(indent) = self.indents.last() {
3992 if !indent.needs_block_end {
3993 self.indent = indent.indent;
3994 self.indents.pop();
3995 }
3996 }
3997 }
3998
3999 if self.indent < col as isize {
4000 self.indents.push(Indent {
4001 indent: self.indent,
4002 needs_block_end: true,
4003 });
4004 self.indent = col as isize;
4005 let tokens_parsed = self.tokens_parsed;
4006 match number {
4007 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
4008 None => self.tokens.push_back(Token(Span::empty(mark), tok).into()),
4009 }
4010 }
4011 }
4012
4013 fn unroll_indent(&mut self, col: isize) {
4019 if self.flow_level > 0 {
4020 return;
4021 }
4022 while self.indent > col {
4023 let indent = self.indents.pop().unwrap();
4024 self.indent = indent.indent;
4025 if indent.needs_block_end {
4026 self.tokens
4027 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd).into());
4028 }
4029 }
4030 }
4031
4032 fn roll_one_col_indent(&mut self) {
4038 if self.flow_level == 0 && self.indents.last().is_some_and(|x| x.needs_block_end) {
4039 self.indents.push(Indent {
4040 indent: self.indent,
4041 needs_block_end: false,
4042 });
4043 self.indent += 1;
4044 }
4045 }
4046
4047 fn unroll_non_block_indents(&mut self) {
4049 while let Some(indent) = self.indents.last() {
4050 if indent.needs_block_end {
4051 break;
4052 }
4053 self.indent = indent.indent;
4054 self.indents.pop();
4055 }
4056 }
4057
4058 fn save_simple_key(&mut self) {
4060 if self.simple_key_allowed {
4061 let required = self.flow_level == 0
4062 && self.indent == (self.mark.col as isize)
4063 && self.indents.last().unwrap().needs_block_end;
4064
4065 if let Some(last) = self.simple_keys.last_mut() {
4066 *last = SimpleKey {
4067 mark: self.mark,
4068 possible: true,
4069 required,
4070 token_number: self.tokens_parsed + self.tokens.len(),
4071 };
4072 }
4073 }
4074 }
4075
4076 fn remove_simple_key(&mut self) -> ScanResult {
4077 let last = self.simple_keys.last_mut().unwrap();
4078 if last.possible && last.required {
4079 return Err(self.simple_key_expected());
4080 }
4081
4082 last.possible = false;
4083 Ok(())
4084 }
4085
4086 fn is_within_block(&self) -> bool {
4088 !self.indents.is_empty()
4089 }
4090
4091 fn end_implicit_mapping(&mut self, mark: Marker, flow_level: u8) {
4097 if self
4098 .implicit_flow_mapping_states
4099 .last()
4100 .is_some_and(|state| *state == ImplicitMappingState::Inside(flow_level))
4101 {
4102 *self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Possible;
4103 self.set_current_flow_mapping_started(false);
4104 self.tokens
4105 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd).into());
4106 }
4107 }
4108
4109 fn current_flow_collection_is_sequence(&self) -> bool {
4110 self.flow_markers
4111 .last()
4112 .is_some_and(|(_, bracket)| *bracket == '[')
4113 }
4114
4115 fn current_flow_mapping_started(&self) -> bool {
4116 self.flow_mapping_started.last().copied().unwrap_or(false)
4117 }
4118
4119 fn set_current_flow_mapping_started(&mut self, started: bool) {
4120 if let Some(current) = self.flow_mapping_started.last_mut() {
4121 *current = started;
4122 }
4123 }
4124}
4125
4126#[derive(PartialEq, Eq)]
4130pub enum Chomping {
4131 Strip,
4133 Clip,
4135 Keep,
4137}
4138
4139#[cfg(test)]
4140mod test {
4141 use alloc::{
4142 borrow::{Cow, ToOwned},
4143 rc::Rc,
4144 string::String,
4145 vec::Vec,
4146 };
4147 use core::cell::Cell;
4148
4149 use crate::{
4150 input::{str::StrInput, BorrowedInput, BufferedInput, Input},
4151 scanner::{
4152 Comment, Marker, Placement, QueuedToken, QueuedTokenType, ScalarStyle, Scanner, Span,
4153 TEncoding, Token, TokenType,
4154 },
4155 };
4156
4157 struct CountingChars {
4158 chars: alloc::vec::IntoIter<char>,
4159 read: Rc<Cell<usize>>,
4160 }
4161
4162 impl Iterator for CountingChars {
4163 type Item = char;
4164
4165 fn next(&mut self) -> Option<Self::Item> {
4166 let next = self.chars.next();
4167 if next.is_some() {
4168 self.read.set(self.read.get() + 1);
4169 }
4170 next
4171 }
4172 }
4173
4174 struct SlicingOnlyInput<'input> {
4175 inner: StrInput<'input>,
4176 expose_slice: bool,
4177 }
4178
4179 impl<'input> SlicingOnlyInput<'input> {
4180 fn new(source: &'input str, expose_slice: bool) -> Self {
4181 Self {
4182 inner: StrInput::new(source),
4183 expose_slice,
4184 }
4185 }
4186 }
4187
4188 impl Input for SlicingOnlyInput<'_> {
4189 fn lookahead(&mut self, count: usize) {
4190 self.inner.lookahead(count);
4191 }
4192
4193 fn buflen(&self) -> usize {
4194 self.inner.buflen()
4195 }
4196
4197 fn bufmaxlen(&self) -> usize {
4198 self.inner.bufmaxlen()
4199 }
4200
4201 fn raw_read_ch(&mut self) -> char {
4202 self.inner.raw_read_ch()
4203 }
4204
4205 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
4206 self.inner.raw_read_non_breakz_ch()
4207 }
4208
4209 fn skip(&mut self) {
4210 self.inner.skip();
4211 }
4212
4213 fn skip_n(&mut self, count: usize) {
4214 self.inner.skip_n(count);
4215 }
4216
4217 fn peek(&self) -> char {
4218 self.inner.peek()
4219 }
4220
4221 fn peek_nth(&self, n: usize) -> char {
4222 self.inner.peek_nth(n)
4223 }
4224
4225 fn byte_offset(&self) -> Option<usize> {
4226 self.inner.byte_offset()
4227 }
4228
4229 fn slice_bytes(&self, start: usize, end: usize) -> Option<&str> {
4230 if self.expose_slice {
4231 self.inner.slice_bytes(start, end)
4232 } else {
4233 None
4234 }
4235 }
4236 }
4237
4238 impl<'input> BorrowedInput<'input> for SlicingOnlyInput<'input> {
4239 fn slice_borrowed(&self, _start: usize, _end: usize) -> Option<&'input str> {
4240 None
4241 }
4242 }
4243
4244 #[test]
4245 fn test_is_anchor_char() {
4246 use super::is_anchor_char;
4247 assert!(is_anchor_char('x'));
4248 }
4249
4250 #[test]
4251 fn flow_simple_key_length_limit_bounds_buffering() {
4252 let mut yaml = String::from("[\n\"start\"\n");
4253 for _ in 0..600 {
4254 yaml.push_str("\"x\"\n");
4255 }
4256 let total_chars = yaml.chars().count();
4257 let read = Rc::new(Cell::new(0));
4258 let chars = yaml.chars().collect::<Vec<_>>().into_iter();
4259 let mut scanner = Scanner::new(BufferedInput::new(CountingChars {
4260 chars,
4261 read: Rc::clone(&read),
4262 }));
4263
4264 assert!(matches!(
4265 scanner.next_token().unwrap().unwrap().1,
4266 TokenType::StreamStart(_)
4267 ));
4268
4269 let token = scanner.next_token().unwrap().unwrap();
4270 assert!(matches!(token.1, TokenType::FlowSequenceStart));
4271
4272 let token = scanner.next_token().unwrap().unwrap();
4273 assert!(matches!(
4274 token.1,
4275 TokenType::Scalar(_, ref value) if value == "start"
4276 ));
4277 assert!(
4278 read.get() < total_chars,
4279 "scanner consumed all {total_chars} chars before yielding the first flow scalar"
4280 );
4281 assert!(
4282 read.get() <= super::SIMPLE_KEY_MAX_LOOKAHEAD + 128,
4283 "scanner read {} chars before yielding the first flow scalar",
4284 read.get()
4285 );
4286 }
4287
4288 #[test]
4289 fn comment_capture_does_not_change_leading_whitespace() {
4290 let mut scanner = Scanner::new(StrInput::new("# comment\n"));
4291
4292 let token = scanner.scan_comment_token().unwrap();
4293
4294 assert!(scanner.leading_whitespace);
4295 assert!(matches!(token.1, TokenType::Comment(ref comment) if comment.text == " comment"));
4296
4297 let mut scanner = Scanner::new(BufferedInput::new("# streaming\n".chars()));
4298 scanner.input.lookahead(1);
4299
4300 let token = scanner.scan_comment_token().unwrap();
4301
4302 assert!(scanner.leading_whitespace);
4303 assert!(matches!(token.1, TokenType::Comment(ref comment) if comment.text == " streaming"));
4304 }
4305
4306 #[test]
4307 fn comment_capture_falls_back_to_owned_slice_when_borrow_unavailable() {
4308 let mut scanner = Scanner::new(SlicingOnlyInput::new("# sliced\n", true));
4309 scanner.input.lookahead(2);
4310 assert_eq!(scanner.input.peek_nth(1), ' ');
4311
4312 let token = scanner.scan_comment_token().unwrap();
4313
4314 assert!(matches!(token.1, TokenType::Comment(ref comment)
4315 if matches!(comment.text, Cow::Owned(ref text) if text == " sliced")));
4316 }
4317
4318 #[test]
4319 fn comment_capture_errors_when_offsets_have_no_slice() {
4320 let mut scanner = Scanner::new(SlicingOnlyInput::new("# broken\n", false));
4321
4322 let error = scanner.scan_comment_token().unwrap_err();
4323
4324 assert_eq!(
4325 error.info(),
4326 "internal error: input advertised offsets but did not provide a slice"
4327 );
4328 }
4329
4330 #[test]
4331 fn queued_token_roundtrips_public_token_variants() {
4332 let span = Span::new(Marker::new(0, 1, 0), Marker::new(7, 1, 7));
4333 let tokens = [
4334 Token(span, TokenType::StreamStart(TEncoding::Utf8)),
4335 Token(span, TokenType::StreamEnd),
4336 Token(span, TokenType::VersionDirective(1, 2)),
4337 Token(
4338 span,
4339 TokenType::TagDirective(Cow::Borrowed("!app!"), Cow::Borrowed("tag:app.example,")),
4340 ),
4341 Token(span, TokenType::DocumentStart),
4342 Token(span, TokenType::DocumentEnd),
4343 Token(span, TokenType::BlockSequenceStart),
4344 Token(span, TokenType::BlockMappingStart),
4345 Token(span, TokenType::BlockEnd),
4346 Token(span, TokenType::FlowSequenceStart),
4347 Token(span, TokenType::FlowSequenceEnd),
4348 Token(span, TokenType::FlowMappingStart),
4349 Token(span, TokenType::FlowMappingEnd),
4350 Token(span, TokenType::BlockEntry),
4351 Token(span, TokenType::FlowEntry),
4352 Token(span, TokenType::Key),
4353 Token(span, TokenType::Value),
4354 Token(span, TokenType::Alias(Cow::Borrowed("alias"))),
4355 Token(span, TokenType::Anchor(Cow::Borrowed("anchor"))),
4356 Token(
4357 span,
4358 TokenType::Tag(Cow::Borrowed("!"), Cow::Borrowed("tag")),
4359 ),
4360 Token(
4361 span,
4362 TokenType::Scalar(ScalarStyle::Literal, Cow::Borrowed("scalar")),
4363 ),
4364 Token(
4365 span,
4366 TokenType::Comment(
4367 Comment::new(span, Cow::Borrowed(" comment")).with_placement(Placement::Right),
4368 ),
4369 ),
4370 Token(
4371 span,
4372 TokenType::ReservedDirective(
4373 "reserved".to_owned(),
4374 vec!["one".to_owned(), "two".to_owned()],
4375 ),
4376 ),
4377 ];
4378
4379 for token in tokens {
4380 let queued: QueuedToken = token.clone().into();
4381
4382 assert_eq!(queued.into_public(), token);
4383 }
4384 }
4385
4386 #[test]
4387 fn comment_skipping_path_consumes_comment_without_tokenizing_it() {
4388 let mut scanner = Scanner::new(StrInput::new("# skipped\nnext: value\n"));
4389
4390 scanner.skip_yaml_whitespace(false).unwrap();
4391
4392 assert!(scanner.tokens.is_empty());
4393 assert_eq!(scanner.mark.line(), 2);
4394 assert_eq!(scanner.mark.col(), 0);
4395 }
4396
4397 #[test]
4398 fn yaml_whitespace_can_stop_after_queued_comment() {
4399 let mut scanner = Scanner::new(StrInput::new(" # queued\n# later\n"));
4400
4401 assert!(scanner.skip_yaml_whitespace(true).unwrap());
4402
4403 assert_eq!(scanner.tokens.len(), 1);
4404 assert!(matches!(
4405 scanner.tokens.front().unwrap().1,
4406 QueuedTokenType::Comment(ref comment) if comment.text == " queued"
4407 ));
4408 assert_eq!(scanner.mark.line(), 1);
4409 assert_eq!(scanner.mark.col(), 9);
4410 }
4411
4412 #[test]
4413 fn token_skip_can_stop_after_queued_comment() {
4414 let mut scanner = Scanner::new(StrInput::new("# first\n# second\n"));
4415
4416 assert!(scanner.skip_to_next_token(true).unwrap());
4417
4418 assert_eq!(scanner.tokens.len(), 1);
4419 assert!(matches!(
4420 scanner.tokens.front().unwrap().1,
4421 QueuedTokenType::Comment(ref comment) if comment.text == " first"
4422 ));
4423 assert_eq!(scanner.mark.line(), 2);
4424 assert_eq!(scanner.mark.col(), 0);
4425 }
4426
4427 #[test]
4428 fn scanner_emits_first_leading_comment_before_scanning_next_comment() {
4429 let mut scanner = Scanner::new(StrInput::new("# first\n# second\nkey: value\n"));
4430
4431 assert!(matches!(
4432 scanner.next_token().unwrap().unwrap().1,
4433 TokenType::StreamStart(_)
4434 ));
4435 assert!(matches!(
4436 scanner.next_token().unwrap().unwrap().1,
4437 TokenType::Comment(ref comment) if comment.text == " first"
4438 ));
4439 assert!(scanner.tokens.is_empty());
4440 assert!(matches!(
4441 scanner.next_token().unwrap().unwrap().1,
4442 TokenType::Comment(ref comment) if comment.text == " second"
4443 ));
4444 }
4445
4446 #[test]
4447 fn scanner_emits_quoted_scalar_comment_before_scanning_following_value() {
4448 let mut scanner = Scanner::new(StrInput::new("\"key\" # quoted\n: value\n"));
4449
4450 assert!(matches!(
4451 scanner.next_token().unwrap().unwrap().1,
4452 TokenType::StreamStart(_)
4453 ));
4454 assert!(matches!(
4455 scanner.next_token().unwrap().unwrap().1,
4456 TokenType::Scalar(ScalarStyle::DoubleQuoted, ref value) if value == "key"
4457 ));
4458 assert!(matches!(
4459 scanner.next_token().unwrap().unwrap().1,
4460 TokenType::Comment(ref comment) if comment.text == " quoted"
4461 ));
4462 }
4463
4464 #[test]
4465 fn flow_scalar_comment_disables_adjacent_value_lookahead() {
4466 let mut scanner = Scanner::new(StrInput::new("\"key\"\n# quoted\n: value\n"));
4467
4468 scanner.fetch_flow_scalar(false).unwrap();
4469
4470 assert_eq!(scanner.adjacent_value_allowed_at, usize::MAX);
4471 assert!(matches!(
4472 scanner.tokens.front().unwrap().1,
4473 QueuedTokenType::Scalar(ScalarStyle::DoubleQuoted, ref value) if value == "key"
4474 ));
4475 assert!(scanner.tokens.iter().any(|QueuedToken(_, token)| matches!(
4476 token,
4477 QueuedTokenType::Comment(comment) if comment.text == " quoted"
4478 )));
4479 }
4480
4481 #[test]
4482 fn deferred_error_waits_for_all_comment_tokens() {
4483 let mut scanner = Scanner::new(StrInput::new("# first\n# second\n@\n"));
4484
4485 assert!(matches!(
4486 scanner.next_token().unwrap().unwrap().1,
4487 TokenType::StreamStart(_)
4488 ));
4489 assert!(matches!(
4490 scanner.next_token().unwrap().unwrap().1,
4491 TokenType::Comment(ref comment) if comment.text == " first"
4492 ));
4493 assert!(matches!(
4494 scanner.next_token().unwrap().unwrap().1,
4495 TokenType::Comment(ref comment) if comment.text == " second"
4496 ));
4497
4498 let error = scanner.next_token().unwrap_err();
4499
4500 assert!(error.info().contains("unexpected character"));
4501 }
4502
4503 #[test]
4505 fn anchor_name_is_borrowed_for_str_input() {
4506 let mut scanner = Scanner::new(StrInput::new("&anch\n"));
4507
4508 loop {
4509 let tok = scanner
4510 .next_token()
4511 .expect("valid YAML must scan without errors")
4512 .expect("scanner must eventually produce a token");
4513 if let TokenType::Anchor(name) = tok.1 {
4514 assert!(matches!(name, Cow::Borrowed("anch")));
4515 break;
4516 }
4517 }
4518 }
4519
4520 #[test]
4522 fn anchor_name_rejects_non_printable_control_chars() {
4523 let mut scanner = Scanner::new(StrInput::new("&foo\u{0001}\n"));
4524
4525 loop {
4526 let tok = scanner
4527 .next_token()
4528 .expect("scanning should not fail")
4529 .expect("scanner must eventually produce a token");
4530 if let TokenType::Anchor(name) = tok.1 {
4531 assert!(matches!(name, Cow::Borrowed("foo")));
4532 let next = scanner.next_token().expect("scanning should not fail");
4533 if let Some(Token(_, TokenType::Scalar(_, rest))) = next {
4534 assert!(rest.starts_with('\u{0001}'));
4535 }
4536 break;
4537 }
4538 }
4539 }
4540
4541 #[test]
4542 fn alias_name_rejects_non_printable_control_chars() {
4543 let mut scanner = Scanner::new(StrInput::new("*foo\u{0001}\n"));
4544
4545 loop {
4546 let tok = scanner
4547 .next_token()
4548 .expect("scanning should not fail")
4549 .expect("scanner must eventually produce a token");
4550 if let TokenType::Alias(name) = tok.1 {
4551 assert!(matches!(name, Cow::Borrowed("foo")));
4552 let next = scanner.next_token().expect("scanning should not fail");
4553 if let Some(Token(_, TokenType::Scalar(_, rest))) = next {
4554 assert!(rest.starts_with('\u{0001}'));
4555 }
4556 break;
4557 }
4558 }
4559 }
4560
4561 #[test]
4562 fn alias_name_is_borrowed_for_str_input() {
4563 let mut scanner = Scanner::new(StrInput::new("*anch\n"));
4564
4565 loop {
4566 let tok = scanner
4567 .next_token()
4568 .expect("valid YAML must scan without errors")
4569 .expect("scanner must eventually produce a token");
4570 if let TokenType::Alias(name) = tok.1 {
4571 assert!(matches!(name, Cow::Borrowed("anch")));
4572 break;
4573 }
4574 }
4575 }
4576
4577 #[test]
4579 fn tag_directive_parts_are_borrowed_for_str_input() {
4580 let mut scanner = Scanner::new(StrInput::new("%TAG !e! tag:example.com,2000:app/\n"));
4581
4582 loop {
4583 let tok = scanner
4584 .next_token()
4585 .expect("valid YAML must scan without errors")
4586 .expect("scanner must eventually produce a token");
4587 if let TokenType::TagDirective(handle, prefix) = tok.1 {
4588 assert!(matches!(handle, Cow::Borrowed("!e!")));
4589 assert!(matches!(prefix, Cow::Borrowed("tag:example.com,2000:app/")));
4590 break;
4591 }
4592 }
4593 }
4594
4595 #[test]
4596 fn plain_scalar_is_borrowed_when_whitespace_free_for_str_input() {
4597 let mut scanner = Scanner::new(StrInput::new("foo\n"));
4598
4599 loop {
4600 let tok = scanner
4601 .next_token()
4602 .expect("valid YAML must scan without errors")
4603 .expect("scanner must eventually produce a token");
4604 if let TokenType::Scalar(_, value) = tok.1 {
4605 assert!(matches!(value, Cow::Borrowed("foo")));
4606 break;
4607 }
4608 }
4609 }
4610
4611 #[test]
4612 fn plain_scalar_is_borrowed_when_whitespace_present_for_str_input() {
4613 let mut scanner = Scanner::new(StrInput::new("foo bar\n"));
4614
4615 loop {
4616 let tok = scanner
4617 .next_token()
4618 .expect("valid YAML must scan without errors")
4619 .expect("scanner must eventually produce a token");
4620 if let TokenType::Scalar(_, value) = tok.1 {
4621 assert!(matches!(value, Cow::Borrowed("foo bar")));
4622 break;
4623 }
4624 }
4625 }
4626
4627 #[test]
4628 fn single_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
4629 let mut scanner = Scanner::new(StrInput::new("'foo bar'\n"));
4630
4631 loop {
4632 let tok = scanner
4633 .next_token()
4634 .expect("valid YAML must scan without errors")
4635 .expect("scanner must eventually produce a token");
4636 if let TokenType::Scalar(_, value) = tok.1 {
4637 assert!(matches!(value, Cow::Borrowed("foo bar")));
4638 break;
4639 }
4640 }
4641 }
4642
4643 #[test]
4644 fn single_quoted_scalar_is_owned_when_quote_is_escaped_for_str_input() {
4645 let mut scanner = Scanner::new(StrInput::new("'foo''bar'\n"));
4646
4647 loop {
4648 let tok = scanner
4649 .next_token()
4650 .expect("valid YAML must scan without errors")
4651 .expect("scanner must eventually produce a token");
4652 if let TokenType::Scalar(_, value) = tok.1 {
4653 assert!(matches!(value, Cow::Owned(_)));
4654 assert_eq!(&*value, "foo'bar");
4655 break;
4656 }
4657 }
4658 }
4659
4660 #[test]
4661 fn double_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
4662 let mut scanner = Scanner::new(StrInput::new("\"foo bar\"\n"));
4663
4664 loop {
4665 let tok = scanner
4666 .next_token()
4667 .expect("valid YAML must scan without errors")
4668 .expect("scanner must eventually produce a token");
4669 if let TokenType::Scalar(_, value) = tok.1 {
4670 assert!(matches!(value, Cow::Borrowed("foo bar")));
4671 break;
4672 }
4673 }
4674 }
4675
4676 #[test]
4677 fn double_quoted_scalar_is_owned_when_escape_sequence_present_for_str_input() {
4678 let mut scanner = Scanner::new(StrInput::new("\"foo\\nbar\"\n"));
4679
4680 loop {
4681 let tok = scanner
4682 .next_token()
4683 .expect("valid YAML must scan without errors")
4684 .expect("scanner must eventually produce a token");
4685 if let TokenType::Scalar(_, value) = tok.1 {
4686 assert!(matches!(value, Cow::Owned(_)));
4687 assert_eq!(&*value, "foo\nbar");
4688 break;
4689 }
4690 }
4691 }
4692
4693 #[test]
4694 fn plain_key_is_borrowed_for_str_input() {
4695 let mut scanner = Scanner::new(StrInput::new("mykey: value\n"));
4697
4698 let mut found_key = false;
4699 let mut key_value: Option<Cow<'_, str>> = None;
4700
4701 loop {
4702 let tok = scanner
4703 .next_token()
4704 .expect("valid YAML must scan without errors");
4705 let Some(tok) = tok else { break };
4706
4707 if matches!(tok.1, TokenType::Key) {
4708 found_key = true;
4709 } else if found_key {
4710 if let TokenType::Scalar(_, value) = tok.1 {
4711 key_value = Some(value);
4712 break;
4713 }
4714 }
4715 }
4716
4717 assert!(found_key, "expected to find a Key token");
4718 let key_value = key_value.expect("expected to find a scalar after Key token");
4719 assert!(
4720 matches!(key_value, Cow::Borrowed("mykey")),
4721 "key should be borrowed, got: {key_value:?}"
4722 );
4723 }
4724
4725 #[test]
4726 fn quoted_key_is_borrowed_when_verbatim_for_str_input() {
4727 let mut scanner = Scanner::new(StrInput::new("\"mykey\": value\n"));
4728
4729 let mut found_key = false;
4730 let mut key_value: Option<Cow<'_, str>> = None;
4731
4732 loop {
4733 let tok = scanner
4734 .next_token()
4735 .expect("valid YAML must scan without errors");
4736 let Some(tok) = tok else { break };
4737
4738 if matches!(tok.1, TokenType::Key) {
4739 found_key = true;
4740 } else if found_key {
4741 if let TokenType::Scalar(_, value) = tok.1 {
4742 key_value = Some(value);
4743 break;
4744 }
4745 }
4746 }
4747
4748 assert!(found_key, "expected to find a Key token");
4749 let key_value = key_value.expect("expected to find a scalar after Key token");
4750 assert!(
4751 matches!(key_value, Cow::Borrowed("mykey")),
4752 "quoted key should be borrowed when verbatim, got: {key_value:?}"
4753 );
4754 }
4755
4756 #[test]
4757 fn tag_handle_and_suffix_are_borrowed_for_str_input() {
4758 let mut scanner = Scanner::new(StrInput::new("!!str foo\n"));
4760
4761 loop {
4762 let tok = scanner
4763 .next_token()
4764 .expect("valid YAML must scan without errors")
4765 .expect("scanner must eventually produce a token");
4766 if let TokenType::Tag(handle, suffix) = tok.1 {
4767 assert!(
4768 matches!(handle, Cow::Borrowed("!!")),
4769 "tag handle should be borrowed, got: {handle:?}"
4770 );
4771 assert!(
4772 matches!(suffix, Cow::Borrowed("str")),
4773 "tag suffix should be borrowed, got: {suffix:?}"
4774 );
4775 break;
4776 }
4777 }
4778 }
4779
4780 #[test]
4781 fn local_tag_suffix_is_borrowed_for_str_input() {
4782 let mut scanner = Scanner::new(StrInput::new("!mytag foo\n"));
4784
4785 loop {
4786 let tok = scanner
4787 .next_token()
4788 .expect("valid YAML must scan without errors")
4789 .expect("scanner must eventually produce a token");
4790 if let TokenType::Tag(handle, suffix) = tok.1 {
4791 assert!(
4792 matches!(handle, Cow::Borrowed("!")),
4793 "local tag handle should be '!', got: {handle:?}"
4794 );
4795 assert!(
4796 matches!(suffix, Cow::Borrowed("mytag")),
4797 "local tag suffix should be borrowed, got: {suffix:?}"
4798 );
4799 break;
4800 }
4801 }
4802 }
4803
4804 #[test]
4805 fn tag_with_uri_escape_is_owned_for_str_input() {
4806 let mut scanner = Scanner::new(StrInput::new("!!my%20tag foo\n"));
4808
4809 loop {
4810 let tok = scanner
4811 .next_token()
4812 .expect("valid YAML must scan without errors")
4813 .expect("scanner must eventually produce a token");
4814 if let TokenType::Tag(handle, suffix) = tok.1 {
4815 assert!(
4816 matches!(handle, Cow::Borrowed("!!")),
4817 "tag handle should still be borrowed, got: {handle:?}"
4818 );
4819 assert!(
4820 matches!(suffix, Cow::Owned(_)),
4821 "tag suffix with URI escape should be owned, got: {suffix:?}"
4822 );
4823 assert_eq!(&*suffix, "my tag");
4824 break;
4825 }
4826 }
4827 }
4828
4829 #[test]
4830 fn flow_scalar_buffer_tracks_pending_whitespace() {
4831 let mut borrowed = super::FlowScalarBuf::new_borrowed(2);
4832
4833 borrowed.note_pending_ws(5, 8);
4834 borrowed.commit_pending_ws();
4835 assert!(matches!(
4836 borrowed,
4837 super::FlowScalarBuf::Borrowed {
4838 end: 8,
4839 pending_ws_start: None,
4840 pending_ws_end: 8,
4841 ..
4842 }
4843 ));
4844
4845 borrowed.note_pending_ws(9, 11);
4846 borrowed.discard_pending_ws();
4847 assert!(matches!(
4848 borrowed,
4849 super::FlowScalarBuf::Borrowed {
4850 end: 8,
4851 pending_ws_start: None,
4852 pending_ws_end: 8,
4853 ..
4854 }
4855 ));
4856 assert!(borrowed.as_owned_mut().is_none());
4857
4858 let mut owned = super::FlowScalarBuf::new_owned();
4859 owned.as_owned_mut().unwrap().push_str("owned");
4860 assert!(matches!(owned, super::FlowScalarBuf::Owned(ref s) if s == "owned"));
4861 }
4862
4863 fn first_scanner_error_info(input: &str) -> String {
4864 let mut scanner = Scanner::new(StrInput::new(input));
4865 loop {
4866 match scanner.next_token() {
4867 Ok(Some(_)) => {}
4868 Ok(None) => panic!("expected scanner error"),
4869 Err(error) => return error.info().to_owned(),
4870 }
4871 }
4872 }
4873
4874 fn first_scalar_value(input: &str) -> String {
4875 let mut scanner = Scanner::new(StrInput::new(input));
4876 loop {
4877 match scanner.next_token().expect("scanner should not error") {
4878 Some(Token(_, TokenType::Scalar(_, value))) => return value.into_owned(),
4879 Some(_) => {}
4880 None => panic!("expected scalar token"),
4881 }
4882 }
4883 }
4884
4885 #[test]
4886 fn iterator_next_records_error_and_then_stays_empty() {
4887 let mut scanner = Scanner::new(StrInput::new("\"unterminated"));
4888
4889 while scanner.next().is_some() {}
4890
4891 let error = scanner
4892 .get_error()
4893 .expect("scanner should retain the error");
4894 assert_eq!(error.info(), "unclosed quote");
4895 assert!(scanner.next().is_none());
4896 }
4897
4898 #[test]
4899 fn next_token_returns_none_after_stream_end() {
4900 let mut scanner = Scanner::new(StrInput::new(""));
4901
4902 while let Some(token) = scanner.next_token().unwrap() {
4903 if matches!(token.1, TokenType::StreamEnd) {
4904 break;
4905 }
4906 }
4907
4908 assert!(scanner.stream_started());
4909 assert!(scanner.stream_ended());
4910 assert!(scanner.next_token().unwrap().is_none());
4911 }
4912
4913 #[test]
4914 fn directive_name_must_be_present() {
4915 assert_eq!(
4916 first_scanner_error_info("%\n"),
4917 "while scanning a directive, could not find expected directive name"
4918 );
4919 }
4920
4921 #[test]
4922 fn yaml_directive_requires_dot_between_version_numbers() {
4923 assert_eq!(
4924 first_scanner_error_info("%YAML 1\n"),
4925 "while scanning a YAML directive, did not find expected digit or '.' character"
4926 );
4927 }
4928
4929 #[test]
4930 fn yaml_directive_requires_major_version_number() {
4931 assert_eq!(
4932 first_scanner_error_info("%YAML .2\n"),
4933 "while scanning a YAML directive, did not find expected version number"
4934 );
4935 }
4936
4937 #[test]
4938 fn yaml_directive_rejects_extremely_long_version_number() {
4939 assert_eq!(
4940 first_scanner_error_info("%YAML 1234567890.2\n"),
4941 "while scanning a YAML directive, found extremely long version number"
4942 );
4943 }
4944
4945 #[test]
4946 fn tag_directive_handle_must_end_with_bang() {
4947 assert_eq!(
4948 first_scanner_error_info("%TAG !bad tag:example.com,2024:\n"),
4949 "while parsing a tag directive, did not find expected '!'"
4950 );
4951 }
4952
4953 #[test]
4954 fn tag_directive_handle_must_start_with_bang() {
4955 assert_eq!(
4956 first_scanner_error_info("%TAG bad! tag:example.com,2024:\n"),
4957 "while scanning a tag, did not find expected '!'"
4958 );
4959 }
4960
4961 #[test]
4962 fn tag_directive_prefix_must_start_with_tag_character() {
4963 assert_eq!(
4964 first_scanner_error_info("%TAG !e! `bad\n"),
4965 "invalid global tag character"
4966 );
4967 }
4968
4969 #[test]
4970 fn tag_directive_prefix_must_end_before_invalid_content() {
4971 assert_eq!(
4972 first_scanner_error_info("%TAG !e! tag:example.com^suffix\n"),
4973 "while scanning TAG, did not find expected whitespace or line break"
4974 );
4975 }
4976
4977 #[test]
4978 fn tag_directive_prefix_with_uri_escape_is_owned_and_decoded() {
4979 let mut scanner =
4980 Scanner::new(StrInput::new("%TAG !e! tag:example.com,2024:some%20app/\n"));
4981
4982 loop {
4983 let token = scanner
4984 .next_token()
4985 .expect("valid directive should scan")
4986 .expect("scanner must produce a directive token");
4987 if let TokenType::TagDirective(handle, prefix) = token.1 {
4988 assert!(matches!(handle, Cow::Borrowed("!e!")));
4989 assert!(matches!(prefix, Cow::Owned(_)));
4990 assert_eq!(&*prefix, "tag:example.com,2024:some app/");
4991 break;
4992 }
4993 }
4994 }
4995
4996 #[test]
4997 fn bare_bang_tag_scans_as_non_specific_tag() {
4998 let mut scanner = Scanner::new(StrInput::new("! foo\n"));
4999
5000 loop {
5001 let token = scanner
5002 .next_token()
5003 .expect("valid tag should scan")
5004 .expect("scanner must produce a tag token");
5005 if let TokenType::Tag(handle, suffix) = token.1 {
5006 assert_eq!(&*handle, "");
5007 assert_eq!(&*suffix, "!");
5008 break;
5009 }
5010 }
5011 }
5012
5013 #[test]
5014 fn tag_requires_separation_after_suffix() {
5015 assert_eq!(
5016 first_scanner_error_info("!foo,bar\n"),
5017 "while scanning a tag, did not find expected whitespace or line break"
5018 );
5019 }
5020
5021 #[test]
5022 fn verbatim_tag_requires_uri() {
5023 assert_eq!(
5024 first_scanner_error_info("!<> foo\n"),
5025 "while parsing a tag, did not find expected tag URI"
5026 );
5027 }
5028
5029 #[test]
5030 fn verbatim_tag_requires_closing_angle_bracket() {
5031 assert_eq!(
5032 first_scanner_error_info("!<tag:yaml.org,2002:str foo\n"),
5033 "while scanning a verbatim tag, did not find the expected '>'"
5034 );
5035 }
5036
5037 #[test]
5038 fn tag_uri_escape_requires_hex_digits() {
5039 assert_eq!(
5040 first_scanner_error_info("!!bad%zz foo\n"),
5041 "while parsing a tag, found an invalid escape sequence"
5042 );
5043 }
5044
5045 #[test]
5046 fn tag_uri_escape_rejects_bad_leading_utf8_byte() {
5047 assert_eq!(
5048 first_scanner_error_info("!!bad%80 foo\n"),
5049 "while parsing a tag, found an incorrect leading UTF-8 byte"
5050 );
5051 }
5052
5053 #[test]
5054 fn tag_uri_escape_rejects_bad_trailing_utf8_byte() {
5055 assert_eq!(
5056 first_scanner_error_info("!!bad%C2%41 foo\n"),
5057 "while parsing a tag, found an incorrect trailing UTF-8 byte"
5058 );
5059 }
5060
5061 #[test]
5062 fn tag_uri_escape_rejects_invalid_utf8_codepoint() {
5063 assert_eq!(
5064 first_scanner_error_info("!!bad%F4%90%80%80 foo\n"),
5065 "while parsing a tag, found an invalid UTF-8 codepoint"
5066 );
5067 }
5068
5069 #[test]
5070 fn anchors_and_aliases_require_names() {
5071 let expected =
5072 "while scanning an anchor or alias, did not find expected alphabetic or numeric character";
5073
5074 assert_eq!(first_scanner_error_info("& \n"), expected);
5075 assert_eq!(first_scanner_error_info("* \n"), expected);
5076 }
5077
5078 #[test]
5079 fn document_end_marker_rejects_trailing_content() {
5080 assert_eq!(
5081 first_scanner_error_info("... trailing\n"),
5082 "invalid content after document end marker"
5083 );
5084 }
5085
5086 #[test]
5087 fn reserved_indicators_are_rejected_outside_directives() {
5088 assert_eq!(
5089 first_scanner_error_info(" @\n"),
5090 "unexpected character: `@'"
5091 );
5092 }
5093
5094 #[test]
5095 fn flow_block_entry_indicator_is_rejected() {
5096 assert_eq!(
5097 first_scanner_error_info("[- ]\n"),
5098 r#""-" is only valid inside a block"#
5099 );
5100 }
5101
5102 #[test]
5103 fn block_entry_after_tabbed_separator_reports_specific_error() {
5104 assert_eq!(
5105 first_scanner_error_info("-\t- value\n"),
5106 "'-' must be followed by a valid YAML whitespace"
5107 );
5108 }
5109
5110 #[test]
5111 fn document_indicator_reports_unclosed_flow_collection() {
5112 assert_eq!(first_scanner_error_info("[\n---\n"), "unclosed bracket '['");
5113 }
5114
5115 #[test]
5116 fn block_scalar_header_rejects_trailing_content() {
5117 assert_eq!(
5118 first_scanner_error_info("|+ trailing\n"),
5119 "while scanning a block scalar, did not find expected comment or line break"
5120 );
5121 }
5122
5123 #[test]
5124 fn block_scalar_rejects_zero_indent_indicator() {
5125 let expected = "while scanning a block scalar, found an indentation indicator equal to 0";
5126
5127 assert_eq!(first_scanner_error_info("|0\n"), expected);
5128 assert_eq!(first_scanner_error_info("|+0\n"), expected);
5129 }
5130
5131 #[test]
5132 fn empty_block_scalar_at_eof_honors_chomping() {
5133 assert_eq!(first_scalar_value("|-\n"), "");
5134 assert_eq!(first_scalar_value("|+\n"), "\n");
5135 }
5136
5137 #[test]
5138 fn explicit_indent_block_scalar_can_end_at_document_marker() {
5139 assert_eq!(first_scalar_value("|1\n...\n"), "");
5140 }
5141
5142 #[test]
5143 fn root_explicit_indent_block_scalar_rejects_underindented_content() {
5144 assert_eq!(
5145 first_scanner_error_info("|2\nx\n"),
5146 "wrongly indented line in block scalar"
5147 );
5148 }
5149
5150 #[test]
5151 fn quoted_scalar_rejects_document_indicator_at_line_start() {
5152 assert_eq!(
5153 first_scanner_error_info("\"one\n---\ntwo\"\n"),
5154 "while scanning a quoted scalar, found unexpected document indicator"
5155 );
5156 }
5157
5158 #[test]
5159 fn quoted_scalar_rejects_tab_indentation_after_line_break() {
5160 assert_eq!(
5161 first_scanner_error_info("a: \"one\n\tbad\"\n"),
5162 "tab cannot be used as indentation"
5163 );
5164 }
5165
5166 #[test]
5167 fn quoted_scalar_rejects_underindented_continuation() {
5168 assert_eq!(
5169 first_scanner_error_info("a: \"one\nbad\"\n"),
5170 "invalid indentation in multiline quoted scalar"
5171 );
5172 }
5173
5174 #[test]
5175 fn indented_flow_scalar_reports_invalid_indentation() {
5176 assert_eq!(
5177 first_scanner_error_info("a:\n [\nfoo]\n"),
5178 "invalid indentation"
5179 );
5180 }
5181
5182 #[test]
5183 fn required_simple_key_requires_value_at_stream_end() {
5184 assert_eq!(
5185 first_scanner_error_info("a:\n&b\n- c\n"),
5186 "simple key expect ':'"
5187 );
5188 }
5189
5190 #[test]
5191 fn plain_scalar_rejects_dash_before_flow_indicator() {
5192 assert_eq!(
5193 first_scanner_error_info("[-]\n"),
5194 "plain scalar cannot start with '-' followed by ,[]{}"
5195 );
5196 }
5197
5198 #[test]
5199 fn explicit_key_rejects_tab_after_indicator() {
5200 assert_eq!(
5201 first_scanner_error_info("? \tfoo\n"),
5202 "tabs disallowed in this context"
5203 );
5204 }
5205
5206 #[test]
5207 fn flow_mapping_rejects_adjacent_collection_value_after_plain_key() {
5208 assert_eq!(
5209 first_scanner_error_info("[a:[]]\n"),
5210 "':' may not precede any of `[{` in flow mapping"
5211 );
5212 }
5213
5214 #[test]
5215 fn implicit_flow_mapping_colon_cannot_move_to_next_line() {
5216 assert_eq!(
5217 first_scanner_error_info("[foo\n: bar]\n"),
5218 "illegal placement of ':' indicator"
5219 );
5220 }
5221}