1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use alloc::{
13 borrow::{Cow, ToOwned},
14 collections::VecDeque,
15 string::String,
16 vec::Vec,
17};
18use core::{char, fmt};
19
20use crate::{
21 char_traits::{
22 as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
23 is_tag_char, is_uri_char,
24 },
25 input::{BorrowedInput, SkipTabs},
26};
27
28const SIMPLE_KEY_MAX_LOOKAHEAD: usize = 1024;
30
31#[derive(Clone, Copy, PartialEq, Debug, Eq)]
33pub enum TEncoding {
34 Utf8,
36}
37
38#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
40pub enum ScalarStyle {
41 Plain,
43 SingleQuoted,
45 DoubleQuoted,
47
48 Literal,
54 Folded,
61}
62
63#[derive(Clone, Copy, Debug, Default)]
70pub struct MarkerOffsets {
71 chars: usize,
73 bytes: Option<usize>,
75}
76
77impl PartialEq for MarkerOffsets {
78 fn eq(&self, other: &Self) -> bool {
79 self.chars == other.chars
83 }
84}
85
86impl Eq for MarkerOffsets {}
87
88#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
90pub struct Marker {
91 offsets: MarkerOffsets,
93 line: usize,
95 col: usize,
97}
98
99impl Marker {
100 #[must_use]
102 pub fn new(index: usize, line: usize, col: usize) -> Marker {
103 Marker {
104 offsets: MarkerOffsets {
105 chars: index,
106 bytes: None,
107 },
108 line,
109 col,
110 }
111 }
112
113 #[must_use]
115 pub fn with_byte_offset(mut self, byte_offset: Option<usize>) -> Marker {
116 self.offsets.bytes = byte_offset;
117 self
118 }
119
120 #[must_use]
122 pub fn index(&self) -> usize {
123 self.offsets.chars
124 }
125
126 #[must_use]
128 pub fn byte_offset(&self) -> Option<usize> {
129 self.offsets.bytes
130 }
131
132 #[must_use]
134 pub fn line(&self) -> usize {
135 self.line
136 }
137
138 #[must_use]
140 pub fn col(&self) -> usize {
141 self.col
142 }
143}
144
145#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
147pub struct Span {
148 pub start: Marker,
150 pub end: Marker,
152
153 pub indent: Option<usize>,
158}
159
160impl Span {
161 #[must_use]
163 pub fn new(start: Marker, end: Marker) -> Span {
164 Span {
165 start,
166 end,
167 indent: None,
168 }
169 }
170
171 #[must_use]
178 pub fn empty(mark: Marker) -> Span {
179 Span {
180 start: mark,
181 end: mark,
182 indent: None,
183 }
184 }
185
186 #[must_use]
188 pub fn with_indent(mut self, indent: Option<usize>) -> Span {
189 self.indent = indent;
190 self
191 }
192
193 #[must_use]
195 pub fn len(&self) -> usize {
196 self.end.index() - self.start.index()
197 }
198
199 #[must_use]
201 pub fn is_empty(&self) -> bool {
202 self.len() == 0
203 }
204
205 #[must_use]
207 pub fn byte_range(&self) -> Option<core::ops::Range<usize>> {
208 let start = self.start.byte_offset()?;
209 let end = self.end.byte_offset()?;
210 Some(start..end)
211 }
212}
213
214#[derive(Clone, PartialEq, Debug, Eq)]
216pub struct ScanError {
217 mark: Marker,
219 info: String,
221}
222
223impl ScanError {
224 #[must_use]
226 #[cold]
227 pub fn new(loc: Marker, info: String) -> ScanError {
228 ScanError { mark: loc, info }
229 }
230
231 #[must_use]
233 #[cold]
234 pub fn new_str(loc: Marker, info: &str) -> ScanError {
235 ScanError {
236 mark: loc,
237 info: info.to_owned(),
238 }
239 }
240
241 #[must_use]
243 pub fn marker(&self) -> &Marker {
244 &self.mark
245 }
246
247 #[must_use]
249 pub fn info(&self) -> &str {
250 self.info.as_ref()
251 }
252}
253
254impl fmt::Display for ScanError {
255 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256 write!(
257 f,
258 "{} at char {} line {} column {}",
259 self.info,
260 self.mark.index(),
261 self.mark.line(),
262 self.mark.col() + 1
263 )
264 }
265}
266
267impl core::error::Error for ScanError {}
268
269#[derive(Clone, PartialEq, Debug, Eq)]
271pub enum TokenType<'input> {
272 StreamStart(TEncoding),
274 StreamEnd,
276 VersionDirective(
278 u32,
280 u32,
282 ),
283 TagDirective(
285 Cow<'input, str>,
287 Cow<'input, str>,
289 ),
290 DocumentStart,
292 DocumentEnd,
294 BlockSequenceStart,
298 BlockMappingStart,
302 BlockEnd,
304 FlowSequenceStart,
306 FlowSequenceEnd,
308 FlowMappingStart,
310 FlowMappingEnd,
312 BlockEntry,
314 FlowEntry,
316 Key,
318 Value,
320 Alias(Cow<'input, str>),
322 Anchor(Cow<'input, str>),
324 Tag(
326 Cow<'input, str>,
328 Cow<'input, str>,
330 ),
331 Scalar(ScalarStyle, Cow<'input, str>),
333 ReservedDirective(
335 String,
337 Vec<String>,
339 ),
340}
341
342#[derive(Clone, PartialEq, Debug, Eq)]
344pub struct Token<'input>(pub Span, pub TokenType<'input>);
345
346#[derive(Clone, PartialEq, Debug, Eq)]
381struct SimpleKey {
382 possible: bool,
395 required: bool,
403 token_number: usize,
409 mark: Marker,
411}
412
413impl SimpleKey {
414 fn new(mark: Marker) -> SimpleKey {
416 SimpleKey {
417 possible: false,
418 required: false,
419 token_number: 0,
420 mark,
421 }
422 }
423}
424
425#[derive(Clone, Debug, Default)]
427struct Indent {
428 indent: isize,
430 needs_block_end: bool,
448}
449
450#[derive(Debug, PartialEq)]
472enum ImplicitMappingState {
473 Possible,
478 Inside(u8),
482}
483
484#[derive(Debug)]
494#[allow(clippy::struct_excessive_bools)]
495pub struct Scanner<'input, T> {
496 input: T,
500 mark: Marker,
502 tokens: VecDeque<Token<'input>>,
509 error: Option<ScanError>,
511
512 stream_start_produced: bool,
514 stream_end_produced: bool,
516 adjacent_value_allowed_at: usize,
519 simple_key_allowed: bool,
523 simple_keys: smallvec::SmallVec<[SimpleKey; 8]>,
528 indent: isize,
530 indents: smallvec::SmallVec<[Indent; 8]>,
532 flow_level: u8,
534 tokens_parsed: usize,
538 token_available: bool,
540 leading_whitespace: bool,
542 flow_mapping_started: smallvec::SmallVec<[bool; 8]>,
549 implicit_flow_mapping_states: smallvec::SmallVec<[ImplicitMappingState; 8]>,
562 interrupted_plain_by_comment: Option<Marker>,
565 flow_markers: smallvec::SmallVec<[(Marker, char); 8]>,
567 buf_leading_break: String,
568 buf_trailing_breaks: String,
569 buf_whitespaces: String,
570}
571
572impl<'input, T: BorrowedInput<'input>> Iterator for Scanner<'input, T> {
573 type Item = Token<'input>;
574
575 fn next(&mut self) -> Option<Self::Item> {
576 if self.error.is_some() {
577 return None;
578 }
579 match self.next_token() {
580 Ok(Some(tok)) => {
581 debug_print!(
582 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
583 tok.1,
584 tok.0
585 );
586 Some(tok)
587 }
588 Ok(tok) => tok,
589 Err(e) => {
590 self.error = Some(e);
591 None
592 }
593 }
594 }
595}
596
597pub type ScanResult = Result<(), ScanError>;
599
600#[derive(Debug)]
601enum FlowScalarBuf {
602 Borrowed {
608 start: usize,
609 end: usize,
610 pending_ws_start: Option<usize>,
611 pending_ws_end: usize,
612 },
613 Owned(String),
614}
615
616impl FlowScalarBuf {
617 #[inline]
618 fn new_borrowed(start: usize) -> Self {
619 Self::Borrowed {
620 start,
621 end: start,
622 pending_ws_start: None,
623 pending_ws_end: start,
624 }
625 }
626
627 #[inline]
628 fn new_owned() -> Self {
629 Self::Owned(String::new())
630 }
631
632 #[inline]
633 fn as_owned_mut(&mut self) -> Option<&mut String> {
634 match self {
635 Self::Owned(s) => Some(s),
636 Self::Borrowed { .. } => None,
637 }
638 }
639
640 #[inline]
641 fn commit_pending_ws(&mut self) {
642 if let Self::Borrowed {
643 end,
644 pending_ws_start,
645 pending_ws_end,
646 ..
647 } = self
648 {
649 if pending_ws_start.is_some() {
650 *end = *pending_ws_end;
651 *pending_ws_start = None;
652 }
653 }
654 }
655
656 #[inline]
657 fn note_pending_ws(&mut self, ws_start: usize, ws_end: usize) {
658 if let Self::Borrowed {
659 pending_ws_start,
660 pending_ws_end,
661 ..
662 } = self
663 {
664 if pending_ws_start.is_none() {
665 *pending_ws_start = Some(ws_start);
666 }
667 *pending_ws_end = ws_end;
668 }
669 }
670
671 #[inline]
672 fn discard_pending_ws(&mut self) {
673 if let Self::Borrowed {
674 pending_ws_start,
675 pending_ws_end,
676 end,
677 ..
678 } = self
679 {
680 *pending_ws_start = None;
681 *pending_ws_end = *end;
682 }
683 }
684}
685
686impl<'input, T: BorrowedInput<'input>> Scanner<'input, T> {
687 #[inline]
688 fn promote_flow_scalar_buf_to_owned(
689 &self,
690 start_mark: &Marker,
691 buf: &mut FlowScalarBuf,
692 ) -> Result<(), ScanError> {
693 let FlowScalarBuf::Borrowed {
694 start,
695 end,
696 pending_ws_start: _,
697 pending_ws_end: _,
698 } = *buf
699 else {
700 return Ok(());
701 };
702
703 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
704 ScanError::new_str(
705 *start_mark,
706 "internal error: input advertised offsets but did not provide a slice",
707 )
708 })?;
709 *buf = FlowScalarBuf::Owned(slice.to_owned());
710 Ok(())
711 }
712 #[inline]
718 fn try_borrow_slice(&self, start: usize, end: usize) -> Option<&'input str> {
719 self.input.slice_borrowed(start, end)
720 }
721
722 fn scan_tag_handle_directive_cow(
727 &mut self,
728 mark: &Marker,
729 ) -> Result<Cow<'input, str>, ScanError> {
730 let Some(start) = self.input.byte_offset() else {
731 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
732 };
733
734 if self.input.look_ch() != '!' {
735 return Err(ScanError::new_str(
736 *mark,
737 "while scanning a tag, did not find expected '!'",
738 ));
739 }
740
741 self.skip_non_blank();
743
744 self.input.lookahead(1);
747 while self.input.next_is_alpha() {
748 self.skip_non_blank();
749 self.input.lookahead(1);
750 }
751
752 if self.input.peek() == '!' {
754 self.skip_non_blank();
755 }
756
757 let Some(end) = self.input.byte_offset() else {
758 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
760 };
761
762 let Some(slice) = self.try_borrow_slice(start, end) else {
763 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
765 ScanError::new_str(
766 *mark,
767 "internal error: input advertised slicing but did not provide a slice",
768 )
769 })?;
770 if !slice.ends_with('!') && slice != "!" {
771 return Err(ScanError::new_str(
772 *mark,
773 "while parsing a tag directive, did not find expected '!'",
774 ));
775 }
776 return Ok(Cow::Owned(slice.to_owned()));
777 };
778
779 if !slice.ends_with('!') && slice != "!" {
780 return Err(ScanError::new_str(
781 *mark,
782 "while parsing a tag directive, did not find expected '!'",
783 ));
784 }
785
786 Ok(Cow::Borrowed(slice))
787 }
788
789 fn scan_tag_prefix_directive_cow(
794 &mut self,
795 start_mark: &Marker,
796 ) -> Result<Cow<'input, str>, ScanError> {
797 let Some(start) = self.input.byte_offset() else {
798 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
799 };
800
801 if self.input.look_ch() == '!' {
803 self.skip_non_blank();
804 } else if !is_tag_char(self.input.peek()) {
805 return Err(ScanError::new_str(
806 *start_mark,
807 "invalid global tag character",
808 ));
809 } else if self.input.peek() == '%' {
810 } else {
812 self.skip_non_blank();
813 }
814
815 while is_uri_char(self.input.look_ch()) {
817 if self.input.peek() == '%' {
818 break;
819 }
820 self.skip_non_blank();
821 }
822
823 if self.input.peek() == '%' {
825 let current = self
826 .input
827 .byte_offset()
828 .expect("byte_offset() must remain available once enabled");
829 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
830 slice.to_owned()
831 } else {
832 String::new()
833 };
834
835 while is_uri_char(self.input.look_ch()) {
836 if self.input.peek() == '%' {
837 out.push(self.scan_uri_escapes(start_mark)?);
838 } else {
839 out.push(self.input.peek());
840 self.skip_non_blank();
841 }
842 }
843 return Ok(Cow::Owned(out));
844 }
845
846 let Some(end) = self.input.byte_offset() else {
847 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
848 };
849
850 let Some(slice) = self.try_borrow_slice(start, end) else {
851 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
853 ScanError::new_str(
854 *start_mark,
855 "internal error: input advertised slicing but did not provide a slice",
856 )
857 })?;
858 return Ok(Cow::Owned(slice.to_owned()));
859 };
860
861 Ok(Cow::Borrowed(slice))
862 }
863 pub fn new(input: T) -> Self {
865 let initial_byte_offset = input.byte_offset();
866 Scanner {
867 input,
868 mark: Marker::new(0, 1, 0).with_byte_offset(initial_byte_offset),
869 tokens: VecDeque::with_capacity(64),
870 error: None,
871
872 stream_start_produced: false,
873 stream_end_produced: false,
874 adjacent_value_allowed_at: 0,
875 simple_key_allowed: true,
876 simple_keys: smallvec::SmallVec::new(),
877 indent: -1,
878 indents: smallvec::SmallVec::new(),
879 flow_level: 0,
880 tokens_parsed: 0,
881 token_available: false,
882 leading_whitespace: true,
883 flow_mapping_started: smallvec::SmallVec::new(),
884 implicit_flow_mapping_states: smallvec::SmallVec::new(),
885 flow_markers: smallvec::SmallVec::new(),
886 interrupted_plain_by_comment: None,
887
888 buf_leading_break: String::with_capacity(128),
889 buf_trailing_breaks: String::with_capacity(128),
890 buf_whitespaces: String::with_capacity(128),
891 }
892 }
893
894 #[inline]
899 pub fn get_error(&self) -> Option<ScanError> {
900 self.error.clone()
901 }
902
903 #[cold]
904 fn simple_key_expected(&self) -> ScanError {
905 ScanError::new_str(self.mark, "simple key expected")
906 }
907
908 #[cold]
909 fn unclosed_bracket(mark: Marker, bracket: char) -> ScanError {
910 ScanError::new(mark, format!("unclosed bracket '{bracket}'"))
911 }
912
913 #[inline]
915 fn skip_blank(&mut self) {
916 self.input.skip();
917
918 self.mark.offsets.chars += 1;
919 self.mark.col += 1;
920 self.mark.offsets.bytes = self.input.byte_offset();
921 }
922
923 #[inline]
925 fn skip_non_blank(&mut self) {
926 self.input.skip();
927
928 self.mark.offsets.chars += 1;
929 self.mark.col += 1;
930 self.mark.offsets.bytes = self.input.byte_offset();
931 self.leading_whitespace = false;
932 }
933
934 #[inline]
936 fn skip_n_non_blank(&mut self, count: usize) {
937 for _ in 0..count {
938 self.input.skip();
939 self.mark.offsets.chars += 1;
940 self.mark.col += 1;
941 }
942 self.mark.offsets.bytes = self.input.byte_offset();
943 self.leading_whitespace = false;
944 }
945
946 #[inline]
948 fn skip_nl(&mut self) {
949 self.input.skip();
950
951 self.mark.offsets.chars += 1;
952 self.mark.col = 0;
953 self.mark.line += 1;
954 self.mark.offsets.bytes = self.input.byte_offset();
955 self.leading_whitespace = true;
956 }
957
958 #[inline]
960 fn skip_linebreak(&mut self) {
961 if self.input.next_2_are('\r', '\n') {
962 self.skip_blank();
965 self.skip_nl();
966 } else if self.input.next_is_break() {
967 self.skip_nl();
968 }
969 }
970
971 #[inline]
973 pub fn stream_started(&self) -> bool {
974 self.stream_start_produced
975 }
976
977 #[inline]
979 pub fn stream_ended(&self) -> bool {
980 self.stream_end_produced
981 }
982
983 #[inline]
985 pub fn mark(&self) -> Marker {
986 self.mark
987 }
988
989 #[inline]
996 fn read_break(&mut self, s: &mut String) {
997 self.skip_break();
998 s.push('\n');
999 }
1000
1001 #[inline]
1006 fn skip_break(&mut self) {
1007 let c = self.input.peek();
1008 let nc = self.input.peek_nth(1);
1009 debug_assert!(is_break(c));
1010 if c == '\r' && nc == '\n' {
1011 self.skip_blank();
1012 }
1013 self.skip_nl();
1014 }
1015
1016 fn insert_token(&mut self, pos: usize, tok: Token<'input>) {
1018 let old_len = self.tokens.len();
1019 assert!(pos <= old_len);
1020 self.tokens.insert(pos, tok);
1021 }
1022
1023 #[inline]
1024 fn allow_simple_key(&mut self) {
1025 self.simple_key_allowed = true;
1026 }
1027
1028 #[inline]
1029 fn disallow_simple_key(&mut self) {
1030 self.simple_key_allowed = false;
1031 }
1032
1033 pub fn fetch_next_token(&mut self) -> ScanResult {
1038 self.input.lookahead(1);
1039
1040 if !self.stream_start_produced {
1041 self.fetch_stream_start();
1042 return Ok(());
1043 }
1044 self.skip_to_next_token()?;
1045
1046 debug_print!(
1047 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
1048 self.mark,
1049 self.input.peek()
1050 );
1051
1052 self.stale_simple_keys()?;
1053
1054 let mark = self.mark;
1055 self.unroll_indent(mark.col as isize);
1056
1057 self.input.lookahead(4);
1058
1059 if self.input.next_is_z() {
1060 self.fetch_stream_end()?;
1061 return Ok(());
1062 }
1063
1064 if self.mark.col == 0 {
1065 if self.input.next_char_is('%') {
1066 return self.fetch_directive();
1067 } else if self.input.next_is_document_start() {
1068 return self.fetch_document_indicator(TokenType::DocumentStart);
1069 } else if self.input.next_is_document_end() {
1070 self.fetch_document_indicator(TokenType::DocumentEnd)?;
1071 self.skip_ws_to_eol(SkipTabs::Yes)?;
1072 if !self.input.next_is_breakz() {
1073 return Err(ScanError::new_str(
1074 self.mark,
1075 "invalid content after document end marker",
1076 ));
1077 }
1078 return Ok(());
1079 }
1080 }
1081
1082 if (self.mark.col as isize) < self.indent {
1083 self.input.lookahead(1);
1084 let c = self.input.peek();
1085 if self.flow_level == 0 || !matches!(c, ']' | '}' | ',') {
1086 return Err(ScanError::new_str(self.mark, "invalid indentation"));
1087 }
1088 }
1089
1090 let c = self.input.peek();
1091 let nc = self.input.peek_nth(1);
1092 match c {
1093 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
1094 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
1095 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
1096 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
1097 ',' => self.fetch_flow_entry(),
1098 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
1099 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
1100 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
1101 ':' if self.flow_level > 0
1102 && (is_flow(nc) || self.mark.index() == self.adjacent_value_allowed_at) =>
1103 {
1104 self.fetch_flow_value()
1105 }
1106 '*' => self.fetch_anchor(true),
1108 '&' => self.fetch_anchor(false),
1110 '!' => self.fetch_tag(),
1111 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
1113 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
1115 '\'' => self.fetch_flow_scalar(true),
1116 '"' => self.fetch_flow_scalar(false),
1117 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
1119 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
1120 self.fetch_plain_scalar()
1121 }
1122 '%' | '@' | '`' => Err(ScanError::new(
1123 self.mark,
1124 format!("unexpected character: `{c}'"),
1125 )),
1126 _ => self.fetch_plain_scalar(),
1127 }
1128 }
1129
1130 pub fn next_token(&mut self) -> Result<Option<Token<'input>>, ScanError> {
1134 if self.stream_end_produced {
1135 return Ok(None);
1136 }
1137
1138 if !self.token_available {
1139 self.fetch_more_tokens()?;
1140 }
1141 let Some(t) = self.tokens.pop_front() else {
1142 return Err(ScanError::new_str(
1143 self.mark,
1144 "did not find expected next token",
1145 ));
1146 };
1147 self.token_available = false;
1148 self.tokens_parsed += 1;
1149
1150 if let TokenType::StreamEnd = t.1 {
1151 self.stream_end_produced = true;
1152 }
1153 Ok(Some(t))
1154 }
1155
1156 pub fn fetch_more_tokens(&mut self) -> ScanResult {
1160 let mut need_more;
1161 loop {
1162 if self.tokens.is_empty() {
1163 need_more = true;
1164 } else {
1165 need_more = false;
1166 self.stale_simple_keys()?;
1168 for sk in &self.simple_keys {
1170 if sk.possible && sk.token_number == self.tokens_parsed {
1171 need_more = true;
1172 break;
1173 }
1174 }
1175 }
1176
1177 if let Some(token) = self.tokens.back() {
1180 if matches!(token.1, TokenType::DocumentEnd | TokenType::DocumentStart) {
1181 break;
1182 }
1183 }
1184
1185 if !need_more {
1186 break;
1187 }
1188 self.fetch_next_token()?;
1189 }
1190 self.token_available = true;
1191
1192 Ok(())
1193 }
1194
1195 fn stale_simple_keys(&mut self) -> ScanResult {
1203 for sk in &mut self.simple_keys {
1204 let is_line_stale = self.flow_level == 0 && sk.mark.line < self.mark.line;
1205 let is_length_stale =
1208 self.mark.index().saturating_sub(sk.mark.index()) > SIMPLE_KEY_MAX_LOOKAHEAD;
1209
1210 if sk.possible && (is_line_stale || is_length_stale) {
1211 if sk.required {
1212 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
1213 }
1214 sk.possible = false;
1215 }
1216 }
1217 Ok(())
1218 }
1219
1220 fn skip_to_next_token(&mut self) -> ScanResult {
1226 let consume_linebreak = |this: &mut Self| {
1229 this.input.lookahead(2);
1230 this.skip_linebreak();
1231 if this.flow_level == 0 {
1232 this.allow_simple_key();
1233 }
1234 };
1235
1236 loop {
1237 match self.input.look_ch() {
1238 '\t' => {
1240 if self.is_within_block()
1241 && self.leading_whitespace
1242 && (self.mark.col as isize) < self.indent
1243 {
1244 self.skip_ws_to_eol(SkipTabs::Yes)?;
1245
1246 if !self.input.next_is_breakz() {
1248 return Err(ScanError::new_str(
1249 self.mark,
1250 "tabs disallowed within this context (block indentation)",
1251 ));
1252 }
1253
1254 if matches!(self.input.look_ch(), '\n' | '\r') {
1256 consume_linebreak(self);
1257 }
1258 } else {
1259 self.skip_blank();
1261 }
1262 }
1263
1264 ' ' => self.skip_blank(),
1265
1266 '\n' | '\r' => consume_linebreak(self),
1267
1268 '#' => {
1269 let n = self.input.skip_while_non_breakz();
1271 self.mark.offsets.chars += n;
1272 self.mark.col += n;
1273 self.mark.offsets.bytes = self.input.byte_offset();
1274
1275 if matches!(self.input.look_ch(), '\n' | '\r') {
1277 consume_linebreak(self);
1278 }
1279 }
1280
1281 _ => break,
1282 }
1283 }
1284
1285 if let Some(err_mark) = self.interrupted_plain_by_comment.take() {
1288 let is_immediate_next_line = self.mark.line == err_mark.line + 1;
1292
1293 if self.flow_level == 0
1295 && is_immediate_next_line
1296 && (self.mark.col as isize) > self.indent
1297 {
1298 self.input.lookahead(4);
1302
1303 if !self.input.next_is_z()
1304 && !self.input.next_is_document_indicator()
1305 && self.input.next_can_be_plain_scalar(false)
1306 {
1307 return Err(ScanError::new_str(
1308 err_mark,
1309 "comment intercepting the multiline text",
1310 ));
1311 }
1312 }
1313 }
1314
1315 Ok(())
1316 }
1317
1318 fn skip_yaml_whitespace(&mut self) -> ScanResult {
1323 let mut need_whitespace = true;
1324 loop {
1325 match self.input.look_ch() {
1326 ' ' => {
1327 self.skip_blank();
1328
1329 need_whitespace = false;
1330 }
1331 '\n' | '\r' => {
1332 self.input.lookahead(2);
1333 self.skip_linebreak();
1334 if self.flow_level == 0 {
1335 self.allow_simple_key();
1336 }
1337 need_whitespace = false;
1338 }
1339 '#' => {
1340 let comment_length = self.input.skip_while_non_breakz();
1341 self.mark.offsets.chars += comment_length;
1342 self.mark.col += comment_length;
1343 self.mark.offsets.bytes = self.input.byte_offset();
1344 }
1345 _ => break,
1346 }
1347 }
1348
1349 if need_whitespace {
1350 Err(ScanError::new_str(self.mark(), "expected whitespace"))
1351 } else {
1352 Ok(())
1353 }
1354 }
1355
1356 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
1357 let (n_bytes, result) = self.input.skip_ws_to_eol(skip_tabs);
1358 self.mark.col += n_bytes;
1359 self.mark.offsets.chars += n_bytes;
1360 self.mark.offsets.bytes = self.input.byte_offset();
1361 result.map_err(|msg| ScanError::new_str(self.mark, msg))
1362 }
1363
1364 fn fetch_stream_start(&mut self) {
1365 let mark = self.mark;
1366 self.indent = -1;
1367 self.stream_start_produced = true;
1368 self.allow_simple_key();
1369 self.tokens.push_back(Token(
1370 Span::empty(mark),
1371 TokenType::StreamStart(TEncoding::Utf8),
1372 ));
1373 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1374 }
1375
1376 fn fetch_stream_end(&mut self) -> ScanResult {
1377 if self.mark.col != 0 {
1379 self.mark.col = 0;
1380 self.mark.line += 1;
1381 }
1382
1383 if let Some((mark, bracket)) = self.flow_markers.pop() {
1384 return Err(Self::unclosed_bracket(mark, bracket));
1385 }
1386
1387 for sk in &mut self.simple_keys {
1390 if sk.required && sk.possible {
1391 return Err(self.simple_key_expected());
1392 }
1393 sk.possible = false;
1394 }
1395
1396 self.unroll_indent(-1);
1397 self.remove_simple_key()?;
1398 self.disallow_simple_key();
1399
1400 self.tokens
1401 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
1402 Ok(())
1403 }
1404
1405 fn fetch_directive(&mut self) -> ScanResult {
1406 self.unroll_indent(-1);
1407 self.remove_simple_key()?;
1408
1409 self.disallow_simple_key();
1410
1411 let tok = self.scan_directive()?;
1412 self.tokens.push_back(tok);
1413
1414 Ok(())
1415 }
1416
1417 fn scan_directive(&mut self) -> Result<Token<'input>, ScanError> {
1418 let start_mark = self.mark;
1419 self.skip_non_blank();
1420
1421 let name = self.scan_directive_name()?;
1422 let tok = match name.as_ref() {
1423 "YAML" => self.scan_version_directive_value(&start_mark)?,
1424 "TAG" => self.scan_tag_directive_value(&start_mark)?,
1425 _ => {
1426 let mut params = Vec::new();
1427 while self.input.next_is_blank() {
1428 let n_blanks = self.input.skip_while_blank();
1429 self.mark.offsets.chars += n_blanks;
1430 self.mark.col += n_blanks;
1431 self.mark.offsets.bytes = self.input.byte_offset();
1432
1433 if !is_blank_or_breakz(self.input.peek()) {
1434 let mut param = String::new();
1435 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut param);
1436 self.mark.offsets.chars += n_chars;
1437 self.mark.col += n_chars;
1438 self.mark.offsets.bytes = self.input.byte_offset();
1439 params.push(param);
1440 }
1441 }
1442
1443 Token(
1444 Span::new(start_mark, self.mark),
1445 TokenType::ReservedDirective(name, params),
1446 )
1447 }
1448 };
1449
1450 self.skip_ws_to_eol(SkipTabs::Yes)?;
1451
1452 if self.input.next_is_breakz() {
1453 self.input.lookahead(2);
1454 self.skip_linebreak();
1455 Ok(tok)
1456 } else {
1457 Err(ScanError::new_str(
1458 start_mark,
1459 "while scanning a directive, did not find expected comment or line break",
1460 ))
1461 }
1462 }
1463
1464 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1465 let n_blanks = self.input.skip_while_blank();
1466 self.mark.offsets.chars += n_blanks;
1467 self.mark.col += n_blanks;
1468 self.mark.offsets.bytes = self.input.byte_offset();
1469
1470 let major = self.scan_version_directive_number(mark)?;
1471
1472 if self.input.peek() != '.' {
1473 return Err(ScanError::new_str(
1474 *mark,
1475 "while scanning a YAML directive, did not find expected digit or '.' character",
1476 ));
1477 }
1478 self.skip_non_blank();
1479
1480 let minor = self.scan_version_directive_number(mark)?;
1481
1482 Ok(Token(
1483 Span::new(*mark, self.mark),
1484 TokenType::VersionDirective(major, minor),
1485 ))
1486 }
1487
1488 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1489 let start_mark = self.mark;
1490 let mut string = String::new();
1491
1492 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut string);
1493 self.mark.offsets.chars += n_chars;
1494 self.mark.col += n_chars;
1495 self.mark.offsets.bytes = self.input.byte_offset();
1496
1497 if string.is_empty() {
1498 return Err(ScanError::new_str(
1499 start_mark,
1500 "while scanning a directive, could not find expected directive name",
1501 ));
1502 }
1503
1504 if !is_blank_or_breakz(self.input.peek()) {
1505 return Err(ScanError::new_str(
1506 start_mark,
1507 "while scanning a directive, found unexpected non-alphabetical character",
1508 ));
1509 }
1510
1511 Ok(string)
1512 }
1513
1514 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1515 let mut val = 0u32;
1516 let mut length = 0usize;
1517 while let Some(digit) = self.input.look_ch().to_digit(10) {
1518 if length + 1 > 9 {
1519 return Err(ScanError::new_str(
1520 *mark,
1521 "while scanning a YAML directive, found extremely long version number",
1522 ));
1523 }
1524 length += 1;
1525 val = val * 10 + digit;
1526 self.skip_non_blank();
1527 }
1528
1529 if length == 0 {
1530 return Err(ScanError::new_str(
1531 *mark,
1532 "while scanning a YAML directive, did not find expected version number",
1533 ));
1534 }
1535
1536 Ok(val)
1537 }
1538
1539 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1540 let n_blanks = self.input.skip_while_blank();
1541 self.mark.offsets.chars += n_blanks;
1542 self.mark.col += n_blanks;
1543 self.mark.offsets.bytes = self.input.byte_offset();
1544
1545 let handle = self.scan_tag_handle_directive_cow(mark)?;
1546
1547 let n_blanks = self.input.skip_while_blank();
1548 self.mark.offsets.chars += n_blanks;
1549 self.mark.col += n_blanks;
1550 self.mark.offsets.bytes = self.input.byte_offset();
1551
1552 let prefix = self.scan_tag_prefix_directive_cow(mark)?;
1553
1554 self.input.lookahead(1);
1555
1556 if self.input.next_is_blank_or_breakz() {
1557 Ok(Token(
1558 Span::new(*mark, self.mark),
1559 TokenType::TagDirective(handle, prefix),
1560 ))
1561 } else {
1562 Err(ScanError::new_str(
1563 *mark,
1564 "while scanning TAG, did not find expected whitespace or line break",
1565 ))
1566 }
1567 }
1568
1569 fn fetch_tag(&mut self) -> ScanResult {
1570 self.save_simple_key();
1571 self.disallow_simple_key();
1572
1573 let tok = self.scan_tag()?;
1574 self.tokens.push_back(tok);
1575 Ok(())
1576 }
1577
1578 fn scan_tag(&mut self) -> Result<Token<'input>, ScanError> {
1579 let start_mark = self.mark;
1580
1581 self.input.lookahead(2);
1583
1584 if self.input.byte_offset().is_none() {
1586 return self.scan_tag_owned(&start_mark);
1587 }
1588
1589 let (handle, suffix): (Cow<'input, str>, Cow<'input, str>) =
1590 if self.input.nth_char_is(1, '<') {
1591 let suffix = self.scan_verbatim_tag(&start_mark)?;
1593 (Cow::Owned(String::new()), Cow::Owned(suffix))
1594 } else {
1595 let handle = self.scan_tag_handle_cow(&start_mark)?;
1597 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1599 let suffix = self.scan_tag_shorthand_suffix_cow(&start_mark, true)?;
1601 (handle, suffix)
1602 } else {
1603 let remaining_suffix =
1608 self.scan_tag_shorthand_suffix_cow(&start_mark, false)?;
1609
1610 let suffix = if handle.len() > 1 {
1612 if remaining_suffix.is_empty() {
1613 match handle {
1615 Cow::Borrowed(s) => Cow::Borrowed(&s[1..]),
1616 Cow::Owned(s) => Cow::Owned(s[1..].to_owned()),
1617 }
1618 } else {
1619 let mut combined = handle[1..].to_owned();
1621 combined.push_str(&remaining_suffix);
1622 Cow::Owned(combined)
1623 }
1624 } else {
1625 remaining_suffix
1627 };
1628
1629 if suffix.is_empty() {
1632 (Cow::Borrowed(""), Cow::Borrowed("!"))
1633 } else {
1634 (Cow::Borrowed("!"), suffix)
1635 }
1636 }
1637 };
1638
1639 if is_blank_or_breakz(self.input.look_ch())
1640 || (self.flow_level > 0 && matches!(self.input.peek(), ',' | ']' | '}'))
1641 {
1642 Ok(Token(
1644 Span::new(start_mark, self.mark),
1645 TokenType::Tag(handle, suffix),
1646 ))
1647 } else {
1648 Err(ScanError::new_str(
1649 start_mark,
1650 "while scanning a tag, did not find expected whitespace or line break",
1651 ))
1652 }
1653 }
1654
1655 fn scan_tag_owned(&mut self, start_mark: &Marker) -> Result<Token<'input>, ScanError> {
1657 let mut handle = String::new();
1658 let mut suffix;
1659
1660 if self.input.nth_char_is(1, '<') {
1661 suffix = self.scan_verbatim_tag(start_mark)?;
1662 } else {
1663 handle = self.scan_tag_handle(false, start_mark)?;
1665 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1667 let is_secondary_handle = handle == "!!";
1669 suffix =
1670 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", start_mark)?;
1671 } else {
1672 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, start_mark)?;
1673 "!".clone_into(&mut handle);
1674 if suffix.is_empty() {
1677 handle.clear();
1678 "!".clone_into(&mut suffix);
1679 }
1680 }
1681 }
1682
1683 if is_blank_or_breakz(self.input.look_ch())
1684 || (self.flow_level > 0 && matches!(self.input.peek(), ',' | ']' | '}'))
1685 {
1686 Ok(Token(
1688 Span::new(*start_mark, self.mark),
1689 TokenType::Tag(handle.into(), suffix.into()),
1690 ))
1691 } else {
1692 Err(ScanError::new_str(
1693 *start_mark,
1694 "while scanning a tag, did not find expected whitespace or line break",
1695 ))
1696 }
1697 }
1698
1699 fn scan_tag_handle_cow(&mut self, mark: &Marker) -> Result<Cow<'input, str>, ScanError> {
1704 let Some(start) = self.input.byte_offset() else {
1705 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
1706 };
1707
1708 if self.input.look_ch() != '!' {
1709 return Err(ScanError::new_str(
1710 *mark,
1711 "while scanning a tag, did not find expected '!'",
1712 ));
1713 }
1714
1715 self.skip_non_blank();
1717
1718 self.input.lookahead(1);
1720 while self.input.next_is_alpha() {
1721 self.skip_non_blank();
1722 self.input.lookahead(1);
1723 }
1724
1725 if self.input.peek() == '!' {
1727 self.skip_non_blank();
1728 }
1729
1730 let Some(end) = self.input.byte_offset() else {
1731 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
1732 };
1733
1734 if let Some(slice) = self.try_borrow_slice(start, end) {
1735 Ok(Cow::Borrowed(slice))
1736 } else {
1737 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1738 ScanError::new_str(
1739 *mark,
1740 "internal error: input advertised slicing but did not provide a slice",
1741 )
1742 })?;
1743 Ok(Cow::Owned(slice.to_owned()))
1744 }
1745 }
1746
1747 fn scan_tag_shorthand_suffix_cow(
1751 &mut self,
1752 mark: &Marker,
1753 require_non_empty: bool,
1754 ) -> Result<Cow<'input, str>, ScanError> {
1755 let Some(start) = self.input.byte_offset() else {
1756 return Ok(Cow::Owned(
1757 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
1758 ));
1759 };
1760
1761 while is_tag_char(self.input.look_ch()) {
1763 if self.input.peek() == '%' {
1764 let current = self
1766 .input
1767 .byte_offset()
1768 .expect("byte_offset() must remain available once enabled");
1769 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
1770 slice.to_owned()
1771 } else {
1772 String::new()
1773 };
1774
1775 while is_tag_char(self.input.look_ch()) {
1777 if self.input.peek() == '%' {
1778 out.push(self.scan_uri_escapes(mark)?);
1779 } else {
1780 out.push(self.input.peek());
1781 self.skip_non_blank();
1782 }
1783 }
1784 return Ok(Cow::Owned(out));
1785 }
1786 self.skip_non_blank();
1787 }
1788
1789 let Some(end) = self.input.byte_offset() else {
1790 return Ok(Cow::Owned(
1791 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
1792 ));
1793 };
1794
1795 if require_non_empty && start == end {
1796 return Err(ScanError::new_str(
1797 *mark,
1798 "while parsing a tag, did not find expected tag URI",
1799 ));
1800 }
1801
1802 if let Some(slice) = self.try_borrow_slice(start, end) {
1803 Ok(Cow::Borrowed(slice))
1804 } else {
1805 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1806 ScanError::new_str(
1807 *mark,
1808 "internal error: input advertised slicing but did not provide a slice",
1809 )
1810 })?;
1811 Ok(Cow::Owned(slice.to_owned()))
1812 }
1813 }
1814
1815 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
1816 let mut string = String::new();
1817 if self.input.look_ch() != '!' {
1818 return Err(ScanError::new_str(
1819 *mark,
1820 "while scanning a tag, did not find expected '!'",
1821 ));
1822 }
1823
1824 string.push(self.input.peek());
1825 self.skip_non_blank();
1826
1827 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1828 self.mark.offsets.chars += n_chars;
1829 self.mark.col += n_chars;
1830 self.mark.offsets.bytes = self.input.byte_offset();
1831
1832 if self.input.peek() == '!' {
1834 string.push(self.input.peek());
1835 self.skip_non_blank();
1836 } else if directive && string != "!" {
1837 return Err(ScanError::new_str(
1841 *mark,
1842 "while parsing a tag directive, did not find expected '!'",
1843 ));
1844 }
1845 Ok(string)
1846 }
1847
1848 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1854 let mut string = String::new();
1855
1856 if self.input.look_ch() == '!' {
1857 string.push(self.input.peek());
1859 self.skip_non_blank();
1860 } else if !is_tag_char(self.input.peek()) {
1861 return Err(ScanError::new_str(
1863 *start_mark,
1864 "invalid global tag character",
1865 ));
1866 } else if self.input.peek() == '%' {
1867 string.push(self.scan_uri_escapes(start_mark)?);
1869 } else {
1870 string.push(self.input.peek());
1872 self.skip_non_blank();
1873 }
1874
1875 while is_uri_char(self.input.look_ch()) {
1876 if self.input.peek() == '%' {
1877 string.push(self.scan_uri_escapes(start_mark)?);
1878 } else {
1879 string.push(self.input.peek());
1880 self.skip_non_blank();
1881 }
1882 }
1883
1884 Ok(string)
1885 }
1886
1887 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1891 self.skip_non_blank();
1893 self.skip_non_blank();
1894
1895 let mut string = String::new();
1896 while is_uri_char(self.input.look_ch()) {
1897 if self.input.peek() == '%' {
1898 string.push(self.scan_uri_escapes(start_mark)?);
1899 } else {
1900 string.push(self.input.peek());
1901 self.skip_non_blank();
1902 }
1903 }
1904
1905 if string.is_empty() {
1906 return Err(ScanError::new_str(
1907 *start_mark,
1908 "while parsing a tag, did not find expected tag URI",
1909 ));
1910 }
1911
1912 if self.input.peek() != '>' {
1913 return Err(ScanError::new_str(
1914 *start_mark,
1915 "while scanning a verbatim tag, did not find the expected '>'",
1916 ));
1917 }
1918 self.skip_non_blank();
1919
1920 Ok(string)
1921 }
1922
1923 fn scan_tag_shorthand_suffix(
1924 &mut self,
1925 _directive: bool,
1926 _is_secondary: bool,
1927 head: &str,
1928 mark: &Marker,
1929 ) -> Result<String, ScanError> {
1930 let mut length = head.len();
1931 let mut string = String::new();
1932
1933 if length > 1 {
1936 string.extend(head.chars().skip(1));
1937 }
1938
1939 while is_tag_char(self.input.look_ch()) {
1940 if self.input.peek() == '%' {
1942 string.push(self.scan_uri_escapes(mark)?);
1943 } else {
1944 string.push(self.input.peek());
1945 self.skip_non_blank();
1946 }
1947
1948 length += 1;
1949 }
1950
1951 if length == 0 {
1952 return Err(ScanError::new_str(
1953 *mark,
1954 "while parsing a tag, did not find expected tag URI",
1955 ));
1956 }
1957
1958 Ok(string)
1959 }
1960
1961 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
1962 let mut width = 0usize;
1963 let mut bytes = [0u8; 4];
1964 let mut bytes_len = 0usize;
1965 loop {
1966 self.input.lookahead(3);
1967
1968 let c = self.input.peek_nth(1);
1969 let nc = self.input.peek_nth(2);
1970
1971 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
1972 return Err(ScanError::new_str(
1973 *mark,
1974 "while parsing a tag, found an invalid escape sequence",
1975 ));
1976 }
1977
1978 let byte = u8::try_from((as_hex(c) << 4) + as_hex(nc))
1979 .expect("two hex nibbles always fit in a byte");
1980 if width == 0 {
1981 width = match byte {
1982 _ if byte & 0x80 == 0x00 => 1,
1983 _ if byte & 0xE0 == 0xC0 => 2,
1984 _ if byte & 0xF0 == 0xE0 => 3,
1985 _ if byte & 0xF8 == 0xF0 => 4,
1986 _ => {
1987 return Err(ScanError::new_str(
1988 *mark,
1989 "while parsing a tag, found an incorrect leading UTF-8 byte",
1990 ));
1991 }
1992 };
1993 } else if byte & 0xc0 != 0x80 {
1994 return Err(ScanError::new_str(
1995 *mark,
1996 "while parsing a tag, found an incorrect trailing UTF-8 byte",
1997 ));
1998 }
1999
2000 bytes[bytes_len] = byte;
2001 bytes_len += 1;
2002
2003 self.skip_n_non_blank(3);
2004
2005 width -= 1;
2006 if width == 0 {
2007 break;
2008 }
2009 }
2010
2011 let s = core::str::from_utf8(&bytes[..bytes_len]).map_err(|_| {
2012 ScanError::new_str(
2013 *mark,
2014 "while parsing a tag, found an invalid UTF-8 codepoint",
2015 )
2016 })?;
2017
2018 let mut chars = s.chars();
2019 match (chars.next(), chars.next()) {
2020 (Some(ch), None) => Ok(ch),
2021 _ => Err(ScanError::new_str(
2022 *mark,
2023 "while parsing a tag, found an invalid UTF-8 codepoint",
2024 )),
2025 }
2026 }
2027
2028 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
2029 self.save_simple_key();
2030 self.disallow_simple_key();
2031
2032 let tok = self.scan_anchor(alias)?;
2033
2034 self.tokens.push_back(tok);
2035
2036 Ok(())
2037 }
2038
2039 fn scan_anchor(&mut self, alias: bool) -> Result<Token<'input>, ScanError> {
2040 let start_mark = self.mark;
2041
2042 self.skip_non_blank();
2044
2045 if let Some(start) = self.input.byte_offset() {
2047 while is_anchor_char(self.input.look_ch()) {
2048 self.skip_non_blank();
2049 }
2050
2051 let end = self
2052 .input
2053 .byte_offset()
2054 .expect("byte_offset() must remain available once enabled");
2055
2056 if start == end {
2057 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2058 }
2059
2060 let cow = if let Some(slice) = self.try_borrow_slice(start, end) {
2061 Cow::Borrowed(slice)
2062 } else if let Some(slice) = self.input.slice_bytes(start, end) {
2063 Cow::Owned(slice.to_owned())
2064 } else {
2065 return Err(ScanError::new_str(
2066 start_mark,
2067 "internal error: input advertised slicing but did not provide a slice",
2068 ));
2069 };
2070
2071 let tok = if alias {
2072 TokenType::Alias(cow)
2073 } else {
2074 TokenType::Anchor(cow)
2075 };
2076 return Ok(Token(Span::new(start_mark, self.mark), tok));
2077 }
2078
2079 let mut string = String::new();
2080 while is_anchor_char(self.input.look_ch()) {
2081 string.push(self.input.peek());
2082 self.skip_non_blank();
2083 }
2084
2085 if string.is_empty() {
2086 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2087 }
2088
2089 let tok = if alias {
2090 TokenType::Alias(string.into())
2091 } else {
2092 TokenType::Anchor(string.into())
2093 };
2094 Ok(Token(Span::new(start_mark, self.mark), tok))
2095 }
2096
2097 fn fetch_flow_collection_start(&mut self, tok: TokenType<'input>) -> ScanResult {
2098 self.save_simple_key();
2100
2101 let start_mark = self.mark;
2102 let indicator = self.input.peek();
2103 self.flow_markers.push((start_mark, indicator));
2104
2105 self.roll_one_col_indent();
2106 self.increase_flow_level()?;
2107
2108 self.allow_simple_key();
2109
2110 self.skip_non_blank();
2111
2112 if tok == TokenType::FlowMappingStart {
2113 self.flow_mapping_started.push(true);
2114 } else {
2115 self.flow_mapping_started.push(false);
2116 self.implicit_flow_mapping_states
2117 .push(ImplicitMappingState::Possible);
2118 }
2119
2120 self.skip_ws_to_eol(SkipTabs::Yes)?;
2121
2122 self.tokens
2123 .push_back(Token(Span::new(start_mark, self.mark), tok));
2124 Ok(())
2125 }
2126
2127 fn fetch_flow_collection_end(&mut self, tok: TokenType<'input>) -> ScanResult {
2128 if self.flow_level == 0 {
2130 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2131 }
2132
2133 let Some((open_mark, open_ch)) = self.flow_markers.pop() else {
2134 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2135 };
2136
2137 let (expected_open, actual_close) = match tok {
2138 TokenType::FlowSequenceEnd => ('[', ']'),
2139 TokenType::FlowMappingEnd => ('{', '}'),
2140 _ => unreachable!("flow collection end called with non-closing token"),
2141 };
2142
2143 if open_ch != expected_open {
2144 return Err(ScanError::new(
2145 open_mark,
2146 format!("mismatched bracket '{open_ch}' closed by '{actual_close}'"),
2147 ));
2148 }
2149
2150 let flow_level = self.flow_level;
2151
2152 self.remove_simple_key()?;
2153
2154 if matches!(tok, TokenType::FlowSequenceEnd) {
2155 self.end_implicit_mapping(self.mark, flow_level);
2156 self.implicit_flow_mapping_states.pop();
2158 }
2159 self.flow_mapping_started.pop();
2160
2161 self.decrease_flow_level();
2162
2163 self.disallow_simple_key();
2164
2165 let start_mark = self.mark;
2166 self.skip_non_blank();
2167 self.skip_ws_to_eol(SkipTabs::Yes)?;
2168
2169 if self.flow_level > 0 {
2175 self.adjacent_value_allowed_at = self.mark.index();
2176 }
2177
2178 self.tokens
2179 .push_back(Token(Span::new(start_mark, self.mark), tok));
2180 Ok(())
2181 }
2182
2183 fn fetch_flow_entry(&mut self) -> ScanResult {
2185 self.remove_simple_key()?;
2186 self.allow_simple_key();
2187
2188 self.end_implicit_mapping(self.mark, self.flow_level);
2189 if self.current_flow_collection_is_sequence() {
2190 self.set_current_flow_mapping_started(false);
2191 }
2192
2193 let start_mark = self.mark;
2194 self.skip_non_blank();
2195 self.skip_ws_to_eol(SkipTabs::Yes)?;
2196
2197 self.tokens.push_back(Token(
2198 Span::new(start_mark, self.mark),
2199 TokenType::FlowEntry,
2200 ));
2201 Ok(())
2202 }
2203
2204 fn increase_flow_level(&mut self) -> ScanResult {
2205 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
2206 self.flow_level = self
2207 .flow_level
2208 .checked_add(1)
2209 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
2210 Ok(())
2211 }
2212
2213 fn decrease_flow_level(&mut self) {
2214 if self.flow_level > 0 {
2215 self.flow_level -= 1;
2216 self.simple_keys.pop().unwrap();
2217 }
2218 }
2219
2220 fn fetch_block_entry(&mut self) -> ScanResult {
2226 if self.flow_level > 0 {
2227 return Err(ScanError::new_str(
2229 self.mark,
2230 r#""-" is only valid inside a block"#,
2231 ));
2232 }
2233 if !self.simple_key_allowed {
2235 return Err(ScanError::new_str(
2236 self.mark,
2237 "block sequence entries are not allowed in this context",
2238 ));
2239 }
2240
2241 if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
2243 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
2244 return Err(ScanError::new_str(
2245 span.start,
2246 "invalid indentation for anchor",
2247 ));
2248 }
2249 }
2250
2251 let mark = self.mark;
2253 self.skip_non_blank();
2254
2255 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
2257 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
2258 self.input.lookahead(2);
2259 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
2260 {
2261 return Err(ScanError::new_str(
2262 self.mark,
2263 "'-' must be followed by a valid YAML whitespace",
2264 ));
2265 }
2266
2267 self.skip_ws_to_eol(SkipTabs::No)?;
2268 self.input.lookahead(1);
2269 if self.input.next_is_break() || self.input.next_is_flow() {
2270 self.roll_one_col_indent();
2271 }
2272
2273 self.remove_simple_key()?;
2274 self.allow_simple_key();
2275
2276 self.tokens
2277 .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
2278
2279 Ok(())
2280 }
2281
2282 fn fetch_document_indicator(&mut self, t: TokenType<'input>) -> ScanResult {
2283 if let Some((mark, bracket)) = self.flow_markers.pop() {
2284 return Err(ScanError::new(
2285 mark,
2286 format!("unclosed bracket '{bracket}'"),
2287 ));
2288 }
2289
2290 self.unroll_indent(-1);
2291 self.remove_simple_key()?;
2292 self.disallow_simple_key();
2293
2294 let mark = self.mark;
2295
2296 self.skip_n_non_blank(3);
2297
2298 self.tokens.push_back(Token(Span::new(mark, self.mark), t));
2299 Ok(())
2300 }
2301
2302 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
2303 self.save_simple_key();
2304 self.allow_simple_key();
2305 let tok = self.scan_block_scalar(literal)?;
2306
2307 self.tokens.push_back(tok);
2308 Ok(())
2309 }
2310
2311 #[allow(clippy::too_many_lines)]
2312 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token<'input>, ScanError> {
2313 let start_mark = self.mark;
2314 let mut chomping = Chomping::Clip;
2315 let mut increment: usize = 0;
2316 let mut indent: usize = 0;
2317 let mut trailing_blank: bool;
2318 let mut leading_blank: bool = false;
2319 let style = if literal {
2320 ScalarStyle::Literal
2321 } else {
2322 ScalarStyle::Folded
2323 };
2324
2325 let mut string = String::new();
2326 let mut leading_break = String::new();
2327 let mut trailing_breaks = String::new();
2328 let mut chomping_break = String::new();
2329
2330 self.skip_non_blank();
2332 self.unroll_non_block_indents();
2333
2334 if self.input.look_ch() == '+' || self.input.peek() == '-' {
2335 if self.input.peek() == '+' {
2336 chomping = Chomping::Keep;
2337 } else {
2338 chomping = Chomping::Strip;
2339 }
2340 self.skip_non_blank();
2341 self.input.lookahead(1);
2342 if self.input.next_is_digit() {
2343 if self.input.peek() == '0' {
2344 return Err(ScanError::new_str(
2345 start_mark,
2346 "while scanning a block scalar, found an indentation indicator equal to 0",
2347 ));
2348 }
2349 increment = (self.input.peek() as usize) - ('0' as usize);
2350 self.skip_non_blank();
2351 }
2352 } else if self.input.next_is_digit() {
2353 if self.input.peek() == '0' {
2354 return Err(ScanError::new_str(
2355 start_mark,
2356 "while scanning a block scalar, found an indentation indicator equal to 0",
2357 ));
2358 }
2359
2360 increment = (self.input.peek() as usize) - ('0' as usize);
2361 self.skip_non_blank();
2362 self.input.lookahead(1);
2363 if self.input.peek() == '+' || self.input.peek() == '-' {
2364 if self.input.peek() == '+' {
2365 chomping = Chomping::Keep;
2366 } else {
2367 chomping = Chomping::Strip;
2368 }
2369 self.skip_non_blank();
2370 }
2371 }
2372
2373 self.skip_ws_to_eol(SkipTabs::Yes)?;
2374
2375 self.input.lookahead(1);
2377 if !self.input.next_is_breakz() {
2378 return Err(ScanError::new_str(
2379 start_mark,
2380 "while scanning a block scalar, did not find expected comment or line break",
2381 ));
2382 }
2383
2384 if self.input.next_is_break() {
2385 self.input.lookahead(2);
2386 self.read_break(&mut chomping_break);
2387 }
2388
2389 if self.input.look_ch() == '\t' {
2390 return Err(ScanError::new_str(
2391 start_mark,
2392 "a block scalar content cannot start with a tab",
2393 ));
2394 }
2395
2396 if increment > 0 {
2397 indent = if self.indent >= 0 {
2398 (self.indent + increment as isize) as usize
2399 } else {
2400 increment
2401 }
2402 }
2403
2404 if indent == 0 {
2406 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
2407 } else {
2408 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2409 }
2410
2411 if self.input.next_is_z() {
2416 let contents = match chomping {
2417 Chomping::Strip => String::new(),
2419 _ if self.mark.line == start_mark.line() => String::new(),
2421 Chomping::Clip => chomping_break,
2424 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
2427 Chomping::Keep => trailing_breaks,
2429 };
2430 return Ok(Token(
2431 Span::new(start_mark, self.mark),
2432 TokenType::Scalar(style, contents.into()),
2433 ));
2434 }
2435
2436 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
2437 if self.indent < 0 && self.mark.col == 0 {
2438 self.input.lookahead(4);
2439 if self.input.next_is_document_start()
2440 || self.input.next_is_document_end()
2441 || self.input.peek() == '#'
2442 {
2443 } else {
2447 return Err(ScanError::new_str(
2448 self.mark,
2449 "wrongly indented line in block scalar",
2450 ));
2451 }
2452 } else {
2453 return Err(ScanError::new_str(
2454 self.mark,
2455 "wrongly indented line in block scalar",
2456 ));
2457 }
2458 }
2459
2460 let mut line_buffer = String::with_capacity(100);
2461 let start_mark = self.mark;
2462 while self.mark.col == indent && !self.input.next_is_z() {
2463 if indent == 0 {
2464 self.input.lookahead(4);
2465 if self.input.next_is_document_end() {
2466 break;
2467 }
2468 }
2469
2470 trailing_blank = self.input.next_is_blank();
2472 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
2473 string.push_str(&trailing_breaks);
2474 if trailing_breaks.is_empty() {
2475 string.push(' ');
2476 }
2477 } else {
2478 string.push_str(&leading_break);
2479 string.push_str(&trailing_breaks);
2480 }
2481
2482 leading_break.clear();
2483 trailing_breaks.clear();
2484
2485 leading_blank = self.input.next_is_blank();
2486
2487 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
2488
2489 self.input.lookahead(2);
2491 if self.input.next_is_z() {
2492 break;
2493 }
2494
2495 self.read_break(&mut leading_break);
2496
2497 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2499 }
2500
2501 if chomping != Chomping::Strip {
2503 string.push_str(&leading_break);
2504 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
2508 string.push('\n');
2509 }
2510 }
2511
2512 if chomping == Chomping::Keep {
2513 string.push_str(&trailing_breaks);
2514 }
2515
2516 Ok(Token(
2517 Span::new(start_mark, self.mark),
2518 TokenType::Scalar(style, string.into()),
2519 ))
2520 }
2521
2522 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
2532 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
2534 string.push(self.input.peek());
2535 self.skip_blank();
2541 }
2542
2543 if self.input.buf_is_empty() {
2546 let mut n_chars = 0;
2554 debug_assert!(line_buffer.is_empty());
2555 while let Some(c) = self.input.raw_read_non_breakz_ch() {
2556 line_buffer.push(c);
2557 n_chars += 1;
2558 }
2559
2560 self.mark.col += n_chars;
2562 self.mark.offsets.chars += n_chars;
2563 self.mark.offsets.bytes = self.input.byte_offset();
2564
2565 string.reserve(line_buffer.len());
2567 string.push_str(line_buffer);
2568 line_buffer.clear();
2570 }
2571 }
2572
2573 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
2575 loop {
2576 if indent < self.input.bufmaxlen() - 2 {
2578 self.input.lookahead(self.input.bufmaxlen());
2579 while self.mark.col < indent && self.input.peek() == ' ' {
2580 self.skip_blank();
2581 }
2582 } else {
2583 loop {
2584 self.input.lookahead(self.input.bufmaxlen());
2585 while !self.input.buf_is_empty()
2586 && self.mark.col < indent
2587 && self.input.peek() == ' '
2588 {
2589 self.skip_blank();
2590 }
2591 if self.mark.col == indent
2595 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
2596 {
2597 break;
2598 }
2599 }
2600 self.input.lookahead(2);
2601 }
2602
2603 if self.input.next_is_break() {
2605 self.read_break(breaks);
2606 } else {
2607 break;
2609 }
2610 }
2611 }
2612
2613 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
2618 let mut max_indent = 0;
2619 loop {
2620 while self.input.look_ch() == ' ' {
2622 self.skip_blank();
2623 }
2624
2625 if self.mark.col > max_indent {
2626 max_indent = self.mark.col;
2627 }
2628
2629 if self.input.next_is_break() {
2630 self.input.lookahead(2);
2632 self.read_break(breaks);
2633 } else {
2634 break;
2636 }
2637 }
2638
2639 *indent = max_indent.max((self.indent + 1) as usize);
2648 if self.indent > 0 {
2649 *indent = (*indent).max(1);
2650 }
2651 }
2652
2653 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
2654 self.save_simple_key();
2655 self.disallow_simple_key();
2656
2657 let tok = self.scan_flow_scalar(single)?;
2658
2659 self.skip_to_next_token()?;
2662 self.adjacent_value_allowed_at = self.mark.index();
2663
2664 self.tokens.push_back(tok);
2665 Ok(())
2666 }
2667
2668 #[allow(clippy::too_many_lines)]
2669 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token<'input>, ScanError> {
2670 let start_mark = self.mark;
2671
2672 let mut buf = match self.input.byte_offset() {
2674 Some(off) => FlowScalarBuf::new_borrowed(off + self.input.peek().len_utf8()),
2675 None => FlowScalarBuf::new_owned(),
2676 };
2677
2678 let mut break_scratch = String::new();
2681
2682 self.skip_non_blank();
2684
2685 loop {
2686 self.input.lookahead(4);
2688
2689 if self.mark.col == 0 && self.input.next_is_document_indicator() {
2690 return Err(ScanError::new_str(
2691 start_mark,
2692 "while scanning a quoted scalar, found unexpected document indicator",
2693 ));
2694 }
2695
2696 if self.input.next_is_z() {
2697 return Err(ScanError::new_str(start_mark, "unclosed quote"));
2698 }
2699
2700 let mut leading_blanks = false;
2703 self.consume_flow_scalar_non_whitespace_chars(
2704 single,
2705 &mut buf,
2706 &mut leading_blanks,
2707 &start_mark,
2708 )?;
2709
2710 match self.input.look_ch() {
2711 '\'' if single => break,
2712 '"' if !single => break,
2713 _ => {}
2714 }
2715
2716 let mut trailing_ws_start: Option<usize> = None;
2732 let mut has_leading_break = false;
2733 let mut has_trailing_breaks = false;
2734
2735 let mut pending_ws_start: Option<usize> = None;
2737
2738 while self.input.next_is_blank() || self.input.next_is_break() {
2740 if self.input.next_is_blank() {
2741 if leading_blanks {
2743 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
2744 return Err(ScanError::new_str(
2745 self.mark,
2746 "tab cannot be used as indentation",
2747 ));
2748 }
2749 self.skip_blank();
2750 } else {
2751 match buf {
2753 FlowScalarBuf::Owned(ref mut string) => {
2754 if trailing_ws_start.is_none() {
2755 trailing_ws_start = Some(string.len());
2756 }
2757 string.push(self.input.peek());
2758 }
2759 FlowScalarBuf::Borrowed { .. } => {
2760 if pending_ws_start.is_none() {
2761 pending_ws_start = self.input.byte_offset();
2762 }
2763 }
2764 }
2765 self.skip_blank();
2766
2767 if let (FlowScalarBuf::Borrowed { .. }, Some(ws_start), Some(ws_end)) =
2768 (&mut buf, pending_ws_start, self.input.byte_offset())
2769 {
2770 buf.note_pending_ws(ws_start, ws_end);
2771 }
2772 }
2773 } else {
2774 self.input.lookahead(2);
2775
2776 if leading_blanks {
2778 match buf {
2780 FlowScalarBuf::Owned(ref mut string) => self.read_break(string),
2781 FlowScalarBuf::Borrowed { .. } => {
2782 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2783 let Some(string) = buf.as_owned_mut() else {
2784 unreachable!()
2785 };
2786 self.read_break(string);
2787 }
2788 }
2789 has_trailing_breaks = true;
2790 } else {
2791 if let Some(pos) = trailing_ws_start.take() {
2793 if let FlowScalarBuf::Owned(ref mut string) = buf {
2794 string.truncate(pos);
2795 }
2796 }
2797
2798 if pending_ws_start.take().is_some() {
2799 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2801 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2802 }
2803 buf.discard_pending_ws();
2804 } else {
2805 buf.commit_pending_ws();
2806 }
2807
2808 break_scratch.clear();
2809 self.read_break(&mut break_scratch);
2810 has_leading_break = true;
2813 leading_blanks = true;
2814 }
2815 }
2816
2817 self.input.lookahead(1);
2818 }
2819
2820 if leading_blanks && has_leading_break && self.flow_level == 0 {
2823 let next_ch = self.input.peek();
2824 let is_closing_quote = (single && next_ch == '\'') || (!single && next_ch == '"');
2825 if !is_closing_quote && (self.mark.col as isize) <= self.indent {
2826 return Err(ScanError::new_str(
2827 self.mark,
2828 "invalid indentation in multiline quoted scalar",
2829 ));
2830 }
2831 }
2832
2833 if leading_blanks {
2835 if has_leading_break && !has_trailing_breaks {
2840 match buf {
2841 FlowScalarBuf::Owned(ref mut string) => string.push(' '),
2842 FlowScalarBuf::Borrowed { .. } => {
2843 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2844 let Some(string) = buf.as_owned_mut() else {
2845 unreachable!()
2846 };
2847 string.push(' ');
2848 }
2849 }
2850 }
2851 }
2852 } self.skip_non_blank();
2857
2858 self.skip_ws_to_eol(SkipTabs::Yes)?;
2860 match self.input.peek() {
2861 ',' | '}' | ']' if self.flow_level > 0 => {}
2863 c if is_breakz(c) => {}
2865 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
2868 ':' if self.flow_level > 0 => {}
2870 _ => {
2871 return Err(ScanError::new_str(
2872 self.mark,
2873 "invalid trailing content after double-quoted scalar",
2874 ));
2875 }
2876 }
2877
2878 let style = if single {
2879 ScalarStyle::SingleQuoted
2880 } else {
2881 ScalarStyle::DoubleQuoted
2882 };
2883
2884 let contents = match buf {
2885 FlowScalarBuf::Owned(string) => Cow::Owned(string),
2886 FlowScalarBuf::Borrowed {
2887 start,
2888 mut end,
2889 pending_ws_start,
2890 pending_ws_end,
2891 } => {
2892 if pending_ws_start.is_some() {
2894 end = pending_ws_end;
2895 }
2896 if let Some(slice) = self.try_borrow_slice(start, end) {
2897 Cow::Borrowed(slice)
2898 } else {
2899 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2900 ScanError::new_str(
2901 start_mark,
2902 "internal error: input advertised offsets but did not provide a slice",
2903 )
2904 })?;
2905 Cow::Owned(slice.to_owned())
2906 }
2907 }
2908 };
2909
2910 Ok(Token(
2911 Span::new(start_mark, self.mark),
2912 TokenType::Scalar(style, contents),
2913 ))
2914 }
2915
2916 fn consume_flow_scalar_non_whitespace_chars(
2925 &mut self,
2926 single: bool,
2927 buf: &mut FlowScalarBuf,
2928 leading_blanks: &mut bool,
2929 start_mark: &Marker,
2930 ) -> Result<(), ScanError> {
2931 self.input.lookahead(2);
2932 while !is_blank_or_breakz(self.input.peek()) {
2933 match self.input.peek() {
2934 '\'' if self.input.peek_nth(1) == '\'' && single => {
2936 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2937 buf.commit_pending_ws();
2938 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2939 }
2940 let Some(string) = buf.as_owned_mut() else {
2941 unreachable!()
2942 };
2943 string.push('\'');
2944 self.skip_n_non_blank(2);
2945 }
2946 '\'' if single => break,
2948 '"' if !single => break,
2949 '\\' if !single && is_break(self.input.peek_nth(1)) => {
2951 self.input.lookahead(3);
2952 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2953 buf.commit_pending_ws();
2954 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2955 }
2956 self.skip_non_blank();
2957 self.skip_linebreak();
2958 *leading_blanks = true;
2959 break;
2960 }
2961 '\\' if !single => {
2963 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2964 buf.commit_pending_ws();
2965 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2966 }
2967 let Some(string) = buf.as_owned_mut() else {
2968 unreachable!()
2969 };
2970 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
2971 }
2972 c => {
2973 match buf {
2974 FlowScalarBuf::Owned(ref mut string) => {
2975 string.push(c);
2976 }
2977 FlowScalarBuf::Borrowed { .. } => {
2978 buf.commit_pending_ws();
2979 }
2980 }
2981 self.skip_non_blank();
2982
2983 if let Some(new_end) = self.input.byte_offset() {
2984 if let FlowScalarBuf::Borrowed { end, .. } = buf {
2985 *end = new_end;
2986 }
2987 }
2988 }
2989 }
2990 self.input.lookahead(2);
2991 }
2992 Ok(())
2993 }
2994
2995 fn resolve_flow_scalar_escape_sequence(
3002 &mut self,
3003 start_mark: &Marker,
3004 ) -> Result<char, ScanError> {
3005 let mut code_length = 0usize;
3006 let mut ret = '\0';
3007
3008 match self.input.peek_nth(1) {
3009 '0' => ret = '\0',
3010 'a' => ret = '\x07',
3011 'b' => ret = '\x08',
3012 't' | '\t' => ret = '\t',
3013 'n' => ret = '\n',
3014 'v' => ret = '\x0b',
3015 'f' => ret = '\x0c',
3016 'r' => ret = '\x0d',
3017 'e' => ret = '\x1b',
3018 ' ' => ret = '\x20',
3019 '"' => ret = '"',
3020 '/' => ret = '/',
3021 '\\' => ret = '\\',
3022 'N' => ret = char::from_u32(0x85).unwrap(),
3024 '_' => ret = char::from_u32(0xA0).unwrap(),
3026 'L' => ret = char::from_u32(0x2028).unwrap(),
3028 'P' => ret = char::from_u32(0x2029).unwrap(),
3030 'x' => code_length = 2,
3031 'u' => code_length = 4,
3032 'U' => code_length = 8,
3033 _ => {
3034 return Err(ScanError::new_str(
3035 *start_mark,
3036 "while parsing a quoted scalar, found unknown escape character",
3037 ))
3038 }
3039 }
3040 self.skip_n_non_blank(2);
3041
3042 if code_length > 0 {
3044 self.input.lookahead(code_length);
3045 let mut value = 0u32;
3046 for i in 0..code_length {
3047 let c = self.input.peek_nth(i);
3048 if !is_hex(c) {
3049 return Err(ScanError::new_str(
3050 *start_mark,
3051 "while parsing a quoted scalar, did not find expected hexadecimal number",
3052 ));
3053 }
3054 value = (value << 4) + as_hex(c);
3055 }
3056
3057 self.skip_n_non_blank(code_length);
3058
3059 if code_length == 4 && (0xD800..=0xDBFF).contains(&value) {
3061 self.input.lookahead(2);
3062 if self.input.peek() == '\\' && self.input.peek_nth(1) == 'u' {
3063 self.skip_n_non_blank(2);
3064 self.input.lookahead(4);
3065 let mut low_value = 0u32;
3066 for i in 0..4 {
3067 let c = self.input.peek_nth(i);
3068 if !is_hex(c) {
3069 return Err(ScanError::new_str(
3070 *start_mark,
3071 "while parsing a quoted scalar, did not find expected hexadecimal number for low surrogate",
3072 ));
3073 }
3074 low_value = (low_value << 4) + as_hex(c);
3075 }
3076 if (0xDC00..=0xDFFF).contains(&low_value) {
3077 value = 0x10000 + (((value - 0xD800) << 10) | (low_value - 0xDC00));
3078 self.skip_n_non_blank(4);
3079 } else {
3080 return Err(ScanError::new_str(
3081 *start_mark,
3082 "while parsing a quoted scalar, found invalid low surrogate",
3083 ));
3084 }
3085 } else {
3086 return Err(ScanError::new_str(
3087 *start_mark,
3088 "while parsing a quoted scalar, found high surrogate without following low surrogate",
3089 ));
3090 }
3091 } else if code_length == 4 && (0xDC00..=0xDFFF).contains(&value) {
3092 return Err(ScanError::new_str(
3093 *start_mark,
3094 "while parsing a quoted scalar, found unpaired low surrogate",
3095 ));
3096 }
3097
3098 let Some(ch) = char::from_u32(value) else {
3099 return Err(ScanError::new_str(
3100 *start_mark,
3101 "while parsing a quoted scalar, found invalid Unicode character escape code",
3102 ));
3103 };
3104 ret = ch;
3105 }
3106 Ok(ret)
3107 }
3108
3109 fn fetch_plain_scalar(&mut self) -> ScanResult {
3110 self.save_simple_key();
3111 self.disallow_simple_key();
3112
3113 let tok = self.scan_plain_scalar()?;
3114
3115 self.tokens.push_back(tok);
3116 Ok(())
3117 }
3118
3119 #[allow(clippy::too_many_lines)]
3124 fn scan_plain_scalar(&mut self) -> Result<Token<'input>, ScanError> {
3125 self.unroll_non_block_indents();
3126 let indent = self.indent + 1;
3127 let start_mark = self.mark;
3128
3129 if self.flow_level > 0 && (start_mark.col as isize) < indent {
3130 return Err(ScanError::new_str(
3131 start_mark,
3132 "invalid indentation in flow construct",
3133 ));
3134 }
3135
3136 let mut string = String::with_capacity(32);
3137 self.buf_whitespaces.clear();
3138 self.buf_leading_break.clear();
3139 self.buf_trailing_breaks.clear();
3140 let mut end_mark = self.mark;
3141
3142 loop {
3143 self.input.lookahead(4);
3144 if (self.mark.col == 0 && self.input.next_is_document_indicator())
3145 || self.input.peek() == '#'
3146 {
3147 if self.input.peek() == '#'
3152 && !string.is_empty()
3153 && !self.buf_whitespaces.is_empty()
3154 && self.flow_level == 0
3155 {
3156 self.interrupted_plain_by_comment = Some(self.mark);
3157 }
3158 break;
3159 }
3160
3161 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
3162 return Err(ScanError::new_str(
3163 self.mark,
3164 "plain scalar cannot start with '-' followed by ,[]{}",
3165 ));
3166 }
3167
3168 if !self.input.next_is_blank_or_breakz()
3169 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
3170 {
3171 if self.leading_whitespace {
3172 if self.buf_leading_break.is_empty() {
3173 string.push_str(&self.buf_leading_break);
3174 string.push_str(&self.buf_trailing_breaks);
3175 self.buf_trailing_breaks.clear();
3176 self.buf_leading_break.clear();
3177 } else {
3178 if self.buf_trailing_breaks.is_empty() {
3179 string.push(' ');
3180 } else {
3181 string.push_str(&self.buf_trailing_breaks);
3182 self.buf_trailing_breaks.clear();
3183 }
3184 self.buf_leading_break.clear();
3185 }
3186 self.leading_whitespace = false;
3187 } else if !self.buf_whitespaces.is_empty() {
3188 string.push_str(&self.buf_whitespaces);
3189 self.buf_whitespaces.clear();
3190 }
3191
3192 string.push(self.input.peek());
3194 self.skip_non_blank();
3195 string.reserve(self.input.bufmaxlen());
3196
3197 let mut end = false;
3199 while !end {
3200 self.input.lookahead(self.input.bufmaxlen());
3204 let (stop, chars_consumed) = self.input.fetch_plain_scalar_chunk(
3205 &mut string,
3206 self.input.bufmaxlen() - 1,
3207 self.flow_level > 0,
3208 );
3209 end = stop;
3210 self.mark.offsets.chars += chars_consumed;
3211 self.mark.col += chars_consumed;
3212 self.mark.offsets.bytes = self.input.byte_offset();
3213 }
3214 end_mark = self.mark;
3215 }
3216
3217 if !(self.input.next_is_blank() || self.input.next_is_break()) {
3222 break;
3223 }
3224
3225 self.input.lookahead(2);
3227 while self.input.next_is_blank_or_break() {
3228 if self.input.next_is_blank() {
3229 if !self.leading_whitespace {
3230 self.buf_whitespaces.push(self.input.peek());
3231 self.skip_blank();
3232 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
3233 self.skip_ws_to_eol(SkipTabs::Yes)?;
3236 if !self.input.next_is_breakz() {
3237 return Err(ScanError::new_str(
3238 start_mark,
3239 "while scanning a plain scalar, found a tab",
3240 ));
3241 }
3242 } else {
3243 self.skip_blank();
3244 }
3245 } else {
3246 if self.leading_whitespace {
3248 self.skip_break();
3249 self.buf_trailing_breaks.push('\n');
3250 } else {
3251 self.buf_whitespaces.clear();
3252 self.skip_break();
3253 self.buf_leading_break.push('\n');
3254 self.leading_whitespace = true;
3255 }
3256 }
3257 self.input.lookahead(2);
3258 }
3259
3260 if self.flow_level == 0 && (self.mark.col as isize) < indent {
3262 break;
3263 }
3264 }
3265
3266 if self.leading_whitespace {
3267 self.allow_simple_key();
3268 }
3269
3270 if string.is_empty() {
3271 Err(ScanError::new_str(
3275 start_mark,
3276 "unexpected end of plain scalar",
3277 ))
3278 } else {
3279 let contents = if let (Some(start), Some(end)) =
3280 (start_mark.byte_offset(), end_mark.byte_offset())
3281 {
3282 match self.try_borrow_slice(start, end) {
3283 Some(slice) if slice == string => Cow::Borrowed(slice),
3284 _ => Cow::Owned(string),
3285 }
3286 } else {
3287 Cow::Owned(string)
3288 };
3289
3290 Ok(Token(
3291 Span::new(start_mark, end_mark),
3292 TokenType::Scalar(ScalarStyle::Plain, contents),
3293 ))
3294 }
3295 }
3296
3297 fn fetch_key(&mut self) -> ScanResult {
3298 let start_mark = self.mark;
3299 if self.flow_level == 0 {
3300 if !self.simple_key_allowed {
3302 return Err(ScanError::new_str(
3303 self.mark,
3304 "mapping keys are not allowed in this context",
3305 ));
3306 }
3307 self.roll_indent(
3308 start_mark.col,
3309 None,
3310 TokenType::BlockMappingStart,
3311 start_mark,
3312 );
3313 } else {
3314 self.set_current_flow_mapping_started(true);
3316 }
3317
3318 self.remove_simple_key()?;
3319
3320 if self.flow_level == 0 {
3321 self.allow_simple_key();
3322 } else {
3323 self.disallow_simple_key();
3324 }
3325
3326 self.skip_non_blank();
3327 self.skip_yaml_whitespace()?;
3328 if self.input.peek() == '\t' {
3329 return Err(ScanError::new_str(
3330 self.mark(),
3331 "tabs disallowed in this context",
3332 ));
3333 }
3334 self.tokens
3335 .push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
3336 Ok(())
3337 }
3338
3339 fn fetch_flow_value(&mut self) -> ScanResult {
3347 let nc = self.input.peek_nth(1);
3348
3349 if self.mark.index() != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
3361 return Err(ScanError::new_str(
3362 self.mark,
3363 "':' may not precede any of `[{` in flow mapping",
3364 ));
3365 }
3366
3367 self.fetch_value()
3368 }
3369
3370 fn fetch_value(&mut self) -> ScanResult {
3372 let sk = self.simple_keys.last().unwrap().clone();
3373 let start_mark = self.mark;
3374 let is_implicit_flow_mapping = self.current_flow_collection_is_sequence()
3375 && !self.current_flow_mapping_started()
3376 && !self.implicit_flow_mapping_states.is_empty();
3377 if is_implicit_flow_mapping {
3378 *self.implicit_flow_mapping_states.last_mut().unwrap() =
3379 ImplicitMappingState::Inside(self.flow_level);
3380 }
3381
3382 self.skip_non_blank();
3384 if self.input.look_ch() == '\t'
3391 && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
3392 && (self.input.peek() == '-' || self.input.next_is_alpha())
3393 {
3394 return Err(ScanError::new_str(
3395 self.mark,
3396 "':' must be followed by a valid YAML whitespace",
3397 ));
3398 }
3399
3400 if sk.possible {
3401 let tok = Token(Span::empty(sk.mark), TokenType::Key);
3403 self.insert_token(sk.token_number - self.tokens_parsed, tok);
3404 if is_implicit_flow_mapping {
3405 if sk.mark.line < start_mark.line {
3406 return Err(ScanError::new_str(
3407 start_mark,
3408 "illegal placement of ':' indicator",
3409 ));
3410 }
3411 self.insert_token(
3412 sk.token_number - self.tokens_parsed,
3413 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
3414 );
3415 }
3416
3417 self.roll_indent(
3419 sk.mark.col,
3420 Some(sk.token_number),
3421 TokenType::BlockMappingStart,
3422 sk.mark,
3423 );
3424 self.roll_one_col_indent();
3425
3426 self.simple_keys.last_mut().unwrap().possible = false;
3427 self.disallow_simple_key();
3428 } else {
3429 if is_implicit_flow_mapping {
3430 self.tokens
3431 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
3432 }
3433 if self.flow_level == 0 {
3435 if !self.simple_key_allowed {
3436 return Err(ScanError::new_str(
3437 start_mark,
3438 "mapping values are not allowed in this context",
3439 ));
3440 }
3441
3442 self.roll_indent(
3443 start_mark.col,
3444 None,
3445 TokenType::BlockMappingStart,
3446 start_mark,
3447 );
3448 }
3449 self.roll_one_col_indent();
3450
3451 if self.flow_level == 0 {
3452 self.allow_simple_key();
3453 } else {
3454 self.disallow_simple_key();
3455 }
3456 }
3457 self.tokens
3458 .push_back(Token(Span::empty(start_mark), TokenType::Value));
3459
3460 Ok(())
3461 }
3462
3463 fn roll_indent(
3469 &mut self,
3470 col: usize,
3471 number: Option<usize>,
3472 tok: TokenType<'input>,
3473 mark: Marker,
3474 ) {
3475 if self.flow_level > 0 {
3476 return;
3477 }
3478
3479 if self.indent <= col as isize {
3483 if let Some(indent) = self.indents.last() {
3484 if !indent.needs_block_end {
3485 self.indent = indent.indent;
3486 self.indents.pop();
3487 }
3488 }
3489 }
3490
3491 if self.indent < col as isize {
3492 self.indents.push(Indent {
3493 indent: self.indent,
3494 needs_block_end: true,
3495 });
3496 self.indent = col as isize;
3497 let tokens_parsed = self.tokens_parsed;
3498 match number {
3499 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
3500 None => self.tokens.push_back(Token(Span::empty(mark), tok)),
3501 }
3502 }
3503 }
3504
3505 fn unroll_indent(&mut self, col: isize) {
3511 if self.flow_level > 0 {
3512 return;
3513 }
3514 while self.indent > col {
3515 let indent = self.indents.pop().unwrap();
3516 self.indent = indent.indent;
3517 if indent.needs_block_end {
3518 self.tokens
3519 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
3520 }
3521 }
3522 }
3523
3524 fn roll_one_col_indent(&mut self) {
3530 if self.flow_level == 0 && self.indents.last().is_some_and(|x| x.needs_block_end) {
3531 self.indents.push(Indent {
3532 indent: self.indent,
3533 needs_block_end: false,
3534 });
3535 self.indent += 1;
3536 }
3537 }
3538
3539 fn unroll_non_block_indents(&mut self) {
3541 while let Some(indent) = self.indents.last() {
3542 if indent.needs_block_end {
3543 break;
3544 }
3545 self.indent = indent.indent;
3546 self.indents.pop();
3547 }
3548 }
3549
3550 fn save_simple_key(&mut self) {
3552 if self.simple_key_allowed {
3553 let required = self.flow_level == 0
3554 && self.indent == (self.mark.col as isize)
3555 && self.indents.last().unwrap().needs_block_end;
3556
3557 if let Some(last) = self.simple_keys.last_mut() {
3558 *last = SimpleKey {
3559 mark: self.mark,
3560 possible: true,
3561 required,
3562 token_number: self.tokens_parsed + self.tokens.len(),
3563 };
3564 }
3565 }
3566 }
3567
3568 fn remove_simple_key(&mut self) -> ScanResult {
3569 let last = self.simple_keys.last_mut().unwrap();
3570 if last.possible && last.required {
3571 return Err(self.simple_key_expected());
3572 }
3573
3574 last.possible = false;
3575 Ok(())
3576 }
3577
3578 fn is_within_block(&self) -> bool {
3580 !self.indents.is_empty()
3581 }
3582
3583 fn end_implicit_mapping(&mut self, mark: Marker, flow_level: u8) {
3589 if self
3590 .implicit_flow_mapping_states
3591 .last()
3592 .is_some_and(|state| *state == ImplicitMappingState::Inside(flow_level))
3593 {
3594 *self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Possible;
3595 self.set_current_flow_mapping_started(false);
3596 self.tokens
3597 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
3598 }
3599 }
3600
3601 fn current_flow_collection_is_sequence(&self) -> bool {
3602 self.flow_markers
3603 .last()
3604 .is_some_and(|(_, bracket)| *bracket == '[')
3605 }
3606
3607 fn current_flow_mapping_started(&self) -> bool {
3608 self.flow_mapping_started.last().copied().unwrap_or(false)
3609 }
3610
3611 fn set_current_flow_mapping_started(&mut self, started: bool) {
3612 if let Some(current) = self.flow_mapping_started.last_mut() {
3613 *current = started;
3614 }
3615 }
3616}
3617
3618#[derive(PartialEq, Eq)]
3622pub enum Chomping {
3623 Strip,
3625 Clip,
3627 Keep,
3629}
3630
3631#[cfg(test)]
3632mod test {
3633 use alloc::{borrow::Cow, rc::Rc, string::String, vec::Vec};
3634 use core::cell::Cell;
3635
3636 use crate::{
3637 input::{str::StrInput, BufferedInput},
3638 scanner::{Scanner, Token, TokenType},
3639 };
3640
3641 struct CountingChars {
3642 chars: alloc::vec::IntoIter<char>,
3643 read: Rc<Cell<usize>>,
3644 }
3645
3646 impl Iterator for CountingChars {
3647 type Item = char;
3648
3649 fn next(&mut self) -> Option<Self::Item> {
3650 let next = self.chars.next();
3651 if next.is_some() {
3652 self.read.set(self.read.get() + 1);
3653 }
3654 next
3655 }
3656 }
3657
3658 #[test]
3659 fn test_is_anchor_char() {
3660 use super::is_anchor_char;
3661 assert!(is_anchor_char('x'));
3662 }
3663
3664 #[test]
3665 fn flow_simple_key_length_limit_bounds_buffering() {
3666 let mut yaml = String::from("[\n\"start\"\n");
3667 for _ in 0..600 {
3668 yaml.push_str("\"x\"\n");
3669 }
3670 let total_chars = yaml.chars().count();
3671 let read = Rc::new(Cell::new(0));
3672 let chars = yaml.chars().collect::<Vec<_>>().into_iter();
3673 let mut scanner = Scanner::new(BufferedInput::new(CountingChars {
3674 chars,
3675 read: Rc::clone(&read),
3676 }));
3677
3678 assert!(matches!(
3679 scanner.next_token().unwrap().unwrap().1,
3680 TokenType::StreamStart(_)
3681 ));
3682
3683 let token = scanner.next_token().unwrap().unwrap();
3684 assert!(matches!(token.1, TokenType::FlowSequenceStart));
3685
3686 let token = scanner.next_token().unwrap().unwrap();
3687 assert!(matches!(
3688 token.1,
3689 TokenType::Scalar(_, ref value) if value == "start"
3690 ));
3691 assert!(
3692 read.get() < total_chars,
3693 "scanner consumed all {total_chars} chars before yielding the first flow scalar"
3694 );
3695 assert!(
3696 read.get() <= super::SIMPLE_KEY_MAX_LOOKAHEAD + 128,
3697 "scanner read {} chars before yielding the first flow scalar",
3698 read.get()
3699 );
3700 }
3701
3702 #[test]
3704 fn anchor_name_is_borrowed_for_str_input() {
3705 let mut scanner = Scanner::new(StrInput::new("&anch\n"));
3706
3707 loop {
3708 let tok = scanner
3709 .next_token()
3710 .expect("valid YAML must scan without errors")
3711 .expect("scanner must eventually produce a token");
3712 if let TokenType::Anchor(name) = tok.1 {
3713 assert!(matches!(name, Cow::Borrowed("anch")));
3714 break;
3715 }
3716 }
3717 }
3718
3719 #[test]
3721 fn anchor_name_rejects_non_printable_control_chars() {
3722 let mut scanner = Scanner::new(StrInput::new("&foo\u{0001}\n"));
3723
3724 loop {
3725 let tok = scanner
3726 .next_token()
3727 .expect("scanning should not fail")
3728 .expect("scanner must eventually produce a token");
3729 if let TokenType::Anchor(name) = tok.1 {
3730 assert!(matches!(name, Cow::Borrowed("foo")));
3731 let next = scanner.next_token().expect("scanning should not fail");
3732 if let Some(Token(_, TokenType::Scalar(_, rest))) = next {
3733 assert!(rest.starts_with('\u{0001}'));
3734 }
3735 break;
3736 }
3737 }
3738 }
3739
3740 #[test]
3741 fn alias_name_rejects_non_printable_control_chars() {
3742 let mut scanner = Scanner::new(StrInput::new("*foo\u{0001}\n"));
3743
3744 loop {
3745 let tok = scanner
3746 .next_token()
3747 .expect("scanning should not fail")
3748 .expect("scanner must eventually produce a token");
3749 if let TokenType::Alias(name) = tok.1 {
3750 assert!(matches!(name, Cow::Borrowed("foo")));
3751 let next = scanner.next_token().expect("scanning should not fail");
3752 if let Some(Token(_, TokenType::Scalar(_, rest))) = next {
3753 assert!(rest.starts_with('\u{0001}'));
3754 }
3755 break;
3756 }
3757 }
3758 }
3759
3760 #[test]
3761 fn alias_name_is_borrowed_for_str_input() {
3762 let mut scanner = Scanner::new(StrInput::new("*anch\n"));
3763
3764 loop {
3765 let tok = scanner
3766 .next_token()
3767 .expect("valid YAML must scan without errors")
3768 .expect("scanner must eventually produce a token");
3769 if let TokenType::Alias(name) = tok.1 {
3770 assert!(matches!(name, Cow::Borrowed("anch")));
3771 break;
3772 }
3773 }
3774 }
3775
3776 #[test]
3778 fn tag_directive_parts_are_borrowed_for_str_input() {
3779 let mut scanner = Scanner::new(StrInput::new("%TAG !e! tag:example.com,2000:app/\n"));
3780
3781 loop {
3782 let tok = scanner
3783 .next_token()
3784 .expect("valid YAML must scan without errors")
3785 .expect("scanner must eventually produce a token");
3786 if let TokenType::TagDirective(handle, prefix) = tok.1 {
3787 assert!(matches!(handle, Cow::Borrowed("!e!")));
3788 assert!(matches!(prefix, Cow::Borrowed("tag:example.com,2000:app/")));
3789 break;
3790 }
3791 }
3792 }
3793
3794 #[test]
3795 fn plain_scalar_is_borrowed_when_whitespace_free_for_str_input() {
3796 let mut scanner = Scanner::new(StrInput::new("foo\n"));
3797
3798 loop {
3799 let tok = scanner
3800 .next_token()
3801 .expect("valid YAML must scan without errors")
3802 .expect("scanner must eventually produce a token");
3803 if let TokenType::Scalar(_, value) = tok.1 {
3804 assert!(matches!(value, Cow::Borrowed("foo")));
3805 break;
3806 }
3807 }
3808 }
3809
3810 #[test]
3811 fn plain_scalar_is_borrowed_when_whitespace_present_for_str_input() {
3812 let mut scanner = Scanner::new(StrInput::new("foo bar\n"));
3813
3814 loop {
3815 let tok = scanner
3816 .next_token()
3817 .expect("valid YAML must scan without errors")
3818 .expect("scanner must eventually produce a token");
3819 if let TokenType::Scalar(_, value) = tok.1 {
3820 assert!(matches!(value, Cow::Borrowed("foo bar")));
3821 break;
3822 }
3823 }
3824 }
3825
3826 #[test]
3827 fn single_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
3828 let mut scanner = Scanner::new(StrInput::new("'foo bar'\n"));
3829
3830 loop {
3831 let tok = scanner
3832 .next_token()
3833 .expect("valid YAML must scan without errors")
3834 .expect("scanner must eventually produce a token");
3835 if let TokenType::Scalar(_, value) = tok.1 {
3836 assert!(matches!(value, Cow::Borrowed("foo bar")));
3837 break;
3838 }
3839 }
3840 }
3841
3842 #[test]
3843 fn single_quoted_scalar_is_owned_when_quote_is_escaped_for_str_input() {
3844 let mut scanner = Scanner::new(StrInput::new("'foo''bar'\n"));
3845
3846 loop {
3847 let tok = scanner
3848 .next_token()
3849 .expect("valid YAML must scan without errors")
3850 .expect("scanner must eventually produce a token");
3851 if let TokenType::Scalar(_, value) = tok.1 {
3852 assert!(matches!(value, Cow::Owned(_)));
3853 assert_eq!(&*value, "foo'bar");
3854 break;
3855 }
3856 }
3857 }
3858
3859 #[test]
3860 fn double_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
3861 let mut scanner = Scanner::new(StrInput::new("\"foo bar\"\n"));
3862
3863 loop {
3864 let tok = scanner
3865 .next_token()
3866 .expect("valid YAML must scan without errors")
3867 .expect("scanner must eventually produce a token");
3868 if let TokenType::Scalar(_, value) = tok.1 {
3869 assert!(matches!(value, Cow::Borrowed("foo bar")));
3870 break;
3871 }
3872 }
3873 }
3874
3875 #[test]
3876 fn double_quoted_scalar_is_owned_when_escape_sequence_present_for_str_input() {
3877 let mut scanner = Scanner::new(StrInput::new("\"foo\\nbar\"\n"));
3878
3879 loop {
3880 let tok = scanner
3881 .next_token()
3882 .expect("valid YAML must scan without errors")
3883 .expect("scanner must eventually produce a token");
3884 if let TokenType::Scalar(_, value) = tok.1 {
3885 assert!(matches!(value, Cow::Owned(_)));
3886 assert_eq!(&*value, "foo\nbar");
3887 break;
3888 }
3889 }
3890 }
3891
3892 #[test]
3893 fn plain_key_is_borrowed_for_str_input() {
3894 let mut scanner = Scanner::new(StrInput::new("mykey: value\n"));
3896
3897 let mut found_key = false;
3898 let mut key_value: Option<Cow<'_, str>> = None;
3899
3900 loop {
3901 let tok = scanner
3902 .next_token()
3903 .expect("valid YAML must scan without errors");
3904 let Some(tok) = tok else { break };
3905
3906 if matches!(tok.1, TokenType::Key) {
3907 found_key = true;
3908 } else if found_key {
3909 if let TokenType::Scalar(_, value) = tok.1 {
3910 key_value = Some(value);
3911 break;
3912 }
3913 }
3914 }
3915
3916 assert!(found_key, "expected to find a Key token");
3917 let key_value = key_value.expect("expected to find a scalar after Key token");
3918 assert!(
3919 matches!(key_value, Cow::Borrowed("mykey")),
3920 "key should be borrowed, got: {key_value:?}"
3921 );
3922 }
3923
3924 #[test]
3925 fn quoted_key_is_borrowed_when_verbatim_for_str_input() {
3926 let mut scanner = Scanner::new(StrInput::new("\"mykey\": value\n"));
3927
3928 let mut found_key = false;
3929 let mut key_value: Option<Cow<'_, str>> = None;
3930
3931 loop {
3932 let tok = scanner
3933 .next_token()
3934 .expect("valid YAML must scan without errors");
3935 let Some(tok) = tok else { break };
3936
3937 if matches!(tok.1, TokenType::Key) {
3938 found_key = true;
3939 } else if found_key {
3940 if let TokenType::Scalar(_, value) = tok.1 {
3941 key_value = Some(value);
3942 break;
3943 }
3944 }
3945 }
3946
3947 assert!(found_key, "expected to find a Key token");
3948 let key_value = key_value.expect("expected to find a scalar after Key token");
3949 assert!(
3950 matches!(key_value, Cow::Borrowed("mykey")),
3951 "quoted key should be borrowed when verbatim, got: {key_value:?}"
3952 );
3953 }
3954
3955 #[test]
3956 fn tag_handle_and_suffix_are_borrowed_for_str_input() {
3957 let mut scanner = Scanner::new(StrInput::new("!!str foo\n"));
3959
3960 loop {
3961 let tok = scanner
3962 .next_token()
3963 .expect("valid YAML must scan without errors")
3964 .expect("scanner must eventually produce a token");
3965 if let TokenType::Tag(handle, suffix) = tok.1 {
3966 assert!(
3967 matches!(handle, Cow::Borrowed("!!")),
3968 "tag handle should be borrowed, got: {handle:?}"
3969 );
3970 assert!(
3971 matches!(suffix, Cow::Borrowed("str")),
3972 "tag suffix should be borrowed, got: {suffix:?}"
3973 );
3974 break;
3975 }
3976 }
3977 }
3978
3979 #[test]
3980 fn local_tag_suffix_is_borrowed_for_str_input() {
3981 let mut scanner = Scanner::new(StrInput::new("!mytag foo\n"));
3983
3984 loop {
3985 let tok = scanner
3986 .next_token()
3987 .expect("valid YAML must scan without errors")
3988 .expect("scanner must eventually produce a token");
3989 if let TokenType::Tag(handle, suffix) = tok.1 {
3990 assert!(
3991 matches!(handle, Cow::Borrowed("!")),
3992 "local tag handle should be '!', got: {handle:?}"
3993 );
3994 assert!(
3995 matches!(suffix, Cow::Borrowed("mytag")),
3996 "local tag suffix should be borrowed, got: {suffix:?}"
3997 );
3998 break;
3999 }
4000 }
4001 }
4002
4003 #[test]
4004 fn tag_with_uri_escape_is_owned_for_str_input() {
4005 let mut scanner = Scanner::new(StrInput::new("!!my%20tag foo\n"));
4007
4008 loop {
4009 let tok = scanner
4010 .next_token()
4011 .expect("valid YAML must scan without errors")
4012 .expect("scanner must eventually produce a token");
4013 if let TokenType::Tag(handle, suffix) = tok.1 {
4014 assert!(
4015 matches!(handle, Cow::Borrowed("!!")),
4016 "tag handle should still be borrowed, got: {handle:?}"
4017 );
4018 assert!(
4019 matches!(suffix, Cow::Owned(_)),
4020 "tag suffix with URI escape should be owned, got: {suffix:?}"
4021 );
4022 assert_eq!(&*suffix, "my tag");
4023 break;
4024 }
4025 }
4026 }
4027}