1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use alloc::{
13 borrow::{Cow, ToOwned},
14 collections::VecDeque,
15 string::String,
16 vec::Vec,
17};
18use core::char;
19
20use thiserror::Error;
21
22use crate::{
23 char_traits::{
24 as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
25 is_tag_char, is_uri_char,
26 },
27 input::{BorrowedInput, SkipTabs},
28};
29
30#[derive(Clone, Copy, PartialEq, Debug, Eq)]
32pub enum TEncoding {
33 Utf8,
35}
36
37#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
39pub enum ScalarStyle {
40 Plain,
42 SingleQuoted,
44 DoubleQuoted,
46
47 Literal,
53 Folded,
60}
61
62#[derive(Clone, Copy, Debug, Default)]
69pub struct MarkerOffsets {
70 chars: usize,
72 bytes: Option<usize>,
74}
75
76impl PartialEq for MarkerOffsets {
77 fn eq(&self, other: &Self) -> bool {
78 self.chars == other.chars
82 }
83}
84
85impl Eq for MarkerOffsets {}
86
87#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
89pub struct Marker {
90 offsets: MarkerOffsets,
92 line: usize,
94 col: usize,
96}
97
98impl Marker {
99 #[must_use]
101 pub fn new(index: usize, line: usize, col: usize) -> Marker {
102 Marker {
103 offsets: MarkerOffsets {
104 chars: index,
105 bytes: None,
106 },
107 line,
108 col,
109 }
110 }
111
112 #[must_use]
114 pub fn with_byte_offset(mut self, byte_offset: Option<usize>) -> Marker {
115 self.offsets.bytes = byte_offset;
116 self
117 }
118
119 #[must_use]
121 pub fn index(&self) -> usize {
122 self.offsets.chars
123 }
124
125 #[must_use]
127 pub fn byte_offset(&self) -> Option<usize> {
128 self.offsets.bytes
129 }
130
131 #[must_use]
133 pub fn line(&self) -> usize {
134 self.line
135 }
136
137 #[must_use]
139 pub fn col(&self) -> usize {
140 self.col
141 }
142}
143
144#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
146pub struct Span {
147 pub start: Marker,
149 pub end: Marker,
151
152 pub indent: Option<usize>,
157}
158
159impl Span {
160 #[must_use]
162 pub fn new(start: Marker, end: Marker) -> Span {
163 Span {
164 start,
165 end,
166 indent: None,
167 }
168 }
169
170 #[must_use]
177 pub fn empty(mark: Marker) -> Span {
178 Span {
179 start: mark,
180 end: mark,
181 indent: None,
182 }
183 }
184
185 #[must_use]
187 pub fn with_indent(mut self, indent: Option<usize>) -> Span {
188 self.indent = indent;
189 self
190 }
191
192 #[must_use]
194 pub fn len(&self) -> usize {
195 self.end.index() - self.start.index()
196 }
197
198 #[must_use]
200 pub fn is_empty(&self) -> bool {
201 self.len() == 0
202 }
203
204 #[must_use]
206 pub fn byte_range(&self) -> Option<core::ops::Range<usize>> {
207 let start = self.start.byte_offset()?;
208 let end = self.end.byte_offset()?;
209 Some(start..end)
210 }
211}
212
213#[derive(Clone, PartialEq, Debug, Eq, Error)]
215#[error(
216 "{} at char {} line {} column {}",
217 .info,
218 .mark.index(),
219 .mark.line(),
220 .mark.col() + 1,
221)]
222pub struct ScanError {
223 mark: Marker,
225 info: String,
227}
228
229impl ScanError {
230 #[must_use]
232 #[cold]
233 pub fn new(loc: Marker, info: String) -> ScanError {
234 ScanError { mark: loc, info }
235 }
236
237 #[must_use]
239 #[cold]
240 pub fn new_str(loc: Marker, info: &str) -> ScanError {
241 ScanError {
242 mark: loc,
243 info: info.to_owned(),
244 }
245 }
246
247 #[must_use]
249 pub fn marker(&self) -> &Marker {
250 &self.mark
251 }
252
253 #[must_use]
255 pub fn info(&self) -> &str {
256 self.info.as_ref()
257 }
258}
259
260#[derive(Clone, PartialEq, Debug, Eq)]
262pub enum TokenType<'input> {
263 StreamStart(TEncoding),
265 StreamEnd,
267 VersionDirective(
269 u32,
271 u32,
273 ),
274 TagDirective(
276 Cow<'input, str>,
278 Cow<'input, str>,
280 ),
281 DocumentStart,
283 DocumentEnd,
285 BlockSequenceStart,
289 BlockMappingStart,
293 BlockEnd,
295 FlowSequenceStart,
297 FlowSequenceEnd,
299 FlowMappingStart,
301 FlowMappingEnd,
303 BlockEntry,
305 FlowEntry,
307 Key,
309 Value,
311 Alias(Cow<'input, str>),
313 Anchor(Cow<'input, str>),
315 Tag(
317 Cow<'input, str>,
319 Cow<'input, str>,
321 ),
322 Scalar(ScalarStyle, Cow<'input, str>),
324 ReservedDirective(
326 String,
328 Vec<String>,
330 ),
331}
332
333#[derive(Clone, PartialEq, Debug, Eq)]
335pub struct Token<'input>(pub Span, pub TokenType<'input>);
336
337#[derive(Clone, PartialEq, Debug, Eq)]
372struct SimpleKey {
373 possible: bool,
386 required: bool,
394 token_number: usize,
400 mark: Marker,
402}
403
404impl SimpleKey {
405 fn new(mark: Marker) -> SimpleKey {
407 SimpleKey {
408 possible: false,
409 required: false,
410 token_number: 0,
411 mark,
412 }
413 }
414}
415
416#[derive(Clone, Debug, Default)]
418struct Indent {
419 indent: isize,
421 needs_block_end: bool,
439}
440
441#[derive(Debug, PartialEq)]
463enum ImplicitMappingState {
464 Possible,
469 Inside(u8),
473}
474
475#[derive(Debug)]
485#[allow(clippy::struct_excessive_bools)]
486pub struct Scanner<'input, T> {
487 input: T,
491 mark: Marker,
493 tokens: VecDeque<Token<'input>>,
500 error: Option<ScanError>,
502
503 stream_start_produced: bool,
505 stream_end_produced: bool,
507 adjacent_value_allowed_at: usize,
510 simple_key_allowed: bool,
514 simple_keys: smallvec::SmallVec<[SimpleKey; 8]>,
519 indent: isize,
521 indents: smallvec::SmallVec<[Indent; 8]>,
523 flow_level: u8,
525 tokens_parsed: usize,
529 token_available: bool,
531 leading_whitespace: bool,
533 flow_mapping_started: bool,
540 implicit_flow_mapping_states: smallvec::SmallVec<[ImplicitMappingState; 8]>,
553 interrupted_plain_by_comment: Option<Marker>,
556 flow_markers: smallvec::SmallVec<[(Marker, char); 8]>,
558 buf_leading_break: String,
559 buf_trailing_breaks: String,
560 buf_whitespaces: String,
561}
562
563impl<'input, T: BorrowedInput<'input>> Iterator for Scanner<'input, T> {
564 type Item = Token<'input>;
565
566 fn next(&mut self) -> Option<Self::Item> {
567 if self.error.is_some() {
568 return None;
569 }
570 match self.next_token() {
571 Ok(Some(tok)) => {
572 debug_print!(
573 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
574 tok.1,
575 tok.0
576 );
577 Some(tok)
578 }
579 Ok(tok) => tok,
580 Err(e) => {
581 self.error = Some(e);
582 None
583 }
584 }
585 }
586}
587
588pub type ScanResult = Result<(), ScanError>;
590
591#[derive(Debug)]
592enum FlowScalarBuf {
593 Borrowed {
599 start: usize,
600 end: usize,
601 pending_ws_start: Option<usize>,
602 pending_ws_end: usize,
603 },
604 Owned(String),
605}
606
607impl FlowScalarBuf {
608 #[inline]
609 fn new_borrowed(start: usize) -> Self {
610 Self::Borrowed {
611 start,
612 end: start,
613 pending_ws_start: None,
614 pending_ws_end: start,
615 }
616 }
617
618 #[inline]
619 fn new_owned() -> Self {
620 Self::Owned(String::new())
621 }
622
623 #[inline]
624 fn as_owned_mut(&mut self) -> Option<&mut String> {
625 match self {
626 Self::Owned(s) => Some(s),
627 Self::Borrowed { .. } => None,
628 }
629 }
630
631 #[inline]
632 fn commit_pending_ws(&mut self) {
633 if let Self::Borrowed {
634 end,
635 pending_ws_start,
636 pending_ws_end,
637 ..
638 } = self
639 {
640 if pending_ws_start.is_some() {
641 *end = *pending_ws_end;
642 *pending_ws_start = None;
643 }
644 }
645 }
646
647 #[inline]
648 fn note_pending_ws(&mut self, ws_start: usize, ws_end: usize) {
649 if let Self::Borrowed {
650 pending_ws_start,
651 pending_ws_end,
652 ..
653 } = self
654 {
655 if pending_ws_start.is_none() {
656 *pending_ws_start = Some(ws_start);
657 }
658 *pending_ws_end = ws_end;
659 }
660 }
661
662 #[inline]
663 fn discard_pending_ws(&mut self) {
664 if let Self::Borrowed {
665 pending_ws_start,
666 pending_ws_end,
667 end,
668 ..
669 } = self
670 {
671 *pending_ws_start = None;
672 *pending_ws_end = *end;
673 }
674 }
675}
676
677impl<'input, T: BorrowedInput<'input>> Scanner<'input, T> {
678 #[inline]
679 fn promote_flow_scalar_buf_to_owned(
680 &self,
681 start_mark: &Marker,
682 buf: &mut FlowScalarBuf,
683 ) -> Result<(), ScanError> {
684 let FlowScalarBuf::Borrowed {
685 start,
686 end,
687 pending_ws_start: _,
688 pending_ws_end: _,
689 } = *buf
690 else {
691 return Ok(());
692 };
693
694 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
695 ScanError::new_str(
696 *start_mark,
697 "internal error: input advertised offsets but did not provide a slice",
698 )
699 })?;
700 *buf = FlowScalarBuf::Owned(slice.to_owned());
701 Ok(())
702 }
703 #[inline]
709 fn try_borrow_slice(&self, start: usize, end: usize) -> Option<&'input str> {
710 self.input.slice_borrowed(start, end)
711 }
712
713 fn scan_tag_handle_directive_cow(
718 &mut self,
719 mark: &Marker,
720 ) -> Result<Cow<'input, str>, ScanError> {
721 let Some(start) = self.input.byte_offset() else {
722 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
723 };
724
725 if self.input.look_ch() != '!' {
726 return Err(ScanError::new_str(
727 *mark,
728 "while scanning a tag, did not find expected '!'",
729 ));
730 }
731
732 self.skip_non_blank();
734
735 self.input.lookahead(1);
738 while self.input.next_is_alpha() {
739 self.skip_non_blank();
740 self.input.lookahead(1);
741 }
742
743 if self.input.peek() == '!' {
745 self.skip_non_blank();
746 }
747
748 let Some(end) = self.input.byte_offset() else {
749 return Ok(Cow::Owned(self.scan_tag_handle(true, mark)?));
751 };
752
753 let Some(slice) = self.try_borrow_slice(start, end) else {
754 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
756 ScanError::new_str(
757 *mark,
758 "internal error: input advertised slicing but did not provide a slice",
759 )
760 })?;
761 if !slice.ends_with('!') && slice != "!" {
762 return Err(ScanError::new_str(
763 *mark,
764 "while parsing a tag directive, did not find expected '!'",
765 ));
766 }
767 return Ok(Cow::Owned(slice.to_owned()));
768 };
769
770 if !slice.ends_with('!') && slice != "!" {
771 return Err(ScanError::new_str(
772 *mark,
773 "while parsing a tag directive, did not find expected '!'",
774 ));
775 }
776
777 Ok(Cow::Borrowed(slice))
778 }
779
780 fn scan_tag_prefix_directive_cow(
785 &mut self,
786 start_mark: &Marker,
787 ) -> Result<Cow<'input, str>, ScanError> {
788 let Some(start) = self.input.byte_offset() else {
789 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
790 };
791
792 if self.input.look_ch() == '!' {
794 self.skip_non_blank();
795 } else if !is_tag_char(self.input.peek()) {
796 return Err(ScanError::new_str(
797 *start_mark,
798 "invalid global tag character",
799 ));
800 } else if self.input.peek() == '%' {
801 } else {
803 self.skip_non_blank();
804 }
805
806 while is_uri_char(self.input.look_ch()) {
808 if self.input.peek() == '%' {
809 break;
810 }
811 self.skip_non_blank();
812 }
813
814 if self.input.peek() == '%' {
816 let current = self
817 .input
818 .byte_offset()
819 .expect("byte_offset() must remain available once enabled");
820 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
821 slice.to_owned()
822 } else {
823 String::new()
824 };
825
826 while is_uri_char(self.input.look_ch()) {
827 if self.input.peek() == '%' {
828 out.push(self.scan_uri_escapes(start_mark)?);
829 } else {
830 out.push(self.input.peek());
831 self.skip_non_blank();
832 }
833 }
834 return Ok(Cow::Owned(out));
835 }
836
837 let Some(end) = self.input.byte_offset() else {
838 return Ok(Cow::Owned(self.scan_tag_prefix(start_mark)?));
839 };
840
841 let Some(slice) = self.try_borrow_slice(start, end) else {
842 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
844 ScanError::new_str(
845 *start_mark,
846 "internal error: input advertised slicing but did not provide a slice",
847 )
848 })?;
849 return Ok(Cow::Owned(slice.to_owned()));
850 };
851
852 Ok(Cow::Borrowed(slice))
853 }
854 pub fn new(input: T) -> Self {
856 let initial_byte_offset = input.byte_offset();
857 Scanner {
858 input,
859 mark: Marker::new(0, 1, 0).with_byte_offset(initial_byte_offset),
860 tokens: VecDeque::with_capacity(64),
861 error: None,
862
863 stream_start_produced: false,
864 stream_end_produced: false,
865 adjacent_value_allowed_at: 0,
866 simple_key_allowed: true,
867 simple_keys: smallvec::SmallVec::new(),
868 indent: -1,
869 indents: smallvec::SmallVec::new(),
870 flow_level: 0,
871 tokens_parsed: 0,
872 token_available: false,
873 leading_whitespace: true,
874 flow_mapping_started: false,
875 implicit_flow_mapping_states: smallvec::SmallVec::new(),
876 flow_markers: smallvec::SmallVec::new(),
877 interrupted_plain_by_comment: None,
878
879 buf_leading_break: String::with_capacity(128),
880 buf_trailing_breaks: String::with_capacity(128),
881 buf_whitespaces: String::with_capacity(128),
882 }
883 }
884
885 #[inline]
890 pub fn get_error(&self) -> Option<ScanError> {
891 self.error.clone()
892 }
893
894 #[cold]
895 fn simple_key_expected(&self) -> ScanError {
896 ScanError::new_str(self.mark, "simple key expected")
897 }
898
899 #[cold]
900 fn unclosed_bracket(mark: Marker, bracket: char) -> ScanError {
901 ScanError::new(mark, format!("unclosed bracket '{bracket}'"))
902 }
903
904 #[inline]
906 fn skip_blank(&mut self) {
907 self.input.skip();
908
909 self.mark.offsets.chars += 1;
910 self.mark.col += 1;
911 self.mark.offsets.bytes = self.input.byte_offset();
912 }
913
914 #[inline]
916 fn skip_non_blank(&mut self) {
917 self.input.skip();
918
919 self.mark.offsets.chars += 1;
920 self.mark.col += 1;
921 self.mark.offsets.bytes = self.input.byte_offset();
922 self.leading_whitespace = false;
923 }
924
925 #[inline]
927 fn skip_n_non_blank(&mut self, count: usize) {
928 for _ in 0..count {
929 self.input.skip();
930 self.mark.offsets.chars += 1;
931 self.mark.col += 1;
932 }
933 self.mark.offsets.bytes = self.input.byte_offset();
934 self.leading_whitespace = false;
935 }
936
937 #[inline]
939 fn skip_nl(&mut self) {
940 self.input.skip();
941
942 self.mark.offsets.chars += 1;
943 self.mark.col = 0;
944 self.mark.line += 1;
945 self.mark.offsets.bytes = self.input.byte_offset();
946 self.leading_whitespace = true;
947 }
948
949 #[inline]
951 fn skip_linebreak(&mut self) {
952 if self.input.next_2_are('\r', '\n') {
953 self.skip_blank();
956 self.skip_nl();
957 } else if self.input.next_is_break() {
958 self.skip_nl();
959 }
960 }
961
962 #[inline]
964 pub fn stream_started(&self) -> bool {
965 self.stream_start_produced
966 }
967
968 #[inline]
970 pub fn stream_ended(&self) -> bool {
971 self.stream_end_produced
972 }
973
974 #[inline]
976 pub fn mark(&self) -> Marker {
977 self.mark
978 }
979
980 #[inline]
987 fn read_break(&mut self, s: &mut String) {
988 self.skip_break();
989 s.push('\n');
990 }
991
992 #[inline]
997 fn skip_break(&mut self) {
998 let c = self.input.peek();
999 let nc = self.input.peek_nth(1);
1000 debug_assert!(is_break(c));
1001 if c == '\r' && nc == '\n' {
1002 self.skip_blank();
1003 }
1004 self.skip_nl();
1005 }
1006
1007 fn insert_token(&mut self, pos: usize, tok: Token<'input>) {
1009 let old_len = self.tokens.len();
1010 assert!(pos <= old_len);
1011 self.tokens.insert(pos, tok);
1012 }
1013
1014 #[inline]
1015 fn allow_simple_key(&mut self) {
1016 self.simple_key_allowed = true;
1017 }
1018
1019 #[inline]
1020 fn disallow_simple_key(&mut self) {
1021 self.simple_key_allowed = false;
1022 }
1023
1024 pub fn fetch_next_token(&mut self) -> ScanResult {
1029 self.input.lookahead(1);
1030
1031 if !self.stream_start_produced {
1032 self.fetch_stream_start();
1033 return Ok(());
1034 }
1035 self.skip_to_next_token()?;
1036
1037 debug_print!(
1038 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
1039 self.mark,
1040 self.input.peek()
1041 );
1042
1043 self.stale_simple_keys()?;
1044
1045 let mark = self.mark;
1046 self.unroll_indent(mark.col as isize);
1047
1048 self.input.lookahead(4);
1049
1050 if self.input.next_is_z() {
1051 self.fetch_stream_end()?;
1052 return Ok(());
1053 }
1054
1055 if self.mark.col == 0 {
1056 if self.input.next_char_is('%') {
1057 return self.fetch_directive();
1058 } else if self.input.next_is_document_start() {
1059 return self.fetch_document_indicator(TokenType::DocumentStart);
1060 } else if self.input.next_is_document_end() {
1061 self.fetch_document_indicator(TokenType::DocumentEnd)?;
1062 self.skip_ws_to_eol(SkipTabs::Yes)?;
1063 if !self.input.next_is_breakz() {
1064 return Err(ScanError::new_str(
1065 self.mark,
1066 "invalid content after document end marker",
1067 ));
1068 }
1069 return Ok(());
1070 }
1071 }
1072
1073 if (self.mark.col as isize) < self.indent {
1074 self.input.lookahead(1);
1075 let c = self.input.peek();
1076 if self.flow_level == 0 || !matches!(c, ']' | '}' | ',') {
1077 return Err(ScanError::new_str(self.mark, "invalid indentation"));
1078 }
1079 }
1080
1081 let c = self.input.peek();
1082 let nc = self.input.peek_nth(1);
1083 match c {
1084 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
1085 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
1086 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
1087 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
1088 ',' => self.fetch_flow_entry(),
1089 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
1090 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
1091 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
1092 ':' if self.flow_level > 0
1093 && (is_flow(nc) || self.mark.index() == self.adjacent_value_allowed_at) =>
1094 {
1095 self.fetch_flow_value()
1096 }
1097 '*' => self.fetch_anchor(true),
1099 '&' => self.fetch_anchor(false),
1101 '!' => self.fetch_tag(),
1102 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
1104 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
1106 '\'' => self.fetch_flow_scalar(true),
1107 '"' => self.fetch_flow_scalar(false),
1108 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
1110 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
1111 self.fetch_plain_scalar()
1112 }
1113 '%' | '@' | '`' => Err(ScanError::new(
1114 self.mark,
1115 format!("unexpected character: `{c}'"),
1116 )),
1117 _ => self.fetch_plain_scalar(),
1118 }
1119 }
1120
1121 pub fn next_token(&mut self) -> Result<Option<Token<'input>>, ScanError> {
1125 if self.stream_end_produced {
1126 return Ok(None);
1127 }
1128
1129 if !self.token_available {
1130 self.fetch_more_tokens()?;
1131 }
1132 let Some(t) = self.tokens.pop_front() else {
1133 return Err(ScanError::new_str(
1134 self.mark,
1135 "did not find expected next token",
1136 ));
1137 };
1138 self.token_available = false;
1139 self.tokens_parsed += 1;
1140
1141 if let TokenType::StreamEnd = t.1 {
1142 self.stream_end_produced = true;
1143 }
1144 Ok(Some(t))
1145 }
1146
1147 pub fn fetch_more_tokens(&mut self) -> ScanResult {
1151 let mut need_more;
1152 loop {
1153 if self.tokens.is_empty() {
1154 need_more = true;
1155 } else {
1156 need_more = false;
1157 self.stale_simple_keys()?;
1159 for sk in &self.simple_keys {
1161 if sk.possible && sk.token_number == self.tokens_parsed {
1162 need_more = true;
1163 break;
1164 }
1165 }
1166 }
1167
1168 if let Some(token) = self.tokens.back() {
1171 if matches!(token.1, TokenType::DocumentEnd | TokenType::DocumentStart) {
1172 break;
1173 }
1174 }
1175
1176 if !need_more {
1177 break;
1178 }
1179 self.fetch_next_token()?;
1180 }
1181 self.token_available = true;
1182
1183 Ok(())
1184 }
1185
1186 fn stale_simple_keys(&mut self) -> ScanResult {
1194 for sk in &mut self.simple_keys {
1195 if sk.possible
1196 && self.flow_level == 0
1198 && (sk.mark.line < self.mark.line
1199 || sk.mark.index() + 1024 < self.mark.index())
1200 {
1201 if sk.required {
1202 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
1203 }
1204 sk.possible = false;
1205 }
1206 }
1207 Ok(())
1208 }
1209
1210 fn skip_to_next_token(&mut self) -> ScanResult {
1216 let consume_linebreak = |this: &mut Self| {
1219 this.input.lookahead(2);
1220 this.skip_linebreak();
1221 if this.flow_level == 0 {
1222 this.allow_simple_key();
1223 }
1224 };
1225
1226 loop {
1227 match self.input.look_ch() {
1228 '\t' => {
1230 if self.is_within_block()
1231 && self.leading_whitespace
1232 && (self.mark.col as isize) < self.indent
1233 {
1234 self.skip_ws_to_eol(SkipTabs::Yes)?;
1235
1236 if !self.input.next_is_breakz() {
1238 return Err(ScanError::new_str(
1239 self.mark,
1240 "tabs disallowed within this context (block indentation)",
1241 ));
1242 }
1243
1244 if matches!(self.input.look_ch(), '\n' | '\r') {
1246 consume_linebreak(self);
1247 }
1248 } else {
1249 self.skip_blank();
1251 }
1252 }
1253
1254 ' ' => self.skip_blank(),
1255
1256 '\n' | '\r' => consume_linebreak(self),
1257
1258 '#' => {
1259 let n = self.input.skip_while_non_breakz();
1261 self.mark.offsets.chars += n;
1262 self.mark.col += n;
1263 self.mark.offsets.bytes = self.input.byte_offset();
1264
1265 if matches!(self.input.look_ch(), '\n' | '\r') {
1267 consume_linebreak(self);
1268 }
1269 }
1270
1271 _ => break,
1272 }
1273 }
1274
1275 if let Some(err_mark) = self.interrupted_plain_by_comment.take() {
1278 let is_immediate_next_line = self.mark.line == err_mark.line + 1;
1282
1283 if self.flow_level == 0
1285 && is_immediate_next_line
1286 && (self.mark.col as isize) > self.indent
1287 {
1288 self.input.lookahead(4);
1292
1293 if !self.input.next_is_z()
1294 && !self.input.next_is_document_indicator()
1295 && self.input.next_can_be_plain_scalar(false)
1296 {
1297 return Err(ScanError::new_str(
1298 err_mark,
1299 "comment intercepting the multiline text",
1300 ));
1301 }
1302 }
1303 }
1304
1305 Ok(())
1306 }
1307
1308 fn skip_yaml_whitespace(&mut self) -> ScanResult {
1313 let mut need_whitespace = true;
1314 loop {
1315 match self.input.look_ch() {
1316 ' ' => {
1317 self.skip_blank();
1318
1319 need_whitespace = false;
1320 }
1321 '\n' | '\r' => {
1322 self.input.lookahead(2);
1323 self.skip_linebreak();
1324 if self.flow_level == 0 {
1325 self.allow_simple_key();
1326 }
1327 need_whitespace = false;
1328 }
1329 '#' => {
1330 let comment_length = self.input.skip_while_non_breakz();
1331 self.mark.offsets.chars += comment_length;
1332 self.mark.col += comment_length;
1333 self.mark.offsets.bytes = self.input.byte_offset();
1334 }
1335 _ => break,
1336 }
1337 }
1338
1339 if need_whitespace {
1340 Err(ScanError::new_str(self.mark(), "expected whitespace"))
1341 } else {
1342 Ok(())
1343 }
1344 }
1345
1346 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
1347 let (n_bytes, result) = self.input.skip_ws_to_eol(skip_tabs);
1348 self.mark.col += n_bytes;
1349 self.mark.offsets.chars += n_bytes;
1350 self.mark.offsets.bytes = self.input.byte_offset();
1351 result.map_err(|msg| ScanError::new_str(self.mark, msg))
1352 }
1353
1354 fn fetch_stream_start(&mut self) {
1355 let mark = self.mark;
1356 self.indent = -1;
1357 self.stream_start_produced = true;
1358 self.allow_simple_key();
1359 self.tokens.push_back(Token(
1360 Span::empty(mark),
1361 TokenType::StreamStart(TEncoding::Utf8),
1362 ));
1363 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1364 }
1365
1366 fn fetch_stream_end(&mut self) -> ScanResult {
1367 if self.mark.col != 0 {
1369 self.mark.col = 0;
1370 self.mark.line += 1;
1371 }
1372
1373 if let Some((mark, bracket)) = self.flow_markers.pop() {
1374 return Err(Self::unclosed_bracket(mark, bracket));
1375 }
1376
1377 for sk in &mut self.simple_keys {
1380 if sk.required && sk.possible {
1381 return Err(self.simple_key_expected());
1382 }
1383 sk.possible = false;
1384 }
1385
1386 self.unroll_indent(-1);
1387 self.remove_simple_key()?;
1388 self.disallow_simple_key();
1389
1390 self.tokens
1391 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
1392 Ok(())
1393 }
1394
1395 fn fetch_directive(&mut self) -> ScanResult {
1396 self.unroll_indent(-1);
1397 self.remove_simple_key()?;
1398
1399 self.disallow_simple_key();
1400
1401 let tok = self.scan_directive()?;
1402 self.tokens.push_back(tok);
1403
1404 Ok(())
1405 }
1406
1407 fn scan_directive(&mut self) -> Result<Token<'input>, ScanError> {
1408 let start_mark = self.mark;
1409 self.skip_non_blank();
1410
1411 let name = self.scan_directive_name()?;
1412 let tok = match name.as_ref() {
1413 "YAML" => self.scan_version_directive_value(&start_mark)?,
1414 "TAG" => self.scan_tag_directive_value(&start_mark)?,
1415 _ => {
1416 let mut params = Vec::new();
1417 while self.input.next_is_blank() {
1418 let n_blanks = self.input.skip_while_blank();
1419 self.mark.offsets.chars += n_blanks;
1420 self.mark.col += n_blanks;
1421 self.mark.offsets.bytes = self.input.byte_offset();
1422
1423 if !is_blank_or_breakz(self.input.peek()) {
1424 let mut param = String::new();
1425 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut param);
1426 self.mark.offsets.chars += n_chars;
1427 self.mark.col += n_chars;
1428 self.mark.offsets.bytes = self.input.byte_offset();
1429 params.push(param);
1430 }
1431 }
1432
1433 Token(
1434 Span::new(start_mark, self.mark),
1435 TokenType::ReservedDirective(name, params),
1436 )
1437 }
1438 };
1439
1440 self.skip_ws_to_eol(SkipTabs::Yes)?;
1441
1442 if self.input.next_is_breakz() {
1443 self.input.lookahead(2);
1444 self.skip_linebreak();
1445 Ok(tok)
1446 } else {
1447 Err(ScanError::new_str(
1448 start_mark,
1449 "while scanning a directive, did not find expected comment or line break",
1450 ))
1451 }
1452 }
1453
1454 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1455 let n_blanks = self.input.skip_while_blank();
1456 self.mark.offsets.chars += n_blanks;
1457 self.mark.col += n_blanks;
1458 self.mark.offsets.bytes = self.input.byte_offset();
1459
1460 let major = self.scan_version_directive_number(mark)?;
1461
1462 if self.input.peek() != '.' {
1463 return Err(ScanError::new_str(
1464 *mark,
1465 "while scanning a YAML directive, did not find expected digit or '.' character",
1466 ));
1467 }
1468 self.skip_non_blank();
1469
1470 let minor = self.scan_version_directive_number(mark)?;
1471
1472 Ok(Token(
1473 Span::new(*mark, self.mark),
1474 TokenType::VersionDirective(major, minor),
1475 ))
1476 }
1477
1478 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1479 let start_mark = self.mark;
1480 let mut string = String::new();
1481
1482 let n_chars = self.input.fetch_while_is_yaml_non_space(&mut string);
1483 self.mark.offsets.chars += n_chars;
1484 self.mark.col += n_chars;
1485 self.mark.offsets.bytes = self.input.byte_offset();
1486
1487 if string.is_empty() {
1488 return Err(ScanError::new_str(
1489 start_mark,
1490 "while scanning a directive, could not find expected directive name",
1491 ));
1492 }
1493
1494 if !is_blank_or_breakz(self.input.peek()) {
1495 return Err(ScanError::new_str(
1496 start_mark,
1497 "while scanning a directive, found unexpected non-alphabetical character",
1498 ));
1499 }
1500
1501 Ok(string)
1502 }
1503
1504 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1505 let mut val = 0u32;
1506 let mut length = 0usize;
1507 while let Some(digit) = self.input.look_ch().to_digit(10) {
1508 if length + 1 > 9 {
1509 return Err(ScanError::new_str(
1510 *mark,
1511 "while scanning a YAML directive, found extremely long version number",
1512 ));
1513 }
1514 length += 1;
1515 val = val * 10 + digit;
1516 self.skip_non_blank();
1517 }
1518
1519 if length == 0 {
1520 return Err(ScanError::new_str(
1521 *mark,
1522 "while scanning a YAML directive, did not find expected version number",
1523 ));
1524 }
1525
1526 Ok(val)
1527 }
1528
1529 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1530 let n_blanks = self.input.skip_while_blank();
1531 self.mark.offsets.chars += n_blanks;
1532 self.mark.col += n_blanks;
1533 self.mark.offsets.bytes = self.input.byte_offset();
1534
1535 let handle = self.scan_tag_handle_directive_cow(mark)?;
1536
1537 let n_blanks = self.input.skip_while_blank();
1538 self.mark.offsets.chars += n_blanks;
1539 self.mark.col += n_blanks;
1540 self.mark.offsets.bytes = self.input.byte_offset();
1541
1542 let prefix = self.scan_tag_prefix_directive_cow(mark)?;
1543
1544 self.input.lookahead(1);
1545
1546 if self.input.next_is_blank_or_breakz() {
1547 Ok(Token(
1548 Span::new(*mark, self.mark),
1549 TokenType::TagDirective(handle, prefix),
1550 ))
1551 } else {
1552 Err(ScanError::new_str(
1553 *mark,
1554 "while scanning TAG, did not find expected whitespace or line break",
1555 ))
1556 }
1557 }
1558
1559 fn fetch_tag(&mut self) -> ScanResult {
1560 self.save_simple_key();
1561 self.disallow_simple_key();
1562
1563 let tok = self.scan_tag()?;
1564 self.tokens.push_back(tok);
1565 Ok(())
1566 }
1567
1568 fn scan_tag(&mut self) -> Result<Token<'input>, ScanError> {
1569 let start_mark = self.mark;
1570
1571 self.input.lookahead(2);
1573
1574 if self.input.byte_offset().is_none() {
1576 return self.scan_tag_owned(&start_mark);
1577 }
1578
1579 let (handle, suffix): (Cow<'input, str>, Cow<'input, str>) =
1580 if self.input.nth_char_is(1, '<') {
1581 let suffix = self.scan_verbatim_tag(&start_mark)?;
1583 (Cow::Owned(String::new()), Cow::Owned(suffix))
1584 } else {
1585 let handle = self.scan_tag_handle_cow(&start_mark)?;
1587 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1589 let suffix = self.scan_tag_shorthand_suffix_cow(&start_mark)?;
1591 (handle, suffix)
1592 } else {
1593 let remaining_suffix = self.scan_tag_shorthand_suffix_cow(&start_mark)?;
1598
1599 let suffix = if handle.len() > 1 {
1601 if remaining_suffix.is_empty() {
1602 match handle {
1604 Cow::Borrowed(s) => Cow::Borrowed(&s[1..]),
1605 Cow::Owned(s) => Cow::Owned(s[1..].to_owned()),
1606 }
1607 } else {
1608 let mut combined = handle[1..].to_owned();
1610 combined.push_str(&remaining_suffix);
1611 Cow::Owned(combined)
1612 }
1613 } else {
1614 remaining_suffix
1616 };
1617
1618 if suffix.is_empty() {
1621 (Cow::Borrowed(""), Cow::Borrowed("!"))
1622 } else {
1623 (Cow::Borrowed("!"), suffix)
1624 }
1625 }
1626 };
1627
1628 if is_blank_or_breakz(self.input.look_ch())
1629 || (self.flow_level > 0 && self.input.next_is_flow())
1630 {
1631 Ok(Token(
1633 Span::new(start_mark, self.mark),
1634 TokenType::Tag(handle, suffix),
1635 ))
1636 } else {
1637 Err(ScanError::new_str(
1638 start_mark,
1639 "while scanning a tag, did not find expected whitespace or line break",
1640 ))
1641 }
1642 }
1643
1644 fn scan_tag_owned(&mut self, start_mark: &Marker) -> Result<Token<'input>, ScanError> {
1646 let mut handle = String::new();
1647 let mut suffix;
1648
1649 if self.input.nth_char_is(1, '<') {
1650 suffix = self.scan_verbatim_tag(start_mark)?;
1651 } else {
1652 handle = self.scan_tag_handle(false, start_mark)?;
1654 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1656 let is_secondary_handle = handle == "!!";
1658 suffix =
1659 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", start_mark)?;
1660 } else {
1661 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, start_mark)?;
1662 "!".clone_into(&mut handle);
1663 if suffix.is_empty() {
1666 handle.clear();
1667 "!".clone_into(&mut suffix);
1668 }
1669 }
1670 }
1671
1672 if is_blank_or_breakz(self.input.look_ch())
1673 || (self.flow_level > 0 && self.input.next_is_flow())
1674 {
1675 Ok(Token(
1677 Span::new(*start_mark, self.mark),
1678 TokenType::Tag(handle.into(), suffix.into()),
1679 ))
1680 } else {
1681 Err(ScanError::new_str(
1682 *start_mark,
1683 "while scanning a tag, did not find expected whitespace or line break",
1684 ))
1685 }
1686 }
1687
1688 fn scan_tag_handle_cow(&mut self, mark: &Marker) -> Result<Cow<'input, str>, ScanError> {
1693 let Some(start) = self.input.byte_offset() else {
1694 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
1695 };
1696
1697 if self.input.look_ch() != '!' {
1698 return Err(ScanError::new_str(
1699 *mark,
1700 "while scanning a tag, did not find expected '!'",
1701 ));
1702 }
1703
1704 self.skip_non_blank();
1706
1707 self.input.lookahead(1);
1709 while self.input.next_is_alpha() {
1710 self.skip_non_blank();
1711 self.input.lookahead(1);
1712 }
1713
1714 if self.input.peek() == '!' {
1716 self.skip_non_blank();
1717 }
1718
1719 let Some(end) = self.input.byte_offset() else {
1720 return Ok(Cow::Owned(self.scan_tag_handle(false, mark)?));
1721 };
1722
1723 if let Some(slice) = self.try_borrow_slice(start, end) {
1724 Ok(Cow::Borrowed(slice))
1725 } else {
1726 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1727 ScanError::new_str(
1728 *mark,
1729 "internal error: input advertised slicing but did not provide a slice",
1730 )
1731 })?;
1732 Ok(Cow::Owned(slice.to_owned()))
1733 }
1734 }
1735
1736 fn scan_tag_shorthand_suffix_cow(
1740 &mut self,
1741 mark: &Marker,
1742 ) -> Result<Cow<'input, str>, ScanError> {
1743 let Some(start) = self.input.byte_offset() else {
1744 return Ok(Cow::Owned(
1745 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
1746 ));
1747 };
1748
1749 while is_tag_char(self.input.look_ch()) {
1751 if self.input.peek() == '%' {
1752 let current = self
1754 .input
1755 .byte_offset()
1756 .expect("byte_offset() must remain available once enabled");
1757 let mut out = if let Some(slice) = self.input.slice_bytes(start, current) {
1758 slice.to_owned()
1759 } else {
1760 String::new()
1761 };
1762
1763 while is_tag_char(self.input.look_ch()) {
1765 if self.input.peek() == '%' {
1766 out.push(self.scan_uri_escapes(mark)?);
1767 } else {
1768 out.push(self.input.peek());
1769 self.skip_non_blank();
1770 }
1771 }
1772 return Ok(Cow::Owned(out));
1773 }
1774 self.skip_non_blank();
1775 }
1776
1777 let Some(end) = self.input.byte_offset() else {
1778 return Ok(Cow::Owned(
1779 self.scan_tag_shorthand_suffix(false, false, "", mark)?,
1780 ));
1781 };
1782
1783 if let Some(slice) = self.try_borrow_slice(start, end) {
1784 Ok(Cow::Borrowed(slice))
1785 } else {
1786 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
1787 ScanError::new_str(
1788 *mark,
1789 "internal error: input advertised slicing but did not provide a slice",
1790 )
1791 })?;
1792 Ok(Cow::Owned(slice.to_owned()))
1793 }
1794 }
1795
1796 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
1797 let mut string = String::new();
1798 if self.input.look_ch() != '!' {
1799 return Err(ScanError::new_str(
1800 *mark,
1801 "while scanning a tag, did not find expected '!'",
1802 ));
1803 }
1804
1805 string.push(self.input.peek());
1806 self.skip_non_blank();
1807
1808 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1809 self.mark.offsets.chars += n_chars;
1810 self.mark.col += n_chars;
1811 self.mark.offsets.bytes = self.input.byte_offset();
1812
1813 if self.input.peek() == '!' {
1815 string.push(self.input.peek());
1816 self.skip_non_blank();
1817 } else if directive && string != "!" {
1818 return Err(ScanError::new_str(
1822 *mark,
1823 "while parsing a tag directive, did not find expected '!'",
1824 ));
1825 }
1826 Ok(string)
1827 }
1828
1829 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1835 let mut string = String::new();
1836
1837 if self.input.look_ch() == '!' {
1838 string.push(self.input.peek());
1840 self.skip_non_blank();
1841 } else if !is_tag_char(self.input.peek()) {
1842 return Err(ScanError::new_str(
1844 *start_mark,
1845 "invalid global tag character",
1846 ));
1847 } else if self.input.peek() == '%' {
1848 string.push(self.scan_uri_escapes(start_mark)?);
1850 } else {
1851 string.push(self.input.peek());
1853 self.skip_non_blank();
1854 }
1855
1856 while is_uri_char(self.input.look_ch()) {
1857 if self.input.peek() == '%' {
1858 string.push(self.scan_uri_escapes(start_mark)?);
1859 } else {
1860 string.push(self.input.peek());
1861 self.skip_non_blank();
1862 }
1863 }
1864
1865 Ok(string)
1866 }
1867
1868 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1872 self.skip_non_blank();
1874 self.skip_non_blank();
1875
1876 let mut string = String::new();
1877 while is_uri_char(self.input.look_ch()) {
1878 if self.input.peek() == '%' {
1879 string.push(self.scan_uri_escapes(start_mark)?);
1880 } else {
1881 string.push(self.input.peek());
1882 self.skip_non_blank();
1883 }
1884 }
1885
1886 if self.input.peek() != '>' {
1887 return Err(ScanError::new_str(
1888 *start_mark,
1889 "while scanning a verbatim tag, did not find the expected '>'",
1890 ));
1891 }
1892 self.skip_non_blank();
1893
1894 Ok(string)
1895 }
1896
1897 fn scan_tag_shorthand_suffix(
1898 &mut self,
1899 _directive: bool,
1900 _is_secondary: bool,
1901 head: &str,
1902 mark: &Marker,
1903 ) -> Result<String, ScanError> {
1904 let mut length = head.len();
1905 let mut string = String::new();
1906
1907 if length > 1 {
1910 string.extend(head.chars().skip(1));
1911 }
1912
1913 while is_tag_char(self.input.look_ch()) {
1914 if self.input.peek() == '%' {
1916 string.push(self.scan_uri_escapes(mark)?);
1917 } else {
1918 string.push(self.input.peek());
1919 self.skip_non_blank();
1920 }
1921
1922 length += 1;
1923 }
1924
1925 if length == 0 {
1926 return Err(ScanError::new_str(
1927 *mark,
1928 "while parsing a tag, did not find expected tag URI",
1929 ));
1930 }
1931
1932 Ok(string)
1933 }
1934
1935 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
1936 let mut width = 0usize;
1937 let mut code = 0u32;
1938 loop {
1939 self.input.lookahead(3);
1940
1941 let c = self.input.peek_nth(1);
1942 let nc = self.input.peek_nth(2);
1943
1944 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
1945 return Err(ScanError::new_str(
1946 *mark,
1947 "while parsing a tag, found an invalid escape sequence",
1948 ));
1949 }
1950
1951 let byte = (as_hex(c) << 4) + as_hex(nc);
1952 if width == 0 {
1953 width = match byte {
1954 _ if byte & 0x80 == 0x00 => 1,
1955 _ if byte & 0xE0 == 0xC0 => 2,
1956 _ if byte & 0xF0 == 0xE0 => 3,
1957 _ if byte & 0xF8 == 0xF0 => 4,
1958 _ => {
1959 return Err(ScanError::new_str(
1960 *mark,
1961 "while parsing a tag, found an incorrect leading UTF-8 byte",
1962 ));
1963 }
1964 };
1965 code = byte;
1966 } else {
1967 if byte & 0xc0 != 0x80 {
1968 return Err(ScanError::new_str(
1969 *mark,
1970 "while parsing a tag, found an incorrect trailing UTF-8 byte",
1971 ));
1972 }
1973 code = (code << 8) + byte;
1974 }
1975
1976 self.skip_n_non_blank(3);
1977
1978 width -= 1;
1979 if width == 0 {
1980 break;
1981 }
1982 }
1983
1984 match char::from_u32(code) {
1985 Some(ch) => Ok(ch),
1986 None => Err(ScanError::new_str(
1987 *mark,
1988 "while parsing a tag, found an invalid UTF-8 codepoint",
1989 )),
1990 }
1991 }
1992
1993 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
1994 self.save_simple_key();
1995 self.disallow_simple_key();
1996
1997 let tok = self.scan_anchor(alias)?;
1998
1999 self.tokens.push_back(tok);
2000
2001 Ok(())
2002 }
2003
2004 fn scan_anchor(&mut self, alias: bool) -> Result<Token<'input>, ScanError> {
2005 let start_mark = self.mark;
2006
2007 self.skip_non_blank();
2009
2010 if let Some(start) = self.input.byte_offset() {
2012 while is_anchor_char(self.input.look_ch()) {
2013 self.skip_non_blank();
2014 }
2015
2016 let end = self
2017 .input
2018 .byte_offset()
2019 .expect("byte_offset() must remain available once enabled");
2020
2021 if start == end {
2022 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2023 }
2024
2025 let cow = if let Some(slice) = self.try_borrow_slice(start, end) {
2026 Cow::Borrowed(slice)
2027 } else if let Some(slice) = self.input.slice_bytes(start, end) {
2028 Cow::Owned(slice.to_owned())
2029 } else {
2030 return Err(ScanError::new_str(
2031 start_mark,
2032 "internal error: input advertised slicing but did not provide a slice",
2033 ));
2034 };
2035
2036 let tok = if alias {
2037 TokenType::Alias(cow)
2038 } else {
2039 TokenType::Anchor(cow)
2040 };
2041 return Ok(Token(Span::new(start_mark, self.mark), tok));
2042 }
2043
2044 let mut string = String::new();
2045 while is_anchor_char(self.input.look_ch()) {
2046 string.push(self.input.peek());
2047 self.skip_non_blank();
2048 }
2049
2050 if string.is_empty() {
2051 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
2052 }
2053
2054 let tok = if alias {
2055 TokenType::Alias(string.into())
2056 } else {
2057 TokenType::Anchor(string.into())
2058 };
2059 Ok(Token(Span::new(start_mark, self.mark), tok))
2060 }
2061
2062 fn fetch_flow_collection_start(&mut self, tok: TokenType<'input>) -> ScanResult {
2063 self.save_simple_key();
2065
2066 let start_mark = self.mark;
2067 let indicator = self.input.peek();
2068 self.flow_markers.push((start_mark, indicator));
2069
2070 self.roll_one_col_indent();
2071 self.increase_flow_level()?;
2072
2073 self.allow_simple_key();
2074
2075 self.skip_non_blank();
2076
2077 if tok == TokenType::FlowMappingStart {
2078 self.flow_mapping_started = true;
2079 } else {
2080 self.implicit_flow_mapping_states
2081 .push(ImplicitMappingState::Possible);
2082 }
2083
2084 self.skip_ws_to_eol(SkipTabs::Yes)?;
2085
2086 self.tokens
2087 .push_back(Token(Span::new(start_mark, self.mark), tok));
2088 Ok(())
2089 }
2090
2091 fn fetch_flow_collection_end(&mut self, tok: TokenType<'input>) -> ScanResult {
2092 if self.flow_level == 0 {
2094 return Err(ScanError::new_str(self.mark, "misplaced bracket"));
2095 }
2096
2097 let flow_level = self.flow_level;
2098
2099 self.flow_markers.pop();
2100 self.remove_simple_key()?;
2101
2102 if matches!(tok, TokenType::FlowSequenceEnd) {
2103 self.end_implicit_mapping(self.mark, flow_level);
2104 self.implicit_flow_mapping_states.pop();
2106 }
2107
2108 self.decrease_flow_level();
2109
2110 self.disallow_simple_key();
2111
2112 let start_mark = self.mark;
2113 self.skip_non_blank();
2114 self.skip_ws_to_eol(SkipTabs::Yes)?;
2115
2116 if self.flow_level > 0 {
2122 self.adjacent_value_allowed_at = self.mark.index();
2123 }
2124
2125 self.tokens
2126 .push_back(Token(Span::new(start_mark, self.mark), tok));
2127 Ok(())
2128 }
2129
2130 fn fetch_flow_entry(&mut self) -> ScanResult {
2132 self.remove_simple_key()?;
2133 self.allow_simple_key();
2134
2135 self.end_implicit_mapping(self.mark, self.flow_level);
2136
2137 let start_mark = self.mark;
2138 self.skip_non_blank();
2139 self.skip_ws_to_eol(SkipTabs::Yes)?;
2140
2141 self.tokens.push_back(Token(
2142 Span::new(start_mark, self.mark),
2143 TokenType::FlowEntry,
2144 ));
2145 Ok(())
2146 }
2147
2148 fn increase_flow_level(&mut self) -> ScanResult {
2149 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
2150 self.flow_level = self
2151 .flow_level
2152 .checked_add(1)
2153 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
2154 Ok(())
2155 }
2156
2157 fn decrease_flow_level(&mut self) {
2158 if self.flow_level > 0 {
2159 self.flow_level -= 1;
2160 self.simple_keys.pop().unwrap();
2161 }
2162 }
2163
2164 fn fetch_block_entry(&mut self) -> ScanResult {
2170 if self.flow_level > 0 {
2171 return Err(ScanError::new_str(
2173 self.mark,
2174 r#""-" is only valid inside a block"#,
2175 ));
2176 }
2177 if !self.simple_key_allowed {
2179 return Err(ScanError::new_str(
2180 self.mark,
2181 "block sequence entries are not allowed in this context",
2182 ));
2183 }
2184
2185 if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
2187 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
2188 return Err(ScanError::new_str(
2189 span.start,
2190 "invalid indentation for anchor",
2191 ));
2192 }
2193 }
2194
2195 let mark = self.mark;
2197 self.skip_non_blank();
2198
2199 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
2201 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
2202 self.input.lookahead(2);
2203 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
2204 {
2205 return Err(ScanError::new_str(
2206 self.mark,
2207 "'-' must be followed by a valid YAML whitespace",
2208 ));
2209 }
2210
2211 self.skip_ws_to_eol(SkipTabs::No)?;
2212 self.input.lookahead(1);
2213 if self.input.next_is_break() || self.input.next_is_flow() {
2214 self.roll_one_col_indent();
2215 }
2216
2217 self.remove_simple_key()?;
2218 self.allow_simple_key();
2219
2220 self.tokens
2221 .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
2222
2223 Ok(())
2224 }
2225
2226 fn fetch_document_indicator(&mut self, t: TokenType<'input>) -> ScanResult {
2227 if let Some((mark, bracket)) = self.flow_markers.pop() {
2228 return Err(ScanError::new(
2229 mark,
2230 format!("unclosed bracket '{bracket}'"),
2231 ));
2232 }
2233
2234 self.unroll_indent(-1);
2235 self.remove_simple_key()?;
2236 self.disallow_simple_key();
2237
2238 let mark = self.mark;
2239
2240 self.skip_n_non_blank(3);
2241
2242 self.tokens.push_back(Token(Span::new(mark, self.mark), t));
2243 Ok(())
2244 }
2245
2246 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
2247 self.save_simple_key();
2248 self.allow_simple_key();
2249 let tok = self.scan_block_scalar(literal)?;
2250
2251 self.tokens.push_back(tok);
2252 Ok(())
2253 }
2254
2255 #[allow(clippy::too_many_lines)]
2256 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token<'input>, ScanError> {
2257 let start_mark = self.mark;
2258 let mut chomping = Chomping::Clip;
2259 let mut increment: usize = 0;
2260 let mut indent: usize = 0;
2261 let mut trailing_blank: bool;
2262 let mut leading_blank: bool = false;
2263 let style = if literal {
2264 ScalarStyle::Literal
2265 } else {
2266 ScalarStyle::Folded
2267 };
2268
2269 let mut string = String::new();
2270 let mut leading_break = String::new();
2271 let mut trailing_breaks = String::new();
2272 let mut chomping_break = String::new();
2273
2274 self.skip_non_blank();
2276 self.unroll_non_block_indents();
2277
2278 if self.input.look_ch() == '+' || self.input.peek() == '-' {
2279 if self.input.peek() == '+' {
2280 chomping = Chomping::Keep;
2281 } else {
2282 chomping = Chomping::Strip;
2283 }
2284 self.skip_non_blank();
2285 self.input.lookahead(1);
2286 if self.input.next_is_digit() {
2287 if self.input.peek() == '0' {
2288 return Err(ScanError::new_str(
2289 start_mark,
2290 "while scanning a block scalar, found an indentation indicator equal to 0",
2291 ));
2292 }
2293 increment = (self.input.peek() as usize) - ('0' as usize);
2294 self.skip_non_blank();
2295 }
2296 } else if self.input.next_is_digit() {
2297 if self.input.peek() == '0' {
2298 return Err(ScanError::new_str(
2299 start_mark,
2300 "while scanning a block scalar, found an indentation indicator equal to 0",
2301 ));
2302 }
2303
2304 increment = (self.input.peek() as usize) - ('0' as usize);
2305 self.skip_non_blank();
2306 self.input.lookahead(1);
2307 if self.input.peek() == '+' || self.input.peek() == '-' {
2308 if self.input.peek() == '+' {
2309 chomping = Chomping::Keep;
2310 } else {
2311 chomping = Chomping::Strip;
2312 }
2313 self.skip_non_blank();
2314 }
2315 }
2316
2317 self.skip_ws_to_eol(SkipTabs::Yes)?;
2318
2319 self.input.lookahead(1);
2321 if !self.input.next_is_breakz() {
2322 return Err(ScanError::new_str(
2323 start_mark,
2324 "while scanning a block scalar, did not find expected comment or line break",
2325 ));
2326 }
2327
2328 if self.input.next_is_break() {
2329 self.input.lookahead(2);
2330 self.read_break(&mut chomping_break);
2331 }
2332
2333 if self.input.look_ch() == '\t' {
2334 return Err(ScanError::new_str(
2335 start_mark,
2336 "a block scalar content cannot start with a tab",
2337 ));
2338 }
2339
2340 if increment > 0 {
2341 indent = if self.indent >= 0 {
2342 (self.indent + increment as isize) as usize
2343 } else {
2344 increment
2345 }
2346 }
2347
2348 if indent == 0 {
2350 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
2351 } else {
2352 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2353 }
2354
2355 if self.input.next_is_z() {
2360 let contents = match chomping {
2361 Chomping::Strip => String::new(),
2363 _ if self.mark.line == start_mark.line() => String::new(),
2365 Chomping::Clip => chomping_break,
2368 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
2371 Chomping::Keep => trailing_breaks,
2373 };
2374 return Ok(Token(
2375 Span::new(start_mark, self.mark),
2376 TokenType::Scalar(style, contents.into()),
2377 ));
2378 }
2379
2380 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
2381 return Err(ScanError::new_str(
2382 self.mark,
2383 "wrongly indented line in block scalar",
2384 ));
2385 }
2386
2387 let mut line_buffer = String::with_capacity(100);
2388 let start_mark = self.mark;
2389 while self.mark.col == indent && !self.input.next_is_z() {
2390 if indent == 0 {
2391 self.input.lookahead(4);
2392 if self.input.next_is_document_end() {
2393 break;
2394 }
2395 }
2396
2397 trailing_blank = self.input.next_is_blank();
2399 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
2400 string.push_str(&trailing_breaks);
2401 if trailing_breaks.is_empty() {
2402 string.push(' ');
2403 }
2404 } else {
2405 string.push_str(&leading_break);
2406 string.push_str(&trailing_breaks);
2407 }
2408
2409 leading_break.clear();
2410 trailing_breaks.clear();
2411
2412 leading_blank = self.input.next_is_blank();
2413
2414 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
2415
2416 self.input.lookahead(2);
2418 if self.input.next_is_z() {
2419 break;
2420 }
2421
2422 self.read_break(&mut leading_break);
2423
2424 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
2426 }
2427
2428 if chomping != Chomping::Strip {
2430 string.push_str(&leading_break);
2431 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
2435 string.push('\n');
2436 }
2437 }
2438
2439 if chomping == Chomping::Keep {
2440 string.push_str(&trailing_breaks);
2441 }
2442
2443 Ok(Token(
2444 Span::new(start_mark, self.mark),
2445 TokenType::Scalar(style, string.into()),
2446 ))
2447 }
2448
2449 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
2459 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
2461 string.push(self.input.peek());
2462 self.skip_blank();
2468 }
2469
2470 if self.input.buf_is_empty() {
2473 let mut n_chars = 0;
2481 debug_assert!(line_buffer.is_empty());
2482 while let Some(c) = self.input.raw_read_non_breakz_ch() {
2483 line_buffer.push(c);
2484 n_chars += 1;
2485 }
2486
2487 self.mark.col += n_chars;
2489 self.mark.offsets.chars += n_chars;
2490 self.mark.offsets.bytes = self.input.byte_offset();
2491
2492 string.reserve(line_buffer.len());
2494 string.push_str(line_buffer);
2495 line_buffer.clear();
2497 }
2498 }
2499
2500 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
2502 loop {
2503 if indent < self.input.bufmaxlen() - 2 {
2505 self.input.lookahead(self.input.bufmaxlen());
2506 while self.mark.col < indent && self.input.peek() == ' ' {
2507 self.skip_blank();
2508 }
2509 } else {
2510 loop {
2511 self.input.lookahead(self.input.bufmaxlen());
2512 while !self.input.buf_is_empty()
2513 && self.mark.col < indent
2514 && self.input.peek() == ' '
2515 {
2516 self.skip_blank();
2517 }
2518 if self.mark.col == indent
2522 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
2523 {
2524 break;
2525 }
2526 }
2527 self.input.lookahead(2);
2528 }
2529
2530 if self.input.next_is_break() {
2532 self.read_break(breaks);
2533 } else {
2534 break;
2536 }
2537 }
2538 }
2539
2540 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
2545 let mut max_indent = 0;
2546 loop {
2547 while self.input.look_ch() == ' ' {
2549 self.skip_blank();
2550 }
2551
2552 if self.mark.col > max_indent {
2553 max_indent = self.mark.col;
2554 }
2555
2556 if self.input.next_is_break() {
2557 self.input.lookahead(2);
2559 self.read_break(breaks);
2560 } else {
2561 break;
2563 }
2564 }
2565
2566 *indent = max_indent.max((self.indent + 1) as usize);
2575 if self.indent > 0 {
2576 *indent = (*indent).max(1);
2577 }
2578 }
2579
2580 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
2581 self.save_simple_key();
2582 self.disallow_simple_key();
2583
2584 let tok = self.scan_flow_scalar(single)?;
2585
2586 self.skip_to_next_token()?;
2589 self.adjacent_value_allowed_at = self.mark.index();
2590
2591 self.tokens.push_back(tok);
2592 Ok(())
2593 }
2594
2595 #[allow(clippy::too_many_lines)]
2596 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token<'input>, ScanError> {
2597 let start_mark = self.mark;
2598
2599 let mut buf = match self.input.byte_offset() {
2601 Some(off) => FlowScalarBuf::new_borrowed(off + self.input.peek().len_utf8()),
2602 None => FlowScalarBuf::new_owned(),
2603 };
2604
2605 let mut break_scratch = String::new();
2608
2609 self.skip_non_blank();
2611
2612 loop {
2613 self.input.lookahead(4);
2615
2616 if self.mark.col == 0 && self.input.next_is_document_indicator() {
2617 return Err(ScanError::new_str(
2618 start_mark,
2619 "while scanning a quoted scalar, found unexpected document indicator",
2620 ));
2621 }
2622
2623 if self.input.next_is_z() {
2624 return Err(ScanError::new_str(start_mark, "unclosed quote"));
2625 }
2626
2627 let mut leading_blanks = false;
2630 self.consume_flow_scalar_non_whitespace_chars(
2631 single,
2632 &mut buf,
2633 &mut leading_blanks,
2634 &start_mark,
2635 )?;
2636
2637 match self.input.look_ch() {
2638 '\'' if single => break,
2639 '"' if !single => break,
2640 _ => {}
2641 }
2642
2643 let mut trailing_ws_start: Option<usize> = None;
2659 let mut has_leading_break = false;
2660 let mut has_trailing_breaks = false;
2661
2662 let mut pending_ws_start: Option<usize> = None;
2664
2665 while self.input.next_is_blank() || self.input.next_is_break() {
2667 if self.input.next_is_blank() {
2668 if leading_blanks {
2670 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
2671 return Err(ScanError::new_str(
2672 self.mark,
2673 "tab cannot be used as indentation",
2674 ));
2675 }
2676 self.skip_blank();
2677 } else {
2678 match buf {
2680 FlowScalarBuf::Owned(ref mut string) => {
2681 if trailing_ws_start.is_none() {
2682 trailing_ws_start = Some(string.len());
2683 }
2684 string.push(self.input.peek());
2685 }
2686 FlowScalarBuf::Borrowed { .. } => {
2687 if pending_ws_start.is_none() {
2688 pending_ws_start = self.input.byte_offset();
2689 }
2690 }
2691 }
2692 self.skip_blank();
2693
2694 if let (FlowScalarBuf::Borrowed { .. }, Some(ws_start), Some(ws_end)) =
2695 (&mut buf, pending_ws_start, self.input.byte_offset())
2696 {
2697 buf.note_pending_ws(ws_start, ws_end);
2698 }
2699 }
2700 } else {
2701 self.input.lookahead(2);
2702
2703 if leading_blanks {
2705 match buf {
2707 FlowScalarBuf::Owned(ref mut string) => self.read_break(string),
2708 FlowScalarBuf::Borrowed { .. } => {
2709 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2710 let Some(string) = buf.as_owned_mut() else {
2711 unreachable!()
2712 };
2713 self.read_break(string);
2714 }
2715 }
2716 has_trailing_breaks = true;
2717 } else {
2718 if let Some(pos) = trailing_ws_start.take() {
2720 if let FlowScalarBuf::Owned(ref mut string) = buf {
2721 string.truncate(pos);
2722 }
2723 }
2724
2725 if pending_ws_start.take().is_some() {
2726 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2728 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2729 }
2730 buf.discard_pending_ws();
2731 } else {
2732 buf.commit_pending_ws();
2733 }
2734
2735 break_scratch.clear();
2736 self.read_break(&mut break_scratch);
2737 has_leading_break = true;
2740 leading_blanks = true;
2741 }
2742 }
2743
2744 self.input.lookahead(1);
2745 }
2746
2747 if leading_blanks && has_leading_break && self.flow_level == 0 {
2750 let next_ch = self.input.peek();
2751 let is_closing_quote = (single && next_ch == '\'') || (!single && next_ch == '"');
2752 if !is_closing_quote && (self.mark.col as isize) <= self.indent {
2753 return Err(ScanError::new_str(
2754 self.mark,
2755 "invalid indentation in multiline quoted scalar",
2756 ));
2757 }
2758 }
2759
2760 if leading_blanks {
2762 if has_leading_break && !has_trailing_breaks {
2767 match buf {
2768 FlowScalarBuf::Owned(ref mut string) => string.push(' '),
2769 FlowScalarBuf::Borrowed { .. } => {
2770 self.promote_flow_scalar_buf_to_owned(&start_mark, &mut buf)?;
2771 let Some(string) = buf.as_owned_mut() else {
2772 unreachable!()
2773 };
2774 string.push(' ');
2775 }
2776 }
2777 }
2778 }
2779 } self.skip_non_blank();
2784
2785 self.skip_ws_to_eol(SkipTabs::Yes)?;
2787 match self.input.peek() {
2788 ',' | '}' | ']' if self.flow_level > 0 => {}
2790 c if is_breakz(c) => {}
2792 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
2795 ':' if self.flow_level > 0 => {}
2797 _ => {
2798 return Err(ScanError::new_str(
2799 self.mark,
2800 "invalid trailing content after double-quoted scalar",
2801 ));
2802 }
2803 }
2804
2805 let style = if single {
2806 ScalarStyle::SingleQuoted
2807 } else {
2808 ScalarStyle::DoubleQuoted
2809 };
2810
2811 let contents = match buf {
2812 FlowScalarBuf::Owned(string) => Cow::Owned(string),
2813 FlowScalarBuf::Borrowed {
2814 start,
2815 mut end,
2816 pending_ws_start,
2817 pending_ws_end,
2818 } => {
2819 if pending_ws_start.is_some() {
2821 end = pending_ws_end;
2822 }
2823 if let Some(slice) = self.try_borrow_slice(start, end) {
2824 Cow::Borrowed(slice)
2825 } else {
2826 let slice = self.input.slice_bytes(start, end).ok_or_else(|| {
2827 ScanError::new_str(
2828 start_mark,
2829 "internal error: input advertised offsets but did not provide a slice",
2830 )
2831 })?;
2832 Cow::Owned(slice.to_owned())
2833 }
2834 }
2835 };
2836
2837 Ok(Token(
2838 Span::new(start_mark, self.mark),
2839 TokenType::Scalar(style, contents),
2840 ))
2841 }
2842
2843 fn consume_flow_scalar_non_whitespace_chars(
2852 &mut self,
2853 single: bool,
2854 buf: &mut FlowScalarBuf,
2855 leading_blanks: &mut bool,
2856 start_mark: &Marker,
2857 ) -> Result<(), ScanError> {
2858 self.input.lookahead(2);
2859 while !is_blank_or_breakz(self.input.peek()) {
2860 match self.input.peek() {
2861 '\'' if self.input.peek_nth(1) == '\'' && single => {
2863 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2864 buf.commit_pending_ws();
2865 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2866 }
2867 let Some(string) = buf.as_owned_mut() else {
2868 unreachable!()
2869 };
2870 string.push('\'');
2871 self.skip_n_non_blank(2);
2872 }
2873 '\'' if single => break,
2875 '"' if !single => break,
2876 '\\' if !single && is_break(self.input.peek_nth(1)) => {
2878 self.input.lookahead(3);
2879 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2880 buf.commit_pending_ws();
2881 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2882 }
2883 self.skip_non_blank();
2884 self.skip_linebreak();
2885 *leading_blanks = true;
2886 break;
2887 }
2888 '\\' if !single => {
2890 if matches!(buf, FlowScalarBuf::Borrowed { .. }) {
2891 buf.commit_pending_ws();
2892 self.promote_flow_scalar_buf_to_owned(start_mark, buf)?;
2893 }
2894 let Some(string) = buf.as_owned_mut() else {
2895 unreachable!()
2896 };
2897 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
2898 }
2899 c => {
2900 match buf {
2901 FlowScalarBuf::Owned(ref mut string) => {
2902 string.push(c);
2903 }
2904 FlowScalarBuf::Borrowed { .. } => {
2905 buf.commit_pending_ws();
2906 }
2907 }
2908 self.skip_non_blank();
2909
2910 if let Some(new_end) = self.input.byte_offset() {
2911 if let FlowScalarBuf::Borrowed { end, .. } = buf {
2912 *end = new_end;
2913 }
2914 }
2915 }
2916 }
2917 self.input.lookahead(2);
2918 }
2919 Ok(())
2920 }
2921
2922 fn resolve_flow_scalar_escape_sequence(
2929 &mut self,
2930 start_mark: &Marker,
2931 ) -> Result<char, ScanError> {
2932 let mut code_length = 0usize;
2933 let mut ret = '\0';
2934
2935 match self.input.peek_nth(1) {
2936 '0' => ret = '\0',
2937 'a' => ret = '\x07',
2938 'b' => ret = '\x08',
2939 't' | '\t' => ret = '\t',
2940 'n' => ret = '\n',
2941 'v' => ret = '\x0b',
2942 'f' => ret = '\x0c',
2943 'r' => ret = '\x0d',
2944 'e' => ret = '\x1b',
2945 ' ' => ret = '\x20',
2946 '"' => ret = '"',
2947 '/' => ret = '/',
2948 '\\' => ret = '\\',
2949 'N' => ret = char::from_u32(0x85).unwrap(),
2951 '_' => ret = char::from_u32(0xA0).unwrap(),
2953 'L' => ret = char::from_u32(0x2028).unwrap(),
2955 'P' => ret = char::from_u32(0x2029).unwrap(),
2957 'x' => code_length = 2,
2958 'u' => code_length = 4,
2959 'U' => code_length = 8,
2960 _ => {
2961 return Err(ScanError::new_str(
2962 *start_mark,
2963 "while parsing a quoted scalar, found unknown escape character",
2964 ))
2965 }
2966 }
2967 self.skip_n_non_blank(2);
2968
2969 if code_length > 0 {
2971 self.input.lookahead(code_length);
2972 let mut value = 0u32;
2973 for i in 0..code_length {
2974 let c = self.input.peek_nth(i);
2975 if !is_hex(c) {
2976 return Err(ScanError::new_str(
2977 *start_mark,
2978 "while parsing a quoted scalar, did not find expected hexadecimal number",
2979 ));
2980 }
2981 value = (value << 4) + as_hex(c);
2982 }
2983
2984 self.skip_n_non_blank(code_length);
2985
2986 if code_length == 4 && (0xD800..=0xDBFF).contains(&value) {
2988 self.input.lookahead(2);
2989 if self.input.peek() == '\\' && self.input.peek_nth(1) == 'u' {
2990 self.skip_n_non_blank(2);
2991 self.input.lookahead(4);
2992 let mut low_value = 0u32;
2993 for i in 0..4 {
2994 let c = self.input.peek_nth(i);
2995 if !is_hex(c) {
2996 return Err(ScanError::new_str(
2997 *start_mark,
2998 "while parsing a quoted scalar, did not find expected hexadecimal number for low surrogate",
2999 ));
3000 }
3001 low_value = (low_value << 4) + as_hex(c);
3002 }
3003 if (0xDC00..=0xDFFF).contains(&low_value) {
3004 value = 0x10000 + (((value - 0xD800) << 10) | (low_value - 0xDC00));
3005 self.skip_n_non_blank(4);
3006 } else {
3007 return Err(ScanError::new_str(
3008 *start_mark,
3009 "while parsing a quoted scalar, found invalid low surrogate",
3010 ));
3011 }
3012 } else {
3013 return Err(ScanError::new_str(
3014 *start_mark,
3015 "while parsing a quoted scalar, found high surrogate without following low surrogate",
3016 ));
3017 }
3018 } else if code_length == 4 && (0xDC00..=0xDFFF).contains(&value) {
3019 return Err(ScanError::new_str(
3020 *start_mark,
3021 "while parsing a quoted scalar, found unpaired low surrogate",
3022 ));
3023 }
3024
3025 let Some(ch) = char::from_u32(value) else {
3026 return Err(ScanError::new_str(
3027 *start_mark,
3028 "while parsing a quoted scalar, found invalid Unicode character escape code",
3029 ));
3030 };
3031 ret = ch;
3032 }
3033 Ok(ret)
3034 }
3035
3036 fn fetch_plain_scalar(&mut self) -> ScanResult {
3037 self.save_simple_key();
3038 self.disallow_simple_key();
3039
3040 let tok = self.scan_plain_scalar()?;
3041
3042 self.tokens.push_back(tok);
3043 Ok(())
3044 }
3045
3046 #[allow(clippy::too_many_lines)]
3051 fn scan_plain_scalar(&mut self) -> Result<Token<'input>, ScanError> {
3052 self.unroll_non_block_indents();
3053 let indent = self.indent + 1;
3054 let start_mark = self.mark;
3055
3056 if self.flow_level > 0 && (start_mark.col as isize) < indent {
3057 return Err(ScanError::new_str(
3058 start_mark,
3059 "invalid indentation in flow construct",
3060 ));
3061 }
3062
3063 let mut string = String::with_capacity(32);
3064 self.buf_whitespaces.clear();
3065 self.buf_leading_break.clear();
3066 self.buf_trailing_breaks.clear();
3067 let mut end_mark = self.mark;
3068
3069 loop {
3070 self.input.lookahead(4);
3071 if (self.mark.col == 0 && self.input.next_is_document_indicator())
3072 || self.input.peek() == '#'
3073 {
3074 if self.input.peek() == '#'
3079 && !string.is_empty()
3080 && !self.buf_whitespaces.is_empty()
3081 && self.flow_level == 0
3082 {
3083 self.interrupted_plain_by_comment = Some(self.mark);
3084 }
3085 break;
3086 }
3087
3088 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
3089 return Err(ScanError::new_str(
3090 self.mark,
3091 "plain scalar cannot start with '-' followed by ,[]{}",
3092 ));
3093 }
3094
3095 if !self.input.next_is_blank_or_breakz()
3096 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
3097 {
3098 if self.leading_whitespace {
3099 if self.buf_leading_break.is_empty() {
3100 string.push_str(&self.buf_leading_break);
3101 string.push_str(&self.buf_trailing_breaks);
3102 self.buf_trailing_breaks.clear();
3103 self.buf_leading_break.clear();
3104 } else {
3105 if self.buf_trailing_breaks.is_empty() {
3106 string.push(' ');
3107 } else {
3108 string.push_str(&self.buf_trailing_breaks);
3109 self.buf_trailing_breaks.clear();
3110 }
3111 self.buf_leading_break.clear();
3112 }
3113 self.leading_whitespace = false;
3114 } else if !self.buf_whitespaces.is_empty() {
3115 string.push_str(&self.buf_whitespaces);
3116 self.buf_whitespaces.clear();
3117 }
3118
3119 string.push(self.input.peek());
3121 self.skip_non_blank();
3122 string.reserve(self.input.bufmaxlen());
3123
3124 let mut end = false;
3126 while !end {
3127 self.input.lookahead(self.input.bufmaxlen());
3131 let (stop, chars_consumed) = self.input.fetch_plain_scalar_chunk(
3132 &mut string,
3133 self.input.bufmaxlen() - 1,
3134 self.flow_level > 0,
3135 );
3136 end = stop;
3137 self.mark.offsets.chars += chars_consumed;
3138 self.mark.col += chars_consumed;
3139 self.mark.offsets.bytes = self.input.byte_offset();
3140 }
3141 end_mark = self.mark;
3142 }
3143
3144 if !(self.input.next_is_blank() || self.input.next_is_break()) {
3149 break;
3150 }
3151
3152 self.input.lookahead(2);
3154 while self.input.next_is_blank_or_break() {
3155 if self.input.next_is_blank() {
3156 if !self.leading_whitespace {
3157 self.buf_whitespaces.push(self.input.peek());
3158 self.skip_blank();
3159 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
3160 self.skip_ws_to_eol(SkipTabs::Yes)?;
3163 if !self.input.next_is_breakz() {
3164 return Err(ScanError::new_str(
3165 start_mark,
3166 "while scanning a plain scalar, found a tab",
3167 ));
3168 }
3169 } else {
3170 self.skip_blank();
3171 }
3172 } else {
3173 if self.leading_whitespace {
3175 self.skip_break();
3176 self.buf_trailing_breaks.push('\n');
3177 } else {
3178 self.buf_whitespaces.clear();
3179 self.skip_break();
3180 self.buf_leading_break.push('\n');
3181 self.leading_whitespace = true;
3182 }
3183 }
3184 self.input.lookahead(2);
3185 }
3186
3187 if self.flow_level == 0 && (self.mark.col as isize) < indent {
3189 break;
3190 }
3191 }
3192
3193 if self.leading_whitespace {
3194 self.allow_simple_key();
3195 }
3196
3197 if string.is_empty() {
3198 Err(ScanError::new_str(
3202 start_mark,
3203 "unexpected end of plain scalar",
3204 ))
3205 } else {
3206 let contents = if let (Some(start), Some(end)) =
3207 (start_mark.byte_offset(), end_mark.byte_offset())
3208 {
3209 match self.try_borrow_slice(start, end) {
3210 Some(slice) if slice == string => Cow::Borrowed(slice),
3211 _ => Cow::Owned(string),
3212 }
3213 } else {
3214 Cow::Owned(string)
3215 };
3216
3217 Ok(Token(
3218 Span::new(start_mark, end_mark),
3219 TokenType::Scalar(ScalarStyle::Plain, contents),
3220 ))
3221 }
3222 }
3223
3224 fn fetch_key(&mut self) -> ScanResult {
3225 let start_mark = self.mark;
3226 if self.flow_level == 0 {
3227 if !self.simple_key_allowed {
3229 return Err(ScanError::new_str(
3230 self.mark,
3231 "mapping keys are not allowed in this context",
3232 ));
3233 }
3234 self.roll_indent(
3235 start_mark.col,
3236 None,
3237 TokenType::BlockMappingStart,
3238 start_mark,
3239 );
3240 } else {
3241 self.flow_mapping_started = true;
3243 }
3244
3245 self.remove_simple_key()?;
3246
3247 if self.flow_level == 0 {
3248 self.allow_simple_key();
3249 } else {
3250 self.disallow_simple_key();
3251 }
3252
3253 self.skip_non_blank();
3254 self.skip_yaml_whitespace()?;
3255 if self.input.peek() == '\t' {
3256 return Err(ScanError::new_str(
3257 self.mark(),
3258 "tabs disallowed in this context",
3259 ));
3260 }
3261 self.tokens
3262 .push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
3263 Ok(())
3264 }
3265
3266 fn fetch_flow_value(&mut self) -> ScanResult {
3274 let nc = self.input.peek_nth(1);
3275
3276 if self.mark.index() != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
3288 return Err(ScanError::new_str(
3289 self.mark,
3290 "':' may not precede any of `[{` in flow mapping",
3291 ));
3292 }
3293
3294 self.fetch_value()
3295 }
3296
3297 fn fetch_value(&mut self) -> ScanResult {
3299 let sk = self.simple_keys.last().unwrap().clone();
3300 let start_mark = self.mark;
3301 let is_implicit_flow_mapping =
3302 !self.implicit_flow_mapping_states.is_empty() && !self.flow_mapping_started;
3303 if is_implicit_flow_mapping {
3304 *self.implicit_flow_mapping_states.last_mut().unwrap() =
3305 ImplicitMappingState::Inside(self.flow_level);
3306 }
3307
3308 self.skip_non_blank();
3310 if self.input.look_ch() == '\t'
3317 && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
3318 && (self.input.peek() == '-' || self.input.next_is_alpha())
3319 {
3320 return Err(ScanError::new_str(
3321 self.mark,
3322 "':' must be followed by a valid YAML whitespace",
3323 ));
3324 }
3325
3326 if sk.possible {
3327 let tok = Token(Span::empty(sk.mark), TokenType::Key);
3329 self.insert_token(sk.token_number - self.tokens_parsed, tok);
3330 if is_implicit_flow_mapping {
3331 if sk.mark.line < start_mark.line {
3332 return Err(ScanError::new_str(
3333 start_mark,
3334 "illegal placement of ':' indicator",
3335 ));
3336 }
3337 self.insert_token(
3338 sk.token_number - self.tokens_parsed,
3339 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
3340 );
3341 }
3342
3343 self.roll_indent(
3345 sk.mark.col,
3346 Some(sk.token_number),
3347 TokenType::BlockMappingStart,
3348 sk.mark,
3349 );
3350 self.roll_one_col_indent();
3351
3352 self.simple_keys.last_mut().unwrap().possible = false;
3353 self.disallow_simple_key();
3354 } else {
3355 if is_implicit_flow_mapping {
3356 self.tokens
3357 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
3358 }
3359 if self.flow_level == 0 {
3361 if !self.simple_key_allowed {
3362 return Err(ScanError::new_str(
3363 start_mark,
3364 "mapping values are not allowed in this context",
3365 ));
3366 }
3367
3368 self.roll_indent(
3369 start_mark.col,
3370 None,
3371 TokenType::BlockMappingStart,
3372 start_mark,
3373 );
3374 }
3375 self.roll_one_col_indent();
3376
3377 if self.flow_level == 0 {
3378 self.allow_simple_key();
3379 } else {
3380 self.disallow_simple_key();
3381 }
3382 }
3383 self.tokens
3384 .push_back(Token(Span::empty(start_mark), TokenType::Value));
3385
3386 Ok(())
3387 }
3388
3389 fn roll_indent(
3395 &mut self,
3396 col: usize,
3397 number: Option<usize>,
3398 tok: TokenType<'input>,
3399 mark: Marker,
3400 ) {
3401 if self.flow_level > 0 {
3402 return;
3403 }
3404
3405 if self.indent <= col as isize {
3409 if let Some(indent) = self.indents.last() {
3410 if !indent.needs_block_end {
3411 self.indent = indent.indent;
3412 self.indents.pop();
3413 }
3414 }
3415 }
3416
3417 if self.indent < col as isize {
3418 self.indents.push(Indent {
3419 indent: self.indent,
3420 needs_block_end: true,
3421 });
3422 self.indent = col as isize;
3423 let tokens_parsed = self.tokens_parsed;
3424 match number {
3425 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
3426 None => self.tokens.push_back(Token(Span::empty(mark), tok)),
3427 }
3428 }
3429 }
3430
3431 fn unroll_indent(&mut self, col: isize) {
3437 if self.flow_level > 0 {
3438 return;
3439 }
3440 while self.indent > col {
3441 let indent = self.indents.pop().unwrap();
3442 self.indent = indent.indent;
3443 if indent.needs_block_end {
3444 self.tokens
3445 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
3446 }
3447 }
3448 }
3449
3450 fn roll_one_col_indent(&mut self) {
3456 if self.flow_level == 0 && self.indents.last().is_some_and(|x| x.needs_block_end) {
3457 self.indents.push(Indent {
3458 indent: self.indent,
3459 needs_block_end: false,
3460 });
3461 self.indent += 1;
3462 }
3463 }
3464
3465 fn unroll_non_block_indents(&mut self) {
3467 while let Some(indent) = self.indents.last() {
3468 if indent.needs_block_end {
3469 break;
3470 }
3471 self.indent = indent.indent;
3472 self.indents.pop();
3473 }
3474 }
3475
3476 fn save_simple_key(&mut self) {
3478 if self.simple_key_allowed {
3479 let required = self.flow_level == 0
3480 && self.indent == (self.mark.col as isize)
3481 && self.indents.last().unwrap().needs_block_end;
3482
3483 if let Some(last) = self.simple_keys.last_mut() {
3484 *last = SimpleKey {
3485 mark: self.mark,
3486 possible: true,
3487 required,
3488 token_number: self.tokens_parsed + self.tokens.len(),
3489 };
3490 }
3491 }
3492 }
3493
3494 fn remove_simple_key(&mut self) -> ScanResult {
3495 let last = self.simple_keys.last_mut().unwrap();
3496 if last.possible && last.required {
3497 return Err(self.simple_key_expected());
3498 }
3499
3500 last.possible = false;
3501 Ok(())
3502 }
3503
3504 fn is_within_block(&self) -> bool {
3506 !self.indents.is_empty()
3507 }
3508
3509 fn end_implicit_mapping(&mut self, mark: Marker, flow_level: u8) {
3515 if let Some(implicit_mapping) = self.implicit_flow_mapping_states.last_mut() {
3516 if *implicit_mapping == ImplicitMappingState::Inside(flow_level) {
3517 self.flow_mapping_started = false;
3518 *implicit_mapping = ImplicitMappingState::Possible;
3519 self.tokens
3520 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
3521 }
3522 }
3523 }
3524}
3525
3526#[derive(PartialEq, Eq)]
3530pub enum Chomping {
3531 Strip,
3533 Clip,
3535 Keep,
3537}
3538
3539#[cfg(test)]
3540mod test {
3541 use alloc::borrow::Cow;
3542
3543 use crate::{
3544 input::str::StrInput,
3545 scanner::{Scanner, TokenType},
3546 };
3547
3548 #[test]
3549 fn test_is_anchor_char() {
3550 use super::is_anchor_char;
3551 assert!(is_anchor_char('x'));
3552 }
3553
3554 #[test]
3556 fn anchor_name_is_borrowed_for_str_input() {
3557 let mut scanner = Scanner::new(StrInput::new("&anch\n"));
3558
3559 loop {
3560 let tok = scanner
3561 .next_token()
3562 .expect("valid YAML must scan without errors")
3563 .expect("scanner must eventually produce a token");
3564 if let TokenType::Anchor(name) = tok.1 {
3565 assert!(matches!(name, Cow::Borrowed("anch")));
3566 break;
3567 }
3568 }
3569 }
3570
3571 #[test]
3573 fn alias_name_is_borrowed_for_str_input() {
3574 let mut scanner = Scanner::new(StrInput::new("*anch\n"));
3575
3576 loop {
3577 let tok = scanner
3578 .next_token()
3579 .expect("valid YAML must scan without errors")
3580 .expect("scanner must eventually produce a token");
3581 if let TokenType::Alias(name) = tok.1 {
3582 assert!(matches!(name, Cow::Borrowed("anch")));
3583 break;
3584 }
3585 }
3586 }
3587
3588 #[test]
3590 fn tag_directive_parts_are_borrowed_for_str_input() {
3591 let mut scanner = Scanner::new(StrInput::new("%TAG !e! tag:example.com,2000:app/\n"));
3592
3593 loop {
3594 let tok = scanner
3595 .next_token()
3596 .expect("valid YAML must scan without errors")
3597 .expect("scanner must eventually produce a token");
3598 if let TokenType::TagDirective(handle, prefix) = tok.1 {
3599 assert!(matches!(handle, Cow::Borrowed("!e!")));
3600 assert!(matches!(prefix, Cow::Borrowed("tag:example.com,2000:app/")));
3601 break;
3602 }
3603 }
3604 }
3605
3606 #[test]
3607 fn plain_scalar_is_borrowed_when_whitespace_free_for_str_input() {
3608 let mut scanner = Scanner::new(StrInput::new("foo\n"));
3609
3610 loop {
3611 let tok = scanner
3612 .next_token()
3613 .expect("valid YAML must scan without errors")
3614 .expect("scanner must eventually produce a token");
3615 if let TokenType::Scalar(_, value) = tok.1 {
3616 assert!(matches!(value, Cow::Borrowed("foo")));
3617 break;
3618 }
3619 }
3620 }
3621
3622 #[test]
3623 fn plain_scalar_is_borrowed_when_whitespace_present_for_str_input() {
3624 let mut scanner = Scanner::new(StrInput::new("foo bar\n"));
3625
3626 loop {
3627 let tok = scanner
3628 .next_token()
3629 .expect("valid YAML must scan without errors")
3630 .expect("scanner must eventually produce a token");
3631 if let TokenType::Scalar(_, value) = tok.1 {
3632 assert!(matches!(value, Cow::Borrowed("foo bar")));
3633 break;
3634 }
3635 }
3636 }
3637
3638 #[test]
3639 fn single_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
3640 let mut scanner = Scanner::new(StrInput::new("'foo bar'\n"));
3641
3642 loop {
3643 let tok = scanner
3644 .next_token()
3645 .expect("valid YAML must scan without errors")
3646 .expect("scanner must eventually produce a token");
3647 if let TokenType::Scalar(_, value) = tok.1 {
3648 assert!(matches!(value, Cow::Borrowed("foo bar")));
3649 break;
3650 }
3651 }
3652 }
3653
3654 #[test]
3655 fn single_quoted_scalar_is_owned_when_quote_is_escaped_for_str_input() {
3656 let mut scanner = Scanner::new(StrInput::new("'foo''bar'\n"));
3657
3658 loop {
3659 let tok = scanner
3660 .next_token()
3661 .expect("valid YAML must scan without errors")
3662 .expect("scanner must eventually produce a token");
3663 if let TokenType::Scalar(_, value) = tok.1 {
3664 assert!(matches!(value, Cow::Owned(_)));
3665 assert_eq!(&*value, "foo'bar");
3666 break;
3667 }
3668 }
3669 }
3670
3671 #[test]
3672 fn double_quoted_scalar_is_borrowed_when_verbatim_for_str_input() {
3673 let mut scanner = Scanner::new(StrInput::new("\"foo bar\"\n"));
3674
3675 loop {
3676 let tok = scanner
3677 .next_token()
3678 .expect("valid YAML must scan without errors")
3679 .expect("scanner must eventually produce a token");
3680 if let TokenType::Scalar(_, value) = tok.1 {
3681 assert!(matches!(value, Cow::Borrowed("foo bar")));
3682 break;
3683 }
3684 }
3685 }
3686
3687 #[test]
3688 fn double_quoted_scalar_is_owned_when_escape_sequence_present_for_str_input() {
3689 let mut scanner = Scanner::new(StrInput::new("\"foo\\nbar\"\n"));
3690
3691 loop {
3692 let tok = scanner
3693 .next_token()
3694 .expect("valid YAML must scan without errors")
3695 .expect("scanner must eventually produce a token");
3696 if let TokenType::Scalar(_, value) = tok.1 {
3697 assert!(matches!(value, Cow::Owned(_)));
3698 assert_eq!(&*value, "foo\nbar");
3699 break;
3700 }
3701 }
3702 }
3703
3704 #[test]
3705 fn plain_key_is_borrowed_for_str_input() {
3706 let mut scanner = Scanner::new(StrInput::new("mykey: value\n"));
3708
3709 let mut found_key = false;
3710 let mut key_value: Option<Cow<'_, str>> = None;
3711
3712 loop {
3713 let tok = scanner
3714 .next_token()
3715 .expect("valid YAML must scan without errors");
3716 let Some(tok) = tok else { break };
3717
3718 if matches!(tok.1, TokenType::Key) {
3719 found_key = true;
3720 } else if found_key {
3721 if let TokenType::Scalar(_, value) = tok.1 {
3722 key_value = Some(value);
3723 break;
3724 }
3725 }
3726 }
3727
3728 assert!(found_key, "expected to find a Key token");
3729 let key_value = key_value.expect("expected to find a scalar after Key token");
3730 assert!(
3731 matches!(key_value, Cow::Borrowed("mykey")),
3732 "key should be borrowed, got: {key_value:?}"
3733 );
3734 }
3735
3736 #[test]
3737 fn quoted_key_is_borrowed_when_verbatim_for_str_input() {
3738 let mut scanner = Scanner::new(StrInput::new("\"mykey\": value\n"));
3739
3740 let mut found_key = false;
3741 let mut key_value: Option<Cow<'_, str>> = None;
3742
3743 loop {
3744 let tok = scanner
3745 .next_token()
3746 .expect("valid YAML must scan without errors");
3747 let Some(tok) = tok else { break };
3748
3749 if matches!(tok.1, TokenType::Key) {
3750 found_key = true;
3751 } else if found_key {
3752 if let TokenType::Scalar(_, value) = tok.1 {
3753 key_value = Some(value);
3754 break;
3755 }
3756 }
3757 }
3758
3759 assert!(found_key, "expected to find a Key token");
3760 let key_value = key_value.expect("expected to find a scalar after Key token");
3761 assert!(
3762 matches!(key_value, Cow::Borrowed("mykey")),
3763 "quoted key should be borrowed when verbatim, got: {key_value:?}"
3764 );
3765 }
3766
3767 #[test]
3768 fn tag_handle_and_suffix_are_borrowed_for_str_input() {
3769 let mut scanner = Scanner::new(StrInput::new("!!str foo\n"));
3771
3772 loop {
3773 let tok = scanner
3774 .next_token()
3775 .expect("valid YAML must scan without errors")
3776 .expect("scanner must eventually produce a token");
3777 if let TokenType::Tag(handle, suffix) = tok.1 {
3778 assert!(
3779 matches!(handle, Cow::Borrowed("!!")),
3780 "tag handle should be borrowed, got: {handle:?}"
3781 );
3782 assert!(
3783 matches!(suffix, Cow::Borrowed("str")),
3784 "tag suffix should be borrowed, got: {suffix:?}"
3785 );
3786 break;
3787 }
3788 }
3789 }
3790
3791 #[test]
3792 fn local_tag_suffix_is_borrowed_for_str_input() {
3793 let mut scanner = Scanner::new(StrInput::new("!mytag foo\n"));
3795
3796 loop {
3797 let tok = scanner
3798 .next_token()
3799 .expect("valid YAML must scan without errors")
3800 .expect("scanner must eventually produce a token");
3801 if let TokenType::Tag(handle, suffix) = tok.1 {
3802 assert!(
3803 matches!(handle, Cow::Borrowed("!")),
3804 "local tag handle should be '!', got: {handle:?}"
3805 );
3806 assert!(
3807 matches!(suffix, Cow::Borrowed("mytag")),
3808 "local tag suffix should be borrowed, got: {suffix:?}"
3809 );
3810 break;
3811 }
3812 }
3813 }
3814
3815 #[test]
3816 fn tag_with_uri_escape_is_owned_for_str_input() {
3817 let mut scanner = Scanner::new(StrInput::new("!!my%20tag foo\n"));
3819
3820 loop {
3821 let tok = scanner
3822 .next_token()
3823 .expect("valid YAML must scan without errors")
3824 .expect("scanner must eventually produce a token");
3825 if let TokenType::Tag(handle, suffix) = tok.1 {
3826 assert!(
3827 matches!(handle, Cow::Borrowed("!!")),
3828 "tag handle should still be borrowed, got: {handle:?}"
3829 );
3830 assert!(
3831 matches!(suffix, Cow::Owned(_)),
3832 "tag suffix with URI escape should be owned, got: {suffix:?}"
3833 );
3834 assert_eq!(&*suffix, "my tag");
3835 break;
3836 }
3837 }
3838 }
3839}