1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use std::{borrow::Cow, char, collections::VecDeque, error::Error, fmt};
13
14use crate::{
15 char_traits::{
16 as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
17 is_tag_char, is_uri_char,
18 },
19 input::{Input, SkipTabs},
20};
21
22#[derive(Clone, Copy, PartialEq, Debug, Eq)]
24pub enum TEncoding {
25 Utf8,
27}
28
29#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
31pub enum ScalarStyle {
32 Plain,
34 SingleQuoted,
36 DoubleQuoted,
38
39 Literal,
45 Folded,
52}
53
54#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
56pub struct Marker {
57 index: usize,
59 line: usize,
61 col: usize,
63}
64
65impl Marker {
66 #[must_use]
68 pub fn new(index: usize, line: usize, col: usize) -> Marker {
69 Marker { index, line, col }
70 }
71
72 #[must_use]
74 pub fn index(&self) -> usize {
75 self.index
76 }
77
78 #[must_use]
80 pub fn line(&self) -> usize {
81 self.line
82 }
83
84 #[must_use]
86 pub fn col(&self) -> usize {
87 self.col
88 }
89}
90
91#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
93pub struct Span {
94 pub start: Marker,
96 pub end: Marker,
98}
99
100impl Span {
101 #[must_use]
103 pub fn new(start: Marker, end: Marker) -> Span {
104 Span { start, end }
105 }
106
107 #[must_use]
114 pub fn empty(mark: Marker) -> Span {
115 Span {
116 start: mark,
117 end: mark,
118 }
119 }
120}
121
122#[derive(Clone, PartialEq, Debug, Eq)]
124pub struct ScanError {
125 mark: Marker,
127 info: String,
129}
130
131impl ScanError {
132 #[must_use]
134 pub fn new(loc: Marker, info: String) -> ScanError {
135 ScanError { mark: loc, info }
136 }
137
138 #[must_use]
140 pub fn new_str(loc: Marker, info: &str) -> ScanError {
141 ScanError {
142 mark: loc,
143 info: info.to_owned(),
144 }
145 }
146
147 #[must_use]
149 pub fn marker(&self) -> &Marker {
150 &self.mark
151 }
152
153 #[must_use]
155 pub fn info(&self) -> &str {
156 self.info.as_ref()
157 }
158}
159
160impl Error for ScanError {
161 fn source(&self) -> Option<&(dyn Error + 'static)> {
162 None
163 }
164}
165
166impl fmt::Display for ScanError {
167 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
168 write!(
169 formatter,
170 "{} at byte {} line {} column {}",
171 self.info,
172 self.mark.index,
173 self.mark.line,
174 self.mark.col + 1,
175 )
176 }
177}
178
179#[derive(Clone, PartialEq, Debug, Eq)]
181pub enum TokenType<'input> {
182 StreamStart(TEncoding),
184 StreamEnd,
186 VersionDirective(
188 u32,
190 u32,
192 ),
193 TagDirective(
195 Cow<'input, str>,
197 Cow<'input, str>,
199 ),
200 DocumentStart,
202 DocumentEnd,
204 BlockSequenceStart,
208 BlockMappingStart,
212 BlockEnd,
214 FlowSequenceStart,
216 FlowSequenceEnd,
218 FlowMappingStart,
220 FlowMappingEnd,
222 BlockEntry,
224 FlowEntry,
226 Key,
228 Value,
230 Alias(Cow<'input, str>),
232 Anchor(Cow<'input, str>),
234 Tag(
236 String,
238 String,
240 ),
241 Scalar(ScalarStyle, Cow<'input, str>),
243}
244
245#[derive(Clone, PartialEq, Debug, Eq)]
247pub struct Token<'input>(pub Span, pub TokenType<'input>);
248
249#[derive(Clone, PartialEq, Debug, Eq)]
284struct SimpleKey {
285 possible: bool,
298 required: bool,
306 token_number: usize,
312 mark: Marker,
314}
315
316impl SimpleKey {
317 fn new(mark: Marker) -> SimpleKey {
319 SimpleKey {
320 possible: false,
321 required: false,
322 token_number: 0,
323 mark,
324 }
325 }
326}
327
328#[derive(Clone, Debug, Default)]
330struct Indent {
331 indent: isize,
333 needs_block_end: bool,
351}
352
353#[derive(Debug, PartialEq)]
375enum ImplicitMappingState {
376 Possible,
381 Inside,
385}
386
387#[derive(Debug)]
397#[allow(clippy::struct_excessive_bools)]
398pub struct Scanner<'input, T> {
399 input: T,
403 mark: Marker,
405 tokens: VecDeque<Token<'input>>,
412 error: Option<ScanError>,
414
415 stream_start_produced: bool,
417 stream_end_produced: bool,
419 adjacent_value_allowed_at: usize,
422 simple_key_allowed: bool,
426 simple_keys: Vec<SimpleKey>,
431 indent: isize,
433 indents: Vec<Indent>,
435 flow_level: u8,
437 tokens_parsed: usize,
441 token_available: bool,
443 leading_whitespace: bool,
445 flow_mapping_started: bool,
452 implicit_flow_mapping_states: Vec<ImplicitMappingState>,
465 buf_leading_break: String,
466 buf_trailing_breaks: String,
467 buf_whitespaces: String,
468}
469
470impl<'input, T: Input> Iterator for Scanner<'input, T> {
471 type Item = Token<'input>;
472
473 fn next(&mut self) -> Option<Self::Item> {
474 if self.error.is_some() {
475 return None;
476 }
477 match self.next_token() {
478 Ok(Some(tok)) => {
479 debug_print!(
480 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
481 tok.1,
482 tok.0
483 );
484 Some(tok)
485 }
486 Ok(tok) => tok,
487 Err(e) => {
488 self.error = Some(e);
489 None
490 }
491 }
492 }
493}
494
495pub type ScanResult = Result<(), ScanError>;
497
498impl<'input, T: Input> Scanner<'input, T> {
499 pub fn new(input: T) -> Self {
501 Scanner {
502 input,
503 mark: Marker::new(0, 1, 0),
504 tokens: VecDeque::new(),
505 error: None,
506
507 stream_start_produced: false,
508 stream_end_produced: false,
509 adjacent_value_allowed_at: 0,
510 simple_key_allowed: true,
511 simple_keys: Vec::new(),
512 indent: -1,
513 indents: Vec::new(),
514 flow_level: 0,
515 tokens_parsed: 0,
516 token_available: false,
517 leading_whitespace: true,
518 flow_mapping_started: false,
519 implicit_flow_mapping_states: vec![],
520
521 buf_leading_break: String::new(),
522 buf_trailing_breaks: String::new(),
523 buf_whitespaces: String::new(),
524 }
525 }
526
527 #[inline]
532 pub fn get_error(&self) -> Option<ScanError> {
533 self.error.clone()
534 }
535
536 #[inline]
538 fn skip_blank(&mut self) {
539 self.input.skip();
540
541 self.mark.index += 1;
542 self.mark.col += 1;
543 }
544
545 #[inline]
547 fn skip_non_blank(&mut self) {
548 self.input.skip();
549
550 self.mark.index += 1;
551 self.mark.col += 1;
552 self.leading_whitespace = false;
553 }
554
555 #[inline]
557 fn skip_n_non_blank(&mut self, count: usize) {
558 self.input.skip_n(count);
559
560 self.mark.index += count;
561 self.mark.col += count;
562 self.leading_whitespace = false;
563 }
564
565 #[inline]
567 fn skip_nl(&mut self) {
568 self.input.skip();
569
570 self.mark.index += 1;
571 self.mark.col = 0;
572 self.mark.line += 1;
573 self.leading_whitespace = true;
574 }
575
576 #[inline]
578 fn skip_linebreak(&mut self) {
579 if self.input.next_2_are('\r', '\n') {
580 self.skip_blank();
583 self.skip_nl();
584 } else if self.input.next_is_break() {
585 self.skip_nl();
586 }
587 }
588
589 #[inline]
591 pub fn stream_started(&self) -> bool {
592 self.stream_start_produced
593 }
594
595 #[inline]
597 pub fn stream_ended(&self) -> bool {
598 self.stream_end_produced
599 }
600
601 #[inline]
603 pub fn mark(&self) -> Marker {
604 self.mark
605 }
606
607 #[inline]
614 fn read_break(&mut self, s: &mut String) {
615 self.skip_break();
616 s.push('\n');
617 }
618
619 #[inline]
624 fn skip_break(&mut self) {
625 let c = self.input.peek();
626 let nc = self.input.peek_nth(1);
627 debug_assert!(is_break(c));
628 if c == '\r' && nc == '\n' {
629 self.skip_blank();
630 }
631 self.skip_nl();
632 }
633
634 fn insert_token(&mut self, pos: usize, tok: Token<'input>) {
636 let old_len = self.tokens.len();
637 assert!(pos <= old_len);
638 self.tokens.insert(pos, tok);
639 }
640
641 fn allow_simple_key(&mut self) {
642 self.simple_key_allowed = true;
643 }
644
645 fn disallow_simple_key(&mut self) {
646 self.simple_key_allowed = false;
647 }
648
649 pub fn fetch_next_token(&mut self) -> ScanResult {
654 self.input.lookahead(1);
655
656 if !self.stream_start_produced {
657 self.fetch_stream_start();
658 return Ok(());
659 }
660 self.skip_to_next_token()?;
661
662 debug_print!(
663 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
664 self.mark,
665 self.input.peek()
666 );
667
668 self.stale_simple_keys()?;
669
670 let mark = self.mark;
671 self.unroll_indent(mark.col as isize);
672
673 self.input.lookahead(4);
674
675 if self.input.next_is_z() {
676 self.fetch_stream_end()?;
677 return Ok(());
678 }
679
680 if self.mark.col == 0 {
681 if self.input.next_char_is('%') {
682 return self.fetch_directive();
683 } else if self.input.next_is_document_start() {
684 return self.fetch_document_indicator(TokenType::DocumentStart);
685 } else if self.input.next_is_document_end() {
686 self.fetch_document_indicator(TokenType::DocumentEnd)?;
687 self.skip_ws_to_eol(SkipTabs::Yes)?;
688 if !self.input.next_is_breakz() {
689 return Err(ScanError::new_str(
690 self.mark,
691 "invalid content after document end marker",
692 ));
693 }
694 return Ok(());
695 }
696 }
697
698 if (self.mark.col as isize) < self.indent {
699 return Err(ScanError::new_str(self.mark, "invalid indentation"));
700 }
701
702 let c = self.input.peek();
703 let nc = self.input.peek_nth(1);
704 match c {
705 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
706 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
707 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
708 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
709 ',' => self.fetch_flow_entry(),
710 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
711 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
712 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
713 ':' if self.flow_level > 0
714 && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at) =>
715 {
716 self.fetch_flow_value()
717 }
718 '*' => self.fetch_anchor(true),
720 '&' => self.fetch_anchor(false),
722 '!' => self.fetch_tag(),
723 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
725 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
727 '\'' => self.fetch_flow_scalar(true),
728 '"' => self.fetch_flow_scalar(false),
729 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
731 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
732 self.fetch_plain_scalar()
733 }
734 '%' | '@' | '`' => Err(ScanError::new(
735 self.mark,
736 format!("unexpected character: `{c}'"),
737 )),
738 _ => self.fetch_plain_scalar(),
739 }
740 }
741
742 pub fn next_token(&mut self) -> Result<Option<Token<'input>>, ScanError> {
746 if self.stream_end_produced {
747 return Ok(None);
748 }
749
750 if !self.token_available {
751 self.fetch_more_tokens()?;
752 }
753 let Some(t) = self.tokens.pop_front() else {
754 return Err(ScanError::new_str(
755 self.mark,
756 "did not find expected next token",
757 ));
758 };
759 self.token_available = false;
760 self.tokens_parsed += 1;
761
762 if let TokenType::StreamEnd = t.1 {
763 self.stream_end_produced = true;
764 }
765 Ok(Some(t))
766 }
767
768 pub fn fetch_more_tokens(&mut self) -> ScanResult {
772 let mut need_more;
773 loop {
774 if self.tokens.is_empty() {
775 need_more = true;
776 } else {
777 need_more = false;
778 self.stale_simple_keys()?;
780 for sk in &self.simple_keys {
782 if sk.possible && sk.token_number == self.tokens_parsed {
783 need_more = true;
784 break;
785 }
786 }
787 }
788
789 if !need_more {
790 break;
791 }
792 self.fetch_next_token()?;
793 }
794 self.token_available = true;
795
796 Ok(())
797 }
798
799 fn stale_simple_keys(&mut self) -> ScanResult {
807 for sk in &mut self.simple_keys {
808 if sk.possible
809 && self.flow_level == 0
811 && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
812 {
813 if sk.required {
814 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
815 }
816 sk.possible = false;
817 }
818 }
819 Ok(())
820 }
821
822 fn skip_to_next_token(&mut self) -> ScanResult {
828 loop {
829 match self.input.look_ch() {
831 '\t' if self.is_within_block()
838 && self.leading_whitespace
839 && (self.mark.col as isize) < self.indent =>
840 {
841 self.skip_ws_to_eol(SkipTabs::Yes)?;
842 if !self.input.next_is_breakz() {
844 return Err(ScanError::new_str(
845 self.mark,
846 "tabs disallowed within this context (block indentation)",
847 ));
848 }
849 }
850 '\t' | ' ' => self.skip_blank(),
851 '\n' | '\r' => {
852 self.input.lookahead(2);
853 self.skip_linebreak();
854 if self.flow_level == 0 {
855 self.allow_simple_key();
856 }
857 }
858 '#' => {
859 let comment_length = self.input.skip_while_non_breakz();
860 self.mark.index += comment_length;
861 self.mark.col += comment_length;
862 }
863 _ => break,
864 }
865 }
866 Ok(())
867 }
868
869 fn skip_yaml_whitespace(&mut self) -> ScanResult {
874 let mut need_whitespace = true;
875 loop {
876 match self.input.look_ch() {
877 ' ' => {
878 self.skip_blank();
879
880 need_whitespace = false;
881 }
882 '\n' | '\r' => {
883 self.input.lookahead(2);
884 self.skip_linebreak();
885 if self.flow_level == 0 {
886 self.allow_simple_key();
887 }
888 need_whitespace = false;
889 }
890 '#' => {
891 let comment_length = self.input.skip_while_non_breakz();
892 self.mark.index += comment_length;
893 self.mark.col += comment_length;
894 }
895 _ => break,
896 }
897 }
898
899 if need_whitespace {
900 Err(ScanError::new_str(self.mark(), "expected whitespace"))
901 } else {
902 Ok(())
903 }
904 }
905
906 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
907 let (n_bytes, result) = self.input.skip_ws_to_eol(skip_tabs);
908 self.mark.col += n_bytes;
909 self.mark.index += n_bytes;
910 result.map_err(|msg| ScanError::new_str(self.mark, msg))
911 }
912
913 fn fetch_stream_start(&mut self) {
914 let mark = self.mark;
915 self.indent = -1;
916 self.stream_start_produced = true;
917 self.allow_simple_key();
918 self.tokens.push_back(Token(
919 Span::empty(mark),
920 TokenType::StreamStart(TEncoding::Utf8),
921 ));
922 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
923 }
924
925 fn fetch_stream_end(&mut self) -> ScanResult {
926 if self.mark.col != 0 {
928 self.mark.col = 0;
929 self.mark.line += 1;
930 }
931
932 for sk in &mut self.simple_keys {
935 if sk.required && sk.possible {
936 return Err(ScanError::new_str(self.mark, "simple key expected"));
937 }
938 sk.possible = false;
939 }
940
941 self.unroll_indent(-1);
942 self.remove_simple_key()?;
943 self.disallow_simple_key();
944
945 self.tokens
946 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
947 Ok(())
948 }
949
950 fn fetch_directive(&mut self) -> ScanResult {
951 self.unroll_indent(-1);
952 self.remove_simple_key()?;
953
954 self.disallow_simple_key();
955
956 let tok = self.scan_directive()?;
957 self.tokens.push_back(tok);
958
959 Ok(())
960 }
961
962 fn scan_directive(&mut self) -> Result<Token<'input>, ScanError> {
963 let start_mark = self.mark;
964 self.skip_non_blank();
965
966 let name = self.scan_directive_name()?;
967 let tok = match name.as_ref() {
968 "YAML" => self.scan_version_directive_value(&start_mark)?,
969 "TAG" => self.scan_tag_directive_value(&start_mark)?,
970 _ => {
972 let line_len = self.input.skip_while_non_breakz();
974 self.mark.index += line_len;
975 self.mark.col += line_len;
976 Token(
978 Span::new(start_mark, self.mark),
979 TokenType::TagDirective(Cow::default(), Cow::default()),
980 )
981 }
984 };
985
986 self.skip_ws_to_eol(SkipTabs::Yes)?;
987
988 if self.input.next_is_breakz() {
989 self.input.lookahead(2);
990 self.skip_linebreak();
991 Ok(tok)
992 } else {
993 Err(ScanError::new_str(
994 start_mark,
995 "while scanning a directive, did not find expected comment or line break",
996 ))
997 }
998 }
999
1000 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1001 let n_blanks = self.input.skip_while_blank();
1002 self.mark.index += n_blanks;
1003 self.mark.col += n_blanks;
1004
1005 let major = self.scan_version_directive_number(mark)?;
1006
1007 if self.input.peek() != '.' {
1008 return Err(ScanError::new_str(
1009 *mark,
1010 "while scanning a YAML directive, did not find expected digit or '.' character",
1011 ));
1012 }
1013 self.skip_non_blank();
1014
1015 let minor = self.scan_version_directive_number(mark)?;
1016
1017 Ok(Token(
1018 Span::new(*mark, self.mark),
1019 TokenType::VersionDirective(major, minor),
1020 ))
1021 }
1022
1023 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1024 let start_mark = self.mark;
1025 let mut string = String::new();
1026
1027 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1028 self.mark.index += n_chars;
1029 self.mark.col += n_chars;
1030
1031 if string.is_empty() {
1032 return Err(ScanError::new_str(
1033 start_mark,
1034 "while scanning a directive, could not find expected directive name",
1035 ));
1036 }
1037
1038 if !is_blank_or_breakz(self.input.peek()) {
1039 return Err(ScanError::new_str(
1040 start_mark,
1041 "while scanning a directive, found unexpected non-alphabetical character",
1042 ));
1043 }
1044
1045 Ok(string)
1046 }
1047
1048 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1049 let mut val = 0u32;
1050 let mut length = 0usize;
1051 while let Some(digit) = self.input.look_ch().to_digit(10) {
1052 if length + 1 > 9 {
1053 return Err(ScanError::new_str(
1054 *mark,
1055 "while scanning a YAML directive, found extremely long version number",
1056 ));
1057 }
1058 length += 1;
1059 val = val * 10 + digit;
1060 self.skip_non_blank();
1061 }
1062
1063 if length == 0 {
1064 return Err(ScanError::new_str(
1065 *mark,
1066 "while scanning a YAML directive, did not find expected version number",
1067 ));
1068 }
1069
1070 Ok(val)
1071 }
1072
1073 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1074 let n_blanks = self.input.skip_while_blank();
1075 self.mark.index += n_blanks;
1076 self.mark.col += n_blanks;
1077
1078 let handle = self.scan_tag_handle(true, mark)?;
1079
1080 let n_blanks = self.input.skip_while_blank();
1081 self.mark.index += n_blanks;
1082 self.mark.col += n_blanks;
1083
1084 let prefix = self.scan_tag_prefix(mark)?;
1085
1086 self.input.lookahead(1);
1087
1088 if self.input.next_is_blank_or_breakz() {
1089 Ok(Token(
1090 Span::new(*mark, self.mark),
1091 TokenType::TagDirective(handle.into(), prefix.into()),
1092 ))
1093 } else {
1094 Err(ScanError::new_str(
1095 *mark,
1096 "while scanning TAG, did not find expected whitespace or line break",
1097 ))
1098 }
1099 }
1100
1101 fn fetch_tag(&mut self) -> ScanResult {
1102 self.save_simple_key();
1103 self.disallow_simple_key();
1104
1105 let tok = self.scan_tag()?;
1106 self.tokens.push_back(tok);
1107 Ok(())
1108 }
1109
1110 fn scan_tag(&mut self) -> Result<Token<'input>, ScanError> {
1111 let start_mark = self.mark;
1112 let mut handle = String::new();
1113 let mut suffix;
1114
1115 self.input.lookahead(2);
1117
1118 if self.input.nth_char_is(1, '<') {
1119 suffix = self.scan_verbatim_tag(&start_mark)?;
1120 } else {
1121 handle = self.scan_tag_handle(false, &start_mark)?;
1123 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1125 let is_secondary_handle = handle == "!!";
1127 suffix =
1128 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", &start_mark)?;
1129 } else {
1130 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, &start_mark)?;
1131 "!".clone_into(&mut handle);
1132 if suffix.is_empty() {
1135 handle.clear();
1136 "!".clone_into(&mut suffix);
1137 }
1138 }
1139 }
1140
1141 if is_blank_or_breakz(self.input.look_ch())
1142 || (self.flow_level > 0 && self.input.next_is_flow())
1143 {
1144 Ok(Token(
1146 Span::new(start_mark, self.mark),
1147 TokenType::Tag(handle, suffix),
1148 ))
1149 } else {
1150 Err(ScanError::new_str(
1151 start_mark,
1152 "while scanning a tag, did not find expected whitespace or line break",
1153 ))
1154 }
1155 }
1156
1157 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
1158 let mut string = String::new();
1159 if self.input.look_ch() != '!' {
1160 return Err(ScanError::new_str(
1161 *mark,
1162 "while scanning a tag, did not find expected '!'",
1163 ));
1164 }
1165
1166 string.push(self.input.peek());
1167 self.skip_non_blank();
1168
1169 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1170 self.mark.index += n_chars;
1171 self.mark.col += n_chars;
1172
1173 if self.input.peek() == '!' {
1175 string.push(self.input.peek());
1176 self.skip_non_blank();
1177 } else if directive && string != "!" {
1178 return Err(ScanError::new_str(
1182 *mark,
1183 "while parsing a tag directive, did not find expected '!'",
1184 ));
1185 }
1186 Ok(string)
1187 }
1188
1189 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1195 let mut string = String::new();
1196
1197 if self.input.look_ch() == '!' {
1198 string.push(self.input.peek());
1200 self.skip_non_blank();
1201 } else if !is_tag_char(self.input.peek()) {
1202 return Err(ScanError::new_str(
1204 *start_mark,
1205 "invalid global tag character",
1206 ));
1207 } else if self.input.peek() == '%' {
1208 string.push(self.scan_uri_escapes(start_mark)?);
1210 } else {
1211 string.push(self.input.peek());
1213 self.skip_non_blank();
1214 }
1215
1216 while is_uri_char(self.input.look_ch()) {
1217 if self.input.peek() == '%' {
1218 string.push(self.scan_uri_escapes(start_mark)?);
1219 } else {
1220 string.push(self.input.peek());
1221 self.skip_non_blank();
1222 }
1223 }
1224
1225 Ok(string)
1226 }
1227
1228 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1232 self.skip_non_blank();
1234 self.skip_non_blank();
1235
1236 let mut string = String::new();
1237 while is_uri_char(self.input.look_ch()) {
1238 if self.input.peek() == '%' {
1239 string.push(self.scan_uri_escapes(start_mark)?);
1240 } else {
1241 string.push(self.input.peek());
1242 self.skip_non_blank();
1243 }
1244 }
1245
1246 if self.input.peek() != '>' {
1247 return Err(ScanError::new_str(
1248 *start_mark,
1249 "while scanning a verbatim tag, did not find the expected '>'",
1250 ));
1251 }
1252 self.skip_non_blank();
1253
1254 Ok(string)
1255 }
1256
1257 fn scan_tag_shorthand_suffix(
1258 &mut self,
1259 _directive: bool,
1260 _is_secondary: bool,
1261 head: &str,
1262 mark: &Marker,
1263 ) -> Result<String, ScanError> {
1264 let mut length = head.len();
1265 let mut string = String::new();
1266
1267 if length > 1 {
1270 string.extend(head.chars().skip(1));
1271 }
1272
1273 while is_tag_char(self.input.look_ch()) {
1274 if self.input.peek() == '%' {
1276 string.push(self.scan_uri_escapes(mark)?);
1277 } else {
1278 string.push(self.input.peek());
1279 self.skip_non_blank();
1280 }
1281
1282 length += 1;
1283 }
1284
1285 if length == 0 {
1286 return Err(ScanError::new_str(
1287 *mark,
1288 "while parsing a tag, did not find expected tag URI",
1289 ));
1290 }
1291
1292 Ok(string)
1293 }
1294
1295 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
1296 let mut width = 0usize;
1297 let mut code = 0u32;
1298 loop {
1299 self.input.lookahead(3);
1300
1301 let c = self.input.peek_nth(1);
1302 let nc = self.input.peek_nth(2);
1303
1304 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
1305 return Err(ScanError::new_str(
1306 *mark,
1307 "while parsing a tag, found an invalid escape sequence",
1308 ));
1309 }
1310
1311 let byte = (as_hex(c) << 4) + as_hex(nc);
1312 if width == 0 {
1313 width = match byte {
1314 _ if byte & 0x80 == 0x00 => 1,
1315 _ if byte & 0xE0 == 0xC0 => 2,
1316 _ if byte & 0xF0 == 0xE0 => 3,
1317 _ if byte & 0xF8 == 0xF0 => 4,
1318 _ => {
1319 return Err(ScanError::new_str(
1320 *mark,
1321 "while parsing a tag, found an incorrect leading UTF-8 byte",
1322 ));
1323 }
1324 };
1325 code = byte;
1326 } else {
1327 if byte & 0xc0 != 0x80 {
1328 return Err(ScanError::new_str(
1329 *mark,
1330 "while parsing a tag, found an incorrect trailing UTF-8 byte",
1331 ));
1332 }
1333 code = (code << 8) + byte;
1334 }
1335
1336 self.skip_n_non_blank(3);
1337
1338 width -= 1;
1339 if width == 0 {
1340 break;
1341 }
1342 }
1343
1344 match char::from_u32(code) {
1345 Some(ch) => Ok(ch),
1346 None => Err(ScanError::new_str(
1347 *mark,
1348 "while parsing a tag, found an invalid UTF-8 codepoint",
1349 )),
1350 }
1351 }
1352
1353 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
1354 self.save_simple_key();
1355 self.disallow_simple_key();
1356
1357 let tok = self.scan_anchor(alias)?;
1358
1359 self.tokens.push_back(tok);
1360
1361 Ok(())
1362 }
1363
1364 fn scan_anchor(&mut self, alias: bool) -> Result<Token<'input>, ScanError> {
1365 let mut string = String::new();
1366 let start_mark = self.mark;
1367
1368 self.skip_non_blank();
1369 while is_anchor_char(self.input.look_ch()) {
1370 string.push(self.input.peek());
1371 self.skip_non_blank();
1372 }
1373
1374 if string.is_empty() {
1375 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
1376 }
1377
1378 let tok = if alias {
1379 TokenType::Alias(string.into())
1380 } else {
1381 TokenType::Anchor(string.into())
1382 };
1383 Ok(Token(Span::new(start_mark, self.mark), tok))
1384 }
1385
1386 fn fetch_flow_collection_start(&mut self, tok: TokenType<'input>) -> ScanResult {
1387 self.save_simple_key();
1389
1390 self.roll_one_col_indent();
1391 self.increase_flow_level()?;
1392
1393 self.allow_simple_key();
1394
1395 let start_mark = self.mark;
1396 self.skip_non_blank();
1397
1398 if tok == TokenType::FlowMappingStart {
1399 self.flow_mapping_started = true;
1400 } else {
1401 self.implicit_flow_mapping_states
1402 .push(ImplicitMappingState::Possible);
1403 }
1404
1405 self.skip_ws_to_eol(SkipTabs::Yes)?;
1406
1407 self.tokens
1408 .push_back(Token(Span::new(start_mark, self.mark), tok));
1409 Ok(())
1410 }
1411
1412 fn fetch_flow_collection_end(&mut self, tok: TokenType<'input>) -> ScanResult {
1413 self.remove_simple_key()?;
1414 self.decrease_flow_level();
1415
1416 self.disallow_simple_key();
1417
1418 if matches!(tok, TokenType::FlowSequenceEnd) {
1419 self.end_implicit_mapping(self.mark);
1420 self.implicit_flow_mapping_states.pop();
1422 }
1423
1424 let start_mark = self.mark;
1425 self.skip_non_blank();
1426 self.skip_ws_to_eol(SkipTabs::Yes)?;
1427
1428 if self.flow_level > 0 {
1434 self.adjacent_value_allowed_at = self.mark.index;
1435 }
1436
1437 self.tokens
1438 .push_back(Token(Span::new(start_mark, self.mark), tok));
1439 Ok(())
1440 }
1441
1442 fn fetch_flow_entry(&mut self) -> ScanResult {
1444 self.remove_simple_key()?;
1445 self.allow_simple_key();
1446
1447 self.end_implicit_mapping(self.mark);
1448
1449 let start_mark = self.mark;
1450 self.skip_non_blank();
1451 self.skip_ws_to_eol(SkipTabs::Yes)?;
1452
1453 self.tokens.push_back(Token(
1454 Span::new(start_mark, self.mark),
1455 TokenType::FlowEntry,
1456 ));
1457 Ok(())
1458 }
1459
1460 fn increase_flow_level(&mut self) -> ScanResult {
1461 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1462 self.flow_level = self
1463 .flow_level
1464 .checked_add(1)
1465 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
1466 Ok(())
1467 }
1468
1469 fn decrease_flow_level(&mut self) {
1470 if self.flow_level > 0 {
1471 self.flow_level -= 1;
1472 self.simple_keys.pop().unwrap();
1473 }
1474 }
1475
1476 fn fetch_block_entry(&mut self) -> ScanResult {
1482 if self.flow_level > 0 {
1483 return Err(ScanError::new_str(
1485 self.mark,
1486 r#""-" is only valid inside a block"#,
1487 ));
1488 }
1489 if !self.simple_key_allowed {
1491 return Err(ScanError::new_str(
1492 self.mark,
1493 "block sequence entries are not allowed in this context",
1494 ));
1495 }
1496
1497 if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
1499 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
1500 return Err(ScanError::new_str(
1501 span.start,
1502 "invalid indentation for anchor",
1503 ));
1504 }
1505 }
1506
1507 let mark = self.mark;
1509 self.skip_non_blank();
1510
1511 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1513 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
1514 self.input.lookahead(2);
1515 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
1516 {
1517 return Err(ScanError::new_str(
1518 self.mark,
1519 "'-' must be followed by a valid YAML whitespace",
1520 ));
1521 }
1522
1523 self.skip_ws_to_eol(SkipTabs::No)?;
1524 self.input.lookahead(1);
1525 if self.input.next_is_break() || self.input.next_is_flow() {
1526 self.roll_one_col_indent();
1527 }
1528
1529 self.remove_simple_key()?;
1530 self.allow_simple_key();
1531
1532 self.tokens
1533 .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
1534
1535 Ok(())
1536 }
1537
1538 fn fetch_document_indicator(&mut self, t: TokenType<'input>) -> ScanResult {
1539 self.unroll_indent(-1);
1540 self.remove_simple_key()?;
1541 self.disallow_simple_key();
1542
1543 let mark = self.mark;
1544
1545 self.skip_n_non_blank(3);
1546
1547 self.tokens.push_back(Token(Span::new(mark, self.mark), t));
1548 Ok(())
1549 }
1550
1551 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1552 self.save_simple_key();
1553 self.allow_simple_key();
1554 let tok = self.scan_block_scalar(literal)?;
1555
1556 self.tokens.push_back(tok);
1557 Ok(())
1558 }
1559
1560 #[allow(clippy::too_many_lines)]
1561 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token<'input>, ScanError> {
1562 let start_mark = self.mark;
1563 let mut chomping = Chomping::Clip;
1564 let mut increment: usize = 0;
1565 let mut indent: usize = 0;
1566 let mut trailing_blank: bool;
1567 let mut leading_blank: bool = false;
1568 let style = if literal {
1569 ScalarStyle::Literal
1570 } else {
1571 ScalarStyle::Folded
1572 };
1573
1574 let mut string = String::new();
1575 let mut leading_break = String::new();
1576 let mut trailing_breaks = String::new();
1577 let mut chomping_break = String::new();
1578
1579 self.skip_non_blank();
1581 self.unroll_non_block_indents();
1582
1583 if self.input.look_ch() == '+' || self.input.peek() == '-' {
1584 if self.input.peek() == '+' {
1585 chomping = Chomping::Keep;
1586 } else {
1587 chomping = Chomping::Strip;
1588 }
1589 self.skip_non_blank();
1590 self.input.lookahead(1);
1591 if self.input.next_is_digit() {
1592 if self.input.peek() == '0' {
1593 return Err(ScanError::new_str(
1594 start_mark,
1595 "while scanning a block scalar, found an indentation indicator equal to 0",
1596 ));
1597 }
1598 increment = (self.input.peek() as usize) - ('0' as usize);
1599 self.skip_non_blank();
1600 }
1601 } else if self.input.next_is_digit() {
1602 if self.input.peek() == '0' {
1603 return Err(ScanError::new_str(
1604 start_mark,
1605 "while scanning a block scalar, found an indentation indicator equal to 0",
1606 ));
1607 }
1608
1609 increment = (self.input.peek() as usize) - ('0' as usize);
1610 self.skip_non_blank();
1611 self.input.lookahead(1);
1612 if self.input.peek() == '+' || self.input.peek() == '-' {
1613 if self.input.peek() == '+' {
1614 chomping = Chomping::Keep;
1615 } else {
1616 chomping = Chomping::Strip;
1617 }
1618 self.skip_non_blank();
1619 }
1620 }
1621
1622 self.skip_ws_to_eol(SkipTabs::Yes)?;
1623
1624 self.input.lookahead(1);
1626 if !self.input.next_is_breakz() {
1627 return Err(ScanError::new_str(
1628 start_mark,
1629 "while scanning a block scalar, did not find expected comment or line break",
1630 ));
1631 }
1632
1633 if self.input.next_is_break() {
1634 self.input.lookahead(2);
1635 self.read_break(&mut chomping_break);
1636 }
1637
1638 if self.input.look_ch() == '\t' {
1639 return Err(ScanError::new_str(
1640 start_mark,
1641 "a block scalar content cannot start with a tab",
1642 ));
1643 }
1644
1645 if increment > 0 {
1646 indent = if self.indent >= 0 {
1647 (self.indent + increment as isize) as usize
1648 } else {
1649 increment
1650 }
1651 }
1652
1653 if indent == 0 {
1655 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
1656 } else {
1657 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1658 }
1659
1660 if self.input.next_is_z() {
1665 let contents = match chomping {
1666 Chomping::Strip => String::new(),
1668 _ if self.mark.line == start_mark.line() => String::new(),
1670 Chomping::Clip => chomping_break,
1673 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
1676 Chomping::Keep => trailing_breaks,
1678 };
1679 return Ok(Token(
1680 Span::new(start_mark, self.mark),
1681 TokenType::Scalar(style, contents.into()),
1682 ));
1683 }
1684
1685 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
1686 return Err(ScanError::new_str(
1687 self.mark,
1688 "wrongly indented line in block scalar",
1689 ));
1690 }
1691
1692 let mut line_buffer = String::with_capacity(100);
1693 let start_mark = self.mark;
1694 while self.mark.col == indent && !self.input.next_is_z() {
1695 if indent == 0 {
1696 self.input.lookahead(4);
1697 if self.input.next_is_document_end() {
1698 break;
1699 }
1700 }
1701
1702 trailing_blank = self.input.next_is_blank();
1704 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1705 string.push_str(&trailing_breaks);
1706 if trailing_breaks.is_empty() {
1707 string.push(' ');
1708 }
1709 } else {
1710 string.push_str(&leading_break);
1711 string.push_str(&trailing_breaks);
1712 }
1713
1714 leading_break.clear();
1715 trailing_breaks.clear();
1716
1717 leading_blank = self.input.next_is_blank();
1718
1719 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
1720
1721 self.input.lookahead(2);
1723 if self.input.next_is_z() {
1724 break;
1725 }
1726
1727 self.read_break(&mut leading_break);
1728
1729 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1731 }
1732
1733 if chomping != Chomping::Strip {
1735 string.push_str(&leading_break);
1736 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
1740 string.push('\n');
1741 }
1742 }
1743
1744 if chomping == Chomping::Keep {
1745 string.push_str(&trailing_breaks);
1746 }
1747
1748 Ok(Token(
1749 Span::new(start_mark, self.mark),
1750 TokenType::Scalar(style, string.into()),
1751 ))
1752 }
1753
1754 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
1764 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
1766 string.push(self.input.peek());
1767 self.skip_blank();
1773 }
1774
1775 if self.input.buf_is_empty() {
1778 while let Some(c) = self.input.raw_read_non_breakz_ch() {
1784 line_buffer.push(c);
1785 }
1786
1787 let n_chars = line_buffer.chars().count();
1789 self.mark.col += n_chars;
1790 self.mark.index += n_chars;
1791
1792 string.reserve(line_buffer.len());
1794 string.push_str(line_buffer);
1795 line_buffer.clear();
1797 }
1798 }
1799
1800 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
1802 loop {
1803 if indent < self.input.bufmaxlen() - 2 {
1805 self.input.lookahead(self.input.bufmaxlen());
1806 while self.mark.col < indent && self.input.peek() == ' ' {
1807 self.skip_blank();
1808 }
1809 } else {
1810 loop {
1811 self.input.lookahead(self.input.bufmaxlen());
1812 while !self.input.buf_is_empty()
1813 && self.mark.col < indent
1814 && self.input.peek() == ' '
1815 {
1816 self.skip_blank();
1817 }
1818 if self.mark.col == indent
1822 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
1823 {
1824 break;
1825 }
1826 }
1827 self.input.lookahead(2);
1828 }
1829
1830 if self.input.next_is_break() {
1832 self.read_break(breaks);
1833 } else {
1834 break;
1836 }
1837 }
1838 }
1839
1840 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
1845 let mut max_indent = 0;
1846 loop {
1847 while self.input.look_ch() == ' ' {
1849 self.skip_blank();
1850 }
1851
1852 if self.mark.col > max_indent {
1853 max_indent = self.mark.col;
1854 }
1855
1856 if self.input.next_is_break() {
1857 self.input.lookahead(2);
1859 self.read_break(breaks);
1860 } else {
1861 break;
1863 }
1864 }
1865
1866 *indent = max_indent.max((self.indent + 1) as usize);
1875 if self.indent > 0 {
1876 *indent = (*indent).max(1);
1877 }
1878 }
1879
1880 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1881 self.save_simple_key();
1882 self.disallow_simple_key();
1883
1884 let tok = self.scan_flow_scalar(single)?;
1885
1886 self.skip_to_next_token()?;
1889 self.adjacent_value_allowed_at = self.mark.index;
1890
1891 self.tokens.push_back(tok);
1892 Ok(())
1893 }
1894
1895 #[allow(clippy::too_many_lines)]
1896 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token<'input>, ScanError> {
1897 let start_mark = self.mark;
1898
1899 let mut string = String::new();
1900 let mut leading_break = String::new();
1901 let mut trailing_breaks = String::new();
1902 let mut whitespaces = String::new();
1903 let mut leading_blanks;
1904
1905 self.skip_non_blank();
1907
1908 loop {
1909 self.input.lookahead(4);
1911
1912 if self.mark.col == 0 && self.input.next_is_document_indicator() {
1913 return Err(ScanError::new_str(
1914 start_mark,
1915 "while scanning a quoted scalar, found unexpected document indicator",
1916 ));
1917 }
1918
1919 if self.input.next_is_z() {
1920 return Err(ScanError::new_str(
1921 start_mark,
1922 "while scanning a quoted scalar, found unexpected end of stream",
1923 ));
1924 }
1925
1926 if (self.mark.col as isize) < self.indent {
1927 return Err(ScanError::new_str(
1928 start_mark,
1929 "invalid indentation in quoted scalar",
1930 ));
1931 }
1932
1933 leading_blanks = false;
1934 self.consume_flow_scalar_non_whitespace_chars(
1935 single,
1936 &mut string,
1937 &mut leading_blanks,
1938 &start_mark,
1939 )?;
1940
1941 match self.input.look_ch() {
1942 '\'' if single => break,
1943 '"' if !single => break,
1944 _ => {}
1945 }
1946
1947 while self.input.next_is_blank() || self.input.next_is_break() {
1949 if self.input.next_is_blank() {
1950 if leading_blanks {
1952 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
1953 return Err(ScanError::new_str(
1954 self.mark,
1955 "tab cannot be used as indentation",
1956 ));
1957 }
1958 self.skip_blank();
1959 } else {
1960 whitespaces.push(self.input.peek());
1961 self.skip_blank();
1962 }
1963 } else {
1964 self.input.lookahead(2);
1965 if leading_blanks {
1967 self.read_break(&mut trailing_breaks);
1968 } else {
1969 whitespaces.clear();
1970 self.read_break(&mut leading_break);
1971 leading_blanks = true;
1972 }
1973 }
1974 self.input.lookahead(1);
1975 }
1976
1977 if leading_blanks {
1979 if leading_break.is_empty() {
1980 string.push_str(&leading_break);
1981 string.push_str(&trailing_breaks);
1982 trailing_breaks.clear();
1983 leading_break.clear();
1984 } else {
1985 if trailing_breaks.is_empty() {
1986 string.push(' ');
1987 } else {
1988 string.push_str(&trailing_breaks);
1989 trailing_breaks.clear();
1990 }
1991 leading_break.clear();
1992 }
1993 } else {
1994 string.push_str(&whitespaces);
1995 whitespaces.clear();
1996 }
1997 } self.skip_non_blank();
2001 self.skip_ws_to_eol(SkipTabs::Yes)?;
2003 match self.input.peek() {
2004 ',' | '}' | ']' if self.flow_level > 0 => {}
2006 c if is_breakz(c) => {}
2008 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
2011 ':' if self.flow_level > 0 => {}
2013 _ => {
2014 return Err(ScanError::new_str(
2015 self.mark,
2016 "invalid trailing content after double-quoted scalar",
2017 ));
2018 }
2019 }
2020
2021 let style = if single {
2022 ScalarStyle::SingleQuoted
2023 } else {
2024 ScalarStyle::DoubleQuoted
2025 };
2026 Ok(Token(
2027 Span::new(start_mark, self.mark),
2028 TokenType::Scalar(style, string.into()),
2029 ))
2030 }
2031
2032 fn consume_flow_scalar_non_whitespace_chars(
2041 &mut self,
2042 single: bool,
2043 string: &mut String,
2044 leading_blanks: &mut bool,
2045 start_mark: &Marker,
2046 ) -> Result<(), ScanError> {
2047 self.input.lookahead(2);
2048 while !is_blank_or_breakz(self.input.peek()) {
2049 match self.input.peek() {
2050 '\'' if self.input.peek_nth(1) == '\'' && single => {
2052 string.push('\'');
2053 self.skip_n_non_blank(2);
2054 }
2055 '\'' if single => break,
2057 '"' if !single => break,
2058 '\\' if !single && is_break(self.input.peek_nth(1)) => {
2060 self.input.lookahead(3);
2061 self.skip_non_blank();
2062 self.skip_linebreak();
2063 *leading_blanks = true;
2064 break;
2065 }
2066 '\\' if !single => {
2068 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
2069 }
2070 c => {
2071 string.push(c);
2072 self.skip_non_blank();
2073 }
2074 }
2075 self.input.lookahead(2);
2076 }
2077 Ok(())
2078 }
2079
2080 fn resolve_flow_scalar_escape_sequence(
2087 &mut self,
2088 start_mark: &Marker,
2089 ) -> Result<char, ScanError> {
2090 let mut code_length = 0usize;
2091 let mut ret = '\0';
2092
2093 match self.input.peek_nth(1) {
2094 '0' => ret = '\0',
2095 'a' => ret = '\x07',
2096 'b' => ret = '\x08',
2097 't' | '\t' => ret = '\t',
2098 'n' => ret = '\n',
2099 'v' => ret = '\x0b',
2100 'f' => ret = '\x0c',
2101 'r' => ret = '\x0d',
2102 'e' => ret = '\x1b',
2103 ' ' => ret = '\x20',
2104 '"' => ret = '"',
2105 '/' => ret = '/',
2106 '\\' => ret = '\\',
2107 'N' => ret = char::from_u32(0x85).unwrap(),
2109 '_' => ret = char::from_u32(0xA0).unwrap(),
2111 'L' => ret = char::from_u32(0x2028).unwrap(),
2113 'P' => ret = char::from_u32(0x2029).unwrap(),
2115 'x' => code_length = 2,
2116 'u' => code_length = 4,
2117 'U' => code_length = 8,
2118 _ => {
2119 return Err(ScanError::new_str(
2120 *start_mark,
2121 "while parsing a quoted scalar, found unknown escape character",
2122 ))
2123 }
2124 }
2125 self.skip_n_non_blank(2);
2126
2127 if code_length > 0 {
2129 self.input.lookahead(code_length);
2130 let mut value = 0u32;
2131 for i in 0..code_length {
2132 let c = self.input.peek_nth(i);
2133 if !is_hex(c) {
2134 return Err(ScanError::new_str(
2135 *start_mark,
2136 "while parsing a quoted scalar, did not find expected hexadecimal number",
2137 ));
2138 }
2139 value = (value << 4) + as_hex(c);
2140 }
2141
2142 let Some(ch) = char::from_u32(value) else {
2143 return Err(ScanError::new_str(
2144 *start_mark,
2145 "while parsing a quoted scalar, found invalid Unicode character escape code",
2146 ));
2147 };
2148 ret = ch;
2149
2150 self.skip_n_non_blank(code_length);
2151 }
2152 Ok(ret)
2153 }
2154
2155 fn fetch_plain_scalar(&mut self) -> ScanResult {
2156 self.save_simple_key();
2157 self.disallow_simple_key();
2158
2159 let tok = self.scan_plain_scalar()?;
2160
2161 self.tokens.push_back(tok);
2162 Ok(())
2163 }
2164
2165 #[allow(clippy::too_many_lines)]
2170 fn scan_plain_scalar(&mut self) -> Result<Token<'input>, ScanError> {
2171 self.unroll_non_block_indents();
2172 let indent = self.indent + 1;
2173 let start_mark = self.mark;
2174
2175 if self.flow_level > 0 && (start_mark.col as isize) < indent {
2176 return Err(ScanError::new_str(
2177 start_mark,
2178 "invalid indentation in flow construct",
2179 ));
2180 }
2181
2182 let mut string = String::with_capacity(32);
2183 self.buf_whitespaces.clear();
2184 self.buf_leading_break.clear();
2185 self.buf_trailing_breaks.clear();
2186 let mut end_mark = self.mark;
2187
2188 loop {
2189 self.input.lookahead(4);
2190 if (self.leading_whitespace && self.input.next_is_document_indicator())
2191 || self.input.peek() == '#'
2192 {
2193 break;
2194 }
2195
2196 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
2197 return Err(ScanError::new_str(
2198 self.mark,
2199 "plain scalar cannot start with '-' followed by ,[]{}",
2200 ));
2201 }
2202
2203 if !self.input.next_is_blank_or_breakz()
2204 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
2205 {
2206 if self.leading_whitespace {
2207 if self.buf_leading_break.is_empty() {
2208 string.push_str(&self.buf_leading_break);
2209 string.push_str(&self.buf_trailing_breaks);
2210 self.buf_trailing_breaks.clear();
2211 self.buf_leading_break.clear();
2212 } else {
2213 if self.buf_trailing_breaks.is_empty() {
2214 string.push(' ');
2215 } else {
2216 string.push_str(&self.buf_trailing_breaks);
2217 self.buf_trailing_breaks.clear();
2218 }
2219 self.buf_leading_break.clear();
2220 }
2221 self.leading_whitespace = false;
2222 } else if !self.buf_whitespaces.is_empty() {
2223 string.push_str(&self.buf_whitespaces);
2224 self.buf_whitespaces.clear();
2225 }
2226
2227 string.push(self.input.peek());
2229 self.skip_non_blank();
2230 string.reserve(self.input.bufmaxlen());
2231
2232 let mut end = false;
2234 while !end {
2235 self.input.lookahead(self.input.bufmaxlen());
2239 for _ in 0..self.input.bufmaxlen() - 1 {
2240 if self.input.next_is_blank_or_breakz()
2241 || !self.input.next_can_be_plain_scalar(self.flow_level > 0)
2242 {
2243 end = true;
2244 break;
2245 }
2246 string.push(self.input.peek());
2247 self.skip_non_blank();
2248 }
2249 }
2250 end_mark = self.mark;
2251 }
2252
2253 if !(self.input.next_is_blank() || self.input.next_is_break()) {
2258 break;
2259 }
2260
2261 self.input.lookahead(2);
2263 while self.input.next_is_blank_or_break() {
2264 if self.input.next_is_blank() {
2265 if !self.leading_whitespace {
2266 self.buf_whitespaces.push(self.input.peek());
2267 self.skip_blank();
2268 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
2269 self.skip_ws_to_eol(SkipTabs::Yes)?;
2272 if !self.input.next_is_breakz() {
2273 return Err(ScanError::new_str(
2274 start_mark,
2275 "while scanning a plain scalar, found a tab",
2276 ));
2277 }
2278 } else {
2279 self.skip_blank();
2280 }
2281 } else {
2282 if self.leading_whitespace {
2284 self.skip_break();
2285 self.buf_trailing_breaks.push('\n');
2286 } else {
2287 self.buf_whitespaces.clear();
2288 self.skip_break();
2289 self.buf_leading_break.push('\n');
2290 self.leading_whitespace = true;
2291 }
2292 }
2293 self.input.lookahead(2);
2294 }
2295
2296 if self.flow_level == 0 && (self.mark.col as isize) < indent {
2298 break;
2299 }
2300 }
2301
2302 if self.leading_whitespace {
2303 self.allow_simple_key();
2304 }
2305
2306 if string.is_empty() {
2307 Err(ScanError::new_str(
2311 start_mark,
2312 "unexpected end of plain scalar",
2313 ))
2314 } else {
2315 Ok(Token(
2316 Span::new(start_mark, end_mark),
2317 TokenType::Scalar(ScalarStyle::Plain, string.into()),
2318 ))
2319 }
2320 }
2321
2322 fn fetch_key(&mut self) -> ScanResult {
2323 let start_mark = self.mark;
2324 if self.flow_level == 0 {
2325 if !self.simple_key_allowed {
2327 return Err(ScanError::new_str(
2328 self.mark,
2329 "mapping keys are not allowed in this context",
2330 ));
2331 }
2332 self.roll_indent(
2333 start_mark.col,
2334 None,
2335 TokenType::BlockMappingStart,
2336 start_mark,
2337 );
2338 } else {
2339 self.flow_mapping_started = true;
2341 }
2342
2343 self.remove_simple_key()?;
2344
2345 if self.flow_level == 0 {
2346 self.allow_simple_key();
2347 } else {
2348 self.disallow_simple_key();
2349 }
2350
2351 self.skip_non_blank();
2352 self.skip_yaml_whitespace()?;
2353 if self.input.peek() == '\t' {
2354 return Err(ScanError::new_str(
2355 self.mark(),
2356 "tabs disallowed in this context",
2357 ));
2358 }
2359 self.tokens
2360 .push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
2361 Ok(())
2362 }
2363
2364 fn fetch_flow_value(&mut self) -> ScanResult {
2372 let nc = self.input.peek_nth(1);
2373
2374 if self.mark.index != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
2386 return Err(ScanError::new_str(
2387 self.mark,
2388 "':' may not precede any of `[{` in flow mapping",
2389 ));
2390 }
2391
2392 self.fetch_value()
2393 }
2394
2395 fn fetch_value(&mut self) -> ScanResult {
2397 let sk = self.simple_keys.last().unwrap().clone();
2398 let start_mark = self.mark;
2399 let is_implicit_flow_mapping =
2400 !self.implicit_flow_mapping_states.is_empty() && !self.flow_mapping_started;
2401 if is_implicit_flow_mapping {
2402 *self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Inside;
2403 }
2404
2405 self.skip_non_blank();
2407 if self.input.look_ch() == '\t'
2408 && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
2409 && (self.input.peek() == '-' || self.input.next_is_alpha())
2410 {
2411 return Err(ScanError::new_str(
2412 self.mark,
2413 "':' must be followed by a valid YAML whitespace",
2414 ));
2415 }
2416
2417 if sk.possible {
2418 let tok = Token(Span::empty(sk.mark), TokenType::Key);
2420 self.insert_token(sk.token_number - self.tokens_parsed, tok);
2421 if is_implicit_flow_mapping {
2422 if sk.mark.line < start_mark.line {
2423 return Err(ScanError::new_str(
2424 start_mark,
2425 "illegal placement of ':' indicator",
2426 ));
2427 }
2428 self.insert_token(
2429 sk.token_number - self.tokens_parsed,
2430 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
2431 );
2432 }
2433
2434 self.roll_indent(
2436 sk.mark.col,
2437 Some(sk.token_number),
2438 TokenType::BlockMappingStart,
2439 sk.mark,
2440 );
2441 self.roll_one_col_indent();
2442
2443 self.simple_keys.last_mut().unwrap().possible = false;
2444 self.disallow_simple_key();
2445 } else {
2446 if is_implicit_flow_mapping {
2447 self.tokens
2448 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
2449 }
2450 if self.flow_level == 0 {
2452 if !self.simple_key_allowed {
2453 return Err(ScanError::new_str(
2454 start_mark,
2455 "mapping values are not allowed in this context",
2456 ));
2457 }
2458
2459 self.roll_indent(
2460 start_mark.col,
2461 None,
2462 TokenType::BlockMappingStart,
2463 start_mark,
2464 );
2465 }
2466 self.roll_one_col_indent();
2467
2468 if self.flow_level == 0 {
2469 self.allow_simple_key();
2470 } else {
2471 self.disallow_simple_key();
2472 }
2473 }
2474 self.tokens
2475 .push_back(Token(Span::empty(start_mark), TokenType::Value));
2476
2477 Ok(())
2478 }
2479
2480 fn roll_indent(
2486 &mut self,
2487 col: usize,
2488 number: Option<usize>,
2489 tok: TokenType<'input>,
2490 mark: Marker,
2491 ) {
2492 if self.flow_level > 0 {
2493 return;
2494 }
2495
2496 if self.indent <= col as isize {
2500 if let Some(indent) = self.indents.last() {
2501 if !indent.needs_block_end {
2502 self.indent = indent.indent;
2503 self.indents.pop();
2504 }
2505 }
2506 }
2507
2508 if self.indent < col as isize {
2509 self.indents.push(Indent {
2510 indent: self.indent,
2511 needs_block_end: true,
2512 });
2513 self.indent = col as isize;
2514 let tokens_parsed = self.tokens_parsed;
2515 match number {
2516 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
2517 None => self.tokens.push_back(Token(Span::empty(mark), tok)),
2518 }
2519 }
2520 }
2521
2522 fn unroll_indent(&mut self, col: isize) {
2528 if self.flow_level > 0 {
2529 return;
2530 }
2531 while self.indent > col {
2532 let indent = self.indents.pop().unwrap();
2533 self.indent = indent.indent;
2534 if indent.needs_block_end {
2535 self.tokens
2536 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
2537 }
2538 }
2539 }
2540
2541 fn roll_one_col_indent(&mut self) {
2547 if self.flow_level == 0 && self.indents.last().is_some_and(|x| x.needs_block_end) {
2548 self.indents.push(Indent {
2549 indent: self.indent,
2550 needs_block_end: false,
2551 });
2552 self.indent += 1;
2553 }
2554 }
2555
2556 fn unroll_non_block_indents(&mut self) {
2558 while let Some(indent) = self.indents.last() {
2559 if indent.needs_block_end {
2560 break;
2561 }
2562 self.indent = indent.indent;
2563 self.indents.pop();
2564 }
2565 }
2566
2567 fn save_simple_key(&mut self) {
2569 if self.simple_key_allowed {
2570 let required = self.flow_level == 0
2571 && self.indent == (self.mark.col as isize)
2572 && self.indents.last().unwrap().needs_block_end;
2573 let mut sk = SimpleKey::new(self.mark);
2574 sk.possible = true;
2575 sk.required = required;
2576 sk.token_number = self.tokens_parsed + self.tokens.len();
2577
2578 self.simple_keys.pop();
2579 self.simple_keys.push(sk);
2580 }
2581 }
2582
2583 fn remove_simple_key(&mut self) -> ScanResult {
2584 let last = self.simple_keys.last_mut().unwrap();
2585 if last.possible && last.required {
2586 return Err(ScanError::new_str(self.mark, "simple key expected"));
2587 }
2588
2589 last.possible = false;
2590 Ok(())
2591 }
2592
2593 fn is_within_block(&self) -> bool {
2595 !self.indents.is_empty()
2596 }
2597
2598 fn end_implicit_mapping(&mut self, mark: Marker) {
2604 if let Some(implicit_mapping) = self.implicit_flow_mapping_states.last_mut() {
2605 if *implicit_mapping == ImplicitMappingState::Inside {
2606 self.flow_mapping_started = false;
2607 *implicit_mapping = ImplicitMappingState::Possible;
2608 self.tokens
2609 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
2610 }
2611 }
2612 }
2613}
2614
2615#[derive(PartialEq, Eq)]
2619pub enum Chomping {
2620 Strip,
2622 Clip,
2624 Keep,
2626}
2627
2628#[cfg(test)]
2629mod test {
2630 #[test]
2631 fn test_is_anchor_char() {
2632 use super::is_anchor_char;
2633 assert!(is_anchor_char('x'));
2634 }
2635}