1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use std::{borrow::Cow, char, collections::VecDeque, error::Error, fmt};
13
14use crate::{
15 char_traits::{
16 as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
17 is_tag_char, is_uri_char,
18 },
19 input::{Input, SkipTabs},
20};
21
22#[derive(Clone, Copy, PartialEq, Debug, Eq)]
24pub enum TEncoding {
25 Utf8,
27}
28
29#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
31pub enum ScalarStyle {
32 Plain,
34 SingleQuoted,
36 DoubleQuoted,
38
39 Literal,
45 Folded,
52}
53
54#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
56pub struct Marker {
57 index: usize,
59 line: usize,
61 col: usize,
63}
64
65impl Marker {
66 #[must_use]
68 pub fn new(index: usize, line: usize, col: usize) -> Marker {
69 Marker { index, line, col }
70 }
71
72 #[must_use]
74 pub fn index(&self) -> usize {
75 self.index
76 }
77
78 #[must_use]
80 pub fn line(&self) -> usize {
81 self.line
82 }
83
84 #[must_use]
86 pub fn col(&self) -> usize {
87 self.col
88 }
89}
90
91#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
93pub struct Span {
94 pub start: Marker,
96 pub end: Marker,
98}
99
100impl Span {
101 #[must_use]
103 pub fn new(start: Marker, end: Marker) -> Span {
104 Span { start, end }
105 }
106
107 #[must_use]
114 pub fn empty(mark: Marker) -> Span {
115 Span {
116 start: mark,
117 end: mark,
118 }
119 }
120
121 #[must_use]
123 pub fn len(&self) -> usize {
124 self.end.index - self.start.index
125 }
126
127 #[must_use]
129 pub fn is_empty(&self) -> bool {
130 self.len() == 0
131 }
132}
133
134#[derive(Clone, PartialEq, Debug, Eq)]
136pub struct ScanError {
137 mark: Marker,
139 info: String,
141}
142
143impl ScanError {
144 #[must_use]
146 pub fn new(loc: Marker, info: String) -> ScanError {
147 ScanError { mark: loc, info }
148 }
149
150 #[must_use]
152 pub fn new_str(loc: Marker, info: &str) -> ScanError {
153 ScanError {
154 mark: loc,
155 info: info.to_owned(),
156 }
157 }
158
159 #[must_use]
161 pub fn marker(&self) -> &Marker {
162 &self.mark
163 }
164
165 #[must_use]
167 pub fn info(&self) -> &str {
168 self.info.as_ref()
169 }
170}
171
172impl Error for ScanError {
173 fn source(&self) -> Option<&(dyn Error + 'static)> {
174 None
175 }
176}
177
178impl fmt::Display for ScanError {
179 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
180 write!(
181 formatter,
182 "{} at byte {} line {} column {}",
183 self.info,
184 self.mark.index,
185 self.mark.line,
186 self.mark.col + 1,
187 )
188 }
189}
190
191#[derive(Clone, PartialEq, Debug, Eq)]
193pub enum TokenType<'input> {
194 StreamStart(TEncoding),
196 StreamEnd,
198 VersionDirective(
200 u32,
202 u32,
204 ),
205 TagDirective(
207 Cow<'input, str>,
209 Cow<'input, str>,
211 ),
212 DocumentStart,
214 DocumentEnd,
216 BlockSequenceStart,
220 BlockMappingStart,
224 BlockEnd,
226 FlowSequenceStart,
228 FlowSequenceEnd,
230 FlowMappingStart,
232 FlowMappingEnd,
234 BlockEntry,
236 FlowEntry,
238 Key,
240 Value,
242 Alias(Cow<'input, str>),
244 Anchor(Cow<'input, str>),
246 Tag(
248 String,
250 String,
252 ),
253 Scalar(ScalarStyle, Cow<'input, str>),
255}
256
257#[derive(Clone, PartialEq, Debug, Eq)]
259pub struct Token<'input>(pub Span, pub TokenType<'input>);
260
261#[derive(Clone, PartialEq, Debug, Eq)]
296struct SimpleKey {
297 possible: bool,
310 required: bool,
318 token_number: usize,
324 mark: Marker,
326}
327
328impl SimpleKey {
329 fn new(mark: Marker) -> SimpleKey {
331 SimpleKey {
332 possible: false,
333 required: false,
334 token_number: 0,
335 mark,
336 }
337 }
338}
339
340#[derive(Clone, Debug, Default)]
342struct Indent {
343 indent: isize,
345 needs_block_end: bool,
363}
364
365#[derive(Debug, PartialEq)]
387enum ImplicitMappingState {
388 Possible,
393 Inside,
397}
398
399#[derive(Debug)]
409#[allow(clippy::struct_excessive_bools)]
410pub struct Scanner<'input, T> {
411 input: T,
415 mark: Marker,
417 tokens: VecDeque<Token<'input>>,
424 error: Option<ScanError>,
426
427 stream_start_produced: bool,
429 stream_end_produced: bool,
431 adjacent_value_allowed_at: usize,
434 simple_key_allowed: bool,
438 simple_keys: Vec<SimpleKey>,
443 indent: isize,
445 indents: Vec<Indent>,
447 flow_level: u8,
449 tokens_parsed: usize,
453 token_available: bool,
455 leading_whitespace: bool,
457 flow_mapping_started: bool,
464 implicit_flow_mapping_states: Vec<ImplicitMappingState>,
477 buf_leading_break: String,
478 buf_trailing_breaks: String,
479 buf_whitespaces: String,
480}
481
482impl<'input, T: Input> Iterator for Scanner<'input, T> {
483 type Item = Token<'input>;
484
485 fn next(&mut self) -> Option<Self::Item> {
486 if self.error.is_some() {
487 return None;
488 }
489 match self.next_token() {
490 Ok(Some(tok)) => {
491 debug_print!(
492 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
493 tok.1,
494 tok.0
495 );
496 Some(tok)
497 }
498 Ok(tok) => tok,
499 Err(e) => {
500 self.error = Some(e);
501 None
502 }
503 }
504 }
505}
506
507pub type ScanResult = Result<(), ScanError>;
509
510impl<'input, T: Input> Scanner<'input, T> {
511 pub fn new(input: T) -> Self {
513 Scanner {
514 input,
515 mark: Marker::new(0, 1, 0),
516 tokens: VecDeque::new(),
517 error: None,
518
519 stream_start_produced: false,
520 stream_end_produced: false,
521 adjacent_value_allowed_at: 0,
522 simple_key_allowed: true,
523 simple_keys: Vec::new(),
524 indent: -1,
525 indents: Vec::new(),
526 flow_level: 0,
527 tokens_parsed: 0,
528 token_available: false,
529 leading_whitespace: true,
530 flow_mapping_started: false,
531 implicit_flow_mapping_states: vec![],
532
533 buf_leading_break: String::new(),
534 buf_trailing_breaks: String::new(),
535 buf_whitespaces: String::new(),
536 }
537 }
538
539 #[inline]
544 pub fn get_error(&self) -> Option<ScanError> {
545 self.error.clone()
546 }
547
548 #[inline]
550 fn skip_blank(&mut self) {
551 self.input.skip();
552
553 self.mark.index += 1;
554 self.mark.col += 1;
555 }
556
557 #[inline]
559 fn skip_non_blank(&mut self) {
560 self.input.skip();
561
562 self.mark.index += 1;
563 self.mark.col += 1;
564 self.leading_whitespace = false;
565 }
566
567 #[inline]
569 fn skip_n_non_blank(&mut self, count: usize) {
570 self.input.skip_n(count);
571
572 self.mark.index += count;
573 self.mark.col += count;
574 self.leading_whitespace = false;
575 }
576
577 #[inline]
579 fn skip_nl(&mut self) {
580 self.input.skip();
581
582 self.mark.index += 1;
583 self.mark.col = 0;
584 self.mark.line += 1;
585 self.leading_whitespace = true;
586 }
587
588 #[inline]
590 fn skip_linebreak(&mut self) {
591 if self.input.next_2_are('\r', '\n') {
592 self.skip_blank();
595 self.skip_nl();
596 } else if self.input.next_is_break() {
597 self.skip_nl();
598 }
599 }
600
601 #[inline]
603 pub fn stream_started(&self) -> bool {
604 self.stream_start_produced
605 }
606
607 #[inline]
609 pub fn stream_ended(&self) -> bool {
610 self.stream_end_produced
611 }
612
613 #[inline]
615 pub fn mark(&self) -> Marker {
616 self.mark
617 }
618
619 #[inline]
626 fn read_break(&mut self, s: &mut String) {
627 self.skip_break();
628 s.push('\n');
629 }
630
631 #[inline]
636 fn skip_break(&mut self) {
637 let c = self.input.peek();
638 let nc = self.input.peek_nth(1);
639 debug_assert!(is_break(c));
640 if c == '\r' && nc == '\n' {
641 self.skip_blank();
642 }
643 self.skip_nl();
644 }
645
646 fn insert_token(&mut self, pos: usize, tok: Token<'input>) {
648 let old_len = self.tokens.len();
649 assert!(pos <= old_len);
650 self.tokens.insert(pos, tok);
651 }
652
653 fn allow_simple_key(&mut self) {
654 self.simple_key_allowed = true;
655 }
656
657 fn disallow_simple_key(&mut self) {
658 self.simple_key_allowed = false;
659 }
660
661 pub fn fetch_next_token(&mut self) -> ScanResult {
666 self.input.lookahead(1);
667
668 if !self.stream_start_produced {
669 self.fetch_stream_start();
670 return Ok(());
671 }
672 self.skip_to_next_token()?;
673
674 debug_print!(
675 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
676 self.mark,
677 self.input.peek()
678 );
679
680 self.stale_simple_keys()?;
681
682 let mark = self.mark;
683 self.unroll_indent(mark.col as isize);
684
685 self.input.lookahead(4);
686
687 if self.input.next_is_z() {
688 self.fetch_stream_end()?;
689 return Ok(());
690 }
691
692 if self.mark.col == 0 {
693 if self.input.next_char_is('%') {
694 return self.fetch_directive();
695 } else if self.input.next_is_document_start() {
696 return self.fetch_document_indicator(TokenType::DocumentStart);
697 } else if self.input.next_is_document_end() {
698 self.fetch_document_indicator(TokenType::DocumentEnd)?;
699 self.skip_ws_to_eol(SkipTabs::Yes)?;
700 if !self.input.next_is_breakz() {
701 return Err(ScanError::new_str(
702 self.mark,
703 "invalid content after document end marker",
704 ));
705 }
706 return Ok(());
707 }
708 }
709
710 if (self.mark.col as isize) < self.indent {
711 return Err(ScanError::new_str(self.mark, "invalid indentation"));
712 }
713
714 let c = self.input.peek();
715 let nc = self.input.peek_nth(1);
716 match c {
717 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
718 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
719 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
720 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
721 ',' => self.fetch_flow_entry(),
722 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
723 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
724 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
725 ':' if self.flow_level > 0
726 && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at) =>
727 {
728 self.fetch_flow_value()
729 }
730 '*' => self.fetch_anchor(true),
732 '&' => self.fetch_anchor(false),
734 '!' => self.fetch_tag(),
735 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
737 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
739 '\'' => self.fetch_flow_scalar(true),
740 '"' => self.fetch_flow_scalar(false),
741 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
743 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
744 self.fetch_plain_scalar()
745 }
746 '%' | '@' | '`' => Err(ScanError::new(
747 self.mark,
748 format!("unexpected character: `{c}'"),
749 )),
750 _ => self.fetch_plain_scalar(),
751 }
752 }
753
754 pub fn next_token(&mut self) -> Result<Option<Token<'input>>, ScanError> {
758 if self.stream_end_produced {
759 return Ok(None);
760 }
761
762 if !self.token_available {
763 self.fetch_more_tokens()?;
764 }
765 let Some(t) = self.tokens.pop_front() else {
766 return Err(ScanError::new_str(
767 self.mark,
768 "did not find expected next token",
769 ));
770 };
771 self.token_available = false;
772 self.tokens_parsed += 1;
773
774 if let TokenType::StreamEnd = t.1 {
775 self.stream_end_produced = true;
776 }
777 Ok(Some(t))
778 }
779
780 pub fn fetch_more_tokens(&mut self) -> ScanResult {
784 let mut need_more;
785 loop {
786 if self.tokens.is_empty() {
787 need_more = true;
788 } else {
789 need_more = false;
790 self.stale_simple_keys()?;
792 for sk in &self.simple_keys {
794 if sk.possible && sk.token_number == self.tokens_parsed {
795 need_more = true;
796 break;
797 }
798 }
799 }
800
801 if !need_more {
802 break;
803 }
804 self.fetch_next_token()?;
805 }
806 self.token_available = true;
807
808 Ok(())
809 }
810
811 fn stale_simple_keys(&mut self) -> ScanResult {
819 for sk in &mut self.simple_keys {
820 if sk.possible
821 && self.flow_level == 0
823 && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
824 {
825 if sk.required {
826 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
827 }
828 sk.possible = false;
829 }
830 }
831 Ok(())
832 }
833
834 fn skip_to_next_token(&mut self) -> ScanResult {
840 loop {
841 match self.input.look_ch() {
843 '\t' if self.is_within_block()
850 && self.leading_whitespace
851 && (self.mark.col as isize) < self.indent =>
852 {
853 self.skip_ws_to_eol(SkipTabs::Yes)?;
854 if !self.input.next_is_breakz() {
856 return Err(ScanError::new_str(
857 self.mark,
858 "tabs disallowed within this context (block indentation)",
859 ));
860 }
861 }
862 '\t' | ' ' => self.skip_blank(),
863 '\n' | '\r' => {
864 self.input.lookahead(2);
865 self.skip_linebreak();
866 if self.flow_level == 0 {
867 self.allow_simple_key();
868 }
869 }
870 '#' => {
871 let comment_length = self.input.skip_while_non_breakz();
872 self.mark.index += comment_length;
873 self.mark.col += comment_length;
874 }
875 _ => break,
876 }
877 }
878 Ok(())
879 }
880
881 fn skip_yaml_whitespace(&mut self) -> ScanResult {
886 let mut need_whitespace = true;
887 loop {
888 match self.input.look_ch() {
889 ' ' => {
890 self.skip_blank();
891
892 need_whitespace = false;
893 }
894 '\n' | '\r' => {
895 self.input.lookahead(2);
896 self.skip_linebreak();
897 if self.flow_level == 0 {
898 self.allow_simple_key();
899 }
900 need_whitespace = false;
901 }
902 '#' => {
903 let comment_length = self.input.skip_while_non_breakz();
904 self.mark.index += comment_length;
905 self.mark.col += comment_length;
906 }
907 _ => break,
908 }
909 }
910
911 if need_whitespace {
912 Err(ScanError::new_str(self.mark(), "expected whitespace"))
913 } else {
914 Ok(())
915 }
916 }
917
918 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
919 let (n_bytes, result) = self.input.skip_ws_to_eol(skip_tabs);
920 self.mark.col += n_bytes;
921 self.mark.index += n_bytes;
922 result.map_err(|msg| ScanError::new_str(self.mark, msg))
923 }
924
925 fn fetch_stream_start(&mut self) {
926 let mark = self.mark;
927 self.indent = -1;
928 self.stream_start_produced = true;
929 self.allow_simple_key();
930 self.tokens.push_back(Token(
931 Span::empty(mark),
932 TokenType::StreamStart(TEncoding::Utf8),
933 ));
934 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
935 }
936
937 fn fetch_stream_end(&mut self) -> ScanResult {
938 if self.mark.col != 0 {
940 self.mark.col = 0;
941 self.mark.line += 1;
942 }
943
944 for sk in &mut self.simple_keys {
947 if sk.required && sk.possible {
948 return Err(ScanError::new_str(self.mark, "simple key expected"));
949 }
950 sk.possible = false;
951 }
952
953 self.unroll_indent(-1);
954 self.remove_simple_key()?;
955 self.disallow_simple_key();
956
957 self.tokens
958 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
959 Ok(())
960 }
961
962 fn fetch_directive(&mut self) -> ScanResult {
963 self.unroll_indent(-1);
964 self.remove_simple_key()?;
965
966 self.disallow_simple_key();
967
968 let tok = self.scan_directive()?;
969 self.tokens.push_back(tok);
970
971 Ok(())
972 }
973
974 fn scan_directive(&mut self) -> Result<Token<'input>, ScanError> {
975 let start_mark = self.mark;
976 self.skip_non_blank();
977
978 let name = self.scan_directive_name()?;
979 let tok = match name.as_ref() {
980 "YAML" => self.scan_version_directive_value(&start_mark)?,
981 "TAG" => self.scan_tag_directive_value(&start_mark)?,
982 _ => {
984 let line_len = self.input.skip_while_non_breakz();
986 self.mark.index += line_len;
987 self.mark.col += line_len;
988 Token(
990 Span::new(start_mark, self.mark),
991 TokenType::TagDirective(Cow::default(), Cow::default()),
992 )
993 }
996 };
997
998 self.skip_ws_to_eol(SkipTabs::Yes)?;
999
1000 if self.input.next_is_breakz() {
1001 self.input.lookahead(2);
1002 self.skip_linebreak();
1003 Ok(tok)
1004 } else {
1005 Err(ScanError::new_str(
1006 start_mark,
1007 "while scanning a directive, did not find expected comment or line break",
1008 ))
1009 }
1010 }
1011
1012 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1013 let n_blanks = self.input.skip_while_blank();
1014 self.mark.index += n_blanks;
1015 self.mark.col += n_blanks;
1016
1017 let major = self.scan_version_directive_number(mark)?;
1018
1019 if self.input.peek() != '.' {
1020 return Err(ScanError::new_str(
1021 *mark,
1022 "while scanning a YAML directive, did not find expected digit or '.' character",
1023 ));
1024 }
1025 self.skip_non_blank();
1026
1027 let minor = self.scan_version_directive_number(mark)?;
1028
1029 Ok(Token(
1030 Span::new(*mark, self.mark),
1031 TokenType::VersionDirective(major, minor),
1032 ))
1033 }
1034
1035 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1036 let start_mark = self.mark;
1037 let mut string = String::new();
1038
1039 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1040 self.mark.index += n_chars;
1041 self.mark.col += n_chars;
1042
1043 if string.is_empty() {
1044 return Err(ScanError::new_str(
1045 start_mark,
1046 "while scanning a directive, could not find expected directive name",
1047 ));
1048 }
1049
1050 if !is_blank_or_breakz(self.input.peek()) {
1051 return Err(ScanError::new_str(
1052 start_mark,
1053 "while scanning a directive, found unexpected non-alphabetical character",
1054 ));
1055 }
1056
1057 Ok(string)
1058 }
1059
1060 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1061 let mut val = 0u32;
1062 let mut length = 0usize;
1063 while let Some(digit) = self.input.look_ch().to_digit(10) {
1064 if length + 1 > 9 {
1065 return Err(ScanError::new_str(
1066 *mark,
1067 "while scanning a YAML directive, found extremely long version number",
1068 ));
1069 }
1070 length += 1;
1071 val = val * 10 + digit;
1072 self.skip_non_blank();
1073 }
1074
1075 if length == 0 {
1076 return Err(ScanError::new_str(
1077 *mark,
1078 "while scanning a YAML directive, did not find expected version number",
1079 ));
1080 }
1081
1082 Ok(val)
1083 }
1084
1085 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token<'input>, ScanError> {
1086 let n_blanks = self.input.skip_while_blank();
1087 self.mark.index += n_blanks;
1088 self.mark.col += n_blanks;
1089
1090 let handle = self.scan_tag_handle(true, mark)?;
1091
1092 let n_blanks = self.input.skip_while_blank();
1093 self.mark.index += n_blanks;
1094 self.mark.col += n_blanks;
1095
1096 let prefix = self.scan_tag_prefix(mark)?;
1097
1098 self.input.lookahead(1);
1099
1100 if self.input.next_is_blank_or_breakz() {
1101 Ok(Token(
1102 Span::new(*mark, self.mark),
1103 TokenType::TagDirective(handle.into(), prefix.into()),
1104 ))
1105 } else {
1106 Err(ScanError::new_str(
1107 *mark,
1108 "while scanning TAG, did not find expected whitespace or line break",
1109 ))
1110 }
1111 }
1112
1113 fn fetch_tag(&mut self) -> ScanResult {
1114 self.save_simple_key();
1115 self.disallow_simple_key();
1116
1117 let tok = self.scan_tag()?;
1118 self.tokens.push_back(tok);
1119 Ok(())
1120 }
1121
1122 fn scan_tag(&mut self) -> Result<Token<'input>, ScanError> {
1123 let start_mark = self.mark;
1124 let mut handle = String::new();
1125 let mut suffix;
1126
1127 self.input.lookahead(2);
1129
1130 if self.input.nth_char_is(1, '<') {
1131 suffix = self.scan_verbatim_tag(&start_mark)?;
1132 } else {
1133 handle = self.scan_tag_handle(false, &start_mark)?;
1135 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1137 let is_secondary_handle = handle == "!!";
1139 suffix =
1140 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", &start_mark)?;
1141 } else {
1142 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, &start_mark)?;
1143 "!".clone_into(&mut handle);
1144 if suffix.is_empty() {
1147 handle.clear();
1148 "!".clone_into(&mut suffix);
1149 }
1150 }
1151 }
1152
1153 if is_blank_or_breakz(self.input.look_ch())
1154 || (self.flow_level > 0 && self.input.next_is_flow())
1155 {
1156 Ok(Token(
1158 Span::new(start_mark, self.mark),
1159 TokenType::Tag(handle, suffix),
1160 ))
1161 } else {
1162 Err(ScanError::new_str(
1163 start_mark,
1164 "while scanning a tag, did not find expected whitespace or line break",
1165 ))
1166 }
1167 }
1168
1169 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
1170 let mut string = String::new();
1171 if self.input.look_ch() != '!' {
1172 return Err(ScanError::new_str(
1173 *mark,
1174 "while scanning a tag, did not find expected '!'",
1175 ));
1176 }
1177
1178 string.push(self.input.peek());
1179 self.skip_non_blank();
1180
1181 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1182 self.mark.index += n_chars;
1183 self.mark.col += n_chars;
1184
1185 if self.input.peek() == '!' {
1187 string.push(self.input.peek());
1188 self.skip_non_blank();
1189 } else if directive && string != "!" {
1190 return Err(ScanError::new_str(
1194 *mark,
1195 "while parsing a tag directive, did not find expected '!'",
1196 ));
1197 }
1198 Ok(string)
1199 }
1200
1201 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1207 let mut string = String::new();
1208
1209 if self.input.look_ch() == '!' {
1210 string.push(self.input.peek());
1212 self.skip_non_blank();
1213 } else if !is_tag_char(self.input.peek()) {
1214 return Err(ScanError::new_str(
1216 *start_mark,
1217 "invalid global tag character",
1218 ));
1219 } else if self.input.peek() == '%' {
1220 string.push(self.scan_uri_escapes(start_mark)?);
1222 } else {
1223 string.push(self.input.peek());
1225 self.skip_non_blank();
1226 }
1227
1228 while is_uri_char(self.input.look_ch()) {
1229 if self.input.peek() == '%' {
1230 string.push(self.scan_uri_escapes(start_mark)?);
1231 } else {
1232 string.push(self.input.peek());
1233 self.skip_non_blank();
1234 }
1235 }
1236
1237 Ok(string)
1238 }
1239
1240 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1244 self.skip_non_blank();
1246 self.skip_non_blank();
1247
1248 let mut string = String::new();
1249 while is_uri_char(self.input.look_ch()) {
1250 if self.input.peek() == '%' {
1251 string.push(self.scan_uri_escapes(start_mark)?);
1252 } else {
1253 string.push(self.input.peek());
1254 self.skip_non_blank();
1255 }
1256 }
1257
1258 if self.input.peek() != '>' {
1259 return Err(ScanError::new_str(
1260 *start_mark,
1261 "while scanning a verbatim tag, did not find the expected '>'",
1262 ));
1263 }
1264 self.skip_non_blank();
1265
1266 Ok(string)
1267 }
1268
1269 fn scan_tag_shorthand_suffix(
1270 &mut self,
1271 _directive: bool,
1272 _is_secondary: bool,
1273 head: &str,
1274 mark: &Marker,
1275 ) -> Result<String, ScanError> {
1276 let mut length = head.len();
1277 let mut string = String::new();
1278
1279 if length > 1 {
1282 string.extend(head.chars().skip(1));
1283 }
1284
1285 while is_tag_char(self.input.look_ch()) {
1286 if self.input.peek() == '%' {
1288 string.push(self.scan_uri_escapes(mark)?);
1289 } else {
1290 string.push(self.input.peek());
1291 self.skip_non_blank();
1292 }
1293
1294 length += 1;
1295 }
1296
1297 if length == 0 {
1298 return Err(ScanError::new_str(
1299 *mark,
1300 "while parsing a tag, did not find expected tag URI",
1301 ));
1302 }
1303
1304 Ok(string)
1305 }
1306
1307 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
1308 let mut width = 0usize;
1309 let mut code = 0u32;
1310 loop {
1311 self.input.lookahead(3);
1312
1313 let c = self.input.peek_nth(1);
1314 let nc = self.input.peek_nth(2);
1315
1316 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
1317 return Err(ScanError::new_str(
1318 *mark,
1319 "while parsing a tag, found an invalid escape sequence",
1320 ));
1321 }
1322
1323 let byte = (as_hex(c) << 4) + as_hex(nc);
1324 if width == 0 {
1325 width = match byte {
1326 _ if byte & 0x80 == 0x00 => 1,
1327 _ if byte & 0xE0 == 0xC0 => 2,
1328 _ if byte & 0xF0 == 0xE0 => 3,
1329 _ if byte & 0xF8 == 0xF0 => 4,
1330 _ => {
1331 return Err(ScanError::new_str(
1332 *mark,
1333 "while parsing a tag, found an incorrect leading UTF-8 byte",
1334 ));
1335 }
1336 };
1337 code = byte;
1338 } else {
1339 if byte & 0xc0 != 0x80 {
1340 return Err(ScanError::new_str(
1341 *mark,
1342 "while parsing a tag, found an incorrect trailing UTF-8 byte",
1343 ));
1344 }
1345 code = (code << 8) + byte;
1346 }
1347
1348 self.skip_n_non_blank(3);
1349
1350 width -= 1;
1351 if width == 0 {
1352 break;
1353 }
1354 }
1355
1356 match char::from_u32(code) {
1357 Some(ch) => Ok(ch),
1358 None => Err(ScanError::new_str(
1359 *mark,
1360 "while parsing a tag, found an invalid UTF-8 codepoint",
1361 )),
1362 }
1363 }
1364
1365 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
1366 self.save_simple_key();
1367 self.disallow_simple_key();
1368
1369 let tok = self.scan_anchor(alias)?;
1370
1371 self.tokens.push_back(tok);
1372
1373 Ok(())
1374 }
1375
1376 fn scan_anchor(&mut self, alias: bool) -> Result<Token<'input>, ScanError> {
1377 let mut string = String::new();
1378 let start_mark = self.mark;
1379
1380 self.skip_non_blank();
1381 while is_anchor_char(self.input.look_ch()) {
1382 string.push(self.input.peek());
1383 self.skip_non_blank();
1384 }
1385
1386 if string.is_empty() {
1387 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
1388 }
1389
1390 let tok = if alias {
1391 TokenType::Alias(string.into())
1392 } else {
1393 TokenType::Anchor(string.into())
1394 };
1395 Ok(Token(Span::new(start_mark, self.mark), tok))
1396 }
1397
1398 fn fetch_flow_collection_start(&mut self, tok: TokenType<'input>) -> ScanResult {
1399 self.save_simple_key();
1401
1402 self.roll_one_col_indent();
1403 self.increase_flow_level()?;
1404
1405 self.allow_simple_key();
1406
1407 let start_mark = self.mark;
1408 self.skip_non_blank();
1409
1410 if tok == TokenType::FlowMappingStart {
1411 self.flow_mapping_started = true;
1412 } else {
1413 self.implicit_flow_mapping_states
1414 .push(ImplicitMappingState::Possible);
1415 }
1416
1417 self.skip_ws_to_eol(SkipTabs::Yes)?;
1418
1419 self.tokens
1420 .push_back(Token(Span::new(start_mark, self.mark), tok));
1421 Ok(())
1422 }
1423
1424 fn fetch_flow_collection_end(&mut self, tok: TokenType<'input>) -> ScanResult {
1425 self.remove_simple_key()?;
1426 self.decrease_flow_level();
1427
1428 self.disallow_simple_key();
1429
1430 if matches!(tok, TokenType::FlowSequenceEnd) {
1431 self.end_implicit_mapping(self.mark);
1432 self.implicit_flow_mapping_states.pop();
1434 }
1435
1436 let start_mark = self.mark;
1437 self.skip_non_blank();
1438 self.skip_ws_to_eol(SkipTabs::Yes)?;
1439
1440 if self.flow_level > 0 {
1446 self.adjacent_value_allowed_at = self.mark.index;
1447 }
1448
1449 self.tokens
1450 .push_back(Token(Span::new(start_mark, self.mark), tok));
1451 Ok(())
1452 }
1453
1454 fn fetch_flow_entry(&mut self) -> ScanResult {
1456 self.remove_simple_key()?;
1457 self.allow_simple_key();
1458
1459 self.end_implicit_mapping(self.mark);
1460
1461 let start_mark = self.mark;
1462 self.skip_non_blank();
1463 self.skip_ws_to_eol(SkipTabs::Yes)?;
1464
1465 self.tokens.push_back(Token(
1466 Span::new(start_mark, self.mark),
1467 TokenType::FlowEntry,
1468 ));
1469 Ok(())
1470 }
1471
1472 fn increase_flow_level(&mut self) -> ScanResult {
1473 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1474 self.flow_level = self
1475 .flow_level
1476 .checked_add(1)
1477 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
1478 Ok(())
1479 }
1480
1481 fn decrease_flow_level(&mut self) {
1482 if self.flow_level > 0 {
1483 self.flow_level -= 1;
1484 self.simple_keys.pop().unwrap();
1485 }
1486 }
1487
1488 fn fetch_block_entry(&mut self) -> ScanResult {
1494 if self.flow_level > 0 {
1495 return Err(ScanError::new_str(
1497 self.mark,
1498 r#""-" is only valid inside a block"#,
1499 ));
1500 }
1501 if !self.simple_key_allowed {
1503 return Err(ScanError::new_str(
1504 self.mark,
1505 "block sequence entries are not allowed in this context",
1506 ));
1507 }
1508
1509 if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
1511 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
1512 return Err(ScanError::new_str(
1513 span.start,
1514 "invalid indentation for anchor",
1515 ));
1516 }
1517 }
1518
1519 let mark = self.mark;
1521 self.skip_non_blank();
1522
1523 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1525 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
1526 self.input.lookahead(2);
1527 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
1528 {
1529 return Err(ScanError::new_str(
1530 self.mark,
1531 "'-' must be followed by a valid YAML whitespace",
1532 ));
1533 }
1534
1535 self.skip_ws_to_eol(SkipTabs::No)?;
1536 self.input.lookahead(1);
1537 if self.input.next_is_break() || self.input.next_is_flow() {
1538 self.roll_one_col_indent();
1539 }
1540
1541 self.remove_simple_key()?;
1542 self.allow_simple_key();
1543
1544 self.tokens
1545 .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
1546
1547 Ok(())
1548 }
1549
1550 fn fetch_document_indicator(&mut self, t: TokenType<'input>) -> ScanResult {
1551 self.unroll_indent(-1);
1552 self.remove_simple_key()?;
1553 self.disallow_simple_key();
1554
1555 let mark = self.mark;
1556
1557 self.skip_n_non_blank(3);
1558
1559 self.tokens.push_back(Token(Span::new(mark, self.mark), t));
1560 Ok(())
1561 }
1562
1563 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1564 self.save_simple_key();
1565 self.allow_simple_key();
1566 let tok = self.scan_block_scalar(literal)?;
1567
1568 self.tokens.push_back(tok);
1569 Ok(())
1570 }
1571
1572 #[allow(clippy::too_many_lines)]
1573 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token<'input>, ScanError> {
1574 let start_mark = self.mark;
1575 let mut chomping = Chomping::Clip;
1576 let mut increment: usize = 0;
1577 let mut indent: usize = 0;
1578 let mut trailing_blank: bool;
1579 let mut leading_blank: bool = false;
1580 let style = if literal {
1581 ScalarStyle::Literal
1582 } else {
1583 ScalarStyle::Folded
1584 };
1585
1586 let mut string = String::new();
1587 let mut leading_break = String::new();
1588 let mut trailing_breaks = String::new();
1589 let mut chomping_break = String::new();
1590
1591 self.skip_non_blank();
1593 self.unroll_non_block_indents();
1594
1595 if self.input.look_ch() == '+' || self.input.peek() == '-' {
1596 if self.input.peek() == '+' {
1597 chomping = Chomping::Keep;
1598 } else {
1599 chomping = Chomping::Strip;
1600 }
1601 self.skip_non_blank();
1602 self.input.lookahead(1);
1603 if self.input.next_is_digit() {
1604 if self.input.peek() == '0' {
1605 return Err(ScanError::new_str(
1606 start_mark,
1607 "while scanning a block scalar, found an indentation indicator equal to 0",
1608 ));
1609 }
1610 increment = (self.input.peek() as usize) - ('0' as usize);
1611 self.skip_non_blank();
1612 }
1613 } else if self.input.next_is_digit() {
1614 if self.input.peek() == '0' {
1615 return Err(ScanError::new_str(
1616 start_mark,
1617 "while scanning a block scalar, found an indentation indicator equal to 0",
1618 ));
1619 }
1620
1621 increment = (self.input.peek() as usize) - ('0' as usize);
1622 self.skip_non_blank();
1623 self.input.lookahead(1);
1624 if self.input.peek() == '+' || self.input.peek() == '-' {
1625 if self.input.peek() == '+' {
1626 chomping = Chomping::Keep;
1627 } else {
1628 chomping = Chomping::Strip;
1629 }
1630 self.skip_non_blank();
1631 }
1632 }
1633
1634 self.skip_ws_to_eol(SkipTabs::Yes)?;
1635
1636 self.input.lookahead(1);
1638 if !self.input.next_is_breakz() {
1639 return Err(ScanError::new_str(
1640 start_mark,
1641 "while scanning a block scalar, did not find expected comment or line break",
1642 ));
1643 }
1644
1645 if self.input.next_is_break() {
1646 self.input.lookahead(2);
1647 self.read_break(&mut chomping_break);
1648 }
1649
1650 if self.input.look_ch() == '\t' {
1651 return Err(ScanError::new_str(
1652 start_mark,
1653 "a block scalar content cannot start with a tab",
1654 ));
1655 }
1656
1657 if increment > 0 {
1658 indent = if self.indent >= 0 {
1659 (self.indent + increment as isize) as usize
1660 } else {
1661 increment
1662 }
1663 }
1664
1665 if indent == 0 {
1667 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
1668 } else {
1669 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1670 }
1671
1672 if self.input.next_is_z() {
1677 let contents = match chomping {
1678 Chomping::Strip => String::new(),
1680 _ if self.mark.line == start_mark.line() => String::new(),
1682 Chomping::Clip => chomping_break,
1685 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
1688 Chomping::Keep => trailing_breaks,
1690 };
1691 return Ok(Token(
1692 Span::new(start_mark, self.mark),
1693 TokenType::Scalar(style, contents.into()),
1694 ));
1695 }
1696
1697 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
1698 return Err(ScanError::new_str(
1699 self.mark,
1700 "wrongly indented line in block scalar",
1701 ));
1702 }
1703
1704 let mut line_buffer = String::with_capacity(100);
1705 let start_mark = self.mark;
1706 while self.mark.col == indent && !self.input.next_is_z() {
1707 if indent == 0 {
1708 self.input.lookahead(4);
1709 if self.input.next_is_document_end() {
1710 break;
1711 }
1712 }
1713
1714 trailing_blank = self.input.next_is_blank();
1716 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1717 string.push_str(&trailing_breaks);
1718 if trailing_breaks.is_empty() {
1719 string.push(' ');
1720 }
1721 } else {
1722 string.push_str(&leading_break);
1723 string.push_str(&trailing_breaks);
1724 }
1725
1726 leading_break.clear();
1727 trailing_breaks.clear();
1728
1729 leading_blank = self.input.next_is_blank();
1730
1731 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
1732
1733 self.input.lookahead(2);
1735 if self.input.next_is_z() {
1736 break;
1737 }
1738
1739 self.read_break(&mut leading_break);
1740
1741 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1743 }
1744
1745 if chomping != Chomping::Strip {
1747 string.push_str(&leading_break);
1748 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
1752 string.push('\n');
1753 }
1754 }
1755
1756 if chomping == Chomping::Keep {
1757 string.push_str(&trailing_breaks);
1758 }
1759
1760 Ok(Token(
1761 Span::new(start_mark, self.mark),
1762 TokenType::Scalar(style, string.into()),
1763 ))
1764 }
1765
1766 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
1776 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
1778 string.push(self.input.peek());
1779 self.skip_blank();
1785 }
1786
1787 if self.input.buf_is_empty() {
1790 while let Some(c) = self.input.raw_read_non_breakz_ch() {
1796 line_buffer.push(c);
1797 }
1798
1799 let n_chars = line_buffer.chars().count();
1801 self.mark.col += n_chars;
1802 self.mark.index += n_chars;
1803
1804 string.reserve(line_buffer.len());
1806 string.push_str(line_buffer);
1807 line_buffer.clear();
1809 }
1810 }
1811
1812 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
1814 loop {
1815 if indent < self.input.bufmaxlen() - 2 {
1817 self.input.lookahead(self.input.bufmaxlen());
1818 while self.mark.col < indent && self.input.peek() == ' ' {
1819 self.skip_blank();
1820 }
1821 } else {
1822 loop {
1823 self.input.lookahead(self.input.bufmaxlen());
1824 while !self.input.buf_is_empty()
1825 && self.mark.col < indent
1826 && self.input.peek() == ' '
1827 {
1828 self.skip_blank();
1829 }
1830 if self.mark.col == indent
1834 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
1835 {
1836 break;
1837 }
1838 }
1839 self.input.lookahead(2);
1840 }
1841
1842 if self.input.next_is_break() {
1844 self.read_break(breaks);
1845 } else {
1846 break;
1848 }
1849 }
1850 }
1851
1852 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
1857 let mut max_indent = 0;
1858 loop {
1859 while self.input.look_ch() == ' ' {
1861 self.skip_blank();
1862 }
1863
1864 if self.mark.col > max_indent {
1865 max_indent = self.mark.col;
1866 }
1867
1868 if self.input.next_is_break() {
1869 self.input.lookahead(2);
1871 self.read_break(breaks);
1872 } else {
1873 break;
1875 }
1876 }
1877
1878 *indent = max_indent.max((self.indent + 1) as usize);
1887 if self.indent > 0 {
1888 *indent = (*indent).max(1);
1889 }
1890 }
1891
1892 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1893 self.save_simple_key();
1894 self.disallow_simple_key();
1895
1896 let tok = self.scan_flow_scalar(single)?;
1897
1898 self.skip_to_next_token()?;
1901 self.adjacent_value_allowed_at = self.mark.index;
1902
1903 self.tokens.push_back(tok);
1904 Ok(())
1905 }
1906
1907 #[allow(clippy::too_many_lines)]
1908 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token<'input>, ScanError> {
1909 let start_mark = self.mark;
1910
1911 let mut string = String::new();
1912 let mut leading_break = String::new();
1913 let mut trailing_breaks = String::new();
1914 let mut whitespaces = String::new();
1915 let mut leading_blanks;
1916
1917 self.skip_non_blank();
1919
1920 loop {
1921 self.input.lookahead(4);
1923
1924 if self.mark.col == 0 && self.input.next_is_document_indicator() {
1925 return Err(ScanError::new_str(
1926 start_mark,
1927 "while scanning a quoted scalar, found unexpected document indicator",
1928 ));
1929 }
1930
1931 if self.input.next_is_z() {
1932 return Err(ScanError::new_str(
1933 start_mark,
1934 "while scanning a quoted scalar, found unexpected end of stream",
1935 ));
1936 }
1937
1938 if (self.mark.col as isize) < self.indent {
1939 return Err(ScanError::new_str(
1940 start_mark,
1941 "invalid indentation in quoted scalar",
1942 ));
1943 }
1944
1945 leading_blanks = false;
1946 self.consume_flow_scalar_non_whitespace_chars(
1947 single,
1948 &mut string,
1949 &mut leading_blanks,
1950 &start_mark,
1951 )?;
1952
1953 match self.input.look_ch() {
1954 '\'' if single => break,
1955 '"' if !single => break,
1956 _ => {}
1957 }
1958
1959 while self.input.next_is_blank() || self.input.next_is_break() {
1961 if self.input.next_is_blank() {
1962 if leading_blanks {
1964 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
1965 return Err(ScanError::new_str(
1966 self.mark,
1967 "tab cannot be used as indentation",
1968 ));
1969 }
1970 self.skip_blank();
1971 } else {
1972 whitespaces.push(self.input.peek());
1973 self.skip_blank();
1974 }
1975 } else {
1976 self.input.lookahead(2);
1977 if leading_blanks {
1979 self.read_break(&mut trailing_breaks);
1980 } else {
1981 whitespaces.clear();
1982 self.read_break(&mut leading_break);
1983 leading_blanks = true;
1984 }
1985 }
1986 self.input.lookahead(1);
1987 }
1988
1989 if leading_blanks {
1991 if leading_break.is_empty() {
1992 string.push_str(&leading_break);
1993 string.push_str(&trailing_breaks);
1994 trailing_breaks.clear();
1995 leading_break.clear();
1996 } else {
1997 if trailing_breaks.is_empty() {
1998 string.push(' ');
1999 } else {
2000 string.push_str(&trailing_breaks);
2001 trailing_breaks.clear();
2002 }
2003 leading_break.clear();
2004 }
2005 } else {
2006 string.push_str(&whitespaces);
2007 whitespaces.clear();
2008 }
2009 } self.skip_non_blank();
2013 self.skip_ws_to_eol(SkipTabs::Yes)?;
2015 match self.input.peek() {
2016 ',' | '}' | ']' if self.flow_level > 0 => {}
2018 c if is_breakz(c) => {}
2020 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
2023 ':' if self.flow_level > 0 => {}
2025 _ => {
2026 return Err(ScanError::new_str(
2027 self.mark,
2028 "invalid trailing content after double-quoted scalar",
2029 ));
2030 }
2031 }
2032
2033 let style = if single {
2034 ScalarStyle::SingleQuoted
2035 } else {
2036 ScalarStyle::DoubleQuoted
2037 };
2038 Ok(Token(
2039 Span::new(start_mark, self.mark),
2040 TokenType::Scalar(style, string.into()),
2041 ))
2042 }
2043
2044 fn consume_flow_scalar_non_whitespace_chars(
2053 &mut self,
2054 single: bool,
2055 string: &mut String,
2056 leading_blanks: &mut bool,
2057 start_mark: &Marker,
2058 ) -> Result<(), ScanError> {
2059 self.input.lookahead(2);
2060 while !is_blank_or_breakz(self.input.peek()) {
2061 match self.input.peek() {
2062 '\'' if self.input.peek_nth(1) == '\'' && single => {
2064 string.push('\'');
2065 self.skip_n_non_blank(2);
2066 }
2067 '\'' if single => break,
2069 '"' if !single => break,
2070 '\\' if !single && is_break(self.input.peek_nth(1)) => {
2072 self.input.lookahead(3);
2073 self.skip_non_blank();
2074 self.skip_linebreak();
2075 *leading_blanks = true;
2076 break;
2077 }
2078 '\\' if !single => {
2080 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
2081 }
2082 c => {
2083 string.push(c);
2084 self.skip_non_blank();
2085 }
2086 }
2087 self.input.lookahead(2);
2088 }
2089 Ok(())
2090 }
2091
2092 fn resolve_flow_scalar_escape_sequence(
2099 &mut self,
2100 start_mark: &Marker,
2101 ) -> Result<char, ScanError> {
2102 let mut code_length = 0usize;
2103 let mut ret = '\0';
2104
2105 match self.input.peek_nth(1) {
2106 '0' => ret = '\0',
2107 'a' => ret = '\x07',
2108 'b' => ret = '\x08',
2109 't' | '\t' => ret = '\t',
2110 'n' => ret = '\n',
2111 'v' => ret = '\x0b',
2112 'f' => ret = '\x0c',
2113 'r' => ret = '\x0d',
2114 'e' => ret = '\x1b',
2115 ' ' => ret = '\x20',
2116 '"' => ret = '"',
2117 '/' => ret = '/',
2118 '\\' => ret = '\\',
2119 'N' => ret = char::from_u32(0x85).unwrap(),
2121 '_' => ret = char::from_u32(0xA0).unwrap(),
2123 'L' => ret = char::from_u32(0x2028).unwrap(),
2125 'P' => ret = char::from_u32(0x2029).unwrap(),
2127 'x' => code_length = 2,
2128 'u' => code_length = 4,
2129 'U' => code_length = 8,
2130 _ => {
2131 return Err(ScanError::new_str(
2132 *start_mark,
2133 "while parsing a quoted scalar, found unknown escape character",
2134 ))
2135 }
2136 }
2137 self.skip_n_non_blank(2);
2138
2139 if code_length > 0 {
2141 self.input.lookahead(code_length);
2142 let mut value = 0u32;
2143 for i in 0..code_length {
2144 let c = self.input.peek_nth(i);
2145 if !is_hex(c) {
2146 return Err(ScanError::new_str(
2147 *start_mark,
2148 "while parsing a quoted scalar, did not find expected hexadecimal number",
2149 ));
2150 }
2151 value = (value << 4) + as_hex(c);
2152 }
2153
2154 let Some(ch) = char::from_u32(value) else {
2155 return Err(ScanError::new_str(
2156 *start_mark,
2157 "while parsing a quoted scalar, found invalid Unicode character escape code",
2158 ));
2159 };
2160 ret = ch;
2161
2162 self.skip_n_non_blank(code_length);
2163 }
2164 Ok(ret)
2165 }
2166
2167 fn fetch_plain_scalar(&mut self) -> ScanResult {
2168 self.save_simple_key();
2169 self.disallow_simple_key();
2170
2171 let tok = self.scan_plain_scalar()?;
2172
2173 self.tokens.push_back(tok);
2174 Ok(())
2175 }
2176
2177 #[allow(clippy::too_many_lines)]
2182 fn scan_plain_scalar(&mut self) -> Result<Token<'input>, ScanError> {
2183 self.unroll_non_block_indents();
2184 let indent = self.indent + 1;
2185 let start_mark = self.mark;
2186
2187 if self.flow_level > 0 && (start_mark.col as isize) < indent {
2188 return Err(ScanError::new_str(
2189 start_mark,
2190 "invalid indentation in flow construct",
2191 ));
2192 }
2193
2194 let mut string = String::with_capacity(32);
2195 self.buf_whitespaces.clear();
2196 self.buf_leading_break.clear();
2197 self.buf_trailing_breaks.clear();
2198 let mut end_mark = self.mark;
2199
2200 loop {
2201 self.input.lookahead(4);
2202 if (self.leading_whitespace && self.input.next_is_document_indicator())
2203 || self.input.peek() == '#'
2204 {
2205 break;
2206 }
2207
2208 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
2209 return Err(ScanError::new_str(
2210 self.mark,
2211 "plain scalar cannot start with '-' followed by ,[]{}",
2212 ));
2213 }
2214
2215 if !self.input.next_is_blank_or_breakz()
2216 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
2217 {
2218 if self.leading_whitespace {
2219 if self.buf_leading_break.is_empty() {
2220 string.push_str(&self.buf_leading_break);
2221 string.push_str(&self.buf_trailing_breaks);
2222 self.buf_trailing_breaks.clear();
2223 self.buf_leading_break.clear();
2224 } else {
2225 if self.buf_trailing_breaks.is_empty() {
2226 string.push(' ');
2227 } else {
2228 string.push_str(&self.buf_trailing_breaks);
2229 self.buf_trailing_breaks.clear();
2230 }
2231 self.buf_leading_break.clear();
2232 }
2233 self.leading_whitespace = false;
2234 } else if !self.buf_whitespaces.is_empty() {
2235 string.push_str(&self.buf_whitespaces);
2236 self.buf_whitespaces.clear();
2237 }
2238
2239 string.push(self.input.peek());
2241 self.skip_non_blank();
2242 string.reserve(self.input.bufmaxlen());
2243
2244 let mut end = false;
2246 while !end {
2247 self.input.lookahead(self.input.bufmaxlen());
2251 for _ in 0..self.input.bufmaxlen() - 1 {
2252 if self.input.next_is_blank_or_breakz()
2253 || !self.input.next_can_be_plain_scalar(self.flow_level > 0)
2254 {
2255 end = true;
2256 break;
2257 }
2258 string.push(self.input.peek());
2259 self.skip_non_blank();
2260 }
2261 }
2262 end_mark = self.mark;
2263 }
2264
2265 if !(self.input.next_is_blank() || self.input.next_is_break()) {
2270 break;
2271 }
2272
2273 self.input.lookahead(2);
2275 while self.input.next_is_blank_or_break() {
2276 if self.input.next_is_blank() {
2277 if !self.leading_whitespace {
2278 self.buf_whitespaces.push(self.input.peek());
2279 self.skip_blank();
2280 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
2281 self.skip_ws_to_eol(SkipTabs::Yes)?;
2284 if !self.input.next_is_breakz() {
2285 return Err(ScanError::new_str(
2286 start_mark,
2287 "while scanning a plain scalar, found a tab",
2288 ));
2289 }
2290 } else {
2291 self.skip_blank();
2292 }
2293 } else {
2294 if self.leading_whitespace {
2296 self.skip_break();
2297 self.buf_trailing_breaks.push('\n');
2298 } else {
2299 self.buf_whitespaces.clear();
2300 self.skip_break();
2301 self.buf_leading_break.push('\n');
2302 self.leading_whitespace = true;
2303 }
2304 }
2305 self.input.lookahead(2);
2306 }
2307
2308 if self.flow_level == 0 && (self.mark.col as isize) < indent {
2310 break;
2311 }
2312 }
2313
2314 if self.leading_whitespace {
2315 self.allow_simple_key();
2316 }
2317
2318 if string.is_empty() {
2319 Err(ScanError::new_str(
2323 start_mark,
2324 "unexpected end of plain scalar",
2325 ))
2326 } else {
2327 Ok(Token(
2328 Span::new(start_mark, end_mark),
2329 TokenType::Scalar(ScalarStyle::Plain, string.into()),
2330 ))
2331 }
2332 }
2333
2334 fn fetch_key(&mut self) -> ScanResult {
2335 let start_mark = self.mark;
2336 if self.flow_level == 0 {
2337 if !self.simple_key_allowed {
2339 return Err(ScanError::new_str(
2340 self.mark,
2341 "mapping keys are not allowed in this context",
2342 ));
2343 }
2344 self.roll_indent(
2345 start_mark.col,
2346 None,
2347 TokenType::BlockMappingStart,
2348 start_mark,
2349 );
2350 } else {
2351 self.flow_mapping_started = true;
2353 }
2354
2355 self.remove_simple_key()?;
2356
2357 if self.flow_level == 0 {
2358 self.allow_simple_key();
2359 } else {
2360 self.disallow_simple_key();
2361 }
2362
2363 self.skip_non_blank();
2364 self.skip_yaml_whitespace()?;
2365 if self.input.peek() == '\t' {
2366 return Err(ScanError::new_str(
2367 self.mark(),
2368 "tabs disallowed in this context",
2369 ));
2370 }
2371 self.tokens
2372 .push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
2373 Ok(())
2374 }
2375
2376 fn fetch_flow_value(&mut self) -> ScanResult {
2384 let nc = self.input.peek_nth(1);
2385
2386 if self.mark.index != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
2398 return Err(ScanError::new_str(
2399 self.mark,
2400 "':' may not precede any of `[{` in flow mapping",
2401 ));
2402 }
2403
2404 self.fetch_value()
2405 }
2406
2407 fn fetch_value(&mut self) -> ScanResult {
2409 let sk = self.simple_keys.last().unwrap().clone();
2410 let start_mark = self.mark;
2411 let is_implicit_flow_mapping =
2412 !self.implicit_flow_mapping_states.is_empty() && !self.flow_mapping_started;
2413 if is_implicit_flow_mapping {
2414 *self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Inside;
2415 }
2416
2417 self.skip_non_blank();
2419 if self.input.look_ch() == '\t'
2420 && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
2421 && (self.input.peek() == '-' || self.input.next_is_alpha())
2422 {
2423 return Err(ScanError::new_str(
2424 self.mark,
2425 "':' must be followed by a valid YAML whitespace",
2426 ));
2427 }
2428
2429 if sk.possible {
2430 let tok = Token(Span::empty(sk.mark), TokenType::Key);
2432 self.insert_token(sk.token_number - self.tokens_parsed, tok);
2433 if is_implicit_flow_mapping {
2434 if sk.mark.line < start_mark.line {
2435 return Err(ScanError::new_str(
2436 start_mark,
2437 "illegal placement of ':' indicator",
2438 ));
2439 }
2440 self.insert_token(
2441 sk.token_number - self.tokens_parsed,
2442 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
2443 );
2444 }
2445
2446 self.roll_indent(
2448 sk.mark.col,
2449 Some(sk.token_number),
2450 TokenType::BlockMappingStart,
2451 sk.mark,
2452 );
2453 self.roll_one_col_indent();
2454
2455 self.simple_keys.last_mut().unwrap().possible = false;
2456 self.disallow_simple_key();
2457 } else {
2458 if is_implicit_flow_mapping {
2459 self.tokens
2460 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
2461 }
2462 if self.flow_level == 0 {
2464 if !self.simple_key_allowed {
2465 return Err(ScanError::new_str(
2466 start_mark,
2467 "mapping values are not allowed in this context",
2468 ));
2469 }
2470
2471 self.roll_indent(
2472 start_mark.col,
2473 None,
2474 TokenType::BlockMappingStart,
2475 start_mark,
2476 );
2477 }
2478 self.roll_one_col_indent();
2479
2480 if self.flow_level == 0 {
2481 self.allow_simple_key();
2482 } else {
2483 self.disallow_simple_key();
2484 }
2485 }
2486 self.tokens
2487 .push_back(Token(Span::empty(start_mark), TokenType::Value));
2488
2489 Ok(())
2490 }
2491
2492 fn roll_indent(
2498 &mut self,
2499 col: usize,
2500 number: Option<usize>,
2501 tok: TokenType<'input>,
2502 mark: Marker,
2503 ) {
2504 if self.flow_level > 0 {
2505 return;
2506 }
2507
2508 if self.indent <= col as isize {
2512 if let Some(indent) = self.indents.last() {
2513 if !indent.needs_block_end {
2514 self.indent = indent.indent;
2515 self.indents.pop();
2516 }
2517 }
2518 }
2519
2520 if self.indent < col as isize {
2521 self.indents.push(Indent {
2522 indent: self.indent,
2523 needs_block_end: true,
2524 });
2525 self.indent = col as isize;
2526 let tokens_parsed = self.tokens_parsed;
2527 match number {
2528 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
2529 None => self.tokens.push_back(Token(Span::empty(mark), tok)),
2530 }
2531 }
2532 }
2533
2534 fn unroll_indent(&mut self, col: isize) {
2540 if self.flow_level > 0 {
2541 return;
2542 }
2543 while self.indent > col {
2544 let indent = self.indents.pop().unwrap();
2545 self.indent = indent.indent;
2546 if indent.needs_block_end {
2547 self.tokens
2548 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
2549 }
2550 }
2551 }
2552
2553 fn roll_one_col_indent(&mut self) {
2559 if self.flow_level == 0 && self.indents.last().map_or(false, |x| x.needs_block_end) {
2560 self.indents.push(Indent {
2561 indent: self.indent,
2562 needs_block_end: false,
2563 });
2564 self.indent += 1;
2565 }
2566 }
2567
2568 fn unroll_non_block_indents(&mut self) {
2570 while let Some(indent) = self.indents.last() {
2571 if indent.needs_block_end {
2572 break;
2573 }
2574 self.indent = indent.indent;
2575 self.indents.pop();
2576 }
2577 }
2578
2579 fn save_simple_key(&mut self) {
2581 if self.simple_key_allowed {
2582 let required = self.flow_level == 0
2583 && self.indent == (self.mark.col as isize)
2584 && self.indents.last().unwrap().needs_block_end;
2585 let mut sk = SimpleKey::new(self.mark);
2586 sk.possible = true;
2587 sk.required = required;
2588 sk.token_number = self.tokens_parsed + self.tokens.len();
2589
2590 self.simple_keys.pop();
2591 self.simple_keys.push(sk);
2592 }
2593 }
2594
2595 fn remove_simple_key(&mut self) -> ScanResult {
2596 let last = self.simple_keys.last_mut().unwrap();
2597 if last.possible && last.required {
2598 return Err(ScanError::new_str(self.mark, "simple key expected"));
2599 }
2600
2601 last.possible = false;
2602 Ok(())
2603 }
2604
2605 fn is_within_block(&self) -> bool {
2607 !self.indents.is_empty()
2608 }
2609
2610 fn end_implicit_mapping(&mut self, mark: Marker) {
2616 if let Some(implicit_mapping) = self.implicit_flow_mapping_states.last_mut() {
2617 if *implicit_mapping == ImplicitMappingState::Inside {
2618 self.flow_mapping_started = false;
2619 *implicit_mapping = ImplicitMappingState::Possible;
2620 self.tokens
2621 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
2622 }
2623 }
2624 }
2625}
2626
2627#[derive(PartialEq, Eq)]
2631pub enum Chomping {
2632 Strip,
2634 Clip,
2636 Keep,
2638}
2639
2640#[cfg(test)]
2641mod test {
2642 #[test]
2643 fn test_is_anchor_char() {
2644 use super::is_anchor_char;
2645 assert!(is_anchor_char('x'));
2646 }
2647}