1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use std::{char, collections::VecDeque, error::Error, fmt};
13
14use crate::{
15 char_traits::{
16 as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
17 is_tag_char, is_uri_char,
18 },
19 input::{Input, SkipTabs},
20};
21
22#[derive(Clone, Copy, PartialEq, Debug, Eq)]
24pub enum TEncoding {
25 Utf8,
27}
28
29#[derive(Clone, Copy, PartialEq, Debug, Eq)]
31pub enum TScalarStyle {
32 Plain,
34 SingleQuoted,
36 DoubleQuoted,
38
39 Literal,
41 Folded,
43}
44
45#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
47pub struct Marker {
48 index: usize,
50 line: usize,
52 col: usize,
54}
55
56impl Marker {
57 #[must_use]
59 pub fn new(index: usize, line: usize, col: usize) -> Marker {
60 Marker { index, line, col }
61 }
62
63 #[must_use]
65 pub fn index(&self) -> usize {
66 self.index
67 }
68
69 #[must_use]
71 pub fn line(&self) -> usize {
72 self.line
73 }
74
75 #[must_use]
77 pub fn col(&self) -> usize {
78 self.col
79 }
80}
81
82#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
84pub struct Span {
85 pub start: Marker,
87 pub end: Marker,
89}
90
91impl Span {
92 #[must_use]
94 pub fn new(start: Marker, end: Marker) -> Span {
95 Span { start, end }
96 }
97
98 #[must_use]
105 pub fn empty(mark: Marker) -> Span {
106 Span {
107 start: mark,
108 end: mark,
109 }
110 }
111}
112
113#[derive(Clone, PartialEq, Debug, Eq)]
115pub struct ScanError {
116 mark: Marker,
118 info: String,
120}
121
122impl ScanError {
123 #[must_use]
125 pub fn new(loc: Marker, info: String) -> ScanError {
126 ScanError { mark: loc, info }
127 }
128
129 #[must_use]
131 pub fn new_str(loc: Marker, info: &str) -> ScanError {
132 ScanError {
133 mark: loc,
134 info: info.to_owned(),
135 }
136 }
137
138 #[must_use]
140 pub fn marker(&self) -> &Marker {
141 &self.mark
142 }
143
144 #[must_use]
146 pub fn info(&self) -> &str {
147 self.info.as_ref()
148 }
149}
150
151impl Error for ScanError {
152 fn source(&self) -> Option<&(dyn Error + 'static)> {
153 None
154 }
155}
156
157impl fmt::Display for ScanError {
158 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
159 write!(
160 formatter,
161 "{} at byte {} line {} column {}",
162 self.info,
163 self.mark.index,
164 self.mark.line,
165 self.mark.col + 1,
166 )
167 }
168}
169
170#[derive(Clone, PartialEq, Debug, Eq)]
172pub enum TokenType {
173 StreamStart(TEncoding),
175 StreamEnd,
177 VersionDirective(
179 u32,
181 u32,
183 ),
184 TagDirective(
186 String,
188 String,
190 ),
191 DocumentStart,
193 DocumentEnd,
195 BlockSequenceStart,
199 BlockMappingStart,
203 BlockEnd,
205 FlowSequenceStart,
207 FlowSequenceEnd,
209 FlowMappingStart,
211 FlowMappingEnd,
213 BlockEntry,
215 FlowEntry,
217 Key,
219 Value,
221 Alias(String),
223 Anchor(String),
225 Tag(
227 String,
229 String,
231 ),
232 Scalar(TScalarStyle, String),
234}
235
236#[derive(Clone, PartialEq, Debug, Eq)]
238pub struct Token(pub Span, pub TokenType);
239
240#[derive(Clone, PartialEq, Debug, Eq)]
275struct SimpleKey {
276 possible: bool,
289 required: bool,
297 token_number: usize,
303 mark: Marker,
305}
306
307impl SimpleKey {
308 fn new(mark: Marker) -> SimpleKey {
310 SimpleKey {
311 possible: false,
312 required: false,
313 token_number: 0,
314 mark,
315 }
316 }
317}
318
319#[derive(Clone, Debug, Default)]
321struct Indent {
322 indent: isize,
324 needs_block_end: bool,
342}
343
344#[derive(Debug, PartialEq)]
366enum ImplicitMappingState {
367 Possible,
372 Inside,
376}
377
378#[derive(Debug)]
388#[allow(clippy::struct_excessive_bools)]
389pub struct Scanner<T> {
390 input: T,
394 mark: Marker,
396 tokens: VecDeque<Token>,
403 error: Option<ScanError>,
405
406 stream_start_produced: bool,
408 stream_end_produced: bool,
410 adjacent_value_allowed_at: usize,
413 simple_key_allowed: bool,
417 simple_keys: Vec<SimpleKey>,
422 indent: isize,
424 indents: Vec<Indent>,
426 flow_level: u8,
428 tokens_parsed: usize,
432 token_available: bool,
434 leading_whitespace: bool,
436 flow_mapping_started: bool,
443 implicit_flow_mapping_states: Vec<ImplicitMappingState>,
456 buf_leading_break: String,
457 buf_trailing_breaks: String,
458 buf_whitespaces: String,
459}
460
461impl<T: Input> Iterator for Scanner<T> {
462 type Item = Token;
463 fn next(&mut self) -> Option<Token> {
464 if self.error.is_some() {
465 return None;
466 }
467 match self.next_token() {
468 Ok(Some(tok)) => {
469 debug_print!(
470 " \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
471 tok.1,
472 tok.0
473 );
474 Some(tok)
475 }
476 Ok(tok) => tok,
477 Err(e) => {
478 self.error = Some(e);
479 None
480 }
481 }
482 }
483}
484
485pub type ScanResult = Result<(), ScanError>;
487
488impl<T: Input> Scanner<T> {
489 pub fn new(input: T) -> Scanner<T> {
491 Scanner {
492 input,
493 mark: Marker::new(0, 1, 0),
494 tokens: VecDeque::new(),
495 error: None,
496
497 stream_start_produced: false,
498 stream_end_produced: false,
499 adjacent_value_allowed_at: 0,
500 simple_key_allowed: true,
501 simple_keys: Vec::new(),
502 indent: -1,
503 indents: Vec::new(),
504 flow_level: 0,
505 tokens_parsed: 0,
506 token_available: false,
507 leading_whitespace: true,
508 flow_mapping_started: false,
509 implicit_flow_mapping_states: vec![],
510
511 buf_leading_break: String::new(),
512 buf_trailing_breaks: String::new(),
513 buf_whitespaces: String::new(),
514 }
515 }
516
517 #[inline]
522 pub fn get_error(&self) -> Option<ScanError> {
523 self.error.clone()
524 }
525
526 #[inline]
528 fn skip_blank(&mut self) {
529 self.input.skip();
530
531 self.mark.index += 1;
532 self.mark.col += 1;
533 }
534
535 #[inline]
537 fn skip_non_blank(&mut self) {
538 self.input.skip();
539
540 self.mark.index += 1;
541 self.mark.col += 1;
542 self.leading_whitespace = false;
543 }
544
545 #[inline]
547 fn skip_n_non_blank(&mut self, count: usize) {
548 self.input.skip_n(count);
549
550 self.mark.index += count;
551 self.mark.col += count;
552 self.leading_whitespace = false;
553 }
554
555 #[inline]
557 fn skip_nl(&mut self) {
558 self.input.skip();
559
560 self.mark.index += 1;
561 self.mark.col = 0;
562 self.mark.line += 1;
563 self.leading_whitespace = true;
564 }
565
566 #[inline]
568 fn skip_linebreak(&mut self) {
569 if self.input.next_2_are('\r', '\n') {
570 self.skip_blank();
573 self.skip_nl();
574 } else if self.input.next_is_break() {
575 self.skip_nl();
576 }
577 }
578
579 #[inline]
581 pub fn stream_started(&self) -> bool {
582 self.stream_start_produced
583 }
584
585 #[inline]
587 pub fn stream_ended(&self) -> bool {
588 self.stream_end_produced
589 }
590
591 #[inline]
593 pub fn mark(&self) -> Marker {
594 self.mark
595 }
596
597 #[inline]
604 fn read_break(&mut self, s: &mut String) {
605 self.skip_break();
606 s.push('\n');
607 }
608
609 #[inline]
614 fn skip_break(&mut self) {
615 let c = self.input.peek();
616 let nc = self.input.peek_nth(1);
617 debug_assert!(is_break(c));
618 if c == '\r' && nc == '\n' {
619 self.skip_blank();
620 }
621 self.skip_nl();
622 }
623
624 fn insert_token(&mut self, pos: usize, tok: Token) {
626 let old_len = self.tokens.len();
627 assert!(pos <= old_len);
628 self.tokens.insert(pos, tok);
629 }
630
631 fn allow_simple_key(&mut self) {
632 self.simple_key_allowed = true;
633 }
634
635 fn disallow_simple_key(&mut self) {
636 self.simple_key_allowed = false;
637 }
638
639 pub fn fetch_next_token(&mut self) -> ScanResult {
644 self.input.lookahead(1);
645
646 if !self.stream_start_produced {
647 self.fetch_stream_start();
648 return Ok(());
649 }
650 self.skip_to_next_token()?;
651
652 debug_print!(
653 " \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
654 self.mark,
655 self.input.peek()
656 );
657
658 self.stale_simple_keys()?;
659
660 let mark = self.mark;
661 self.unroll_indent(mark.col as isize);
662
663 self.input.lookahead(4);
664
665 if self.input.next_is_z() {
666 self.fetch_stream_end()?;
667 return Ok(());
668 }
669
670 if self.mark.col == 0 {
671 if self.input.next_char_is('%') {
672 return self.fetch_directive();
673 } else if self.input.next_is_document_start() {
674 return self.fetch_document_indicator(TokenType::DocumentStart);
675 } else if self.input.next_is_document_end() {
676 self.fetch_document_indicator(TokenType::DocumentEnd)?;
677 self.skip_ws_to_eol(SkipTabs::Yes)?;
678 if !self.input.next_is_breakz() {
679 return Err(ScanError::new_str(
680 self.mark,
681 "invalid content after document end marker",
682 ));
683 }
684 return Ok(());
685 }
686 }
687
688 if (self.mark.col as isize) < self.indent {
689 return Err(ScanError::new_str(self.mark, "invalid indentation"));
690 }
691
692 let c = self.input.peek();
693 let nc = self.input.peek_nth(1);
694 match c {
695 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
696 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
697 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
698 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
699 ',' => self.fetch_flow_entry(),
700 '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
701 '?' if is_blank_or_breakz(nc) => self.fetch_key(),
702 ':' if is_blank_or_breakz(nc) => self.fetch_value(),
703 ':' if self.flow_level > 0
704 && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at) =>
705 {
706 self.fetch_flow_value()
707 }
708 '*' => self.fetch_anchor(true),
710 '&' => self.fetch_anchor(false),
712 '!' => self.fetch_tag(),
713 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
715 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
717 '\'' => self.fetch_flow_scalar(true),
718 '"' => self.fetch_flow_scalar(false),
719 '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
721 ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
722 self.fetch_plain_scalar()
723 }
724 '%' | '@' | '`' => Err(ScanError::new(
725 self.mark,
726 format!("unexpected character: `{c}'"),
727 )),
728 _ => self.fetch_plain_scalar(),
729 }
730 }
731
732 pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
736 if self.stream_end_produced {
737 return Ok(None);
738 }
739
740 if !self.token_available {
741 self.fetch_more_tokens()?;
742 }
743 let Some(t) = self.tokens.pop_front() else {
744 return Err(ScanError::new_str(
745 self.mark,
746 "did not find expected next token",
747 ));
748 };
749 self.token_available = false;
750 self.tokens_parsed += 1;
751
752 if let TokenType::StreamEnd = t.1 {
753 self.stream_end_produced = true;
754 }
755 Ok(Some(t))
756 }
757
758 pub fn fetch_more_tokens(&mut self) -> ScanResult {
762 let mut need_more;
763 loop {
764 if self.tokens.is_empty() {
765 need_more = true;
766 } else {
767 need_more = false;
768 self.stale_simple_keys()?;
770 for sk in &self.simple_keys {
772 if sk.possible && sk.token_number == self.tokens_parsed {
773 need_more = true;
774 break;
775 }
776 }
777 }
778
779 if !need_more {
780 break;
781 }
782 self.fetch_next_token()?;
783 }
784 self.token_available = true;
785
786 Ok(())
787 }
788
789 fn stale_simple_keys(&mut self) -> ScanResult {
797 for sk in &mut self.simple_keys {
798 if sk.possible
799 && self.flow_level == 0
801 && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
802 {
803 if sk.required {
804 return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
805 }
806 sk.possible = false;
807 }
808 }
809 Ok(())
810 }
811
812 fn skip_to_next_token(&mut self) -> ScanResult {
818 loop {
819 match self.input.look_ch() {
821 '\t' if self.is_within_block()
828 && self.leading_whitespace
829 && (self.mark.col as isize) < self.indent =>
830 {
831 self.skip_ws_to_eol(SkipTabs::Yes)?;
832 if !self.input.next_is_breakz() {
834 return Err(ScanError::new_str(
835 self.mark,
836 "tabs disallowed within this context (block indentation)",
837 ));
838 }
839 }
840 '\t' | ' ' => self.skip_blank(),
841 '\n' | '\r' => {
842 self.input.lookahead(2);
843 self.skip_linebreak();
844 if self.flow_level == 0 {
845 self.allow_simple_key();
846 }
847 }
848 '#' => {
849 let comment_length = self.input.skip_while_non_breakz();
850 self.mark.index += comment_length;
851 self.mark.col += comment_length;
852 }
853 _ => break,
854 }
855 }
856 Ok(())
857 }
858
859 fn skip_yaml_whitespace(&mut self) -> ScanResult {
864 let mut need_whitespace = true;
865 loop {
866 match self.input.look_ch() {
867 ' ' => {
868 self.skip_blank();
869
870 need_whitespace = false;
871 }
872 '\n' | '\r' => {
873 self.input.lookahead(2);
874 self.skip_linebreak();
875 if self.flow_level == 0 {
876 self.allow_simple_key();
877 }
878 need_whitespace = false;
879 }
880 '#' => {
881 let comment_length = self.input.skip_while_non_breakz();
882 self.mark.index += comment_length;
883 self.mark.col += comment_length;
884 }
885 _ => break,
886 }
887 }
888
889 if need_whitespace {
890 Err(ScanError::new_str(self.mark(), "expected whitespace"))
891 } else {
892 Ok(())
893 }
894 }
895
896 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
897 let (n_bytes, result) = self.input.skip_ws_to_eol(skip_tabs);
898 self.mark.col += n_bytes;
899 self.mark.index += n_bytes;
900 result.map_err(|msg| ScanError::new_str(self.mark, msg))
901 }
902
903 fn fetch_stream_start(&mut self) {
904 let mark = self.mark;
905 self.indent = -1;
906 self.stream_start_produced = true;
907 self.allow_simple_key();
908 self.tokens.push_back(Token(
909 Span::empty(mark),
910 TokenType::StreamStart(TEncoding::Utf8),
911 ));
912 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
913 }
914
915 fn fetch_stream_end(&mut self) -> ScanResult {
916 if self.mark.col != 0 {
918 self.mark.col = 0;
919 self.mark.line += 1;
920 }
921
922 for sk in &mut self.simple_keys {
925 if sk.required && sk.possible {
926 return Err(ScanError::new_str(self.mark, "simple key expected"));
927 }
928 sk.possible = false;
929 }
930
931 self.unroll_indent(-1);
932 self.remove_simple_key()?;
933 self.disallow_simple_key();
934
935 self.tokens
936 .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
937 Ok(())
938 }
939
940 fn fetch_directive(&mut self) -> ScanResult {
941 self.unroll_indent(-1);
942 self.remove_simple_key()?;
943
944 self.disallow_simple_key();
945
946 let tok = self.scan_directive()?;
947 self.tokens.push_back(tok);
948
949 Ok(())
950 }
951
952 fn scan_directive(&mut self) -> Result<Token, ScanError> {
953 let start_mark = self.mark;
954 self.skip_non_blank();
955
956 let name = self.scan_directive_name()?;
957 let tok = match name.as_ref() {
958 "YAML" => self.scan_version_directive_value(&start_mark)?,
959 "TAG" => self.scan_tag_directive_value(&start_mark)?,
960 _ => {
962 let line_len = self.input.skip_while_non_breakz();
964 self.mark.index += line_len;
965 self.mark.col += line_len;
966 Token(
968 Span::new(start_mark, self.mark),
969 TokenType::TagDirective(String::new(), String::new()),
970 )
971 }
974 };
975
976 self.skip_ws_to_eol(SkipTabs::Yes)?;
977
978 if self.input.next_is_breakz() {
979 self.input.lookahead(2);
980 self.skip_linebreak();
981 Ok(tok)
982 } else {
983 Err(ScanError::new_str(
984 start_mark,
985 "while scanning a directive, did not find expected comment or line break",
986 ))
987 }
988 }
989
990 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
991 let n_blanks = self.input.skip_while_blank();
992 self.mark.index += n_blanks;
993 self.mark.col += n_blanks;
994
995 let major = self.scan_version_directive_number(mark)?;
996
997 if self.input.peek() != '.' {
998 return Err(ScanError::new_str(
999 *mark,
1000 "while scanning a YAML directive, did not find expected digit or '.' character",
1001 ));
1002 }
1003 self.skip_non_blank();
1004
1005 let minor = self.scan_version_directive_number(mark)?;
1006
1007 Ok(Token(
1008 Span::new(*mark, self.mark),
1009 TokenType::VersionDirective(major, minor),
1010 ))
1011 }
1012
1013 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1014 let start_mark = self.mark;
1015 let mut string = String::new();
1016
1017 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1018 self.mark.index += n_chars;
1019 self.mark.col += n_chars;
1020
1021 if string.is_empty() {
1022 return Err(ScanError::new_str(
1023 start_mark,
1024 "while scanning a directive, could not find expected directive name",
1025 ));
1026 }
1027
1028 if !is_blank_or_breakz(self.input.peek()) {
1029 return Err(ScanError::new_str(
1030 start_mark,
1031 "while scanning a directive, found unexpected non-alphabetical character",
1032 ));
1033 }
1034
1035 Ok(string)
1036 }
1037
1038 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1039 let mut val = 0u32;
1040 let mut length = 0usize;
1041 while let Some(digit) = self.input.look_ch().to_digit(10) {
1042 if length + 1 > 9 {
1043 return Err(ScanError::new_str(
1044 *mark,
1045 "while scanning a YAML directive, found extremely long version number",
1046 ));
1047 }
1048 length += 1;
1049 val = val * 10 + digit;
1050 self.skip_non_blank();
1051 }
1052
1053 if length == 0 {
1054 return Err(ScanError::new_str(
1055 *mark,
1056 "while scanning a YAML directive, did not find expected version number",
1057 ));
1058 }
1059
1060 Ok(val)
1061 }
1062
1063 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
1064 let n_blanks = self.input.skip_while_blank();
1065 self.mark.index += n_blanks;
1066 self.mark.col += n_blanks;
1067
1068 let handle = self.scan_tag_handle(true, mark)?;
1069
1070 let n_blanks = self.input.skip_while_blank();
1071 self.mark.index += n_blanks;
1072 self.mark.col += n_blanks;
1073
1074 let prefix = self.scan_tag_prefix(mark)?;
1075
1076 self.input.lookahead(1);
1077
1078 if self.input.next_is_blank_or_breakz() {
1079 Ok(Token(
1080 Span::new(*mark, self.mark),
1081 TokenType::TagDirective(handle, prefix),
1082 ))
1083 } else {
1084 Err(ScanError::new_str(
1085 *mark,
1086 "while scanning TAG, did not find expected whitespace or line break",
1087 ))
1088 }
1089 }
1090
1091 fn fetch_tag(&mut self) -> ScanResult {
1092 self.save_simple_key();
1093 self.disallow_simple_key();
1094
1095 let tok = self.scan_tag()?;
1096 self.tokens.push_back(tok);
1097 Ok(())
1098 }
1099
1100 fn scan_tag(&mut self) -> Result<Token, ScanError> {
1101 let start_mark = self.mark;
1102 let mut handle = String::new();
1103 let mut suffix;
1104
1105 self.input.lookahead(2);
1107
1108 if self.input.nth_char_is(1, '<') {
1109 suffix = self.scan_verbatim_tag(&start_mark)?;
1110 } else {
1111 handle = self.scan_tag_handle(false, &start_mark)?;
1113 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1115 let is_secondary_handle = handle == "!!";
1117 suffix =
1118 self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", &start_mark)?;
1119 } else {
1120 suffix = self.scan_tag_shorthand_suffix(false, false, &handle, &start_mark)?;
1121 "!".clone_into(&mut handle);
1122 if suffix.is_empty() {
1125 handle.clear();
1126 "!".clone_into(&mut suffix);
1127 }
1128 }
1129 }
1130
1131 if is_blank_or_breakz(self.input.look_ch())
1132 || (self.flow_level > 0 && self.input.next_is_flow())
1133 {
1134 Ok(Token(
1136 Span::new(start_mark, self.mark),
1137 TokenType::Tag(handle, suffix),
1138 ))
1139 } else {
1140 Err(ScanError::new_str(
1141 start_mark,
1142 "while scanning a tag, did not find expected whitespace or line break",
1143 ))
1144 }
1145 }
1146
1147 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
1148 let mut string = String::new();
1149 if self.input.look_ch() != '!' {
1150 return Err(ScanError::new_str(
1151 *mark,
1152 "while scanning a tag, did not find expected '!'",
1153 ));
1154 }
1155
1156 string.push(self.input.peek());
1157 self.skip_non_blank();
1158
1159 let n_chars = self.input.fetch_while_is_alpha(&mut string);
1160 self.mark.index += n_chars;
1161 self.mark.col += n_chars;
1162
1163 if self.input.peek() == '!' {
1165 string.push(self.input.peek());
1166 self.skip_non_blank();
1167 } else if directive && string != "!" {
1168 return Err(ScanError::new_str(
1172 *mark,
1173 "while parsing a tag directive, did not find expected '!'",
1174 ));
1175 }
1176 Ok(string)
1177 }
1178
1179 fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1185 let mut string = String::new();
1186
1187 if self.input.look_ch() == '!' {
1188 string.push(self.input.peek());
1190 self.skip_non_blank();
1191 } else if !is_tag_char(self.input.peek()) {
1192 return Err(ScanError::new_str(
1194 *start_mark,
1195 "invalid global tag character",
1196 ));
1197 } else if self.input.peek() == '%' {
1198 string.push(self.scan_uri_escapes(start_mark)?);
1200 } else {
1201 string.push(self.input.peek());
1203 self.skip_non_blank();
1204 }
1205
1206 while is_uri_char(self.input.look_ch()) {
1207 if self.input.peek() == '%' {
1208 string.push(self.scan_uri_escapes(start_mark)?);
1209 } else {
1210 string.push(self.input.peek());
1211 self.skip_non_blank();
1212 }
1213 }
1214
1215 Ok(string)
1216 }
1217
1218 fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1222 self.skip_non_blank();
1224 self.skip_non_blank();
1225
1226 let mut string = String::new();
1227 while is_uri_char(self.input.look_ch()) {
1228 if self.input.peek() == '%' {
1229 string.push(self.scan_uri_escapes(start_mark)?);
1230 } else {
1231 string.push(self.input.peek());
1232 self.skip_non_blank();
1233 }
1234 }
1235
1236 if self.input.peek() != '>' {
1237 return Err(ScanError::new_str(
1238 *start_mark,
1239 "while scanning a verbatim tag, did not find the expected '>'",
1240 ));
1241 }
1242 self.skip_non_blank();
1243
1244 Ok(string)
1245 }
1246
1247 fn scan_tag_shorthand_suffix(
1248 &mut self,
1249 _directive: bool,
1250 _is_secondary: bool,
1251 head: &str,
1252 mark: &Marker,
1253 ) -> Result<String, ScanError> {
1254 let mut length = head.len();
1255 let mut string = String::new();
1256
1257 if length > 1 {
1260 string.extend(head.chars().skip(1));
1261 }
1262
1263 while is_tag_char(self.input.look_ch()) {
1264 if self.input.peek() == '%' {
1266 string.push(self.scan_uri_escapes(mark)?);
1267 } else {
1268 string.push(self.input.peek());
1269 self.skip_non_blank();
1270 }
1271
1272 length += 1;
1273 }
1274
1275 if length == 0 {
1276 return Err(ScanError::new_str(
1277 *mark,
1278 "while parsing a tag, did not find expected tag URI",
1279 ));
1280 }
1281
1282 Ok(string)
1283 }
1284
1285 fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
1286 let mut width = 0usize;
1287 let mut code = 0u32;
1288 loop {
1289 self.input.lookahead(3);
1290
1291 let c = self.input.peek_nth(1);
1292 let nc = self.input.peek_nth(2);
1293
1294 if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
1295 return Err(ScanError::new_str(
1296 *mark,
1297 "while parsing a tag, found an invalid escape sequence",
1298 ));
1299 }
1300
1301 let byte = (as_hex(c) << 4) + as_hex(nc);
1302 if width == 0 {
1303 width = match byte {
1304 _ if byte & 0x80 == 0x00 => 1,
1305 _ if byte & 0xE0 == 0xC0 => 2,
1306 _ if byte & 0xF0 == 0xE0 => 3,
1307 _ if byte & 0xF8 == 0xF0 => 4,
1308 _ => {
1309 return Err(ScanError::new_str(
1310 *mark,
1311 "while parsing a tag, found an incorrect leading UTF-8 byte",
1312 ));
1313 }
1314 };
1315 code = byte;
1316 } else {
1317 if byte & 0xc0 != 0x80 {
1318 return Err(ScanError::new_str(
1319 *mark,
1320 "while parsing a tag, found an incorrect trailing UTF-8 byte",
1321 ));
1322 }
1323 code = (code << 8) + byte;
1324 }
1325
1326 self.skip_n_non_blank(3);
1327
1328 width -= 1;
1329 if width == 0 {
1330 break;
1331 }
1332 }
1333
1334 match char::from_u32(code) {
1335 Some(ch) => Ok(ch),
1336 None => Err(ScanError::new_str(
1337 *mark,
1338 "while parsing a tag, found an invalid UTF-8 codepoint",
1339 )),
1340 }
1341 }
1342
1343 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
1344 self.save_simple_key();
1345 self.disallow_simple_key();
1346
1347 let tok = self.scan_anchor(alias)?;
1348
1349 self.tokens.push_back(tok);
1350
1351 Ok(())
1352 }
1353
1354 fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
1355 let mut string = String::new();
1356 let start_mark = self.mark;
1357
1358 self.skip_non_blank();
1359 while is_anchor_char(self.input.look_ch()) {
1360 string.push(self.input.peek());
1361 self.skip_non_blank();
1362 }
1363
1364 if string.is_empty() {
1365 return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
1366 }
1367
1368 let tok = if alias {
1369 TokenType::Alias(string)
1370 } else {
1371 TokenType::Anchor(string)
1372 };
1373 Ok(Token(Span::new(start_mark, self.mark), tok))
1374 }
1375
1376 fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
1377 self.save_simple_key();
1379
1380 self.roll_one_col_indent();
1381 self.increase_flow_level()?;
1382
1383 self.allow_simple_key();
1384
1385 let start_mark = self.mark;
1386 self.skip_non_blank();
1387
1388 if tok == TokenType::FlowMappingStart {
1389 self.flow_mapping_started = true;
1390 } else {
1391 self.implicit_flow_mapping_states
1392 .push(ImplicitMappingState::Possible);
1393 }
1394
1395 self.skip_ws_to_eol(SkipTabs::Yes)?;
1396
1397 self.tokens
1398 .push_back(Token(Span::new(start_mark, self.mark), tok));
1399 Ok(())
1400 }
1401
1402 fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
1403 self.remove_simple_key()?;
1404 self.decrease_flow_level();
1405
1406 self.disallow_simple_key();
1407
1408 if matches!(tok, TokenType::FlowSequenceEnd) {
1409 self.end_implicit_mapping(self.mark);
1410 self.implicit_flow_mapping_states.pop();
1412 }
1413
1414 let start_mark = self.mark;
1415 self.skip_non_blank();
1416 self.skip_ws_to_eol(SkipTabs::Yes)?;
1417
1418 if self.flow_level > 0 {
1424 self.adjacent_value_allowed_at = self.mark.index;
1425 }
1426
1427 self.tokens
1428 .push_back(Token(Span::new(start_mark, self.mark), tok));
1429 Ok(())
1430 }
1431
1432 fn fetch_flow_entry(&mut self) -> ScanResult {
1434 self.remove_simple_key()?;
1435 self.allow_simple_key();
1436
1437 self.end_implicit_mapping(self.mark);
1438
1439 let start_mark = self.mark;
1440 self.skip_non_blank();
1441 self.skip_ws_to_eol(SkipTabs::Yes)?;
1442
1443 self.tokens.push_back(Token(
1444 Span::new(start_mark, self.mark),
1445 TokenType::FlowEntry,
1446 ));
1447 Ok(())
1448 }
1449
1450 fn increase_flow_level(&mut self) -> ScanResult {
1451 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1452 self.flow_level = self
1453 .flow_level
1454 .checked_add(1)
1455 .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
1456 Ok(())
1457 }
1458
1459 fn decrease_flow_level(&mut self) {
1460 if self.flow_level > 0 {
1461 self.flow_level -= 1;
1462 self.simple_keys.pop().unwrap();
1463 }
1464 }
1465
1466 fn fetch_block_entry(&mut self) -> ScanResult {
1472 if self.flow_level > 0 {
1473 return Err(ScanError::new_str(
1475 self.mark,
1476 r#""-" is only valid inside a block"#,
1477 ));
1478 }
1479 if !self.simple_key_allowed {
1481 return Err(ScanError::new_str(
1482 self.mark,
1483 "block sequence entries are not allowed in this context",
1484 ));
1485 }
1486
1487 if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
1489 if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
1490 return Err(ScanError::new_str(
1491 span.start,
1492 "invalid indentation for anchor",
1493 ));
1494 }
1495 }
1496
1497 let mark = self.mark;
1499 self.skip_non_blank();
1500
1501 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1503 let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
1504 self.input.lookahead(2);
1505 if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
1506 {
1507 return Err(ScanError::new_str(
1508 self.mark,
1509 "'-' must be followed by a valid YAML whitespace",
1510 ));
1511 }
1512
1513 self.skip_ws_to_eol(SkipTabs::No)?;
1514 self.input.lookahead(1);
1515 if self.input.next_is_break() || self.input.next_is_flow() {
1516 self.roll_one_col_indent();
1517 }
1518
1519 self.remove_simple_key()?;
1520 self.allow_simple_key();
1521
1522 self.tokens
1523 .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
1524
1525 Ok(())
1526 }
1527
1528 fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1529 self.unroll_indent(-1);
1530 self.remove_simple_key()?;
1531 self.disallow_simple_key();
1532
1533 let mark = self.mark;
1534
1535 self.skip_n_non_blank(3);
1536
1537 self.tokens.push_back(Token(Span::new(mark, self.mark), t));
1538 Ok(())
1539 }
1540
1541 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1542 self.save_simple_key();
1543 self.allow_simple_key();
1544 let tok = self.scan_block_scalar(literal)?;
1545
1546 self.tokens.push_back(tok);
1547 Ok(())
1548 }
1549
1550 #[allow(clippy::too_many_lines)]
1551 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1552 let start_mark = self.mark;
1553 let mut chomping = Chomping::Clip;
1554 let mut increment: usize = 0;
1555 let mut indent: usize = 0;
1556 let mut trailing_blank: bool;
1557 let mut leading_blank: bool = false;
1558 let style = if literal {
1559 TScalarStyle::Literal
1560 } else {
1561 TScalarStyle::Folded
1562 };
1563
1564 let mut string = String::new();
1565 let mut leading_break = String::new();
1566 let mut trailing_breaks = String::new();
1567 let mut chomping_break = String::new();
1568
1569 self.skip_non_blank();
1571 self.unroll_non_block_indents();
1572
1573 if self.input.look_ch() == '+' || self.input.peek() == '-' {
1574 if self.input.peek() == '+' {
1575 chomping = Chomping::Keep;
1576 } else {
1577 chomping = Chomping::Strip;
1578 }
1579 self.skip_non_blank();
1580 self.input.lookahead(1);
1581 if self.input.next_is_digit() {
1582 if self.input.peek() == '0' {
1583 return Err(ScanError::new_str(
1584 start_mark,
1585 "while scanning a block scalar, found an indentation indicator equal to 0",
1586 ));
1587 }
1588 increment = (self.input.peek() as usize) - ('0' as usize);
1589 self.skip_non_blank();
1590 }
1591 } else if self.input.next_is_digit() {
1592 if self.input.peek() == '0' {
1593 return Err(ScanError::new_str(
1594 start_mark,
1595 "while scanning a block scalar, found an indentation indicator equal to 0",
1596 ));
1597 }
1598
1599 increment = (self.input.peek() as usize) - ('0' as usize);
1600 self.skip_non_blank();
1601 self.input.lookahead(1);
1602 if self.input.peek() == '+' || self.input.peek() == '-' {
1603 if self.input.peek() == '+' {
1604 chomping = Chomping::Keep;
1605 } else {
1606 chomping = Chomping::Strip;
1607 }
1608 self.skip_non_blank();
1609 }
1610 }
1611
1612 self.skip_ws_to_eol(SkipTabs::Yes)?;
1613
1614 self.input.lookahead(1);
1616 if !self.input.next_is_breakz() {
1617 return Err(ScanError::new_str(
1618 start_mark,
1619 "while scanning a block scalar, did not find expected comment or line break",
1620 ));
1621 }
1622
1623 if self.input.next_is_break() {
1624 self.input.lookahead(2);
1625 self.read_break(&mut chomping_break);
1626 }
1627
1628 if self.input.look_ch() == '\t' {
1629 return Err(ScanError::new_str(
1630 start_mark,
1631 "a block scalar content cannot start with a tab",
1632 ));
1633 }
1634
1635 if increment > 0 {
1636 indent = if self.indent >= 0 {
1637 (self.indent + increment as isize) as usize
1638 } else {
1639 increment
1640 }
1641 }
1642
1643 if indent == 0 {
1645 self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
1646 } else {
1647 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1648 }
1649
1650 if self.input.next_is_z() {
1655 let contents = match chomping {
1656 Chomping::Strip => String::new(),
1658 _ if self.mark.line == start_mark.line() => String::new(),
1660 Chomping::Clip => chomping_break,
1663 Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
1666 Chomping::Keep => trailing_breaks,
1668 };
1669 return Ok(Token(
1670 Span::new(start_mark, self.mark),
1671 TokenType::Scalar(style, contents),
1672 ));
1673 }
1674
1675 if self.mark.col < indent && (self.mark.col as isize) > self.indent {
1676 return Err(ScanError::new_str(
1677 self.mark,
1678 "wrongly indented line in block scalar",
1679 ));
1680 }
1681
1682 let mut line_buffer = String::with_capacity(100);
1683 let start_mark = self.mark;
1684 while self.mark.col == indent && !self.input.next_is_z() {
1685 if indent == 0 {
1686 self.input.lookahead(4);
1687 if self.input.next_is_document_end() {
1688 break;
1689 }
1690 }
1691
1692 trailing_blank = self.input.next_is_blank();
1694 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1695 string.push_str(&trailing_breaks);
1696 if trailing_breaks.is_empty() {
1697 string.push(' ');
1698 }
1699 } else {
1700 string.push_str(&leading_break);
1701 string.push_str(&trailing_breaks);
1702 }
1703
1704 leading_break.clear();
1705 trailing_breaks.clear();
1706
1707 leading_blank = self.input.next_is_blank();
1708
1709 self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
1710
1711 self.input.lookahead(2);
1713 if self.input.next_is_z() {
1714 break;
1715 }
1716
1717 self.read_break(&mut leading_break);
1718
1719 self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1721 }
1722
1723 if chomping != Chomping::Strip {
1725 string.push_str(&leading_break);
1726 if self.input.next_is_z() && self.mark.col >= indent.max(1) {
1730 string.push('\n');
1731 }
1732 }
1733
1734 if chomping == Chomping::Keep {
1735 string.push_str(&trailing_breaks);
1736 }
1737
1738 Ok(Token(
1739 Span::new(start_mark, self.mark),
1740 TokenType::Scalar(style, string),
1741 ))
1742 }
1743
1744 fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
1754 while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
1756 string.push(self.input.peek());
1757 self.skip_blank();
1763 }
1764
1765 if self.input.buf_is_empty() {
1768 while let Some(c) = self.input.raw_read_non_breakz_ch() {
1774 line_buffer.push(c);
1775 }
1776
1777 let n_chars = line_buffer.chars().count();
1779 self.mark.col += n_chars;
1780 self.mark.index += n_chars;
1781
1782 string.reserve(line_buffer.as_bytes().len());
1784 string.push_str(line_buffer);
1785 line_buffer.clear();
1787 }
1788 }
1789
1790 fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
1792 loop {
1793 if indent < self.input.bufmaxlen() - 2 {
1795 self.input.lookahead(self.input.bufmaxlen());
1796 while self.mark.col < indent && self.input.peek() == ' ' {
1797 self.skip_blank();
1798 }
1799 } else {
1800 loop {
1801 self.input.lookahead(self.input.bufmaxlen());
1802 while !self.input.buf_is_empty()
1803 && self.mark.col < indent
1804 && self.input.peek() == ' '
1805 {
1806 self.skip_blank();
1807 }
1808 if self.mark.col == indent
1812 || (!self.input.buf_is_empty() && self.input.peek() != ' ')
1813 {
1814 break;
1815 }
1816 }
1817 self.input.lookahead(2);
1818 }
1819
1820 if self.input.next_is_break() {
1822 self.read_break(breaks);
1823 } else {
1824 break;
1826 }
1827 }
1828 }
1829
1830 fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
1835 let mut max_indent = 0;
1836 loop {
1837 while self.input.look_ch() == ' ' {
1839 self.skip_blank();
1840 }
1841
1842 if self.mark.col > max_indent {
1843 max_indent = self.mark.col;
1844 }
1845
1846 if self.input.next_is_break() {
1847 self.input.lookahead(2);
1849 self.read_break(breaks);
1850 } else {
1851 break;
1853 }
1854 }
1855
1856 *indent = max_indent.max((self.indent + 1) as usize);
1865 if self.indent > 0 {
1866 *indent = (*indent).max(1);
1867 }
1868 }
1869
1870 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1871 self.save_simple_key();
1872 self.disallow_simple_key();
1873
1874 let tok = self.scan_flow_scalar(single)?;
1875
1876 self.skip_to_next_token()?;
1879 self.adjacent_value_allowed_at = self.mark.index;
1880
1881 self.tokens.push_back(tok);
1882 Ok(())
1883 }
1884
1885 #[allow(clippy::too_many_lines)]
1886 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1887 let start_mark = self.mark;
1888
1889 let mut string = String::new();
1890 let mut leading_break = String::new();
1891 let mut trailing_breaks = String::new();
1892 let mut whitespaces = String::new();
1893 let mut leading_blanks;
1894
1895 self.skip_non_blank();
1897
1898 loop {
1899 self.input.lookahead(4);
1901
1902 if self.mark.col == 0 && self.input.next_is_document_indicator() {
1903 return Err(ScanError::new_str(
1904 start_mark,
1905 "while scanning a quoted scalar, found unexpected document indicator",
1906 ));
1907 }
1908
1909 if self.input.next_is_z() {
1910 return Err(ScanError::new_str(
1911 start_mark,
1912 "while scanning a quoted scalar, found unexpected end of stream",
1913 ));
1914 }
1915
1916 if (self.mark.col as isize) < self.indent {
1917 return Err(ScanError::new_str(
1918 start_mark,
1919 "invalid indentation in quoted scalar",
1920 ));
1921 }
1922
1923 leading_blanks = false;
1924 self.consume_flow_scalar_non_whitespace_chars(
1925 single,
1926 &mut string,
1927 &mut leading_blanks,
1928 &start_mark,
1929 )?;
1930
1931 match self.input.look_ch() {
1932 '\'' if single => break,
1933 '"' if !single => break,
1934 _ => {}
1935 }
1936
1937 while self.input.next_is_blank() || self.input.next_is_break() {
1939 if self.input.next_is_blank() {
1940 if leading_blanks {
1942 if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
1943 return Err(ScanError::new_str(
1944 self.mark,
1945 "tab cannot be used as indentation",
1946 ));
1947 }
1948 self.skip_blank();
1949 } else {
1950 whitespaces.push(self.input.peek());
1951 self.skip_blank();
1952 }
1953 } else {
1954 self.input.lookahead(2);
1955 if leading_blanks {
1957 self.read_break(&mut trailing_breaks);
1958 } else {
1959 whitespaces.clear();
1960 self.read_break(&mut leading_break);
1961 leading_blanks = true;
1962 }
1963 }
1964 self.input.lookahead(1);
1965 }
1966
1967 if leading_blanks {
1969 if leading_break.is_empty() {
1970 string.push_str(&leading_break);
1971 string.push_str(&trailing_breaks);
1972 trailing_breaks.clear();
1973 leading_break.clear();
1974 } else {
1975 if trailing_breaks.is_empty() {
1976 string.push(' ');
1977 } else {
1978 string.push_str(&trailing_breaks);
1979 trailing_breaks.clear();
1980 }
1981 leading_break.clear();
1982 }
1983 } else {
1984 string.push_str(&whitespaces);
1985 whitespaces.clear();
1986 }
1987 } self.skip_non_blank();
1991 self.skip_ws_to_eol(SkipTabs::Yes)?;
1993 match self.input.peek() {
1994 ',' | '}' | ']' if self.flow_level > 0 => {}
1996 c if is_breakz(c) => {}
1998 ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
2001 ':' if self.flow_level > 0 => {}
2003 _ => {
2004 return Err(ScanError::new_str(
2005 self.mark,
2006 "invalid trailing content after double-quoted scalar",
2007 ));
2008 }
2009 }
2010
2011 let style = if single {
2012 TScalarStyle::SingleQuoted
2013 } else {
2014 TScalarStyle::DoubleQuoted
2015 };
2016 Ok(Token(
2017 Span::new(start_mark, self.mark),
2018 TokenType::Scalar(style, string),
2019 ))
2020 }
2021
2022 fn consume_flow_scalar_non_whitespace_chars(
2031 &mut self,
2032 single: bool,
2033 string: &mut String,
2034 leading_blanks: &mut bool,
2035 start_mark: &Marker,
2036 ) -> Result<(), ScanError> {
2037 self.input.lookahead(2);
2038 while !is_blank_or_breakz(self.input.peek()) {
2039 match self.input.peek() {
2040 '\'' if self.input.peek_nth(1) == '\'' && single => {
2042 string.push('\'');
2043 self.skip_n_non_blank(2);
2044 }
2045 '\'' if single => break,
2047 '"' if !single => break,
2048 '\\' if !single && is_break(self.input.peek_nth(1)) => {
2050 self.input.lookahead(3);
2051 self.skip_non_blank();
2052 self.skip_linebreak();
2053 *leading_blanks = true;
2054 break;
2055 }
2056 '\\' if !single => {
2058 string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
2059 }
2060 c => {
2061 string.push(c);
2062 self.skip_non_blank();
2063 }
2064 }
2065 self.input.lookahead(2);
2066 }
2067 Ok(())
2068 }
2069
2070 fn resolve_flow_scalar_escape_sequence(
2077 &mut self,
2078 start_mark: &Marker,
2079 ) -> Result<char, ScanError> {
2080 let mut code_length = 0usize;
2081 let mut ret = '\0';
2082
2083 match self.input.peek_nth(1) {
2084 '0' => ret = '\0',
2085 'a' => ret = '\x07',
2086 'b' => ret = '\x08',
2087 't' | '\t' => ret = '\t',
2088 'n' => ret = '\n',
2089 'v' => ret = '\x0b',
2090 'f' => ret = '\x0c',
2091 'r' => ret = '\x0d',
2092 'e' => ret = '\x1b',
2093 ' ' => ret = '\x20',
2094 '"' => ret = '"',
2095 '/' => ret = '/',
2096 '\\' => ret = '\\',
2097 'N' => ret = char::from_u32(0x85).unwrap(),
2099 '_' => ret = char::from_u32(0xA0).unwrap(),
2101 'L' => ret = char::from_u32(0x2028).unwrap(),
2103 'P' => ret = char::from_u32(0x2029).unwrap(),
2105 'x' => code_length = 2,
2106 'u' => code_length = 4,
2107 'U' => code_length = 8,
2108 _ => {
2109 return Err(ScanError::new_str(
2110 *start_mark,
2111 "while parsing a quoted scalar, found unknown escape character",
2112 ))
2113 }
2114 }
2115 self.skip_n_non_blank(2);
2116
2117 if code_length > 0 {
2119 self.input.lookahead(code_length);
2120 let mut value = 0u32;
2121 for i in 0..code_length {
2122 let c = self.input.peek_nth(i);
2123 if !is_hex(c) {
2124 return Err(ScanError::new_str(
2125 *start_mark,
2126 "while parsing a quoted scalar, did not find expected hexadecimal number",
2127 ));
2128 }
2129 value = (value << 4) + as_hex(c);
2130 }
2131
2132 let Some(ch) = char::from_u32(value) else {
2133 return Err(ScanError::new_str(
2134 *start_mark,
2135 "while parsing a quoted scalar, found invalid Unicode character escape code",
2136 ));
2137 };
2138 ret = ch;
2139
2140 self.skip_n_non_blank(code_length);
2141 }
2142 Ok(ret)
2143 }
2144
2145 fn fetch_plain_scalar(&mut self) -> ScanResult {
2146 self.save_simple_key();
2147 self.disallow_simple_key();
2148
2149 let tok = self.scan_plain_scalar()?;
2150
2151 self.tokens.push_back(tok);
2152 Ok(())
2153 }
2154
2155 #[allow(clippy::too_many_lines)]
2160 fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
2161 self.unroll_non_block_indents();
2162 let indent = self.indent + 1;
2163 let start_mark = self.mark;
2164
2165 if self.flow_level > 0 && (start_mark.col as isize) < indent {
2166 return Err(ScanError::new_str(
2167 start_mark,
2168 "invalid indentation in flow construct",
2169 ));
2170 }
2171
2172 let mut string = String::with_capacity(32);
2173 self.buf_whitespaces.clear();
2174 self.buf_leading_break.clear();
2175 self.buf_trailing_breaks.clear();
2176 let mut end_mark = self.mark;
2177
2178 loop {
2179 self.input.lookahead(4);
2180 if self.input.next_is_document_end()
2181 || (self.leading_whitespace && self.input.next_is_document_start())
2182 || self.input.peek() == '#'
2183 {
2184 break;
2185 }
2186
2187 if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
2188 return Err(ScanError::new_str(
2189 self.mark,
2190 "plain scalar cannot start with '-' followed by ,[]{}",
2191 ));
2192 }
2193
2194 if !self.input.next_is_blank_or_breakz()
2195 && self.input.next_can_be_plain_scalar(self.flow_level > 0)
2196 {
2197 if self.leading_whitespace {
2198 if self.buf_leading_break.is_empty() {
2199 string.push_str(&self.buf_leading_break);
2200 string.push_str(&self.buf_trailing_breaks);
2201 self.buf_trailing_breaks.clear();
2202 self.buf_leading_break.clear();
2203 } else {
2204 if self.buf_trailing_breaks.is_empty() {
2205 string.push(' ');
2206 } else {
2207 string.push_str(&self.buf_trailing_breaks);
2208 self.buf_trailing_breaks.clear();
2209 }
2210 self.buf_leading_break.clear();
2211 }
2212 self.leading_whitespace = false;
2213 } else if !self.buf_whitespaces.is_empty() {
2214 string.push_str(&self.buf_whitespaces);
2215 self.buf_whitespaces.clear();
2216 }
2217
2218 string.push(self.input.peek());
2220 self.skip_non_blank();
2221 string.reserve(self.input.bufmaxlen());
2222
2223 let mut end = false;
2225 while !end {
2226 self.input.lookahead(self.input.bufmaxlen());
2230 for _ in 0..self.input.bufmaxlen() - 1 {
2231 if self.input.next_is_blank_or_breakz()
2232 || !self.input.next_can_be_plain_scalar(self.flow_level > 0)
2233 {
2234 end = true;
2235 break;
2236 }
2237 string.push(self.input.peek());
2238 self.skip_non_blank();
2239 }
2240 }
2241 end_mark = self.mark;
2242 }
2243
2244 if !(self.input.next_is_blank() || self.input.next_is_break()) {
2249 break;
2250 }
2251
2252 self.input.lookahead(2);
2254 while self.input.next_is_blank_or_break() {
2255 if self.input.next_is_blank() {
2256 if !self.leading_whitespace {
2257 self.buf_whitespaces.push(self.input.peek());
2258 self.skip_blank();
2259 } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
2260 self.skip_ws_to_eol(SkipTabs::Yes)?;
2263 if !self.input.next_is_breakz() {
2264 return Err(ScanError::new_str(
2265 start_mark,
2266 "while scanning a plain scalar, found a tab",
2267 ));
2268 }
2269 } else {
2270 self.skip_blank();
2271 }
2272 } else {
2273 if self.leading_whitespace {
2275 self.skip_break();
2276 self.buf_trailing_breaks.push('\n');
2277 } else {
2278 self.buf_whitespaces.clear();
2279 self.skip_break();
2280 self.buf_leading_break.push('\n');
2281 self.leading_whitespace = true;
2282 }
2283 }
2284 self.input.lookahead(2);
2285 }
2286
2287 if self.flow_level == 0 && (self.mark.col as isize) < indent {
2289 break;
2290 }
2291 }
2292
2293 if self.leading_whitespace {
2294 self.allow_simple_key();
2295 }
2296
2297 if string.is_empty() {
2298 Err(ScanError::new_str(
2302 start_mark,
2303 "unexpected end of plain scalar",
2304 ))
2305 } else {
2306 Ok(Token(
2307 Span::new(start_mark, end_mark),
2308 TokenType::Scalar(TScalarStyle::Plain, string),
2309 ))
2310 }
2311 }
2312
2313 fn fetch_key(&mut self) -> ScanResult {
2314 let start_mark = self.mark;
2315 if self.flow_level == 0 {
2316 if !self.simple_key_allowed {
2318 return Err(ScanError::new_str(
2319 self.mark,
2320 "mapping keys are not allowed in this context",
2321 ));
2322 }
2323 self.roll_indent(
2324 start_mark.col,
2325 None,
2326 TokenType::BlockMappingStart,
2327 start_mark,
2328 );
2329 } else {
2330 self.flow_mapping_started = true;
2332 }
2333
2334 self.remove_simple_key()?;
2335
2336 if self.flow_level == 0 {
2337 self.allow_simple_key();
2338 } else {
2339 self.disallow_simple_key();
2340 }
2341
2342 self.skip_non_blank();
2343 self.skip_yaml_whitespace()?;
2344 if self.input.peek() == '\t' {
2345 return Err(ScanError::new_str(
2346 self.mark(),
2347 "tabs disallowed in this context",
2348 ));
2349 }
2350 self.tokens
2351 .push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
2352 Ok(())
2353 }
2354
2355 fn fetch_flow_value(&mut self) -> ScanResult {
2363 let nc = self.input.peek_nth(1);
2364
2365 if self.mark.index != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
2377 return Err(ScanError::new_str(
2378 self.mark,
2379 "':' may not precede any of `[{` in flow mapping",
2380 ));
2381 }
2382
2383 self.fetch_value()
2384 }
2385
2386 fn fetch_value(&mut self) -> ScanResult {
2388 let sk = self.simple_keys.last().unwrap().clone();
2389 let start_mark = self.mark;
2390 let is_implicit_flow_mapping =
2391 !self.implicit_flow_mapping_states.is_empty() && !self.flow_mapping_started;
2392 if is_implicit_flow_mapping {
2393 *self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Inside;
2394 }
2395
2396 self.skip_non_blank();
2398 if self.input.look_ch() == '\t'
2399 && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
2400 && (self.input.peek() == '-' || self.input.next_is_alpha())
2401 {
2402 return Err(ScanError::new_str(
2403 self.mark,
2404 "':' must be followed by a valid YAML whitespace",
2405 ));
2406 }
2407
2408 if sk.possible {
2409 let tok = Token(Span::empty(sk.mark), TokenType::Key);
2411 self.insert_token(sk.token_number - self.tokens_parsed, tok);
2412 if is_implicit_flow_mapping {
2413 if sk.mark.line < start_mark.line {
2414 return Err(ScanError::new_str(
2415 start_mark,
2416 "illegal placement of ':' indicator",
2417 ));
2418 }
2419 self.insert_token(
2420 sk.token_number - self.tokens_parsed,
2421 Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
2422 );
2423 }
2424
2425 self.roll_indent(
2427 sk.mark.col,
2428 Some(sk.token_number),
2429 TokenType::BlockMappingStart,
2430 sk.mark,
2431 );
2432 self.roll_one_col_indent();
2433
2434 self.simple_keys.last_mut().unwrap().possible = false;
2435 self.disallow_simple_key();
2436 } else {
2437 if is_implicit_flow_mapping {
2438 self.tokens
2439 .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
2440 }
2441 if self.flow_level == 0 {
2443 if !self.simple_key_allowed {
2444 return Err(ScanError::new_str(
2445 start_mark,
2446 "mapping values are not allowed in this context",
2447 ));
2448 }
2449
2450 self.roll_indent(
2451 start_mark.col,
2452 None,
2453 TokenType::BlockMappingStart,
2454 start_mark,
2455 );
2456 }
2457 self.roll_one_col_indent();
2458
2459 if self.flow_level == 0 {
2460 self.allow_simple_key();
2461 } else {
2462 self.disallow_simple_key();
2463 }
2464 }
2465 self.tokens
2466 .push_back(Token(Span::empty(start_mark), TokenType::Value));
2467
2468 Ok(())
2469 }
2470
2471 fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
2477 if self.flow_level > 0 {
2478 return;
2479 }
2480
2481 if self.indent <= col as isize {
2485 if let Some(indent) = self.indents.last() {
2486 if !indent.needs_block_end {
2487 self.indent = indent.indent;
2488 self.indents.pop();
2489 }
2490 }
2491 }
2492
2493 if self.indent < col as isize {
2494 self.indents.push(Indent {
2495 indent: self.indent,
2496 needs_block_end: true,
2497 });
2498 self.indent = col as isize;
2499 let tokens_parsed = self.tokens_parsed;
2500 match number {
2501 Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
2502 None => self.tokens.push_back(Token(Span::empty(mark), tok)),
2503 }
2504 }
2505 }
2506
2507 fn unroll_indent(&mut self, col: isize) {
2513 if self.flow_level > 0 {
2514 return;
2515 }
2516 while self.indent > col {
2517 let indent = self.indents.pop().unwrap();
2518 self.indent = indent.indent;
2519 if indent.needs_block_end {
2520 self.tokens
2521 .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
2522 }
2523 }
2524 }
2525
2526 fn roll_one_col_indent(&mut self) {
2532 if self.flow_level == 0 && self.indents.last().map_or(false, |x| x.needs_block_end) {
2533 self.indents.push(Indent {
2534 indent: self.indent,
2535 needs_block_end: false,
2536 });
2537 self.indent += 1;
2538 }
2539 }
2540
2541 fn unroll_non_block_indents(&mut self) {
2543 while let Some(indent) = self.indents.last() {
2544 if indent.needs_block_end {
2545 break;
2546 }
2547 self.indent = indent.indent;
2548 self.indents.pop();
2549 }
2550 }
2551
2552 fn save_simple_key(&mut self) {
2554 if self.simple_key_allowed {
2555 let required = self.flow_level == 0
2556 && self.indent == (self.mark.col as isize)
2557 && self.indents.last().unwrap().needs_block_end;
2558 let mut sk = SimpleKey::new(self.mark);
2559 sk.possible = true;
2560 sk.required = required;
2561 sk.token_number = self.tokens_parsed + self.tokens.len();
2562
2563 self.simple_keys.pop();
2564 self.simple_keys.push(sk);
2565 }
2566 }
2567
2568 fn remove_simple_key(&mut self) -> ScanResult {
2569 let last = self.simple_keys.last_mut().unwrap();
2570 if last.possible && last.required {
2571 return Err(ScanError::new_str(self.mark, "simple key expected"));
2572 }
2573
2574 last.possible = false;
2575 Ok(())
2576 }
2577
2578 fn is_within_block(&self) -> bool {
2580 !self.indents.is_empty()
2581 }
2582
2583 fn end_implicit_mapping(&mut self, mark: Marker) {
2589 if let Some(implicit_mapping) = self.implicit_flow_mapping_states.last_mut() {
2590 if *implicit_mapping == ImplicitMappingState::Inside {
2591 self.flow_mapping_started = false;
2592 *implicit_mapping = ImplicitMappingState::Possible;
2593 self.tokens
2594 .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
2595 }
2596 }
2597 }
2598}
2599
2600#[derive(PartialEq, Eq)]
2604pub enum Chomping {
2605 Strip,
2607 Clip,
2609 Keep,
2611}
2612
2613#[cfg(test)]
2614mod test {
2615 #[test]
2616 fn test_is_anchor_char() {
2617 use super::is_anchor_char;
2618 assert!(is_anchor_char('x'));
2619 }
2620}