1#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11
12use std::{char, collections::VecDeque, error::Error, fmt};
13
14use crate::{
15    char_traits::{
16        as_hex, is_anchor_char, is_blank_or_breakz, is_break, is_breakz, is_flow, is_hex,
17        is_tag_char, is_uri_char,
18    },
19    input::{Input, SkipTabs},
20};
21
22#[derive(Clone, Copy, PartialEq, Debug, Eq)]
24pub enum TEncoding {
25    Utf8,
27}
28
29#[derive(Clone, Copy, PartialEq, Debug, Eq)]
31pub enum TScalarStyle {
32    Plain,
34    SingleQuoted,
36    DoubleQuoted,
38
39    Literal,
41    Folded,
43}
44
45#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
47pub struct Marker {
48    index: usize,
50    line: usize,
52    col: usize,
54}
55
56impl Marker {
57    #[must_use]
59    pub fn new(index: usize, line: usize, col: usize) -> Marker {
60        Marker { index, line, col }
61    }
62
63    #[must_use]
65    pub fn index(&self) -> usize {
66        self.index
67    }
68
69    #[must_use]
71    pub fn line(&self) -> usize {
72        self.line
73    }
74
75    #[must_use]
77    pub fn col(&self) -> usize {
78        self.col
79    }
80}
81
82#[derive(Clone, Copy, PartialEq, Debug, Eq, Default)]
84pub struct Span {
85    pub start: Marker,
87    pub end: Marker,
89}
90
91impl Span {
92    #[must_use]
94    pub fn new(start: Marker, end: Marker) -> Span {
95        Span { start, end }
96    }
97
98    #[must_use]
105    pub fn empty(mark: Marker) -> Span {
106        Span {
107            start: mark,
108            end: mark,
109        }
110    }
111}
112
113#[derive(Clone, PartialEq, Debug, Eq)]
115pub struct ScanError {
116    mark: Marker,
118    info: String,
120}
121
122impl ScanError {
123    #[must_use]
125    pub fn new(loc: Marker, info: String) -> ScanError {
126        ScanError { mark: loc, info }
127    }
128
129    #[must_use]
131    pub fn new_str(loc: Marker, info: &str) -> ScanError {
132        ScanError {
133            mark: loc,
134            info: info.to_owned(),
135        }
136    }
137
138    #[must_use]
140    pub fn marker(&self) -> &Marker {
141        &self.mark
142    }
143
144    #[must_use]
146    pub fn info(&self) -> &str {
147        self.info.as_ref()
148    }
149}
150
151impl Error for ScanError {
152    fn source(&self) -> Option<&(dyn Error + 'static)> {
153        None
154    }
155}
156
157impl fmt::Display for ScanError {
158    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
159        write!(
160            formatter,
161            "{} at byte {} line {} column {}",
162            self.info,
163            self.mark.index,
164            self.mark.line,
165            self.mark.col + 1,
166        )
167    }
168}
169
170#[derive(Clone, PartialEq, Debug, Eq)]
172pub enum TokenType {
173    StreamStart(TEncoding),
175    StreamEnd,
177    VersionDirective(
179        u32,
181        u32,
183    ),
184    TagDirective(
186        String,
188        String,
190    ),
191    DocumentStart,
193    DocumentEnd,
195    BlockSequenceStart,
199    BlockMappingStart,
203    BlockEnd,
205    FlowSequenceStart,
207    FlowSequenceEnd,
209    FlowMappingStart,
211    FlowMappingEnd,
213    BlockEntry,
215    FlowEntry,
217    Key,
219    Value,
221    Alias(String),
223    Anchor(String),
225    Tag(
227        String,
229        String,
231    ),
232    Scalar(TScalarStyle, String),
234}
235
236#[derive(Clone, PartialEq, Debug, Eq)]
238pub struct Token(pub Span, pub TokenType);
239
240#[derive(Clone, PartialEq, Debug, Eq)]
275struct SimpleKey {
276    possible: bool,
289    required: bool,
297    token_number: usize,
303    mark: Marker,
305}
306
307impl SimpleKey {
308    fn new(mark: Marker) -> SimpleKey {
310        SimpleKey {
311            possible: false,
312            required: false,
313            token_number: 0,
314            mark,
315        }
316    }
317}
318
319#[derive(Clone, Debug, Default)]
321struct Indent {
322    indent: isize,
324    needs_block_end: bool,
342}
343
344#[derive(Debug, PartialEq)]
366enum ImplicitMappingState {
367    Possible,
372    Inside,
376}
377
378#[derive(Debug)]
388#[allow(clippy::struct_excessive_bools)]
389pub struct Scanner<T> {
390    input: T,
394    mark: Marker,
396    tokens: VecDeque<Token>,
403    error: Option<ScanError>,
405
406    stream_start_produced: bool,
408    stream_end_produced: bool,
410    adjacent_value_allowed_at: usize,
413    simple_key_allowed: bool,
417    simple_keys: Vec<SimpleKey>,
422    indent: isize,
424    indents: Vec<Indent>,
426    flow_level: u8,
428    tokens_parsed: usize,
432    token_available: bool,
434    leading_whitespace: bool,
436    flow_mapping_started: bool,
443    implicit_flow_mapping_states: Vec<ImplicitMappingState>,
456    buf_leading_break: String,
457    buf_trailing_breaks: String,
458    buf_whitespaces: String,
459}
460
461impl<T: Input> Iterator for Scanner<T> {
462    type Item = Token;
463    fn next(&mut self) -> Option<Token> {
464        if self.error.is_some() {
465            return None;
466        }
467        match self.next_token() {
468            Ok(Some(tok)) => {
469                debug_print!(
470                    "    \x1B[;32m\u{21B3} {:?} \x1B[;36m{:?}\x1B[;m",
471                    tok.1,
472                    tok.0
473                );
474                Some(tok)
475            }
476            Ok(tok) => tok,
477            Err(e) => {
478                self.error = Some(e);
479                None
480            }
481        }
482    }
483}
484
485pub type ScanResult = Result<(), ScanError>;
487
488impl<T: Input> Scanner<T> {
489    pub fn new(input: T) -> Scanner<T> {
491        Scanner {
492            input,
493            mark: Marker::new(0, 1, 0),
494            tokens: VecDeque::new(),
495            error: None,
496
497            stream_start_produced: false,
498            stream_end_produced: false,
499            adjacent_value_allowed_at: 0,
500            simple_key_allowed: true,
501            simple_keys: Vec::new(),
502            indent: -1,
503            indents: Vec::new(),
504            flow_level: 0,
505            tokens_parsed: 0,
506            token_available: false,
507            leading_whitespace: true,
508            flow_mapping_started: false,
509            implicit_flow_mapping_states: vec![],
510
511            buf_leading_break: String::new(),
512            buf_trailing_breaks: String::new(),
513            buf_whitespaces: String::new(),
514        }
515    }
516
517    #[inline]
522    pub fn get_error(&self) -> Option<ScanError> {
523        self.error.clone()
524    }
525
526    #[inline]
528    fn skip_blank(&mut self) {
529        self.input.skip();
530
531        self.mark.index += 1;
532        self.mark.col += 1;
533    }
534
535    #[inline]
537    fn skip_non_blank(&mut self) {
538        self.input.skip();
539
540        self.mark.index += 1;
541        self.mark.col += 1;
542        self.leading_whitespace = false;
543    }
544
545    #[inline]
547    fn skip_n_non_blank(&mut self, count: usize) {
548        self.input.skip_n(count);
549
550        self.mark.index += count;
551        self.mark.col += count;
552        self.leading_whitespace = false;
553    }
554
555    #[inline]
557    fn skip_nl(&mut self) {
558        self.input.skip();
559
560        self.mark.index += 1;
561        self.mark.col = 0;
562        self.mark.line += 1;
563        self.leading_whitespace = true;
564    }
565
566    #[inline]
568    fn skip_linebreak(&mut self) {
569        if self.input.next_2_are('\r', '\n') {
570            self.skip_blank();
573            self.skip_nl();
574        } else if self.input.next_is_break() {
575            self.skip_nl();
576        }
577    }
578
579    #[inline]
581    pub fn stream_started(&self) -> bool {
582        self.stream_start_produced
583    }
584
585    #[inline]
587    pub fn stream_ended(&self) -> bool {
588        self.stream_end_produced
589    }
590
591    #[inline]
593    pub fn mark(&self) -> Marker {
594        self.mark
595    }
596
597    #[inline]
604    fn read_break(&mut self, s: &mut String) {
605        self.skip_break();
606        s.push('\n');
607    }
608
609    #[inline]
614    fn skip_break(&mut self) {
615        let c = self.input.peek();
616        let nc = self.input.peek_nth(1);
617        debug_assert!(is_break(c));
618        if c == '\r' && nc == '\n' {
619            self.skip_blank();
620        }
621        self.skip_nl();
622    }
623
624    fn insert_token(&mut self, pos: usize, tok: Token) {
626        let old_len = self.tokens.len();
627        assert!(pos <= old_len);
628        self.tokens.insert(pos, tok);
629    }
630
631    fn allow_simple_key(&mut self) {
632        self.simple_key_allowed = true;
633    }
634
635    fn disallow_simple_key(&mut self) {
636        self.simple_key_allowed = false;
637    }
638
639    pub fn fetch_next_token(&mut self) -> ScanResult {
644        self.input.lookahead(1);
645
646        if !self.stream_start_produced {
647            self.fetch_stream_start();
648            return Ok(());
649        }
650        self.skip_to_next_token()?;
651
652        debug_print!(
653            "  \x1B[38;5;244m\u{2192} fetch_next_token after whitespace {:?} {:?}\x1B[m",
654            self.mark,
655            self.input.peek()
656        );
657
658        self.stale_simple_keys()?;
659
660        let mark = self.mark;
661        self.unroll_indent(mark.col as isize);
662
663        self.input.lookahead(4);
664
665        if self.input.next_is_z() {
666            self.fetch_stream_end()?;
667            return Ok(());
668        }
669
670        if self.mark.col == 0 {
671            if self.input.next_char_is('%') {
672                return self.fetch_directive();
673            } else if self.input.next_is_document_start() {
674                return self.fetch_document_indicator(TokenType::DocumentStart);
675            } else if self.input.next_is_document_end() {
676                self.fetch_document_indicator(TokenType::DocumentEnd)?;
677                self.skip_ws_to_eol(SkipTabs::Yes)?;
678                if !self.input.next_is_breakz() {
679                    return Err(ScanError::new_str(
680                        self.mark,
681                        "invalid content after document end marker",
682                    ));
683                }
684                return Ok(());
685            }
686        }
687
688        if (self.mark.col as isize) < self.indent {
689            return Err(ScanError::new_str(self.mark, "invalid indentation"));
690        }
691
692        let c = self.input.peek();
693        let nc = self.input.peek_nth(1);
694        match c {
695            '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
696            '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
697            ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
698            '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
699            ',' => self.fetch_flow_entry(),
700            '-' if is_blank_or_breakz(nc) => self.fetch_block_entry(),
701            '?' if is_blank_or_breakz(nc) => self.fetch_key(),
702            ':' if is_blank_or_breakz(nc) => self.fetch_value(),
703            ':' if self.flow_level > 0
704                && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at) =>
705            {
706                self.fetch_flow_value()
707            }
708            '*' => self.fetch_anchor(true),
710            '&' => self.fetch_anchor(false),
712            '!' => self.fetch_tag(),
713            '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
715            '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
717            '\'' => self.fetch_flow_scalar(true),
718            '"' => self.fetch_flow_scalar(false),
719            '-' if !is_blank_or_breakz(nc) => self.fetch_plain_scalar(),
721            ':' | '?' if !is_blank_or_breakz(nc) && self.flow_level == 0 => {
722                self.fetch_plain_scalar()
723            }
724            '%' | '@' | '`' => Err(ScanError::new(
725                self.mark,
726                format!("unexpected character: `{c}'"),
727            )),
728            _ => self.fetch_plain_scalar(),
729        }
730    }
731
732    pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
736        if self.stream_end_produced {
737            return Ok(None);
738        }
739
740        if !self.token_available {
741            self.fetch_more_tokens()?;
742        }
743        let Some(t) = self.tokens.pop_front() else {
744            return Err(ScanError::new_str(
745                self.mark,
746                "did not find expected next token",
747            ));
748        };
749        self.token_available = false;
750        self.tokens_parsed += 1;
751
752        if let TokenType::StreamEnd = t.1 {
753            self.stream_end_produced = true;
754        }
755        Ok(Some(t))
756    }
757
758    pub fn fetch_more_tokens(&mut self) -> ScanResult {
762        let mut need_more;
763        loop {
764            if self.tokens.is_empty() {
765                need_more = true;
766            } else {
767                need_more = false;
768                self.stale_simple_keys()?;
770                for sk in &self.simple_keys {
772                    if sk.possible && sk.token_number == self.tokens_parsed {
773                        need_more = true;
774                        break;
775                    }
776                }
777            }
778
779            if !need_more {
780                break;
781            }
782            self.fetch_next_token()?;
783        }
784        self.token_available = true;
785
786        Ok(())
787    }
788
789    fn stale_simple_keys(&mut self) -> ScanResult {
797        for sk in &mut self.simple_keys {
798            if sk.possible
799                && self.flow_level == 0
801                    && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
802            {
803                if sk.required {
804                    return Err(ScanError::new_str(self.mark, "simple key expect ':'"));
805                }
806                sk.possible = false;
807            }
808        }
809        Ok(())
810    }
811
812    fn skip_to_next_token(&mut self) -> ScanResult {
818        loop {
819            match self.input.look_ch() {
821                '\t' if self.is_within_block()
828                    && self.leading_whitespace
829                    && (self.mark.col as isize) < self.indent =>
830                {
831                    self.skip_ws_to_eol(SkipTabs::Yes)?;
832                    if !self.input.next_is_breakz() {
834                        return Err(ScanError::new_str(
835                            self.mark,
836                            "tabs disallowed within this context (block indentation)",
837                        ));
838                    }
839                }
840                '\t' | ' ' => self.skip_blank(),
841                '\n' | '\r' => {
842                    self.input.lookahead(2);
843                    self.skip_linebreak();
844                    if self.flow_level == 0 {
845                        self.allow_simple_key();
846                    }
847                }
848                '#' => {
849                    let comment_length = self.input.skip_while_non_breakz();
850                    self.mark.index += comment_length;
851                    self.mark.col += comment_length;
852                }
853                _ => break,
854            }
855        }
856        Ok(())
857    }
858
859    fn skip_yaml_whitespace(&mut self) -> ScanResult {
864        let mut need_whitespace = true;
865        loop {
866            match self.input.look_ch() {
867                ' ' => {
868                    self.skip_blank();
869
870                    need_whitespace = false;
871                }
872                '\n' | '\r' => {
873                    self.input.lookahead(2);
874                    self.skip_linebreak();
875                    if self.flow_level == 0 {
876                        self.allow_simple_key();
877                    }
878                    need_whitespace = false;
879                }
880                '#' => {
881                    let comment_length = self.input.skip_while_non_breakz();
882                    self.mark.index += comment_length;
883                    self.mark.col += comment_length;
884                }
885                _ => break,
886            }
887        }
888
889        if need_whitespace {
890            Err(ScanError::new_str(self.mark(), "expected whitespace"))
891        } else {
892            Ok(())
893        }
894    }
895
896    fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> Result<SkipTabs, ScanError> {
897        let (n_bytes, result) = self.input.skip_ws_to_eol(skip_tabs);
898        self.mark.col += n_bytes;
899        self.mark.index += n_bytes;
900        result.map_err(|msg| ScanError::new_str(self.mark, msg))
901    }
902
903    fn fetch_stream_start(&mut self) {
904        let mark = self.mark;
905        self.indent = -1;
906        self.stream_start_produced = true;
907        self.allow_simple_key();
908        self.tokens.push_back(Token(
909            Span::empty(mark),
910            TokenType::StreamStart(TEncoding::Utf8),
911        ));
912        self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
913    }
914
915    fn fetch_stream_end(&mut self) -> ScanResult {
916        if self.mark.col != 0 {
918            self.mark.col = 0;
919            self.mark.line += 1;
920        }
921
922        for sk in &mut self.simple_keys {
925            if sk.required && sk.possible {
926                return Err(ScanError::new_str(self.mark, "simple key expected"));
927            }
928            sk.possible = false;
929        }
930
931        self.unroll_indent(-1);
932        self.remove_simple_key()?;
933        self.disallow_simple_key();
934
935        self.tokens
936            .push_back(Token(Span::empty(self.mark), TokenType::StreamEnd));
937        Ok(())
938    }
939
940    fn fetch_directive(&mut self) -> ScanResult {
941        self.unroll_indent(-1);
942        self.remove_simple_key()?;
943
944        self.disallow_simple_key();
945
946        let tok = self.scan_directive()?;
947        self.tokens.push_back(tok);
948
949        Ok(())
950    }
951
952    fn scan_directive(&mut self) -> Result<Token, ScanError> {
953        let start_mark = self.mark;
954        self.skip_non_blank();
955
956        let name = self.scan_directive_name()?;
957        let tok = match name.as_ref() {
958            "YAML" => self.scan_version_directive_value(&start_mark)?,
959            "TAG" => self.scan_tag_directive_value(&start_mark)?,
960            _ => {
962                let line_len = self.input.skip_while_non_breakz();
964                self.mark.index += line_len;
965                self.mark.col += line_len;
966                Token(
968                    Span::new(start_mark, self.mark),
969                    TokenType::TagDirective(String::new(), String::new()),
970                )
971                }
974        };
975
976        self.skip_ws_to_eol(SkipTabs::Yes)?;
977
978        if self.input.next_is_breakz() {
979            self.input.lookahead(2);
980            self.skip_linebreak();
981            Ok(tok)
982        } else {
983            Err(ScanError::new_str(
984                start_mark,
985                "while scanning a directive, did not find expected comment or line break",
986            ))
987        }
988    }
989
990    fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
991        let n_blanks = self.input.skip_while_blank();
992        self.mark.index += n_blanks;
993        self.mark.col += n_blanks;
994
995        let major = self.scan_version_directive_number(mark)?;
996
997        if self.input.peek() != '.' {
998            return Err(ScanError::new_str(
999                *mark,
1000                "while scanning a YAML directive, did not find expected digit or '.' character",
1001            ));
1002        }
1003        self.skip_non_blank();
1004
1005        let minor = self.scan_version_directive_number(mark)?;
1006
1007        Ok(Token(
1008            Span::new(*mark, self.mark),
1009            TokenType::VersionDirective(major, minor),
1010        ))
1011    }
1012
1013    fn scan_directive_name(&mut self) -> Result<String, ScanError> {
1014        let start_mark = self.mark;
1015        let mut string = String::new();
1016
1017        let n_chars = self.input.fetch_while_is_alpha(&mut string);
1018        self.mark.index += n_chars;
1019        self.mark.col += n_chars;
1020
1021        if string.is_empty() {
1022            return Err(ScanError::new_str(
1023                start_mark,
1024                "while scanning a directive, could not find expected directive name",
1025            ));
1026        }
1027
1028        if !is_blank_or_breakz(self.input.peek()) {
1029            return Err(ScanError::new_str(
1030                start_mark,
1031                "while scanning a directive, found unexpected non-alphabetical character",
1032            ));
1033        }
1034
1035        Ok(string)
1036    }
1037
1038    fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
1039        let mut val = 0u32;
1040        let mut length = 0usize;
1041        while let Some(digit) = self.input.look_ch().to_digit(10) {
1042            if length + 1 > 9 {
1043                return Err(ScanError::new_str(
1044                    *mark,
1045                    "while scanning a YAML directive, found extremely long version number",
1046                ));
1047            }
1048            length += 1;
1049            val = val * 10 + digit;
1050            self.skip_non_blank();
1051        }
1052
1053        if length == 0 {
1054            return Err(ScanError::new_str(
1055                *mark,
1056                "while scanning a YAML directive, did not find expected version number",
1057            ));
1058        }
1059
1060        Ok(val)
1061    }
1062
1063    fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
1064        let n_blanks = self.input.skip_while_blank();
1065        self.mark.index += n_blanks;
1066        self.mark.col += n_blanks;
1067
1068        let handle = self.scan_tag_handle(true, mark)?;
1069
1070        let n_blanks = self.input.skip_while_blank();
1071        self.mark.index += n_blanks;
1072        self.mark.col += n_blanks;
1073
1074        let prefix = self.scan_tag_prefix(mark)?;
1075
1076        self.input.lookahead(1);
1077
1078        if self.input.next_is_blank_or_breakz() {
1079            Ok(Token(
1080                Span::new(*mark, self.mark),
1081                TokenType::TagDirective(handle, prefix),
1082            ))
1083        } else {
1084            Err(ScanError::new_str(
1085                *mark,
1086                "while scanning TAG, did not find expected whitespace or line break",
1087            ))
1088        }
1089    }
1090
1091    fn fetch_tag(&mut self) -> ScanResult {
1092        self.save_simple_key();
1093        self.disallow_simple_key();
1094
1095        let tok = self.scan_tag()?;
1096        self.tokens.push_back(tok);
1097        Ok(())
1098    }
1099
1100    fn scan_tag(&mut self) -> Result<Token, ScanError> {
1101        let start_mark = self.mark;
1102        let mut handle = String::new();
1103        let mut suffix;
1104
1105        self.input.lookahead(2);
1107
1108        if self.input.nth_char_is(1, '<') {
1109            suffix = self.scan_verbatim_tag(&start_mark)?;
1110        } else {
1111            handle = self.scan_tag_handle(false, &start_mark)?;
1113            if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
1115                let is_secondary_handle = handle == "!!";
1117                suffix =
1118                    self.scan_tag_shorthand_suffix(false, is_secondary_handle, "", &start_mark)?;
1119            } else {
1120                suffix = self.scan_tag_shorthand_suffix(false, false, &handle, &start_mark)?;
1121                "!".clone_into(&mut handle);
1122                if suffix.is_empty() {
1125                    handle.clear();
1126                    "!".clone_into(&mut suffix);
1127                }
1128            }
1129        }
1130
1131        if is_blank_or_breakz(self.input.look_ch())
1132            || (self.flow_level > 0 && self.input.next_is_flow())
1133        {
1134            Ok(Token(
1136                Span::new(start_mark, self.mark),
1137                TokenType::Tag(handle, suffix),
1138            ))
1139        } else {
1140            Err(ScanError::new_str(
1141                start_mark,
1142                "while scanning a tag, did not find expected whitespace or line break",
1143            ))
1144        }
1145    }
1146
1147    fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
1148        let mut string = String::new();
1149        if self.input.look_ch() != '!' {
1150            return Err(ScanError::new_str(
1151                *mark,
1152                "while scanning a tag, did not find expected '!'",
1153            ));
1154        }
1155
1156        string.push(self.input.peek());
1157        self.skip_non_blank();
1158
1159        let n_chars = self.input.fetch_while_is_alpha(&mut string);
1160        self.mark.index += n_chars;
1161        self.mark.col += n_chars;
1162
1163        if self.input.peek() == '!' {
1165            string.push(self.input.peek());
1166            self.skip_non_blank();
1167        } else if directive && string != "!" {
1168            return Err(ScanError::new_str(
1172                *mark,
1173                "while parsing a tag directive, did not find expected '!'",
1174            ));
1175        }
1176        Ok(string)
1177    }
1178
1179    fn scan_tag_prefix(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1185        let mut string = String::new();
1186
1187        if self.input.look_ch() == '!' {
1188            string.push(self.input.peek());
1190            self.skip_non_blank();
1191        } else if !is_tag_char(self.input.peek()) {
1192            return Err(ScanError::new_str(
1194                *start_mark,
1195                "invalid global tag character",
1196            ));
1197        } else if self.input.peek() == '%' {
1198            string.push(self.scan_uri_escapes(start_mark)?);
1200        } else {
1201            string.push(self.input.peek());
1203            self.skip_non_blank();
1204        }
1205
1206        while is_uri_char(self.input.look_ch()) {
1207            if self.input.peek() == '%' {
1208                string.push(self.scan_uri_escapes(start_mark)?);
1209            } else {
1210                string.push(self.input.peek());
1211                self.skip_non_blank();
1212            }
1213        }
1214
1215        Ok(string)
1216    }
1217
1218    fn scan_verbatim_tag(&mut self, start_mark: &Marker) -> Result<String, ScanError> {
1222        self.skip_non_blank();
1224        self.skip_non_blank();
1225
1226        let mut string = String::new();
1227        while is_uri_char(self.input.look_ch()) {
1228            if self.input.peek() == '%' {
1229                string.push(self.scan_uri_escapes(start_mark)?);
1230            } else {
1231                string.push(self.input.peek());
1232                self.skip_non_blank();
1233            }
1234        }
1235
1236        if self.input.peek() != '>' {
1237            return Err(ScanError::new_str(
1238                *start_mark,
1239                "while scanning a verbatim tag, did not find the expected '>'",
1240            ));
1241        }
1242        self.skip_non_blank();
1243
1244        Ok(string)
1245    }
1246
1247    fn scan_tag_shorthand_suffix(
1248        &mut self,
1249        _directive: bool,
1250        _is_secondary: bool,
1251        head: &str,
1252        mark: &Marker,
1253    ) -> Result<String, ScanError> {
1254        let mut length = head.len();
1255        let mut string = String::new();
1256
1257        if length > 1 {
1260            string.extend(head.chars().skip(1));
1261        }
1262
1263        while is_tag_char(self.input.look_ch()) {
1264            if self.input.peek() == '%' {
1266                string.push(self.scan_uri_escapes(mark)?);
1267            } else {
1268                string.push(self.input.peek());
1269                self.skip_non_blank();
1270            }
1271
1272            length += 1;
1273        }
1274
1275        if length == 0 {
1276            return Err(ScanError::new_str(
1277                *mark,
1278                "while parsing a tag, did not find expected tag URI",
1279            ));
1280        }
1281
1282        Ok(string)
1283    }
1284
1285    fn scan_uri_escapes(&mut self, mark: &Marker) -> Result<char, ScanError> {
1286        let mut width = 0usize;
1287        let mut code = 0u32;
1288        loop {
1289            self.input.lookahead(3);
1290
1291            let c = self.input.peek_nth(1);
1292            let nc = self.input.peek_nth(2);
1293
1294            if !(self.input.peek() == '%' && is_hex(c) && is_hex(nc)) {
1295                return Err(ScanError::new_str(
1296                    *mark,
1297                    "while parsing a tag, found an invalid escape sequence",
1298                ));
1299            }
1300
1301            let byte = (as_hex(c) << 4) + as_hex(nc);
1302            if width == 0 {
1303                width = match byte {
1304                    _ if byte & 0x80 == 0x00 => 1,
1305                    _ if byte & 0xE0 == 0xC0 => 2,
1306                    _ if byte & 0xF0 == 0xE0 => 3,
1307                    _ if byte & 0xF8 == 0xF0 => 4,
1308                    _ => {
1309                        return Err(ScanError::new_str(
1310                            *mark,
1311                            "while parsing a tag, found an incorrect leading UTF-8 byte",
1312                        ));
1313                    }
1314                };
1315                code = byte;
1316            } else {
1317                if byte & 0xc0 != 0x80 {
1318                    return Err(ScanError::new_str(
1319                        *mark,
1320                        "while parsing a tag, found an incorrect trailing UTF-8 byte",
1321                    ));
1322                }
1323                code = (code << 8) + byte;
1324            }
1325
1326            self.skip_n_non_blank(3);
1327
1328            width -= 1;
1329            if width == 0 {
1330                break;
1331            }
1332        }
1333
1334        match char::from_u32(code) {
1335            Some(ch) => Ok(ch),
1336            None => Err(ScanError::new_str(
1337                *mark,
1338                "while parsing a tag, found an invalid UTF-8 codepoint",
1339            )),
1340        }
1341    }
1342
1343    fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
1344        self.save_simple_key();
1345        self.disallow_simple_key();
1346
1347        let tok = self.scan_anchor(alias)?;
1348
1349        self.tokens.push_back(tok);
1350
1351        Ok(())
1352    }
1353
1354    fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
1355        let mut string = String::new();
1356        let start_mark = self.mark;
1357
1358        self.skip_non_blank();
1359        while is_anchor_char(self.input.look_ch()) {
1360            string.push(self.input.peek());
1361            self.skip_non_blank();
1362        }
1363
1364        if string.is_empty() {
1365            return Err(ScanError::new_str(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
1366        }
1367
1368        let tok = if alias {
1369            TokenType::Alias(string)
1370        } else {
1371            TokenType::Anchor(string)
1372        };
1373        Ok(Token(Span::new(start_mark, self.mark), tok))
1374    }
1375
1376    fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
1377        self.save_simple_key();
1379
1380        self.roll_one_col_indent();
1381        self.increase_flow_level()?;
1382
1383        self.allow_simple_key();
1384
1385        let start_mark = self.mark;
1386        self.skip_non_blank();
1387
1388        if tok == TokenType::FlowMappingStart {
1389            self.flow_mapping_started = true;
1390        } else {
1391            self.implicit_flow_mapping_states
1392                .push(ImplicitMappingState::Possible);
1393        }
1394
1395        self.skip_ws_to_eol(SkipTabs::Yes)?;
1396
1397        self.tokens
1398            .push_back(Token(Span::new(start_mark, self.mark), tok));
1399        Ok(())
1400    }
1401
1402    fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
1403        self.remove_simple_key()?;
1404        self.decrease_flow_level();
1405
1406        self.disallow_simple_key();
1407
1408        if matches!(tok, TokenType::FlowSequenceEnd) {
1409            self.end_implicit_mapping(self.mark);
1410            self.implicit_flow_mapping_states.pop();
1412        }
1413
1414        let start_mark = self.mark;
1415        self.skip_non_blank();
1416        self.skip_ws_to_eol(SkipTabs::Yes)?;
1417
1418        if self.flow_level > 0 {
1424            self.adjacent_value_allowed_at = self.mark.index;
1425        }
1426
1427        self.tokens
1428            .push_back(Token(Span::new(start_mark, self.mark), tok));
1429        Ok(())
1430    }
1431
1432    fn fetch_flow_entry(&mut self) -> ScanResult {
1434        self.remove_simple_key()?;
1435        self.allow_simple_key();
1436
1437        self.end_implicit_mapping(self.mark);
1438
1439        let start_mark = self.mark;
1440        self.skip_non_blank();
1441        self.skip_ws_to_eol(SkipTabs::Yes)?;
1442
1443        self.tokens.push_back(Token(
1444            Span::new(start_mark, self.mark),
1445            TokenType::FlowEntry,
1446        ));
1447        Ok(())
1448    }
1449
1450    fn increase_flow_level(&mut self) -> ScanResult {
1451        self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1452        self.flow_level = self
1453            .flow_level
1454            .checked_add(1)
1455            .ok_or_else(|| ScanError::new_str(self.mark, "recursion limit exceeded"))?;
1456        Ok(())
1457    }
1458
1459    fn decrease_flow_level(&mut self) {
1460        if self.flow_level > 0 {
1461            self.flow_level -= 1;
1462            self.simple_keys.pop().unwrap();
1463        }
1464    }
1465
1466    fn fetch_block_entry(&mut self) -> ScanResult {
1472        if self.flow_level > 0 {
1473            return Err(ScanError::new_str(
1475                self.mark,
1476                r#""-" is only valid inside a block"#,
1477            ));
1478        }
1479        if !self.simple_key_allowed {
1481            return Err(ScanError::new_str(
1482                self.mark,
1483                "block sequence entries are not allowed in this context",
1484            ));
1485        }
1486
1487        if let Some(Token(span, TokenType::Anchor(..) | TokenType::Tag(..))) = self.tokens.back() {
1489            if self.mark.col == 0 && span.start.col == 0 && self.indent > -1 {
1490                return Err(ScanError::new_str(
1491                    span.start,
1492                    "invalid indentation for anchor",
1493                ));
1494            }
1495        }
1496
1497        let mark = self.mark;
1499        self.skip_non_blank();
1500
1501        self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1503        let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes)?.found_tabs();
1504        self.input.lookahead(2);
1505        if found_tabs && self.input.next_char_is('-') && is_blank_or_breakz(self.input.peek_nth(1))
1506        {
1507            return Err(ScanError::new_str(
1508                self.mark,
1509                "'-' must be followed by a valid YAML whitespace",
1510            ));
1511        }
1512
1513        self.skip_ws_to_eol(SkipTabs::No)?;
1514        self.input.lookahead(1);
1515        if self.input.next_is_break() || self.input.next_is_flow() {
1516            self.roll_one_col_indent();
1517        }
1518
1519        self.remove_simple_key()?;
1520        self.allow_simple_key();
1521
1522        self.tokens
1523            .push_back(Token(Span::empty(self.mark), TokenType::BlockEntry));
1524
1525        Ok(())
1526    }
1527
1528    fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1529        self.unroll_indent(-1);
1530        self.remove_simple_key()?;
1531        self.disallow_simple_key();
1532
1533        let mark = self.mark;
1534
1535        self.skip_n_non_blank(3);
1536
1537        self.tokens.push_back(Token(Span::new(mark, self.mark), t));
1538        Ok(())
1539    }
1540
1541    fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1542        self.save_simple_key();
1543        self.allow_simple_key();
1544        let tok = self.scan_block_scalar(literal)?;
1545
1546        self.tokens.push_back(tok);
1547        Ok(())
1548    }
1549
1550    #[allow(clippy::too_many_lines)]
1551    fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1552        let start_mark = self.mark;
1553        let mut chomping = Chomping::Clip;
1554        let mut increment: usize = 0;
1555        let mut indent: usize = 0;
1556        let mut trailing_blank: bool;
1557        let mut leading_blank: bool = false;
1558        let style = if literal {
1559            TScalarStyle::Literal
1560        } else {
1561            TScalarStyle::Folded
1562        };
1563
1564        let mut string = String::new();
1565        let mut leading_break = String::new();
1566        let mut trailing_breaks = String::new();
1567        let mut chomping_break = String::new();
1568
1569        self.skip_non_blank();
1571        self.unroll_non_block_indents();
1572
1573        if self.input.look_ch() == '+' || self.input.peek() == '-' {
1574            if self.input.peek() == '+' {
1575                chomping = Chomping::Keep;
1576            } else {
1577                chomping = Chomping::Strip;
1578            }
1579            self.skip_non_blank();
1580            self.input.lookahead(1);
1581            if self.input.next_is_digit() {
1582                if self.input.peek() == '0' {
1583                    return Err(ScanError::new_str(
1584                        start_mark,
1585                        "while scanning a block scalar, found an indentation indicator equal to 0",
1586                    ));
1587                }
1588                increment = (self.input.peek() as usize) - ('0' as usize);
1589                self.skip_non_blank();
1590            }
1591        } else if self.input.next_is_digit() {
1592            if self.input.peek() == '0' {
1593                return Err(ScanError::new_str(
1594                    start_mark,
1595                    "while scanning a block scalar, found an indentation indicator equal to 0",
1596                ));
1597            }
1598
1599            increment = (self.input.peek() as usize) - ('0' as usize);
1600            self.skip_non_blank();
1601            self.input.lookahead(1);
1602            if self.input.peek() == '+' || self.input.peek() == '-' {
1603                if self.input.peek() == '+' {
1604                    chomping = Chomping::Keep;
1605                } else {
1606                    chomping = Chomping::Strip;
1607                }
1608                self.skip_non_blank();
1609            }
1610        }
1611
1612        self.skip_ws_to_eol(SkipTabs::Yes)?;
1613
1614        self.input.lookahead(1);
1616        if !self.input.next_is_breakz() {
1617            return Err(ScanError::new_str(
1618                start_mark,
1619                "while scanning a block scalar, did not find expected comment or line break",
1620            ));
1621        }
1622
1623        if self.input.next_is_break() {
1624            self.input.lookahead(2);
1625            self.read_break(&mut chomping_break);
1626        }
1627
1628        if self.input.look_ch() == '\t' {
1629            return Err(ScanError::new_str(
1630                start_mark,
1631                "a block scalar content cannot start with a tab",
1632            ));
1633        }
1634
1635        if increment > 0 {
1636            indent = if self.indent >= 0 {
1637                (self.indent + increment as isize) as usize
1638            } else {
1639                increment
1640            }
1641        }
1642
1643        if indent == 0 {
1645            self.skip_block_scalar_first_line_indent(&mut indent, &mut trailing_breaks);
1646        } else {
1647            self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1648        }
1649
1650        if self.input.next_is_z() {
1655            let contents = match chomping {
1656                Chomping::Strip => String::new(),
1658                _ if self.mark.line == start_mark.line() => String::new(),
1660                Chomping::Clip => chomping_break,
1663                Chomping::Keep if trailing_breaks.is_empty() => chomping_break,
1666                Chomping::Keep => trailing_breaks,
1668            };
1669            return Ok(Token(
1670                Span::new(start_mark, self.mark),
1671                TokenType::Scalar(style, contents),
1672            ));
1673        }
1674
1675        if self.mark.col < indent && (self.mark.col as isize) > self.indent {
1676            return Err(ScanError::new_str(
1677                self.mark,
1678                "wrongly indented line in block scalar",
1679            ));
1680        }
1681
1682        let mut line_buffer = String::with_capacity(100);
1683        let start_mark = self.mark;
1684        while self.mark.col == indent && !self.input.next_is_z() {
1685            if indent == 0 {
1686                self.input.lookahead(4);
1687                if self.input.next_is_document_end() {
1688                    break;
1689                }
1690            }
1691
1692            trailing_blank = self.input.next_is_blank();
1694            if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1695                string.push_str(&trailing_breaks);
1696                if trailing_breaks.is_empty() {
1697                    string.push(' ');
1698                }
1699            } else {
1700                string.push_str(&leading_break);
1701                string.push_str(&trailing_breaks);
1702            }
1703
1704            leading_break.clear();
1705            trailing_breaks.clear();
1706
1707            leading_blank = self.input.next_is_blank();
1708
1709            self.scan_block_scalar_content_line(&mut string, &mut line_buffer);
1710
1711            self.input.lookahead(2);
1713            if self.input.next_is_z() {
1714                break;
1715            }
1716
1717            self.read_break(&mut leading_break);
1718
1719            self.skip_block_scalar_indent(indent, &mut trailing_breaks);
1721        }
1722
1723        if chomping != Chomping::Strip {
1725            string.push_str(&leading_break);
1726            if self.input.next_is_z() && self.mark.col >= indent.max(1) {
1730                string.push('\n');
1731            }
1732        }
1733
1734        if chomping == Chomping::Keep {
1735            string.push_str(&trailing_breaks);
1736        }
1737
1738        Ok(Token(
1739            Span::new(start_mark, self.mark),
1740            TokenType::Scalar(style, string),
1741        ))
1742    }
1743
1744    fn scan_block_scalar_content_line(&mut self, string: &mut String, line_buffer: &mut String) {
1754        while !self.input.buf_is_empty() && !self.input.next_is_breakz() {
1756            string.push(self.input.peek());
1757            self.skip_blank();
1763        }
1764
1765        if self.input.buf_is_empty() {
1768            while let Some(c) = self.input.raw_read_non_breakz_ch() {
1774                line_buffer.push(c);
1775            }
1776
1777            let n_chars = line_buffer.chars().count();
1779            self.mark.col += n_chars;
1780            self.mark.index += n_chars;
1781
1782            string.reserve(line_buffer.as_bytes().len());
1784            string.push_str(line_buffer);
1785            line_buffer.clear();
1787        }
1788    }
1789
1790    fn skip_block_scalar_indent(&mut self, indent: usize, breaks: &mut String) {
1792        loop {
1793            if indent < self.input.bufmaxlen() - 2 {
1795                self.input.lookahead(self.input.bufmaxlen());
1796                while self.mark.col < indent && self.input.peek() == ' ' {
1797                    self.skip_blank();
1798                }
1799            } else {
1800                loop {
1801                    self.input.lookahead(self.input.bufmaxlen());
1802                    while !self.input.buf_is_empty()
1803                        && self.mark.col < indent
1804                        && self.input.peek() == ' '
1805                    {
1806                        self.skip_blank();
1807                    }
1808                    if self.mark.col == indent
1812                        || (!self.input.buf_is_empty() && self.input.peek() != ' ')
1813                    {
1814                        break;
1815                    }
1816                }
1817                self.input.lookahead(2);
1818            }
1819
1820            if self.input.next_is_break() {
1822                self.read_break(breaks);
1823            } else {
1824                break;
1826            }
1827        }
1828    }
1829
1830    fn skip_block_scalar_first_line_indent(&mut self, indent: &mut usize, breaks: &mut String) {
1835        let mut max_indent = 0;
1836        loop {
1837            while self.input.look_ch() == ' ' {
1839                self.skip_blank();
1840            }
1841
1842            if self.mark.col > max_indent {
1843                max_indent = self.mark.col;
1844            }
1845
1846            if self.input.next_is_break() {
1847                self.input.lookahead(2);
1849                self.read_break(breaks);
1850            } else {
1851                break;
1853            }
1854        }
1855
1856        *indent = max_indent.max((self.indent + 1) as usize);
1865        if self.indent > 0 {
1866            *indent = (*indent).max(1);
1867        }
1868    }
1869
1870    fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1871        self.save_simple_key();
1872        self.disallow_simple_key();
1873
1874        let tok = self.scan_flow_scalar(single)?;
1875
1876        self.skip_to_next_token()?;
1879        self.adjacent_value_allowed_at = self.mark.index;
1880
1881        self.tokens.push_back(tok);
1882        Ok(())
1883    }
1884
1885    #[allow(clippy::too_many_lines)]
1886    fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1887        let start_mark = self.mark;
1888
1889        let mut string = String::new();
1890        let mut leading_break = String::new();
1891        let mut trailing_breaks = String::new();
1892        let mut whitespaces = String::new();
1893        let mut leading_blanks;
1894
1895        self.skip_non_blank();
1897
1898        loop {
1899            self.input.lookahead(4);
1901
1902            if self.mark.col == 0 && self.input.next_is_document_indicator() {
1903                return Err(ScanError::new_str(
1904                    start_mark,
1905                    "while scanning a quoted scalar, found unexpected document indicator",
1906                ));
1907            }
1908
1909            if self.input.next_is_z() {
1910                return Err(ScanError::new_str(
1911                    start_mark,
1912                    "while scanning a quoted scalar, found unexpected end of stream",
1913                ));
1914            }
1915
1916            if (self.mark.col as isize) < self.indent {
1917                return Err(ScanError::new_str(
1918                    start_mark,
1919                    "invalid indentation in quoted scalar",
1920                ));
1921            }
1922
1923            leading_blanks = false;
1924            self.consume_flow_scalar_non_whitespace_chars(
1925                single,
1926                &mut string,
1927                &mut leading_blanks,
1928                &start_mark,
1929            )?;
1930
1931            match self.input.look_ch() {
1932                '\'' if single => break,
1933                '"' if !single => break,
1934                _ => {}
1935            }
1936
1937            while self.input.next_is_blank() || self.input.next_is_break() {
1939                if self.input.next_is_blank() {
1940                    if leading_blanks {
1942                        if self.input.peek() == '\t' && (self.mark.col as isize) < self.indent {
1943                            return Err(ScanError::new_str(
1944                                self.mark,
1945                                "tab cannot be used as indentation",
1946                            ));
1947                        }
1948                        self.skip_blank();
1949                    } else {
1950                        whitespaces.push(self.input.peek());
1951                        self.skip_blank();
1952                    }
1953                } else {
1954                    self.input.lookahead(2);
1955                    if leading_blanks {
1957                        self.read_break(&mut trailing_breaks);
1958                    } else {
1959                        whitespaces.clear();
1960                        self.read_break(&mut leading_break);
1961                        leading_blanks = true;
1962                    }
1963                }
1964                self.input.lookahead(1);
1965            }
1966
1967            if leading_blanks {
1969                if leading_break.is_empty() {
1970                    string.push_str(&leading_break);
1971                    string.push_str(&trailing_breaks);
1972                    trailing_breaks.clear();
1973                    leading_break.clear();
1974                } else {
1975                    if trailing_breaks.is_empty() {
1976                        string.push(' ');
1977                    } else {
1978                        string.push_str(&trailing_breaks);
1979                        trailing_breaks.clear();
1980                    }
1981                    leading_break.clear();
1982                }
1983            } else {
1984                string.push_str(&whitespaces);
1985                whitespaces.clear();
1986            }
1987        } self.skip_non_blank();
1991        self.skip_ws_to_eol(SkipTabs::Yes)?;
1993        match self.input.peek() {
1994            ',' | '}' | ']' if self.flow_level > 0 => {}
1996            c if is_breakz(c) => {}
1998            ':' if self.flow_level == 0 && start_mark.line == self.mark.line => {}
2001            ':' if self.flow_level > 0 => {}
2003            _ => {
2004                return Err(ScanError::new_str(
2005                    self.mark,
2006                    "invalid trailing content after double-quoted scalar",
2007                ));
2008            }
2009        }
2010
2011        let style = if single {
2012            TScalarStyle::SingleQuoted
2013        } else {
2014            TScalarStyle::DoubleQuoted
2015        };
2016        Ok(Token(
2017            Span::new(start_mark, self.mark),
2018            TokenType::Scalar(style, string),
2019        ))
2020    }
2021
2022    fn consume_flow_scalar_non_whitespace_chars(
2031        &mut self,
2032        single: bool,
2033        string: &mut String,
2034        leading_blanks: &mut bool,
2035        start_mark: &Marker,
2036    ) -> Result<(), ScanError> {
2037        self.input.lookahead(2);
2038        while !is_blank_or_breakz(self.input.peek()) {
2039            match self.input.peek() {
2040                '\'' if self.input.peek_nth(1) == '\'' && single => {
2042                    string.push('\'');
2043                    self.skip_n_non_blank(2);
2044                }
2045                '\'' if single => break,
2047                '"' if !single => break,
2048                '\\' if !single && is_break(self.input.peek_nth(1)) => {
2050                    self.input.lookahead(3);
2051                    self.skip_non_blank();
2052                    self.skip_linebreak();
2053                    *leading_blanks = true;
2054                    break;
2055                }
2056                '\\' if !single => {
2058                    string.push(self.resolve_flow_scalar_escape_sequence(start_mark)?);
2059                }
2060                c => {
2061                    string.push(c);
2062                    self.skip_non_blank();
2063                }
2064            }
2065            self.input.lookahead(2);
2066        }
2067        Ok(())
2068    }
2069
2070    fn resolve_flow_scalar_escape_sequence(
2077        &mut self,
2078        start_mark: &Marker,
2079    ) -> Result<char, ScanError> {
2080        let mut code_length = 0usize;
2081        let mut ret = '\0';
2082
2083        match self.input.peek_nth(1) {
2084            '0' => ret = '\0',
2085            'a' => ret = '\x07',
2086            'b' => ret = '\x08',
2087            't' | '\t' => ret = '\t',
2088            'n' => ret = '\n',
2089            'v' => ret = '\x0b',
2090            'f' => ret = '\x0c',
2091            'r' => ret = '\x0d',
2092            'e' => ret = '\x1b',
2093            ' ' => ret = '\x20',
2094            '"' => ret = '"',
2095            '/' => ret = '/',
2096            '\\' => ret = '\\',
2097            'N' => ret = char::from_u32(0x85).unwrap(),
2099            '_' => ret = char::from_u32(0xA0).unwrap(),
2101            'L' => ret = char::from_u32(0x2028).unwrap(),
2103            'P' => ret = char::from_u32(0x2029).unwrap(),
2105            'x' => code_length = 2,
2106            'u' => code_length = 4,
2107            'U' => code_length = 8,
2108            _ => {
2109                return Err(ScanError::new_str(
2110                    *start_mark,
2111                    "while parsing a quoted scalar, found unknown escape character",
2112                ))
2113            }
2114        }
2115        self.skip_n_non_blank(2);
2116
2117        if code_length > 0 {
2119            self.input.lookahead(code_length);
2120            let mut value = 0u32;
2121            for i in 0..code_length {
2122                let c = self.input.peek_nth(i);
2123                if !is_hex(c) {
2124                    return Err(ScanError::new_str(
2125                        *start_mark,
2126                        "while parsing a quoted scalar, did not find expected hexadecimal number",
2127                    ));
2128                }
2129                value = (value << 4) + as_hex(c);
2130            }
2131
2132            let Some(ch) = char::from_u32(value) else {
2133                return Err(ScanError::new_str(
2134                    *start_mark,
2135                    "while parsing a quoted scalar, found invalid Unicode character escape code",
2136                ));
2137            };
2138            ret = ch;
2139
2140            self.skip_n_non_blank(code_length);
2141        }
2142        Ok(ret)
2143    }
2144
2145    fn fetch_plain_scalar(&mut self) -> ScanResult {
2146        self.save_simple_key();
2147        self.disallow_simple_key();
2148
2149        let tok = self.scan_plain_scalar()?;
2150
2151        self.tokens.push_back(tok);
2152        Ok(())
2153    }
2154
2155    #[allow(clippy::too_many_lines)]
2160    fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
2161        self.unroll_non_block_indents();
2162        let indent = self.indent + 1;
2163        let start_mark = self.mark;
2164
2165        if self.flow_level > 0 && (start_mark.col as isize) < indent {
2166            return Err(ScanError::new_str(
2167                start_mark,
2168                "invalid indentation in flow construct",
2169            ));
2170        }
2171
2172        let mut string = String::with_capacity(32);
2173        self.buf_whitespaces.clear();
2174        self.buf_leading_break.clear();
2175        self.buf_trailing_breaks.clear();
2176        let mut end_mark = self.mark;
2177
2178        loop {
2179            self.input.lookahead(4);
2180            if self.input.next_is_document_end()
2181                || (self.leading_whitespace && self.input.next_is_document_start())
2182                || self.input.peek() == '#'
2183            {
2184                break;
2185            }
2186
2187            if self.flow_level > 0 && self.input.peek() == '-' && is_flow(self.input.peek_nth(1)) {
2188                return Err(ScanError::new_str(
2189                    self.mark,
2190                    "plain scalar cannot start with '-' followed by ,[]{}",
2191                ));
2192            }
2193
2194            if !self.input.next_is_blank_or_breakz()
2195                && self.input.next_can_be_plain_scalar(self.flow_level > 0)
2196            {
2197                if self.leading_whitespace {
2198                    if self.buf_leading_break.is_empty() {
2199                        string.push_str(&self.buf_leading_break);
2200                        string.push_str(&self.buf_trailing_breaks);
2201                        self.buf_trailing_breaks.clear();
2202                        self.buf_leading_break.clear();
2203                    } else {
2204                        if self.buf_trailing_breaks.is_empty() {
2205                            string.push(' ');
2206                        } else {
2207                            string.push_str(&self.buf_trailing_breaks);
2208                            self.buf_trailing_breaks.clear();
2209                        }
2210                        self.buf_leading_break.clear();
2211                    }
2212                    self.leading_whitespace = false;
2213                } else if !self.buf_whitespaces.is_empty() {
2214                    string.push_str(&self.buf_whitespaces);
2215                    self.buf_whitespaces.clear();
2216                }
2217
2218                string.push(self.input.peek());
2220                self.skip_non_blank();
2221                string.reserve(self.input.bufmaxlen());
2222
2223                let mut end = false;
2225                while !end {
2226                    self.input.lookahead(self.input.bufmaxlen());
2230                    for _ in 0..self.input.bufmaxlen() - 1 {
2231                        if self.input.next_is_blank_or_breakz()
2232                            || !self.input.next_can_be_plain_scalar(self.flow_level > 0)
2233                        {
2234                            end = true;
2235                            break;
2236                        }
2237                        string.push(self.input.peek());
2238                        self.skip_non_blank();
2239                    }
2240                }
2241                end_mark = self.mark;
2242            }
2243
2244            if !(self.input.next_is_blank() || self.input.next_is_break()) {
2249                break;
2250            }
2251
2252            self.input.lookahead(2);
2254            while self.input.next_is_blank_or_break() {
2255                if self.input.next_is_blank() {
2256                    if !self.leading_whitespace {
2257                        self.buf_whitespaces.push(self.input.peek());
2258                        self.skip_blank();
2259                    } else if (self.mark.col as isize) < indent && self.input.peek() == '\t' {
2260                        self.skip_ws_to_eol(SkipTabs::Yes)?;
2263                        if !self.input.next_is_breakz() {
2264                            return Err(ScanError::new_str(
2265                                start_mark,
2266                                "while scanning a plain scalar, found a tab",
2267                            ));
2268                        }
2269                    } else {
2270                        self.skip_blank();
2271                    }
2272                } else {
2273                    if self.leading_whitespace {
2275                        self.skip_break();
2276                        self.buf_trailing_breaks.push('\n');
2277                    } else {
2278                        self.buf_whitespaces.clear();
2279                        self.skip_break();
2280                        self.buf_leading_break.push('\n');
2281                        self.leading_whitespace = true;
2282                    }
2283                }
2284                self.input.lookahead(2);
2285            }
2286
2287            if self.flow_level == 0 && (self.mark.col as isize) < indent {
2289                break;
2290            }
2291        }
2292
2293        if self.leading_whitespace {
2294            self.allow_simple_key();
2295        }
2296
2297        if string.is_empty() {
2298            Err(ScanError::new_str(
2302                start_mark,
2303                "unexpected end of plain scalar",
2304            ))
2305        } else {
2306            Ok(Token(
2307                Span::new(start_mark, end_mark),
2308                TokenType::Scalar(TScalarStyle::Plain, string),
2309            ))
2310        }
2311    }
2312
2313    fn fetch_key(&mut self) -> ScanResult {
2314        let start_mark = self.mark;
2315        if self.flow_level == 0 {
2316            if !self.simple_key_allowed {
2318                return Err(ScanError::new_str(
2319                    self.mark,
2320                    "mapping keys are not allowed in this context",
2321                ));
2322            }
2323            self.roll_indent(
2324                start_mark.col,
2325                None,
2326                TokenType::BlockMappingStart,
2327                start_mark,
2328            );
2329        } else {
2330            self.flow_mapping_started = true;
2332        }
2333
2334        self.remove_simple_key()?;
2335
2336        if self.flow_level == 0 {
2337            self.allow_simple_key();
2338        } else {
2339            self.disallow_simple_key();
2340        }
2341
2342        self.skip_non_blank();
2343        self.skip_yaml_whitespace()?;
2344        if self.input.peek() == '\t' {
2345            return Err(ScanError::new_str(
2346                self.mark(),
2347                "tabs disallowed in this context",
2348            ));
2349        }
2350        self.tokens
2351            .push_back(Token(Span::new(start_mark, self.mark), TokenType::Key));
2352        Ok(())
2353    }
2354
2355    fn fetch_flow_value(&mut self) -> ScanResult {
2363        let nc = self.input.peek_nth(1);
2364
2365        if self.mark.index != self.adjacent_value_allowed_at && (nc == '[' || nc == '{') {
2377            return Err(ScanError::new_str(
2378                self.mark,
2379                "':' may not precede any of `[{` in flow mapping",
2380            ));
2381        }
2382
2383        self.fetch_value()
2384    }
2385
2386    fn fetch_value(&mut self) -> ScanResult {
2388        let sk = self.simple_keys.last().unwrap().clone();
2389        let start_mark = self.mark;
2390        let is_implicit_flow_mapping =
2391            !self.implicit_flow_mapping_states.is_empty() && !self.flow_mapping_started;
2392        if is_implicit_flow_mapping {
2393            *self.implicit_flow_mapping_states.last_mut().unwrap() = ImplicitMappingState::Inside;
2394        }
2395
2396        self.skip_non_blank();
2398        if self.input.look_ch() == '\t'
2399            && !self.skip_ws_to_eol(SkipTabs::Yes)?.has_valid_yaml_ws()
2400            && (self.input.peek() == '-' || self.input.next_is_alpha())
2401        {
2402            return Err(ScanError::new_str(
2403                self.mark,
2404                "':' must be followed by a valid YAML whitespace",
2405            ));
2406        }
2407
2408        if sk.possible {
2409            let tok = Token(Span::empty(sk.mark), TokenType::Key);
2411            self.insert_token(sk.token_number - self.tokens_parsed, tok);
2412            if is_implicit_flow_mapping {
2413                if sk.mark.line < start_mark.line {
2414                    return Err(ScanError::new_str(
2415                        start_mark,
2416                        "illegal placement of ':' indicator",
2417                    ));
2418                }
2419                self.insert_token(
2420                    sk.token_number - self.tokens_parsed,
2421                    Token(Span::empty(sk.mark), TokenType::FlowMappingStart),
2422                );
2423            }
2424
2425            self.roll_indent(
2427                sk.mark.col,
2428                Some(sk.token_number),
2429                TokenType::BlockMappingStart,
2430                sk.mark,
2431            );
2432            self.roll_one_col_indent();
2433
2434            self.simple_keys.last_mut().unwrap().possible = false;
2435            self.disallow_simple_key();
2436        } else {
2437            if is_implicit_flow_mapping {
2438                self.tokens
2439                    .push_back(Token(Span::empty(start_mark), TokenType::FlowMappingStart));
2440            }
2441            if self.flow_level == 0 {
2443                if !self.simple_key_allowed {
2444                    return Err(ScanError::new_str(
2445                        start_mark,
2446                        "mapping values are not allowed in this context",
2447                    ));
2448                }
2449
2450                self.roll_indent(
2451                    start_mark.col,
2452                    None,
2453                    TokenType::BlockMappingStart,
2454                    start_mark,
2455                );
2456            }
2457            self.roll_one_col_indent();
2458
2459            if self.flow_level == 0 {
2460                self.allow_simple_key();
2461            } else {
2462                self.disallow_simple_key();
2463            }
2464        }
2465        self.tokens
2466            .push_back(Token(Span::empty(start_mark), TokenType::Value));
2467
2468        Ok(())
2469    }
2470
2471    fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
2477        if self.flow_level > 0 {
2478            return;
2479        }
2480
2481        if self.indent <= col as isize {
2485            if let Some(indent) = self.indents.last() {
2486                if !indent.needs_block_end {
2487                    self.indent = indent.indent;
2488                    self.indents.pop();
2489                }
2490            }
2491        }
2492
2493        if self.indent < col as isize {
2494            self.indents.push(Indent {
2495                indent: self.indent,
2496                needs_block_end: true,
2497            });
2498            self.indent = col as isize;
2499            let tokens_parsed = self.tokens_parsed;
2500            match number {
2501                Some(n) => self.insert_token(n - tokens_parsed, Token(Span::empty(mark), tok)),
2502                None => self.tokens.push_back(Token(Span::empty(mark), tok)),
2503            }
2504        }
2505    }
2506
2507    fn unroll_indent(&mut self, col: isize) {
2513        if self.flow_level > 0 {
2514            return;
2515        }
2516        while self.indent > col {
2517            let indent = self.indents.pop().unwrap();
2518            self.indent = indent.indent;
2519            if indent.needs_block_end {
2520                self.tokens
2521                    .push_back(Token(Span::empty(self.mark), TokenType::BlockEnd));
2522            }
2523        }
2524    }
2525
2526    fn roll_one_col_indent(&mut self) {
2532        if self.flow_level == 0 && self.indents.last().map_or(false, |x| x.needs_block_end) {
2533            self.indents.push(Indent {
2534                indent: self.indent,
2535                needs_block_end: false,
2536            });
2537            self.indent += 1;
2538        }
2539    }
2540
2541    fn unroll_non_block_indents(&mut self) {
2543        while let Some(indent) = self.indents.last() {
2544            if indent.needs_block_end {
2545                break;
2546            }
2547            self.indent = indent.indent;
2548            self.indents.pop();
2549        }
2550    }
2551
2552    fn save_simple_key(&mut self) {
2554        if self.simple_key_allowed {
2555            let required = self.flow_level == 0
2556                && self.indent == (self.mark.col as isize)
2557                && self.indents.last().unwrap().needs_block_end;
2558            let mut sk = SimpleKey::new(self.mark);
2559            sk.possible = true;
2560            sk.required = required;
2561            sk.token_number = self.tokens_parsed + self.tokens.len();
2562
2563            self.simple_keys.pop();
2564            self.simple_keys.push(sk);
2565        }
2566    }
2567
2568    fn remove_simple_key(&mut self) -> ScanResult {
2569        let last = self.simple_keys.last_mut().unwrap();
2570        if last.possible && last.required {
2571            return Err(ScanError::new_str(self.mark, "simple key expected"));
2572        }
2573
2574        last.possible = false;
2575        Ok(())
2576    }
2577
2578    fn is_within_block(&self) -> bool {
2580        !self.indents.is_empty()
2581    }
2582
2583    fn end_implicit_mapping(&mut self, mark: Marker) {
2589        if let Some(implicit_mapping) = self.implicit_flow_mapping_states.last_mut() {
2590            if *implicit_mapping == ImplicitMappingState::Inside {
2591                self.flow_mapping_started = false;
2592                *implicit_mapping = ImplicitMappingState::Possible;
2593                self.tokens
2594                    .push_back(Token(Span::empty(mark), TokenType::FlowMappingEnd));
2595            }
2596        }
2597    }
2598}
2599
2600#[derive(PartialEq, Eq)]
2604pub enum Chomping {
2605    Strip,
2607    Clip,
2609    Keep,
2611}
2612
2613#[cfg(test)]
2614mod test {
2615    #[test]
2616    fn test_is_anchor_char() {
2617        use super::is_anchor_char;
2618        assert!(is_anchor_char('x'));
2619    }
2620}