unity_yaml_rust/
scanner.rs

1use std::collections::VecDeque;
2use std::error::Error;
3use std::{char, fmt};
4
5#[derive(Clone, Copy, PartialEq, Debug, Eq)]
6pub enum TEncoding {
7    Utf8,
8}
9
10#[derive(Clone, Copy, PartialEq, Debug, Eq)]
11pub enum TScalarStyle {
12    Any,
13    Plain,
14    SingleQuoted,
15    DoubleQuoted,
16
17    Literal,
18    Foled,
19}
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22pub struct Marker {
23    index: usize,
24    line: usize,
25    col: usize,
26}
27
28impl Marker {
29    fn new(index: usize, line: usize, col: usize) -> Marker {
30        Marker { index, line, col }
31    }
32
33    pub fn index(&self) -> usize {
34        self.index
35    }
36
37    pub fn line(&self) -> usize {
38        self.line
39    }
40
41    pub fn col(&self) -> usize {
42        self.col
43    }
44}
45
46#[derive(Clone, PartialEq, Debug, Eq)]
47pub struct ScanError {
48    mark: Marker,
49    info: String,
50}
51
52impl ScanError {
53    pub fn new(loc: Marker, info: &str) -> ScanError {
54        ScanError {
55            mark: loc,
56            info: info.to_owned(),
57        }
58    }
59
60    pub fn marker(&self) -> &Marker {
61        &self.mark
62    }
63}
64
65impl Error for ScanError {
66    fn description(&self) -> &str {
67        self.info.as_ref()
68    }
69
70    fn cause(&self) -> Option<&dyn Error> {
71        None
72    }
73}
74
75impl fmt::Display for ScanError {
76    // col starts from 0
77    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
78        write!(
79            formatter,
80            "{} at line {} column {}",
81            self.info,
82            self.mark.line,
83            self.mark.col + 1
84        )
85    }
86}
87
88#[derive(Clone, PartialEq, Debug, Eq)]
89pub enum TokenType {
90    NoToken,
91    StreamStart(TEncoding),
92    StreamEnd,
93    /// major, minor
94    VersionDirective(u32, u32),
95    /// handle, prefix
96    TagDirective(String, String),
97    DocumentStart(u64, u64),
98    DocumentEnd,
99    BlockSequenceStart,
100    BlockMappingStart,
101    BlockEnd,
102    FlowSequenceStart,
103    FlowSequenceEnd,
104    FlowMappingStart,
105    FlowMappingEnd,
106    BlockEntry,
107    FlowEntry,
108    Key,
109    Value,
110    Alias(String),
111    Anchor(String),
112    /// handle, suffix
113    Tag(String, String),
114    Scalar(TScalarStyle, String),
115}
116
117#[derive(Clone, PartialEq, Debug, Eq)]
118pub struct Token(pub Marker, pub TokenType);
119
120#[derive(Clone, PartialEq, Debug, Eq)]
121struct SimpleKey {
122    possible: bool,
123    required: bool,
124    token_number: usize,
125    mark: Marker,
126}
127
128impl SimpleKey {
129    fn new(mark: Marker) -> SimpleKey {
130        SimpleKey {
131            possible: false,
132            required: false,
133            token_number: 0,
134            mark,
135        }
136    }
137}
138
139#[derive(Debug)]
140pub struct Scanner<T> {
141    rdr: T,
142    mark: Marker,
143    tokens: VecDeque<Token>,
144    buffer: VecDeque<char>,
145    error: Option<ScanError>,
146
147    stream_start_produced: bool,
148    stream_end_produced: bool,
149    adjacent_value_allowed_at: usize,
150    simple_key_allowed: bool,
151    simple_keys: Vec<SimpleKey>,
152    indent: isize,
153    indents: Vec<isize>,
154    flow_level: u8,
155    tokens_parsed: usize,
156    token_available: bool,
157}
158
159impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
160    type Item = Token;
161    fn next(&mut self) -> Option<Token> {
162        if self.error.is_some() {
163            return None;
164        }
165        match self.next_token() {
166            Ok(tok) => tok,
167            Err(e) => {
168                self.error = Some(e);
169                None
170            }
171        }
172    }
173}
174
175#[inline]
176fn is_z(c: char) -> bool {
177    c == '\0'
178}
179#[inline]
180fn is_break(c: char) -> bool {
181    c == '\n' || c == '\r'
182}
183#[inline]
184fn is_breakz(c: char) -> bool {
185    is_break(c) || is_z(c)
186}
187#[inline]
188fn is_blank(c: char) -> bool {
189    c == ' ' || c == '\t'
190}
191#[inline]
192fn is_blankz(c: char) -> bool {
193    is_blank(c) || is_breakz(c)
194}
195#[inline]
196fn is_digit(c: char) -> bool {
197    ('0'..='9').contains(&c)
198}
199#[inline]
200fn is_alpha(c: char) -> bool {
201    matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
202}
203#[inline]
204fn is_hex(c: char) -> bool {
205    ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
206}
207#[inline]
208fn as_hex(c: char) -> u32 {
209    match c {
210        '0'..='9' => (c as u32) - ('0' as u32),
211        'a'..='f' => (c as u32) - ('a' as u32) + 10,
212        'A'..='F' => (c as u32) - ('A' as u32) + 10,
213        _ => unreachable!(),
214    }
215}
216#[inline]
217fn is_flow(c: char) -> bool {
218    matches!(c, ',' | '[' | ']' | '{' | '}')
219}
220
221pub type ScanResult = Result<(), ScanError>;
222
223impl<T: Iterator<Item = char>> Scanner<T> {
224    /// Creates the YAML tokenizer.
225    pub fn new(rdr: T) -> Scanner<T> {
226        Scanner {
227            rdr,
228            buffer: VecDeque::new(),
229            mark: Marker::new(0, 1, 0),
230            tokens: VecDeque::new(),
231            error: None,
232
233            stream_start_produced: false,
234            stream_end_produced: false,
235            adjacent_value_allowed_at: 0,
236            simple_key_allowed: true,
237            simple_keys: Vec::new(),
238            indent: -1,
239            indents: Vec::new(),
240            flow_level: 0,
241            tokens_parsed: 0,
242            token_available: false,
243        }
244    }
245    #[inline]
246    pub fn get_error(&self) -> Option<ScanError> {
247        self.error.as_ref().cloned()
248    }
249
250    #[inline]
251    fn lookahead(&mut self, count: usize) {
252        if self.buffer.len() >= count {
253            return;
254        }
255        for _ in 0..(count - self.buffer.len()) {
256            self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
257        }
258    }
259    #[inline]
260    fn skip(&mut self) {
261        let c = self.buffer.pop_front().unwrap();
262
263        self.mark.index += 1;
264        if c == '\n' {
265            self.mark.line += 1;
266            self.mark.col = 0;
267        } else {
268            self.mark.col += 1;
269        }
270    }
271    #[inline]
272    fn skip_line(&mut self) {
273        if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
274            self.skip();
275            self.skip();
276        } else if is_break(self.buffer[0]) {
277            self.skip();
278        }
279    }
280    #[inline]
281    fn ch(&self) -> char {
282        self.buffer[0]
283    }
284    #[inline]
285    fn ch_is(&self, c: char) -> bool {
286        self.buffer[0] == c
287    }
288    #[allow(dead_code)]
289    #[inline]
290    fn eof(&self) -> bool {
291        self.ch_is('\0')
292    }
293    #[inline]
294    pub fn stream_started(&self) -> bool {
295        self.stream_start_produced
296    }
297    #[inline]
298    pub fn stream_ended(&self) -> bool {
299        self.stream_end_produced
300    }
301    #[inline]
302    pub fn mark(&self) -> Marker {
303        self.mark
304    }
305    #[inline]
306    fn read_break(&mut self, s: &mut String) {
307        if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
308            s.push('\n');
309            self.skip();
310            self.skip();
311        } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
312            s.push('\n');
313            self.skip();
314        } else {
315            unreachable!();
316        }
317    }
318    fn insert_token(&mut self, pos: usize, tok: Token) {
319        let old_len = self.tokens.len();
320        assert!(pos <= old_len);
321        self.tokens.push_back(tok);
322        for i in 0..old_len - pos {
323            self.tokens.swap(old_len - i, old_len - i - 1);
324        }
325    }
326    fn allow_simple_key(&mut self) {
327        self.simple_key_allowed = true;
328    }
329    fn disallow_simple_key(&mut self) {
330        self.simple_key_allowed = false;
331    }
332
333    pub fn fetch_next_token(&mut self) -> ScanResult {
334        self.lookahead(1);
335        // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
336
337        if !self.stream_start_produced {
338            self.fetch_stream_start();
339            return Ok(());
340        }
341        self.skip_to_next_token();
342
343        self.stale_simple_keys()?;
344
345        let mark = self.mark;
346        self.unroll_indent(mark.col as isize);
347
348        self.lookahead(4);
349
350        if is_z(self.ch()) {
351            self.fetch_stream_end()?;
352            return Ok(());
353        }
354
355        // Is it a directive?
356        if self.mark.col == 0 && self.ch_is('%') {
357            return self.fetch_directive();
358        }
359
360        if self.mark.col == 0
361            && self.buffer[0] == '-'
362            && self.buffer[1] == '-'
363            && self.buffer[2] == '-'
364            && is_blankz(self.buffer[3])
365        {
366            return self.fetch_document_start();
367        }
368
369        if self.mark.col == 0
370            && self.buffer[0] == '.'
371            && self.buffer[1] == '.'
372            && self.buffer[2] == '.'
373            && is_blankz(self.buffer[3])
374        {
375            self.fetch_document_indicator(TokenType::DocumentEnd)?;
376            return Ok(());
377        }
378
379        let c = self.buffer[0];
380        let nc = self.buffer[1];
381        match c {
382            '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
383            '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
384            ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
385            '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
386            ',' => self.fetch_flow_entry(),
387            '-' if is_blankz(nc) => self.fetch_block_entry(),
388            '?' if is_blankz(nc) => self.fetch_key(),
389            ':' if is_blankz(nc)
390                || (self.flow_level > 0
391                    && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
392            {
393                self.fetch_value()
394            }
395            // Is it an alias?
396            '*' => self.fetch_anchor(true),
397            // Is it an anchor?
398            '&' => self.fetch_anchor(false),
399            '!' => self.fetch_tag(),
400            // Is it a literal scalar?
401            '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
402            // Is it a folded scalar?
403            '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
404            '\'' => self.fetch_flow_scalar(true),
405            '"' => self.fetch_flow_scalar(false),
406            // plain scalar
407            '-' if !is_blankz(nc) => self.fetch_plain_scalar(),
408            ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
409            '%' | '@' | '`' => Err(ScanError::new(
410                self.mark,
411                &format!("unexpected character: `{}'", c),
412            )),
413            _ => self.fetch_plain_scalar(),
414        }
415    }
416
417    pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
418        if self.stream_end_produced {
419            return Ok(None);
420        }
421
422        if !self.token_available {
423            self.fetch_more_tokens()?;
424        }
425        let t = self.tokens.pop_front().unwrap();
426        self.token_available = false;
427        self.tokens_parsed += 1;
428
429        if let TokenType::StreamEnd = t.1 {
430            self.stream_end_produced = true;
431        }
432        Ok(Some(t))
433    }
434
435    pub fn fetch_more_tokens(&mut self) -> ScanResult {
436        let mut need_more;
437        loop {
438            need_more = false;
439            if self.tokens.is_empty() {
440                need_more = true;
441            } else {
442                self.stale_simple_keys()?;
443                for sk in &self.simple_keys {
444                    if sk.possible && sk.token_number == self.tokens_parsed {
445                        need_more = true;
446                        break;
447                    }
448                }
449            }
450
451            if !need_more {
452                break;
453            }
454            self.fetch_next_token()?;
455        }
456        self.token_available = true;
457
458        Ok(())
459    }
460
461    fn stale_simple_keys(&mut self) -> ScanResult {
462        for sk in &mut self.simple_keys {
463            if sk.possible
464                && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
465            {
466                if sk.required {
467                    return Err(ScanError::new(self.mark, "simple key expect ':'"));
468                }
469                sk.possible = false;
470            }
471        }
472        Ok(())
473    }
474
475    fn skip_to_next_token(&mut self) {
476        loop {
477            self.lookahead(1);
478            // TODO(chenyh) BOM
479            match self.ch() {
480                ' ' => self.skip(),
481                '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
482                '\n' | '\r' => {
483                    self.lookahead(2);
484                    self.skip_line();
485                    if self.flow_level == 0 {
486                        self.allow_simple_key();
487                    }
488                }
489                '#' => {
490                    while !is_breakz(self.ch()) {
491                        self.skip();
492                        self.lookahead(1);
493                    }
494                }
495                _ => break,
496            }
497        }
498    }
499
500    fn fetch_stream_start(&mut self) {
501        let mark = self.mark;
502        self.indent = -1;
503        self.stream_start_produced = true;
504        self.allow_simple_key();
505        self.tokens
506            .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
507        self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
508    }
509
510    fn fetch_stream_end(&mut self) -> ScanResult {
511        // force new line
512        if self.mark.col != 0 {
513            self.mark.col = 0;
514            self.mark.line += 1;
515        }
516
517        self.unroll_indent(-1);
518        self.remove_simple_key()?;
519        self.disallow_simple_key();
520
521        self.tokens
522            .push_back(Token(self.mark, TokenType::StreamEnd));
523        Ok(())
524    }
525
526    fn fetch_directive(&mut self) -> ScanResult {
527        self.unroll_indent(-1);
528        self.remove_simple_key()?;
529
530        self.disallow_simple_key();
531
532        let tok = self.scan_directive()?;
533
534        self.tokens.push_back(tok);
535
536        Ok(())
537    }
538
539    fn scan_directive(&mut self) -> Result<Token, ScanError> {
540        let start_mark = self.mark;
541        self.skip();
542
543        let name = self.scan_directive_name()?;
544        let tok = match name.as_ref() {
545            "YAML" => self.scan_version_directive_value(&start_mark)?,
546            "TAG" => self.scan_tag_directive_value(&start_mark)?,
547            // XXX This should be a warning instead of an error
548            _ => {
549                // skip current line
550                self.lookahead(1);
551                while !is_breakz(self.ch()) {
552                    self.skip();
553                    self.lookahead(1);
554                }
555                // XXX return an empty TagDirective token
556                Token(
557                    start_mark,
558                    TokenType::TagDirective(String::new(), String::new()),
559                )
560                // return Err(ScanError::new(start_mark,
561                //     "while scanning a directive, found unknown directive name"))
562            }
563        };
564        self.lookahead(1);
565
566        while is_blank(self.ch()) {
567            self.skip();
568            self.lookahead(1);
569        }
570
571        if self.ch() == '#' {
572            while !is_breakz(self.ch()) {
573                self.skip();
574                self.lookahead(1);
575            }
576        }
577
578        if !is_breakz(self.ch()) {
579            return Err(ScanError::new(
580                start_mark,
581                "while scanning a directive, did not find expected comment or line break",
582            ));
583        }
584
585        // Eat a line break
586        if is_break(self.ch()) {
587            self.lookahead(2);
588            self.skip_line();
589        }
590
591        Ok(tok)
592    }
593
594    fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
595        self.lookahead(1);
596
597        while is_blank(self.ch()) {
598            self.skip();
599            self.lookahead(1);
600        }
601
602        let major = self.scan_version_directive_number(mark)?;
603
604        if self.ch() != '.' {
605            return Err(ScanError::new(
606                *mark,
607                "while scanning a YAML directive, did not find expected digit or '.' character",
608            ));
609        }
610
611        self.skip();
612
613        let minor = self.scan_version_directive_number(mark)?;
614
615        Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
616    }
617
618    fn scan_directive_name(&mut self) -> Result<String, ScanError> {
619        let start_mark = self.mark;
620        let mut string = String::new();
621        self.lookahead(1);
622        while is_alpha(self.ch()) {
623            string.push(self.ch());
624            self.skip();
625            self.lookahead(1);
626        }
627
628        if string.is_empty() {
629            return Err(ScanError::new(
630                start_mark,
631                "while scanning a directive, could not find expected directive name",
632            ));
633        }
634
635        if !is_blankz(self.ch()) {
636            return Err(ScanError::new(
637                start_mark,
638                "while scanning a directive, found unexpected non-alphabetical character",
639            ));
640        }
641
642        Ok(string)
643    }
644
645    fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
646        let mut val = 0u32;
647        let mut length = 0usize;
648        self.lookahead(1);
649        while is_digit(self.ch()) {
650            if length + 1 > 9 {
651                return Err(ScanError::new(
652                    *mark,
653                    "while scanning a YAML directive, found extremely long version number",
654                ));
655            }
656            length += 1;
657            val = val * 10 + ((self.ch() as u32) - ('0' as u32));
658            self.skip();
659            self.lookahead(1);
660        }
661
662        if length == 0 {
663            return Err(ScanError::new(
664                *mark,
665                "while scanning a YAML directive, did not find expected version number",
666            ));
667        }
668
669        Ok(val)
670    }
671
672    fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
673        self.lookahead(1);
674        /* Eat whitespaces. */
675        while is_blank(self.ch()) {
676            self.skip();
677            self.lookahead(1);
678        }
679        let handle = self.scan_tag_handle(true, mark)?;
680
681        self.lookahead(1);
682        /* Eat whitespaces. */
683        while is_blank(self.ch()) {
684            self.skip();
685            self.lookahead(1);
686        }
687
688        let is_secondary = handle == "!!";
689        let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?;
690
691        self.lookahead(1);
692
693        if is_blankz(self.ch()) {
694            Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
695        } else {
696            Err(ScanError::new(
697                *mark,
698                "while scanning TAG, did not find expected whitespace or line break",
699            ))
700        }
701    }
702
703    fn fetch_tag(&mut self) -> ScanResult {
704        self.save_simple_key()?;
705        self.disallow_simple_key();
706
707        let tok = self.scan_tag()?;
708        self.tokens.push_back(tok);
709        Ok(())
710    }
711
712    fn scan_tag(&mut self) -> Result<Token, ScanError> {
713        let start_mark = self.mark;
714        let mut handle = String::new();
715        let mut suffix;
716        let mut secondary = false;
717
718        // Check if the tag is in the canonical form (verbatim).
719        self.lookahead(2);
720
721        if self.buffer[1] == '<' {
722            // Eat '!<'
723            self.skip();
724            self.skip();
725            suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?;
726
727            if self.ch() != '>' {
728                return Err(ScanError::new(
729                    start_mark,
730                    "while scanning a tag, did not find the expected '>'",
731                ));
732            }
733
734            self.skip();
735        } else {
736            // The tag has either the '!suffix' or the '!handle!suffix'
737            handle = self.scan_tag_handle(false, &start_mark)?;
738            // Check if it is, indeed, handle.
739            if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
740                if handle == "!!" {
741                    secondary = true;
742                }
743                suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?;
744            } else {
745                suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
746                handle = "!".to_owned();
747                // A special case: the '!' tag.  Set the handle to '' and the
748                // suffix to '!'.
749                if suffix.is_empty() {
750                    handle.clear();
751                    suffix = "!".to_owned();
752                }
753            }
754        }
755
756        self.lookahead(1);
757        if is_blankz(self.ch()) {
758            // XXX: ex 7.2, an empty scalar can follow a secondary tag
759            Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
760        } else {
761            Err(ScanError::new(
762                start_mark,
763                "while scanning a tag, did not find expected whitespace or line break",
764            ))
765        }
766    }
767
768    fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
769        let mut string = String::new();
770        self.lookahead(1);
771        if self.ch() != '!' {
772            return Err(ScanError::new(
773                *mark,
774                "while scanning a tag, did not find expected '!'",
775            ));
776        }
777
778        string.push(self.ch());
779        self.skip();
780
781        self.lookahead(1);
782        while is_alpha(self.ch()) {
783            string.push(self.ch());
784            self.skip();
785            self.lookahead(1);
786        }
787
788        // Check if the trailing character is '!' and copy it.
789        if self.ch() == '!' {
790            string.push(self.ch());
791            self.skip();
792        } else if directive && string != "!" {
793            // It's either the '!' tag or not really a tag handle.  If it's a %TAG
794            // directive, it's an error.  If it's a tag token, it must be a part of
795            // URI.
796            return Err(ScanError::new(
797                *mark,
798                "while parsing a tag directive, did not find expected '!'",
799            ));
800        }
801        Ok(string)
802    }
803
804    fn scan_tag_uri(
805        &mut self,
806        directive: bool,
807        _is_secondary: bool,
808        head: &str,
809        mark: &Marker,
810    ) -> Result<String, ScanError> {
811        let mut length = head.len();
812        let mut string = String::new();
813
814        // Copy the head if needed.
815        // Note that we don't copy the leading '!' character.
816        if length > 1 {
817            string.extend(head.chars().skip(1));
818        }
819
820        self.lookahead(1);
821        /*
822         * The set of characters that may appear in URI is as follows:
823         *
824         *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
825         *      '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
826         *      '%'.
827         */
828        while match self.ch() {
829            ';' | '/' | '?' | ':' | '@' | '&' => true,
830            '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
831            '%' => true,
832            c if is_alpha(c) => true,
833            _ => false,
834        } {
835            // Check if it is a URI-escape sequence.
836            if self.ch() == '%' {
837                string.push(self.scan_uri_escapes(directive, mark)?);
838            } else {
839                string.push(self.ch());
840                self.skip();
841            }
842
843            length += 1;
844            self.lookahead(1);
845        }
846
847        if length == 0 {
848            return Err(ScanError::new(
849                *mark,
850                "while parsing a tag, did not find expected tag URI",
851            ));
852        }
853
854        Ok(string)
855    }
856
857    fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
858        let mut width = 0usize;
859        let mut code = 0u32;
860        loop {
861            self.lookahead(3);
862
863            if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
864                return Err(ScanError::new(
865                    *mark,
866                    "while parsing a tag, did not find URI escaped octet",
867                ));
868            }
869
870            let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
871            if width == 0 {
872                width = match octet {
873                    _ if octet & 0x80 == 0x00 => 1,
874                    _ if octet & 0xE0 == 0xC0 => 2,
875                    _ if octet & 0xF0 == 0xE0 => 3,
876                    _ if octet & 0xF8 == 0xF0 => 4,
877                    _ => {
878                        return Err(ScanError::new(
879                            *mark,
880                            "while parsing a tag, found an incorrect leading UTF-8 octet",
881                        ));
882                    }
883                };
884                code = octet;
885            } else {
886                if octet & 0xc0 != 0x80 {
887                    return Err(ScanError::new(
888                        *mark,
889                        "while parsing a tag, found an incorrect trailing UTF-8 octet",
890                    ));
891                }
892                code = (code << 8) + octet;
893            }
894
895            self.skip();
896            self.skip();
897            self.skip();
898
899            width -= 1;
900            if width == 0 {
901                break;
902            }
903        }
904
905        match char::from_u32(code) {
906            Some(ch) => Ok(ch),
907            None => Err(ScanError::new(
908                *mark,
909                "while parsing a tag, found an invalid UTF-8 codepoint",
910            )),
911        }
912    }
913
914    fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
915        self.save_simple_key()?;
916        self.disallow_simple_key();
917
918        let tok = self.scan_anchor(alias)?;
919
920        self.tokens.push_back(tok);
921
922        Ok(())
923    }
924
925    fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
926        let mut string = String::new();
927        let start_mark = self.mark;
928
929        self.skip();
930        self.lookahead(1);
931        while is_alpha(self.ch()) {
932            string.push(self.ch());
933            self.skip();
934            self.lookahead(1);
935        }
936
937        if string.is_empty()
938            || match self.ch() {
939                c if is_blankz(c) => false,
940                '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
941                _ => true,
942            }
943        {
944            return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
945        }
946
947        if alias {
948            Ok(Token(start_mark, TokenType::Alias(string)))
949        } else {
950            Ok(Token(start_mark, TokenType::Anchor(string)))
951        }
952    }
953
954    fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
955        // The indicators '[' and '{' may start a simple key.
956        self.save_simple_key()?;
957
958        self.increase_flow_level()?;
959
960        self.allow_simple_key();
961
962        let start_mark = self.mark;
963        self.skip();
964
965        self.tokens.push_back(Token(start_mark, tok));
966        Ok(())
967    }
968
969    fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
970        self.remove_simple_key()?;
971        self.decrease_flow_level();
972
973        self.disallow_simple_key();
974
975        let start_mark = self.mark;
976        self.skip();
977
978        self.tokens.push_back(Token(start_mark, tok));
979        Ok(())
980    }
981
982    fn fetch_flow_entry(&mut self) -> ScanResult {
983        self.remove_simple_key()?;
984        self.allow_simple_key();
985
986        let start_mark = self.mark;
987        self.skip();
988
989        self.tokens
990            .push_back(Token(start_mark, TokenType::FlowEntry));
991        Ok(())
992    }
993
994    fn increase_flow_level(&mut self) -> ScanResult {
995        self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
996        self.flow_level = self
997            .flow_level
998            .checked_add(1)
999            .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
1000        Ok(())
1001    }
1002    fn decrease_flow_level(&mut self) {
1003        if self.flow_level > 0 {
1004            self.flow_level -= 1;
1005            self.simple_keys.pop().unwrap();
1006        }
1007    }
1008
1009    fn fetch_block_entry(&mut self) -> ScanResult {
1010        if self.flow_level == 0 {
1011            // Check if we are allowed to start a new entry.
1012            if !self.simple_key_allowed {
1013                return Err(ScanError::new(
1014                    self.mark,
1015                    "block sequence entries are not allowed in this context",
1016                ));
1017            }
1018
1019            let mark = self.mark;
1020            // generate BLOCK-SEQUENCE-START if indented
1021            self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1022        } else {
1023            // - * only allowed in block
1024            return Err(ScanError::new(
1025                self.mark,
1026                r#""-" is only valid inside a block"#,
1027            ));
1028        }
1029        self.remove_simple_key()?;
1030        self.allow_simple_key();
1031
1032        let start_mark = self.mark;
1033        self.skip();
1034
1035        self.tokens
1036            .push_back(Token(start_mark, TokenType::BlockEntry));
1037        Ok(())
1038    }
1039
1040    fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1041        self.unroll_indent(-1);
1042        self.remove_simple_key()?;
1043        self.disallow_simple_key();
1044
1045        let mark = self.mark;
1046
1047        self.skip();
1048        self.skip();
1049        self.skip();
1050
1051        self.tokens.push_back(Token(mark, t));
1052        Ok(())
1053    }
1054
1055    fn fetch_document_start(&mut self) -> ScanResult {
1056        self.unroll_indent(-1);
1057        self.remove_simple_key()?;
1058        self.disallow_simple_key();
1059
1060        let mark = self.mark;
1061
1062        self.skip();
1063        self.skip();
1064        self.skip();
1065        self.skip();
1066
1067        self.lookahead(1);
1068        if self.ch() != '!' {
1069            return Err(ScanError::new(
1070                mark,
1071                "while scanning a tag, did not find expected '!'",
1072            ));
1073        }
1074        // string.push(self.ch());
1075        self.skip();
1076
1077        self.lookahead(1);
1078        while is_alpha(self.ch()) {
1079            // string.push(self.ch());
1080            self.skip();
1081            self.lookahead(1);
1082        }
1083        if self.ch() != '!' {
1084            return Err(ScanError::new(
1085                mark,
1086                "while scanning a tag, did not find expected '!'",
1087            ));
1088        }
1089        self.skip();
1090        let mut class_id = 0u64;
1091        self.lookahead(1);
1092        while is_digit(self.ch()) {
1093            class_id = class_id * 10 + (self.ch() as usize - '0' as usize) as u64;
1094            self.skip();
1095            self.lookahead(1);
1096        }
1097        while is_blank(self.ch()) {
1098            self.skip();
1099            self.lookahead(1);
1100        }
1101        if self.ch() != '&' {
1102            return Err(ScanError::new(
1103                mark,
1104                "while scanning a tag, did not find expected '&'",
1105            ));
1106        }
1107        self.skip();
1108        self.lookahead(1);
1109
1110        let mut object_id = 0u64;
1111        while is_digit(self.ch()) {
1112            object_id = object_id * 10 + (self.ch() as usize - '0' as usize) as u64;
1113            self.skip();
1114            self.lookahead(1);
1115        }
1116
1117        self.tokens.push_back(Token(mark, TokenType::DocumentStart(class_id, object_id)));
1118        Ok(())
1119    }
1120
1121    fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1122        self.save_simple_key()?;
1123        self.allow_simple_key();
1124        let tok = self.scan_block_scalar(literal)?;
1125
1126        self.tokens.push_back(tok);
1127        Ok(())
1128    }
1129
1130    fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1131        let start_mark = self.mark;
1132        let mut chomping: i32 = 0;
1133        let mut increment: usize = 0;
1134        let mut indent: usize = 0;
1135        let mut trailing_blank: bool;
1136        let mut leading_blank: bool = false;
1137
1138        let mut string = String::new();
1139        let mut leading_break = String::new();
1140        let mut trailing_breaks = String::new();
1141
1142        // skip '|' or '>'
1143        self.skip();
1144        self.lookahead(1);
1145
1146        if self.ch() == '+' || self.ch() == '-' {
1147            if self.ch() == '+' {
1148                chomping = 1;
1149            } else {
1150                chomping = -1;
1151            }
1152            self.skip();
1153            self.lookahead(1);
1154            if is_digit(self.ch()) {
1155                if self.ch() == '0' {
1156                    return Err(ScanError::new(
1157                        start_mark,
1158                        "while scanning a block scalar, found an indentation indicator equal to 0",
1159                    ));
1160                }
1161                increment = (self.ch() as usize) - ('0' as usize);
1162                self.skip();
1163            }
1164        } else if is_digit(self.ch()) {
1165            if self.ch() == '0' {
1166                return Err(ScanError::new(
1167                    start_mark,
1168                    "while scanning a block scalar, found an indentation indicator equal to 0",
1169                ));
1170            }
1171
1172            increment = (self.ch() as usize) - ('0' as usize);
1173            self.skip();
1174            self.lookahead(1);
1175            if self.ch() == '+' || self.ch() == '-' {
1176                if self.ch() == '+' {
1177                    chomping = 1;
1178                } else {
1179                    chomping = -1;
1180                }
1181                self.skip();
1182            }
1183        }
1184
1185        // Eat whitespaces and comments to the end of the line.
1186        self.lookahead(1);
1187
1188        while is_blank(self.ch()) {
1189            self.skip();
1190            self.lookahead(1);
1191        }
1192
1193        if self.ch() == '#' {
1194            while !is_breakz(self.ch()) {
1195                self.skip();
1196                self.lookahead(1);
1197            }
1198        }
1199
1200        // Check if we are at the end of the line.
1201        if !is_breakz(self.ch()) {
1202            return Err(ScanError::new(
1203                start_mark,
1204                "while scanning a block scalar, did not find expected comment or line break",
1205            ));
1206        }
1207
1208        if is_break(self.ch()) {
1209            self.lookahead(2);
1210            self.skip_line();
1211        }
1212
1213        if increment > 0 {
1214            indent = if self.indent >= 0 {
1215                (self.indent + increment as isize) as usize
1216            } else {
1217                increment
1218            }
1219        }
1220        // Scan the leading line breaks and determine the indentation level if needed.
1221        self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1222
1223        self.lookahead(1);
1224
1225        let start_mark = self.mark;
1226
1227        while self.mark.col == indent && !is_z(self.ch()) {
1228            // We are at the beginning of a non-empty line.
1229            trailing_blank = is_blank(self.ch());
1230            if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1231                if trailing_breaks.is_empty() {
1232                    string.push(' ');
1233                }
1234                leading_break.clear();
1235            } else {
1236                string.push_str(&leading_break);
1237                leading_break.clear();
1238            }
1239
1240            string.push_str(&trailing_breaks);
1241            trailing_breaks.clear();
1242
1243            leading_blank = is_blank(self.ch());
1244
1245            while !is_breakz(self.ch()) {
1246                string.push(self.ch());
1247                self.skip();
1248                self.lookahead(1);
1249            }
1250            // break on EOF
1251            if is_z(self.ch()) {
1252                break;
1253            }
1254
1255            self.lookahead(2);
1256            self.read_break(&mut leading_break);
1257
1258            // Eat the following indentation spaces and line breaks.
1259            self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1260        }
1261
1262        // Chomp the tail.
1263        if chomping != -1 {
1264            string.push_str(&leading_break);
1265        }
1266
1267        if chomping == 1 {
1268            string.push_str(&trailing_breaks);
1269        }
1270
1271        if literal {
1272            Ok(Token(
1273                start_mark,
1274                TokenType::Scalar(TScalarStyle::Literal, string),
1275            ))
1276        } else {
1277            Ok(Token(
1278                start_mark,
1279                TokenType::Scalar(TScalarStyle::Foled, string),
1280            ))
1281        }
1282    }
1283
1284    fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
1285        let mut max_indent = 0;
1286        loop {
1287            self.lookahead(1);
1288            while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' {
1289                self.skip();
1290                self.lookahead(1);
1291            }
1292
1293            if self.mark.col > max_indent {
1294                max_indent = self.mark.col;
1295            }
1296
1297            // Check for a tab character messing the indentation.
1298            if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' {
1299                return Err(ScanError::new(self.mark,
1300                        "while scanning a block scalar, found a tab character where an indentation space is expected"));
1301            }
1302
1303            if !is_break(self.ch()) {
1304                break;
1305            }
1306
1307            self.lookahead(2);
1308            // Consume the line break.
1309            self.read_break(breaks);
1310        }
1311
1312        if *indent == 0 {
1313            *indent = max_indent;
1314            if *indent < (self.indent + 1) as usize {
1315                *indent = (self.indent + 1) as usize;
1316            }
1317            if *indent < 1 {
1318                *indent = 1;
1319            }
1320        }
1321        Ok(())
1322    }
1323
1324    fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1325        self.save_simple_key()?;
1326        self.disallow_simple_key();
1327
1328        let tok = self.scan_flow_scalar(single)?;
1329
1330        // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
1331        // YAML allows the following value to be specified adjacent to the “:”.
1332        self.adjacent_value_allowed_at = self.mark.index;
1333
1334        self.tokens.push_back(tok);
1335        Ok(())
1336    }
1337
1338    fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1339        let start_mark = self.mark;
1340
1341        let mut string = String::new();
1342        let mut leading_break = String::new();
1343        let mut trailing_breaks = String::new();
1344        let mut whitespaces = String::new();
1345        let mut leading_blanks;
1346
1347        /* Eat the left quote. */
1348        self.skip();
1349
1350        loop {
1351            /* Check for a document indicator. */
1352            self.lookahead(4);
1353
1354            if self.mark.col == 0
1355                && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1356                    || ((self.buffer[0] == '.')
1357                        && (self.buffer[1] == '.')
1358                        && (self.buffer[2] == '.')))
1359                && is_blankz(self.buffer[3])
1360            {
1361                return Err(ScanError::new(
1362                    start_mark,
1363                    "while scanning a quoted scalar, found unexpected document indicator",
1364                ));
1365            }
1366
1367            if is_z(self.ch()) {
1368                return Err(ScanError::new(
1369                    start_mark,
1370                    "while scanning a quoted scalar, found unexpected end of stream",
1371                ));
1372            }
1373
1374            self.lookahead(2);
1375
1376            leading_blanks = false;
1377            // Consume non-blank characters.
1378
1379            while !is_blankz(self.ch()) {
1380                match self.ch() {
1381                    // Check for an escaped single quote.
1382                    '\'' if self.buffer[1] == '\'' && single => {
1383                        string.push('\'');
1384                        self.skip();
1385                        self.skip();
1386                    }
1387                    // Check for the right quote.
1388                    '\'' if single => break,
1389                    '"' if !single => break,
1390                    // Check for an escaped line break.
1391                    '\\' if !single && is_break(self.buffer[1]) => {
1392                        self.lookahead(3);
1393                        self.skip();
1394                        self.skip_line();
1395                        leading_blanks = true;
1396                        break;
1397                    }
1398                    // Check for an escape sequence.
1399                    '\\' if !single => {
1400                        let mut code_length = 0usize;
1401                        match self.buffer[1] {
1402                            '0' => string.push('\0'),
1403                            'a' => string.push('\x07'),
1404                            'b' => string.push('\x08'),
1405                            't' | '\t' => string.push('\t'),
1406                            'n' => string.push('\n'),
1407                            'v' => string.push('\x0b'),
1408                            'f' => string.push('\x0c'),
1409                            'r' => string.push('\x0d'),
1410                            'e' => string.push('\x1b'),
1411                            ' ' => string.push('\x20'),
1412                            '"' => string.push('"'),
1413                            '\'' => string.push('\''),
1414                            '\\' => string.push('\\'),
1415                            // NEL (#x85)
1416                            'N' => string.push(char::from_u32(0x85).unwrap()),
1417                            // #xA0
1418                            '_' => string.push(char::from_u32(0xA0).unwrap()),
1419                            // LS (#x2028)
1420                            'L' => string.push(char::from_u32(0x2028).unwrap()),
1421                            // PS (#x2029)
1422                            'P' => string.push(char::from_u32(0x2029).unwrap()),
1423                            'x' => code_length = 2,
1424                            'u' => code_length = 4,
1425                            'U' => code_length = 8,
1426                            _ => {
1427                                return Err(ScanError::new(
1428                                    start_mark,
1429                                    "while parsing a quoted scalar, found unknown escape character",
1430                                ))
1431                            }
1432                        }
1433                        self.skip();
1434                        self.skip();
1435                        // Consume an arbitrary escape code.
1436                        if code_length > 0 {
1437                            self.lookahead(code_length);
1438                            let mut value = 0u32;
1439                            for i in 0..code_length {
1440                                if !is_hex(self.buffer[i]) {
1441                                    return Err(ScanError::new(start_mark,
1442                                        "while parsing a quoted scalar, did not find expected hexadecimal number"));
1443                                }
1444                                value = (value << 4) + as_hex(self.buffer[i]);
1445                            }
1446
1447                            let ch = match char::from_u32(value) {
1448                                Some(v) => v,
1449                                None => {
1450                                    return Err(ScanError::new(start_mark,
1451                                        "while parsing a quoted scalar, found invalid Unicode character escape code"));
1452                                }
1453                            };
1454                            string.push(ch);
1455
1456                            for _ in 0..code_length {
1457                                self.skip();
1458                            }
1459                        }
1460                    }
1461                    c => {
1462                        string.push(c);
1463                        self.skip();
1464                    }
1465                }
1466                self.lookahead(2);
1467            }
1468            self.lookahead(1);
1469            match self.ch() {
1470                '\'' if single => break,
1471                '"' if !single => break,
1472                _ => {}
1473            }
1474
1475            // Consume blank characters.
1476            while is_blank(self.ch()) || is_break(self.ch()) {
1477                if is_blank(self.ch()) {
1478                    // Consume a space or a tab character.
1479                    if leading_blanks {
1480                        self.skip();
1481                    } else {
1482                        whitespaces.push(self.ch());
1483                        self.skip();
1484                    }
1485                } else {
1486                    self.lookahead(2);
1487                    // Check if it is a first line break.
1488                    if leading_blanks {
1489                        self.read_break(&mut trailing_breaks);
1490                    } else {
1491                        whitespaces.clear();
1492                        self.read_break(&mut leading_break);
1493                        leading_blanks = true;
1494                    }
1495                }
1496                self.lookahead(1);
1497            }
1498            // Join the whitespaces or fold line breaks.
1499            if leading_blanks {
1500                if leading_break.is_empty() {
1501                    string.push_str(&leading_break);
1502                    string.push_str(&trailing_breaks);
1503                    trailing_breaks.clear();
1504                    leading_break.clear();
1505                } else {
1506                    if trailing_breaks.is_empty() {
1507                        string.push(' ');
1508                    } else {
1509                        string.push_str(&trailing_breaks);
1510                        trailing_breaks.clear();
1511                    }
1512                    leading_break.clear();
1513                }
1514            } else {
1515                string.push_str(&whitespaces);
1516                whitespaces.clear();
1517            }
1518        } // loop
1519
1520        // Eat the right quote.
1521        self.skip();
1522
1523        if single {
1524            Ok(Token(
1525                start_mark,
1526                TokenType::Scalar(TScalarStyle::SingleQuoted, string),
1527            ))
1528        } else {
1529            Ok(Token(
1530                start_mark,
1531                TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
1532            ))
1533        }
1534    }
1535
1536    fn fetch_plain_scalar(&mut self) -> ScanResult {
1537        self.save_simple_key()?;
1538        self.disallow_simple_key();
1539
1540        let tok = self.scan_plain_scalar()?;
1541
1542        self.tokens.push_back(tok);
1543        Ok(())
1544    }
1545
1546    fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
1547        let indent = self.indent + 1;
1548        let start_mark = self.mark;
1549
1550        let mut string = String::new();
1551        let mut leading_break = String::new();
1552        let mut trailing_breaks = String::new();
1553        let mut whitespaces = String::new();
1554        let mut leading_blanks = false;
1555
1556        loop {
1557            /* Check for a document indicator. */
1558            self.lookahead(4);
1559
1560            if self.mark.col == 0
1561                && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1562                    || ((self.buffer[0] == '.')
1563                        && (self.buffer[1] == '.')
1564                        && (self.buffer[2] == '.')))
1565                && is_blankz(self.buffer[3])
1566            {
1567                break;
1568            }
1569
1570            if self.ch() == '#' {
1571                break;
1572            }
1573            while !is_blankz(self.ch()) {
1574                // indicators can end a plain scalar, see 7.3.3. Plain Style
1575                match self.ch() {
1576                    ':' if is_blankz(self.buffer[1])
1577                        || (self.flow_level > 0 && is_flow(self.buffer[1])) =>
1578                    {
1579                        break;
1580                    }
1581                    ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
1582                    _ => {}
1583                }
1584
1585                if leading_blanks || !whitespaces.is_empty() {
1586                    if leading_blanks {
1587                        if leading_break.is_empty() {
1588                            string.push_str(&leading_break);
1589                            string.push_str(&trailing_breaks);
1590                            trailing_breaks.clear();
1591                            leading_break.clear();
1592                        } else {
1593                            if trailing_breaks.is_empty() {
1594                                string.push(' ');
1595                            } else {
1596                                string.push_str(&trailing_breaks);
1597                                trailing_breaks.clear();
1598                            }
1599                            leading_break.clear();
1600                        }
1601                        leading_blanks = false;
1602                    } else {
1603                        string.push_str(&whitespaces);
1604                        whitespaces.clear();
1605                    }
1606                }
1607
1608                string.push(self.ch());
1609                self.skip();
1610                self.lookahead(2);
1611            }
1612            // is the end?
1613            if !(is_blank(self.ch()) || is_break(self.ch())) {
1614                break;
1615            }
1616            self.lookahead(1);
1617
1618            while is_blank(self.ch()) || is_break(self.ch()) {
1619                if is_blank(self.ch()) {
1620                    if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
1621                        return Err(ScanError::new(
1622                            start_mark,
1623                            "while scanning a plain scalar, found a tab",
1624                        ));
1625                    }
1626
1627                    if leading_blanks {
1628                        self.skip();
1629                    } else {
1630                        whitespaces.push(self.ch());
1631                        self.skip();
1632                    }
1633                } else {
1634                    self.lookahead(2);
1635                    // Check if it is a first line break
1636                    if leading_blanks {
1637                        self.read_break(&mut trailing_breaks);
1638                    } else {
1639                        whitespaces.clear();
1640                        self.read_break(&mut leading_break);
1641                        leading_blanks = true;
1642                    }
1643                }
1644                self.lookahead(1);
1645            }
1646
1647            // check indentation level
1648            if self.flow_level == 0 && (self.mark.col as isize) < indent {
1649                break;
1650            }
1651        }
1652
1653        if leading_blanks {
1654            self.allow_simple_key();
1655        }
1656
1657        Ok(Token(
1658            start_mark,
1659            TokenType::Scalar(TScalarStyle::Plain, string),
1660        ))
1661    }
1662
1663    fn fetch_key(&mut self) -> ScanResult {
1664        let start_mark = self.mark;
1665        if self.flow_level == 0 {
1666            // Check if we are allowed to start a new key (not necessarily simple).
1667            if !self.simple_key_allowed {
1668                return Err(ScanError::new(
1669                    self.mark,
1670                    "mapping keys are not allowed in this context",
1671                ));
1672            }
1673            self.roll_indent(
1674                start_mark.col,
1675                None,
1676                TokenType::BlockMappingStart,
1677                start_mark,
1678            );
1679        }
1680
1681        self.remove_simple_key()?;
1682
1683        if self.flow_level == 0 {
1684            self.allow_simple_key();
1685        } else {
1686            self.disallow_simple_key();
1687        }
1688
1689        self.skip();
1690        self.tokens.push_back(Token(start_mark, TokenType::Key));
1691        Ok(())
1692    }
1693
1694    fn fetch_value(&mut self) -> ScanResult {
1695        let sk = self.simple_keys.last().unwrap().clone();
1696        let start_mark = self.mark;
1697        if sk.possible {
1698            // insert simple key
1699            let tok = Token(sk.mark, TokenType::Key);
1700            let tokens_parsed = self.tokens_parsed;
1701            self.insert_token(sk.token_number - tokens_parsed, tok);
1702
1703            // Add the BLOCK-MAPPING-START token if needed.
1704            self.roll_indent(
1705                sk.mark.col,
1706                Some(sk.token_number),
1707                TokenType::BlockMappingStart,
1708                start_mark,
1709            );
1710
1711            self.simple_keys.last_mut().unwrap().possible = false;
1712            self.disallow_simple_key();
1713        } else {
1714            // The ':' indicator follows a complex key.
1715            if self.flow_level == 0 {
1716                if !self.simple_key_allowed {
1717                    return Err(ScanError::new(
1718                        start_mark,
1719                        "mapping values are not allowed in this context",
1720                    ));
1721                }
1722
1723                self.roll_indent(
1724                    start_mark.col,
1725                    None,
1726                    TokenType::BlockMappingStart,
1727                    start_mark,
1728                );
1729            }
1730
1731            if self.flow_level == 0 {
1732                self.allow_simple_key();
1733            } else {
1734                self.disallow_simple_key();
1735            }
1736        }
1737        self.skip();
1738        self.tokens.push_back(Token(start_mark, TokenType::Value));
1739
1740        Ok(())
1741    }
1742
1743    fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
1744        if self.flow_level > 0 {
1745            return;
1746        }
1747
1748        if self.indent < col as isize {
1749            self.indents.push(self.indent);
1750            self.indent = col as isize;
1751            let tokens_parsed = self.tokens_parsed;
1752            match number {
1753                Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
1754                None => self.tokens.push_back(Token(mark, tok)),
1755            }
1756        }
1757    }
1758
1759    fn unroll_indent(&mut self, col: isize) {
1760        if self.flow_level > 0 {
1761            return;
1762        }
1763        while self.indent > col {
1764            self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
1765            self.indent = self.indents.pop().unwrap();
1766        }
1767    }
1768
1769    fn save_simple_key(&mut self) -> Result<(), ScanError> {
1770        let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
1771        if self.simple_key_allowed {
1772            let mut sk = SimpleKey::new(self.mark);
1773            sk.possible = true;
1774            sk.required = required;
1775            sk.token_number = self.tokens_parsed + self.tokens.len();
1776
1777            self.remove_simple_key()?;
1778
1779            self.simple_keys.pop();
1780            self.simple_keys.push(sk);
1781        }
1782        Ok(())
1783    }
1784
1785    fn remove_simple_key(&mut self) -> ScanResult {
1786        let last = self.simple_keys.last_mut().unwrap();
1787        if last.possible && last.required {
1788            return Err(ScanError::new(self.mark, "simple key expected"));
1789        }
1790
1791        last.possible = false;
1792        Ok(())
1793    }
1794}
1795
1796#[cfg(test)]
1797mod test {
1798    use super::TokenType::*;
1799    use super::*;
1800
1801    macro_rules! next {
1802        ($p:ident, $tk:pat) => {{
1803            let tok = $p.next().unwrap();
1804            match tok.1 {
1805                $tk => {}
1806                _ => panic!("unexpected token: {:?}", tok),
1807            }
1808        }};
1809    }
1810
1811    macro_rules! next_scalar {
1812        ($p:ident, $tk:expr, $v:expr) => {{
1813            let tok = $p.next().unwrap();
1814            match tok.1 {
1815                Scalar(style, ref v) => {
1816                    assert_eq!(style, $tk);
1817                    assert_eq!(v, $v);
1818                }
1819                _ => panic!("unexpected token: {:?}", tok),
1820            }
1821        }};
1822    }
1823
1824    macro_rules! end {
1825        ($p:ident) => {{
1826            assert_eq!($p.next(), None);
1827        }};
1828    }
1829    /// test cases in libyaml scanner.c
1830    #[test]
1831    fn test_empty() {
1832        let s = "";
1833        let mut p = Scanner::new(s.chars());
1834        next!(p, StreamStart(..));
1835        next!(p, StreamEnd);
1836        end!(p);
1837    }
1838
1839    #[test]
1840    fn test_scalar() {
1841        let s = "a scalar";
1842        let mut p = Scanner::new(s.chars());
1843        next!(p, StreamStart(..));
1844        next!(p, Scalar(TScalarStyle::Plain, _));
1845        next!(p, StreamEnd);
1846        end!(p);
1847    }
1848
1849    #[test]
1850    fn test_explicit_scalar() {
1851        let s = "---
1852'a scalar'
1853...
1854";
1855        let mut p = Scanner::new(s.chars());
1856        next!(p, StreamStart(..));
1857        next!(p, DocumentStart(..));
1858        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1859        next!(p, DocumentEnd);
1860        next!(p, StreamEnd);
1861        end!(p);
1862    }
1863
1864    #[test]
1865    fn test_multiple_documents() {
1866        let s = "
1867'a scalar'
1868---
1869'a scalar'
1870---
1871'a scalar'
1872";
1873        let mut p = Scanner::new(s.chars());
1874        next!(p, StreamStart(..));
1875        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1876        next!(p, DocumentStart(..));
1877        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1878        next!(p, DocumentStart(..));
1879        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1880        next!(p, StreamEnd);
1881        end!(p);
1882    }
1883
1884    #[test]
1885    fn test_a_flow_sequence() {
1886        let s = "[item 1, item 2, item 3]";
1887        let mut p = Scanner::new(s.chars());
1888        next!(p, StreamStart(..));
1889        next!(p, FlowSequenceStart);
1890        next_scalar!(p, TScalarStyle::Plain, "item 1");
1891        next!(p, FlowEntry);
1892        next!(p, Scalar(TScalarStyle::Plain, _));
1893        next!(p, FlowEntry);
1894        next!(p, Scalar(TScalarStyle::Plain, _));
1895        next!(p, FlowSequenceEnd);
1896        next!(p, StreamEnd);
1897        end!(p);
1898    }
1899
1900    #[test]
1901    fn test_a_flow_mapping() {
1902        let s = "
1903{
1904    a simple key: a value, # Note that the KEY token is produced.
1905    ? a complex key: another value,
1906}
1907";
1908        let mut p = Scanner::new(s.chars());
1909        next!(p, StreamStart(..));
1910        next!(p, FlowMappingStart);
1911        next!(p, Key);
1912        next!(p, Scalar(TScalarStyle::Plain, _));
1913        next!(p, Value);
1914        next!(p, Scalar(TScalarStyle::Plain, _));
1915        next!(p, FlowEntry);
1916        next!(p, Key);
1917        next_scalar!(p, TScalarStyle::Plain, "a complex key");
1918        next!(p, Value);
1919        next!(p, Scalar(TScalarStyle::Plain, _));
1920        next!(p, FlowEntry);
1921        next!(p, FlowMappingEnd);
1922        next!(p, StreamEnd);
1923        end!(p);
1924    }
1925
1926    #[test]
1927    fn test_block_sequences() {
1928        let s = "
1929- item 1
1930- item 2
1931-
1932  - item 3.1
1933  - item 3.2
1934-
1935  key 1: value 1
1936  key 2: value 2
1937";
1938        let mut p = Scanner::new(s.chars());
1939        next!(p, StreamStart(..));
1940        next!(p, BlockSequenceStart);
1941        next!(p, BlockEntry);
1942        next_scalar!(p, TScalarStyle::Plain, "item 1");
1943        next!(p, BlockEntry);
1944        next_scalar!(p, TScalarStyle::Plain, "item 2");
1945        next!(p, BlockEntry);
1946        next!(p, BlockSequenceStart);
1947        next!(p, BlockEntry);
1948        next_scalar!(p, TScalarStyle::Plain, "item 3.1");
1949        next!(p, BlockEntry);
1950        next_scalar!(p, TScalarStyle::Plain, "item 3.2");
1951        next!(p, BlockEnd);
1952        next!(p, BlockEntry);
1953        next!(p, BlockMappingStart);
1954        next!(p, Key);
1955        next_scalar!(p, TScalarStyle::Plain, "key 1");
1956        next!(p, Value);
1957        next_scalar!(p, TScalarStyle::Plain, "value 1");
1958        next!(p, Key);
1959        next_scalar!(p, TScalarStyle::Plain, "key 2");
1960        next!(p, Value);
1961        next_scalar!(p, TScalarStyle::Plain, "value 2");
1962        next!(p, BlockEnd);
1963        next!(p, BlockEnd);
1964        next!(p, StreamEnd);
1965        end!(p);
1966    }
1967
1968    #[test]
1969    fn test_block_mappings() {
1970        let s = "
1971a simple key: a value   # The KEY token is produced here.
1972? a complex key
1973: another value
1974a mapping:
1975  key 1: value 1
1976  key 2: value 2
1977a sequence:
1978  - item 1
1979  - item 2
1980";
1981        let mut p = Scanner::new(s.chars());
1982        next!(p, StreamStart(..));
1983        next!(p, BlockMappingStart);
1984        next!(p, Key);
1985        next!(p, Scalar(_, _));
1986        next!(p, Value);
1987        next!(p, Scalar(_, _));
1988        next!(p, Key);
1989        next!(p, Scalar(_, _));
1990        next!(p, Value);
1991        next!(p, Scalar(_, _));
1992        next!(p, Key);
1993        next!(p, Scalar(_, _));
1994        next!(p, Value); // libyaml comment seems to be wrong
1995        next!(p, BlockMappingStart);
1996        next!(p, Key);
1997        next!(p, Scalar(_, _));
1998        next!(p, Value);
1999        next!(p, Scalar(_, _));
2000        next!(p, Key);
2001        next!(p, Scalar(_, _));
2002        next!(p, Value);
2003        next!(p, Scalar(_, _));
2004        next!(p, BlockEnd);
2005        next!(p, Key);
2006        next!(p, Scalar(_, _));
2007        next!(p, Value);
2008        next!(p, BlockSequenceStart);
2009        next!(p, BlockEntry);
2010        next!(p, Scalar(_, _));
2011        next!(p, BlockEntry);
2012        next!(p, Scalar(_, _));
2013        next!(p, BlockEnd);
2014        next!(p, BlockEnd);
2015        next!(p, StreamEnd);
2016        end!(p);
2017    }
2018
2019    #[test]
2020    fn test_no_block_sequence_start() {
2021        let s = "
2022key:
2023- item 1
2024- item 2
2025";
2026        let mut p = Scanner::new(s.chars());
2027        next!(p, StreamStart(..));
2028        next!(p, BlockMappingStart);
2029        next!(p, Key);
2030        next_scalar!(p, TScalarStyle::Plain, "key");
2031        next!(p, Value);
2032        next!(p, BlockEntry);
2033        next_scalar!(p, TScalarStyle::Plain, "item 1");
2034        next!(p, BlockEntry);
2035        next_scalar!(p, TScalarStyle::Plain, "item 2");
2036        next!(p, BlockEnd);
2037        next!(p, StreamEnd);
2038        end!(p);
2039    }
2040
2041    #[test]
2042    fn test_collections_in_sequence() {
2043        let s = "
2044- - item 1
2045  - item 2
2046- key 1: value 1
2047  key 2: value 2
2048- ? complex key
2049  : complex value
2050";
2051        let mut p = Scanner::new(s.chars());
2052        next!(p, StreamStart(..));
2053        next!(p, BlockSequenceStart);
2054        next!(p, BlockEntry);
2055        next!(p, BlockSequenceStart);
2056        next!(p, BlockEntry);
2057        next_scalar!(p, TScalarStyle::Plain, "item 1");
2058        next!(p, BlockEntry);
2059        next_scalar!(p, TScalarStyle::Plain, "item 2");
2060        next!(p, BlockEnd);
2061        next!(p, BlockEntry);
2062        next!(p, BlockMappingStart);
2063        next!(p, Key);
2064        next_scalar!(p, TScalarStyle::Plain, "key 1");
2065        next!(p, Value);
2066        next_scalar!(p, TScalarStyle::Plain, "value 1");
2067        next!(p, Key);
2068        next_scalar!(p, TScalarStyle::Plain, "key 2");
2069        next!(p, Value);
2070        next_scalar!(p, TScalarStyle::Plain, "value 2");
2071        next!(p, BlockEnd);
2072        next!(p, BlockEntry);
2073        next!(p, BlockMappingStart);
2074        next!(p, Key);
2075        next_scalar!(p, TScalarStyle::Plain, "complex key");
2076        next!(p, Value);
2077        next_scalar!(p, TScalarStyle::Plain, "complex value");
2078        next!(p, BlockEnd);
2079        next!(p, BlockEnd);
2080        next!(p, StreamEnd);
2081        end!(p);
2082    }
2083
2084    #[test]
2085    fn test_collections_in_mapping() {
2086        let s = "
2087? a sequence
2088: - item 1
2089  - item 2
2090? a mapping
2091: key 1: value 1
2092  key 2: value 2
2093";
2094        let mut p = Scanner::new(s.chars());
2095        next!(p, StreamStart(..));
2096        next!(p, BlockMappingStart);
2097        next!(p, Key);
2098        next_scalar!(p, TScalarStyle::Plain, "a sequence");
2099        next!(p, Value);
2100        next!(p, BlockSequenceStart);
2101        next!(p, BlockEntry);
2102        next_scalar!(p, TScalarStyle::Plain, "item 1");
2103        next!(p, BlockEntry);
2104        next_scalar!(p, TScalarStyle::Plain, "item 2");
2105        next!(p, BlockEnd);
2106        next!(p, Key);
2107        next_scalar!(p, TScalarStyle::Plain, "a mapping");
2108        next!(p, Value);
2109        next!(p, BlockMappingStart);
2110        next!(p, Key);
2111        next_scalar!(p, TScalarStyle::Plain, "key 1");
2112        next!(p, Value);
2113        next_scalar!(p, TScalarStyle::Plain, "value 1");
2114        next!(p, Key);
2115        next_scalar!(p, TScalarStyle::Plain, "key 2");
2116        next!(p, Value);
2117        next_scalar!(p, TScalarStyle::Plain, "value 2");
2118        next!(p, BlockEnd);
2119        next!(p, BlockEnd);
2120        next!(p, StreamEnd);
2121        end!(p);
2122    }
2123
2124    #[test]
2125    fn test_spec_ex7_3() {
2126        let s = "
2127{
2128    ? foo :,
2129    : bar,
2130}
2131";
2132        let mut p = Scanner::new(s.chars());
2133        next!(p, StreamStart(..));
2134        next!(p, FlowMappingStart);
2135        next!(p, Key);
2136        next_scalar!(p, TScalarStyle::Plain, "foo");
2137        next!(p, Value);
2138        next!(p, FlowEntry);
2139        next!(p, Value);
2140        next_scalar!(p, TScalarStyle::Plain, "bar");
2141        next!(p, FlowEntry);
2142        next!(p, FlowMappingEnd);
2143        next!(p, StreamEnd);
2144        end!(p);
2145    }
2146
2147    #[test]
2148    fn test_plain_scalar_starting_with_indicators_in_flow() {
2149        // "Plain scalars must not begin with most indicators, as this would cause ambiguity with
2150        // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first
2151        // character if followed by a non-space “safe” character, as this causes no ambiguity."
2152
2153        let s = "{a: :b}";
2154        let mut p = Scanner::new(s.chars());
2155        next!(p, StreamStart(..));
2156        next!(p, FlowMappingStart);
2157        next!(p, Key);
2158        next_scalar!(p, TScalarStyle::Plain, "a");
2159        next!(p, Value);
2160        next_scalar!(p, TScalarStyle::Plain, ":b");
2161        next!(p, FlowMappingEnd);
2162        next!(p, StreamEnd);
2163        end!(p);
2164
2165        let s = "{a: ?b}";
2166        let mut p = Scanner::new(s.chars());
2167        next!(p, StreamStart(..));
2168        next!(p, FlowMappingStart);
2169        next!(p, Key);
2170        next_scalar!(p, TScalarStyle::Plain, "a");
2171        next!(p, Value);
2172        next_scalar!(p, TScalarStyle::Plain, "?b");
2173        next!(p, FlowMappingEnd);
2174        next!(p, StreamEnd);
2175        end!(p);
2176    }
2177
2178    #[test]
2179    fn test_plain_scalar_starting_with_indicators_in_block() {
2180        let s = ":a";
2181        let mut p = Scanner::new(s.chars());
2182        next!(p, StreamStart(..));
2183        next_scalar!(p, TScalarStyle::Plain, ":a");
2184        next!(p, StreamEnd);
2185        end!(p);
2186
2187        let s = "?a";
2188        let mut p = Scanner::new(s.chars());
2189        next!(p, StreamStart(..));
2190        next_scalar!(p, TScalarStyle::Plain, "?a");
2191        next!(p, StreamEnd);
2192        end!(p);
2193    }
2194
2195    #[test]
2196    fn test_plain_scalar_containing_indicators_in_block() {
2197        let s = "a:,b";
2198        let mut p = Scanner::new(s.chars());
2199        next!(p, StreamStart(..));
2200        next_scalar!(p, TScalarStyle::Plain, "a:,b");
2201        next!(p, StreamEnd);
2202        end!(p);
2203
2204        let s = ":,b";
2205        let mut p = Scanner::new(s.chars());
2206        next!(p, StreamStart(..));
2207        next_scalar!(p, TScalarStyle::Plain, ":,b");
2208        next!(p, StreamEnd);
2209        end!(p);
2210    }
2211
2212    #[test]
2213    fn test_scanner_cr() {
2214        let s = "---\r\n- tok1\r\n- tok2";
2215        let mut p = Scanner::new(s.chars());
2216        next!(p, StreamStart(..));
2217        next!(p, DocumentStart(..));
2218        next!(p, BlockSequenceStart);
2219        next!(p, BlockEntry);
2220        next_scalar!(p, TScalarStyle::Plain, "tok1");
2221        next!(p, BlockEntry);
2222        next_scalar!(p, TScalarStyle::Plain, "tok2");
2223        next!(p, BlockEnd);
2224        next!(p, StreamEnd);
2225        end!(p);
2226    }
2227
2228    #[test]
2229    fn test_uri() {
2230        // TODO
2231    }
2232
2233    #[test]
2234    fn test_uri_escapes() {
2235        // TODO
2236    }
2237}