1use std::collections::VecDeque;
2use std::error::Error;
3use std::{char, fmt};
4
5#[derive(Clone, Copy, PartialEq, Debug, Eq)]
6pub enum TEncoding {
7 Utf8,
8}
9
10#[derive(Clone, Copy, PartialEq, Debug, Eq)]
11pub enum TScalarStyle {
12 Any,
13 Plain,
14 SingleQuoted,
15 DoubleQuoted,
16
17 Literal,
18 Foled,
19}
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22pub struct Marker {
23 index: usize,
24 line: usize,
25 col: usize,
26}
27
28impl Marker {
29 fn new(index: usize, line: usize, col: usize) -> Marker {
30 Marker { index, line, col }
31 }
32
33 pub fn index(&self) -> usize {
34 self.index
35 }
36
37 pub fn line(&self) -> usize {
38 self.line
39 }
40
41 pub fn col(&self) -> usize {
42 self.col
43 }
44}
45
46#[derive(Clone, PartialEq, Debug, Eq)]
47pub struct ScanError {
48 mark: Marker,
49 info: String,
50}
51
52impl ScanError {
53 pub fn new(loc: Marker, info: &str) -> ScanError {
54 ScanError {
55 mark: loc,
56 info: info.to_owned(),
57 }
58 }
59
60 pub fn marker(&self) -> &Marker {
61 &self.mark
62 }
63}
64
65impl Error for ScanError {
66 fn description(&self) -> &str {
67 self.info.as_ref()
68 }
69
70 fn cause(&self) -> Option<&dyn Error> {
71 None
72 }
73}
74
75impl fmt::Display for ScanError {
76 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
78 write!(
79 formatter,
80 "{} at line {} column {}",
81 self.info,
82 self.mark.line,
83 self.mark.col + 1
84 )
85 }
86}
87
88#[derive(Clone, PartialEq, Debug, Eq)]
89pub enum TokenType {
90 NoToken,
91 StreamStart(TEncoding),
92 StreamEnd,
93 VersionDirective(u32, u32),
95 TagDirective(String, String),
97 DocumentStart(u64, u64),
98 DocumentEnd,
99 BlockSequenceStart,
100 BlockMappingStart,
101 BlockEnd,
102 FlowSequenceStart,
103 FlowSequenceEnd,
104 FlowMappingStart,
105 FlowMappingEnd,
106 BlockEntry,
107 FlowEntry,
108 Key,
109 Value,
110 Alias(String),
111 Anchor(String),
112 Tag(String, String),
114 Scalar(TScalarStyle, String),
115}
116
117#[derive(Clone, PartialEq, Debug, Eq)]
118pub struct Token(pub Marker, pub TokenType);
119
120#[derive(Clone, PartialEq, Debug, Eq)]
121struct SimpleKey {
122 possible: bool,
123 required: bool,
124 token_number: usize,
125 mark: Marker,
126}
127
128impl SimpleKey {
129 fn new(mark: Marker) -> SimpleKey {
130 SimpleKey {
131 possible: false,
132 required: false,
133 token_number: 0,
134 mark,
135 }
136 }
137}
138
139#[derive(Debug)]
140pub struct Scanner<T> {
141 rdr: T,
142 mark: Marker,
143 tokens: VecDeque<Token>,
144 buffer: VecDeque<char>,
145 error: Option<ScanError>,
146
147 stream_start_produced: bool,
148 stream_end_produced: bool,
149 adjacent_value_allowed_at: usize,
150 simple_key_allowed: bool,
151 simple_keys: Vec<SimpleKey>,
152 indent: isize,
153 indents: Vec<isize>,
154 flow_level: u8,
155 tokens_parsed: usize,
156 token_available: bool,
157}
158
159impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
160 type Item = Token;
161 fn next(&mut self) -> Option<Token> {
162 if self.error.is_some() {
163 return None;
164 }
165 match self.next_token() {
166 Ok(tok) => tok,
167 Err(e) => {
168 self.error = Some(e);
169 None
170 }
171 }
172 }
173}
174
175#[inline]
176fn is_z(c: char) -> bool {
177 c == '\0'
178}
179#[inline]
180fn is_break(c: char) -> bool {
181 c == '\n' || c == '\r'
182}
183#[inline]
184fn is_breakz(c: char) -> bool {
185 is_break(c) || is_z(c)
186}
187#[inline]
188fn is_blank(c: char) -> bool {
189 c == ' ' || c == '\t'
190}
191#[inline]
192fn is_blankz(c: char) -> bool {
193 is_blank(c) || is_breakz(c)
194}
195#[inline]
196fn is_digit(c: char) -> bool {
197 ('0'..='9').contains(&c)
198}
199#[inline]
200fn is_alpha(c: char) -> bool {
201 matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
202}
203#[inline]
204fn is_hex(c: char) -> bool {
205 ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
206}
207#[inline]
208fn as_hex(c: char) -> u32 {
209 match c {
210 '0'..='9' => (c as u32) - ('0' as u32),
211 'a'..='f' => (c as u32) - ('a' as u32) + 10,
212 'A'..='F' => (c as u32) - ('A' as u32) + 10,
213 _ => unreachable!(),
214 }
215}
216#[inline]
217fn is_flow(c: char) -> bool {
218 matches!(c, ',' | '[' | ']' | '{' | '}')
219}
220
221pub type ScanResult = Result<(), ScanError>;
222
223impl<T: Iterator<Item = char>> Scanner<T> {
224 pub fn new(rdr: T) -> Scanner<T> {
226 Scanner {
227 rdr,
228 buffer: VecDeque::new(),
229 mark: Marker::new(0, 1, 0),
230 tokens: VecDeque::new(),
231 error: None,
232
233 stream_start_produced: false,
234 stream_end_produced: false,
235 adjacent_value_allowed_at: 0,
236 simple_key_allowed: true,
237 simple_keys: Vec::new(),
238 indent: -1,
239 indents: Vec::new(),
240 flow_level: 0,
241 tokens_parsed: 0,
242 token_available: false,
243 }
244 }
245 #[inline]
246 pub fn get_error(&self) -> Option<ScanError> {
247 self.error.as_ref().cloned()
248 }
249
250 #[inline]
251 fn lookahead(&mut self, count: usize) {
252 if self.buffer.len() >= count {
253 return;
254 }
255 for _ in 0..(count - self.buffer.len()) {
256 self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
257 }
258 }
259 #[inline]
260 fn skip(&mut self) {
261 let c = self.buffer.pop_front().unwrap();
262
263 self.mark.index += 1;
264 if c == '\n' {
265 self.mark.line += 1;
266 self.mark.col = 0;
267 } else {
268 self.mark.col += 1;
269 }
270 }
271 #[inline]
272 fn skip_line(&mut self) {
273 if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
274 self.skip();
275 self.skip();
276 } else if is_break(self.buffer[0]) {
277 self.skip();
278 }
279 }
280 #[inline]
281 fn ch(&self) -> char {
282 self.buffer[0]
283 }
284 #[inline]
285 fn ch_is(&self, c: char) -> bool {
286 self.buffer[0] == c
287 }
288 #[allow(dead_code)]
289 #[inline]
290 fn eof(&self) -> bool {
291 self.ch_is('\0')
292 }
293 #[inline]
294 pub fn stream_started(&self) -> bool {
295 self.stream_start_produced
296 }
297 #[inline]
298 pub fn stream_ended(&self) -> bool {
299 self.stream_end_produced
300 }
301 #[inline]
302 pub fn mark(&self) -> Marker {
303 self.mark
304 }
305 #[inline]
306 fn read_break(&mut self, s: &mut String) {
307 if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
308 s.push('\n');
309 self.skip();
310 self.skip();
311 } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
312 s.push('\n');
313 self.skip();
314 } else {
315 unreachable!();
316 }
317 }
318 fn insert_token(&mut self, pos: usize, tok: Token) {
319 let old_len = self.tokens.len();
320 assert!(pos <= old_len);
321 self.tokens.push_back(tok);
322 for i in 0..old_len - pos {
323 self.tokens.swap(old_len - i, old_len - i - 1);
324 }
325 }
326 fn allow_simple_key(&mut self) {
327 self.simple_key_allowed = true;
328 }
329 fn disallow_simple_key(&mut self) {
330 self.simple_key_allowed = false;
331 }
332
333 pub fn fetch_next_token(&mut self) -> ScanResult {
334 self.lookahead(1);
335 if !self.stream_start_produced {
338 self.fetch_stream_start();
339 return Ok(());
340 }
341 self.skip_to_next_token();
342
343 self.stale_simple_keys()?;
344
345 let mark = self.mark;
346 self.unroll_indent(mark.col as isize);
347
348 self.lookahead(4);
349
350 if is_z(self.ch()) {
351 self.fetch_stream_end()?;
352 return Ok(());
353 }
354
355 if self.mark.col == 0 && self.ch_is('%') {
357 return self.fetch_directive();
358 }
359
360 if self.mark.col == 0
361 && self.buffer[0] == '-'
362 && self.buffer[1] == '-'
363 && self.buffer[2] == '-'
364 && is_blankz(self.buffer[3])
365 {
366 return self.fetch_document_start();
367 }
368
369 if self.mark.col == 0
370 && self.buffer[0] == '.'
371 && self.buffer[1] == '.'
372 && self.buffer[2] == '.'
373 && is_blankz(self.buffer[3])
374 {
375 self.fetch_document_indicator(TokenType::DocumentEnd)?;
376 return Ok(());
377 }
378
379 let c = self.buffer[0];
380 let nc = self.buffer[1];
381 match c {
382 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
383 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
384 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
385 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
386 ',' => self.fetch_flow_entry(),
387 '-' if is_blankz(nc) => self.fetch_block_entry(),
388 '?' if is_blankz(nc) => self.fetch_key(),
389 ':' if is_blankz(nc)
390 || (self.flow_level > 0
391 && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
392 {
393 self.fetch_value()
394 }
395 '*' => self.fetch_anchor(true),
397 '&' => self.fetch_anchor(false),
399 '!' => self.fetch_tag(),
400 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
402 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
404 '\'' => self.fetch_flow_scalar(true),
405 '"' => self.fetch_flow_scalar(false),
406 '-' if !is_blankz(nc) => self.fetch_plain_scalar(),
408 ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
409 '%' | '@' | '`' => Err(ScanError::new(
410 self.mark,
411 &format!("unexpected character: `{}'", c),
412 )),
413 _ => self.fetch_plain_scalar(),
414 }
415 }
416
417 pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
418 if self.stream_end_produced {
419 return Ok(None);
420 }
421
422 if !self.token_available {
423 self.fetch_more_tokens()?;
424 }
425 let t = self.tokens.pop_front().unwrap();
426 self.token_available = false;
427 self.tokens_parsed += 1;
428
429 if let TokenType::StreamEnd = t.1 {
430 self.stream_end_produced = true;
431 }
432 Ok(Some(t))
433 }
434
435 pub fn fetch_more_tokens(&mut self) -> ScanResult {
436 let mut need_more;
437 loop {
438 need_more = false;
439 if self.tokens.is_empty() {
440 need_more = true;
441 } else {
442 self.stale_simple_keys()?;
443 for sk in &self.simple_keys {
444 if sk.possible && sk.token_number == self.tokens_parsed {
445 need_more = true;
446 break;
447 }
448 }
449 }
450
451 if !need_more {
452 break;
453 }
454 self.fetch_next_token()?;
455 }
456 self.token_available = true;
457
458 Ok(())
459 }
460
461 fn stale_simple_keys(&mut self) -> ScanResult {
462 for sk in &mut self.simple_keys {
463 if sk.possible
464 && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
465 {
466 if sk.required {
467 return Err(ScanError::new(self.mark, "simple key expect ':'"));
468 }
469 sk.possible = false;
470 }
471 }
472 Ok(())
473 }
474
475 fn skip_to_next_token(&mut self) {
476 loop {
477 self.lookahead(1);
478 match self.ch() {
480 ' ' => self.skip(),
481 '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
482 '\n' | '\r' => {
483 self.lookahead(2);
484 self.skip_line();
485 if self.flow_level == 0 {
486 self.allow_simple_key();
487 }
488 }
489 '#' => {
490 while !is_breakz(self.ch()) {
491 self.skip();
492 self.lookahead(1);
493 }
494 }
495 _ => break,
496 }
497 }
498 }
499
500 fn fetch_stream_start(&mut self) {
501 let mark = self.mark;
502 self.indent = -1;
503 self.stream_start_produced = true;
504 self.allow_simple_key();
505 self.tokens
506 .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
507 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
508 }
509
510 fn fetch_stream_end(&mut self) -> ScanResult {
511 if self.mark.col != 0 {
513 self.mark.col = 0;
514 self.mark.line += 1;
515 }
516
517 self.unroll_indent(-1);
518 self.remove_simple_key()?;
519 self.disallow_simple_key();
520
521 self.tokens
522 .push_back(Token(self.mark, TokenType::StreamEnd));
523 Ok(())
524 }
525
526 fn fetch_directive(&mut self) -> ScanResult {
527 self.unroll_indent(-1);
528 self.remove_simple_key()?;
529
530 self.disallow_simple_key();
531
532 let tok = self.scan_directive()?;
533
534 self.tokens.push_back(tok);
535
536 Ok(())
537 }
538
539 fn scan_directive(&mut self) -> Result<Token, ScanError> {
540 let start_mark = self.mark;
541 self.skip();
542
543 let name = self.scan_directive_name()?;
544 let tok = match name.as_ref() {
545 "YAML" => self.scan_version_directive_value(&start_mark)?,
546 "TAG" => self.scan_tag_directive_value(&start_mark)?,
547 _ => {
549 self.lookahead(1);
551 while !is_breakz(self.ch()) {
552 self.skip();
553 self.lookahead(1);
554 }
555 Token(
557 start_mark,
558 TokenType::TagDirective(String::new(), String::new()),
559 )
560 }
563 };
564 self.lookahead(1);
565
566 while is_blank(self.ch()) {
567 self.skip();
568 self.lookahead(1);
569 }
570
571 if self.ch() == '#' {
572 while !is_breakz(self.ch()) {
573 self.skip();
574 self.lookahead(1);
575 }
576 }
577
578 if !is_breakz(self.ch()) {
579 return Err(ScanError::new(
580 start_mark,
581 "while scanning a directive, did not find expected comment or line break",
582 ));
583 }
584
585 if is_break(self.ch()) {
587 self.lookahead(2);
588 self.skip_line();
589 }
590
591 Ok(tok)
592 }
593
594 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
595 self.lookahead(1);
596
597 while is_blank(self.ch()) {
598 self.skip();
599 self.lookahead(1);
600 }
601
602 let major = self.scan_version_directive_number(mark)?;
603
604 if self.ch() != '.' {
605 return Err(ScanError::new(
606 *mark,
607 "while scanning a YAML directive, did not find expected digit or '.' character",
608 ));
609 }
610
611 self.skip();
612
613 let minor = self.scan_version_directive_number(mark)?;
614
615 Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
616 }
617
618 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
619 let start_mark = self.mark;
620 let mut string = String::new();
621 self.lookahead(1);
622 while is_alpha(self.ch()) {
623 string.push(self.ch());
624 self.skip();
625 self.lookahead(1);
626 }
627
628 if string.is_empty() {
629 return Err(ScanError::new(
630 start_mark,
631 "while scanning a directive, could not find expected directive name",
632 ));
633 }
634
635 if !is_blankz(self.ch()) {
636 return Err(ScanError::new(
637 start_mark,
638 "while scanning a directive, found unexpected non-alphabetical character",
639 ));
640 }
641
642 Ok(string)
643 }
644
645 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
646 let mut val = 0u32;
647 let mut length = 0usize;
648 self.lookahead(1);
649 while is_digit(self.ch()) {
650 if length + 1 > 9 {
651 return Err(ScanError::new(
652 *mark,
653 "while scanning a YAML directive, found extremely long version number",
654 ));
655 }
656 length += 1;
657 val = val * 10 + ((self.ch() as u32) - ('0' as u32));
658 self.skip();
659 self.lookahead(1);
660 }
661
662 if length == 0 {
663 return Err(ScanError::new(
664 *mark,
665 "while scanning a YAML directive, did not find expected version number",
666 ));
667 }
668
669 Ok(val)
670 }
671
672 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
673 self.lookahead(1);
674 while is_blank(self.ch()) {
676 self.skip();
677 self.lookahead(1);
678 }
679 let handle = self.scan_tag_handle(true, mark)?;
680
681 self.lookahead(1);
682 while is_blank(self.ch()) {
684 self.skip();
685 self.lookahead(1);
686 }
687
688 let is_secondary = handle == "!!";
689 let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?;
690
691 self.lookahead(1);
692
693 if is_blankz(self.ch()) {
694 Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
695 } else {
696 Err(ScanError::new(
697 *mark,
698 "while scanning TAG, did not find expected whitespace or line break",
699 ))
700 }
701 }
702
703 fn fetch_tag(&mut self) -> ScanResult {
704 self.save_simple_key()?;
705 self.disallow_simple_key();
706
707 let tok = self.scan_tag()?;
708 self.tokens.push_back(tok);
709 Ok(())
710 }
711
712 fn scan_tag(&mut self) -> Result<Token, ScanError> {
713 let start_mark = self.mark;
714 let mut handle = String::new();
715 let mut suffix;
716 let mut secondary = false;
717
718 self.lookahead(2);
720
721 if self.buffer[1] == '<' {
722 self.skip();
724 self.skip();
725 suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?;
726
727 if self.ch() != '>' {
728 return Err(ScanError::new(
729 start_mark,
730 "while scanning a tag, did not find the expected '>'",
731 ));
732 }
733
734 self.skip();
735 } else {
736 handle = self.scan_tag_handle(false, &start_mark)?;
738 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
740 if handle == "!!" {
741 secondary = true;
742 }
743 suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?;
744 } else {
745 suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
746 handle = "!".to_owned();
747 if suffix.is_empty() {
750 handle.clear();
751 suffix = "!".to_owned();
752 }
753 }
754 }
755
756 self.lookahead(1);
757 if is_blankz(self.ch()) {
758 Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
760 } else {
761 Err(ScanError::new(
762 start_mark,
763 "while scanning a tag, did not find expected whitespace or line break",
764 ))
765 }
766 }
767
768 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
769 let mut string = String::new();
770 self.lookahead(1);
771 if self.ch() != '!' {
772 return Err(ScanError::new(
773 *mark,
774 "while scanning a tag, did not find expected '!'",
775 ));
776 }
777
778 string.push(self.ch());
779 self.skip();
780
781 self.lookahead(1);
782 while is_alpha(self.ch()) {
783 string.push(self.ch());
784 self.skip();
785 self.lookahead(1);
786 }
787
788 if self.ch() == '!' {
790 string.push(self.ch());
791 self.skip();
792 } else if directive && string != "!" {
793 return Err(ScanError::new(
797 *mark,
798 "while parsing a tag directive, did not find expected '!'",
799 ));
800 }
801 Ok(string)
802 }
803
804 fn scan_tag_uri(
805 &mut self,
806 directive: bool,
807 _is_secondary: bool,
808 head: &str,
809 mark: &Marker,
810 ) -> Result<String, ScanError> {
811 let mut length = head.len();
812 let mut string = String::new();
813
814 if length > 1 {
817 string.extend(head.chars().skip(1));
818 }
819
820 self.lookahead(1);
821 while match self.ch() {
829 ';' | '/' | '?' | ':' | '@' | '&' => true,
830 '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
831 '%' => true,
832 c if is_alpha(c) => true,
833 _ => false,
834 } {
835 if self.ch() == '%' {
837 string.push(self.scan_uri_escapes(directive, mark)?);
838 } else {
839 string.push(self.ch());
840 self.skip();
841 }
842
843 length += 1;
844 self.lookahead(1);
845 }
846
847 if length == 0 {
848 return Err(ScanError::new(
849 *mark,
850 "while parsing a tag, did not find expected tag URI",
851 ));
852 }
853
854 Ok(string)
855 }
856
857 fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
858 let mut width = 0usize;
859 let mut code = 0u32;
860 loop {
861 self.lookahead(3);
862
863 if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
864 return Err(ScanError::new(
865 *mark,
866 "while parsing a tag, did not find URI escaped octet",
867 ));
868 }
869
870 let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
871 if width == 0 {
872 width = match octet {
873 _ if octet & 0x80 == 0x00 => 1,
874 _ if octet & 0xE0 == 0xC0 => 2,
875 _ if octet & 0xF0 == 0xE0 => 3,
876 _ if octet & 0xF8 == 0xF0 => 4,
877 _ => {
878 return Err(ScanError::new(
879 *mark,
880 "while parsing a tag, found an incorrect leading UTF-8 octet",
881 ));
882 }
883 };
884 code = octet;
885 } else {
886 if octet & 0xc0 != 0x80 {
887 return Err(ScanError::new(
888 *mark,
889 "while parsing a tag, found an incorrect trailing UTF-8 octet",
890 ));
891 }
892 code = (code << 8) + octet;
893 }
894
895 self.skip();
896 self.skip();
897 self.skip();
898
899 width -= 1;
900 if width == 0 {
901 break;
902 }
903 }
904
905 match char::from_u32(code) {
906 Some(ch) => Ok(ch),
907 None => Err(ScanError::new(
908 *mark,
909 "while parsing a tag, found an invalid UTF-8 codepoint",
910 )),
911 }
912 }
913
914 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
915 self.save_simple_key()?;
916 self.disallow_simple_key();
917
918 let tok = self.scan_anchor(alias)?;
919
920 self.tokens.push_back(tok);
921
922 Ok(())
923 }
924
925 fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
926 let mut string = String::new();
927 let start_mark = self.mark;
928
929 self.skip();
930 self.lookahead(1);
931 while is_alpha(self.ch()) {
932 string.push(self.ch());
933 self.skip();
934 self.lookahead(1);
935 }
936
937 if string.is_empty()
938 || match self.ch() {
939 c if is_blankz(c) => false,
940 '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
941 _ => true,
942 }
943 {
944 return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
945 }
946
947 if alias {
948 Ok(Token(start_mark, TokenType::Alias(string)))
949 } else {
950 Ok(Token(start_mark, TokenType::Anchor(string)))
951 }
952 }
953
954 fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
955 self.save_simple_key()?;
957
958 self.increase_flow_level()?;
959
960 self.allow_simple_key();
961
962 let start_mark = self.mark;
963 self.skip();
964
965 self.tokens.push_back(Token(start_mark, tok));
966 Ok(())
967 }
968
969 fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
970 self.remove_simple_key()?;
971 self.decrease_flow_level();
972
973 self.disallow_simple_key();
974
975 let start_mark = self.mark;
976 self.skip();
977
978 self.tokens.push_back(Token(start_mark, tok));
979 Ok(())
980 }
981
982 fn fetch_flow_entry(&mut self) -> ScanResult {
983 self.remove_simple_key()?;
984 self.allow_simple_key();
985
986 let start_mark = self.mark;
987 self.skip();
988
989 self.tokens
990 .push_back(Token(start_mark, TokenType::FlowEntry));
991 Ok(())
992 }
993
994 fn increase_flow_level(&mut self) -> ScanResult {
995 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
996 self.flow_level = self
997 .flow_level
998 .checked_add(1)
999 .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
1000 Ok(())
1001 }
1002 fn decrease_flow_level(&mut self) {
1003 if self.flow_level > 0 {
1004 self.flow_level -= 1;
1005 self.simple_keys.pop().unwrap();
1006 }
1007 }
1008
1009 fn fetch_block_entry(&mut self) -> ScanResult {
1010 if self.flow_level == 0 {
1011 if !self.simple_key_allowed {
1013 return Err(ScanError::new(
1014 self.mark,
1015 "block sequence entries are not allowed in this context",
1016 ));
1017 }
1018
1019 let mark = self.mark;
1020 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1022 } else {
1023 return Err(ScanError::new(
1025 self.mark,
1026 r#""-" is only valid inside a block"#,
1027 ));
1028 }
1029 self.remove_simple_key()?;
1030 self.allow_simple_key();
1031
1032 let start_mark = self.mark;
1033 self.skip();
1034
1035 self.tokens
1036 .push_back(Token(start_mark, TokenType::BlockEntry));
1037 Ok(())
1038 }
1039
1040 fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1041 self.unroll_indent(-1);
1042 self.remove_simple_key()?;
1043 self.disallow_simple_key();
1044
1045 let mark = self.mark;
1046
1047 self.skip();
1048 self.skip();
1049 self.skip();
1050
1051 self.tokens.push_back(Token(mark, t));
1052 Ok(())
1053 }
1054
1055 fn fetch_document_start(&mut self) -> ScanResult {
1056 self.unroll_indent(-1);
1057 self.remove_simple_key()?;
1058 self.disallow_simple_key();
1059
1060 let mark = self.mark;
1061
1062 self.skip();
1063 self.skip();
1064 self.skip();
1065 self.skip();
1066
1067 self.lookahead(1);
1068 if self.ch() != '!' {
1069 return Err(ScanError::new(
1070 mark,
1071 "while scanning a tag, did not find expected '!'",
1072 ));
1073 }
1074 self.skip();
1076
1077 self.lookahead(1);
1078 while is_alpha(self.ch()) {
1079 self.skip();
1081 self.lookahead(1);
1082 }
1083 if self.ch() != '!' {
1084 return Err(ScanError::new(
1085 mark,
1086 "while scanning a tag, did not find expected '!'",
1087 ));
1088 }
1089 self.skip();
1090 let mut class_id = 0u64;
1091 self.lookahead(1);
1092 while is_digit(self.ch()) {
1093 class_id = class_id * 10 + (self.ch() as usize - '0' as usize) as u64;
1094 self.skip();
1095 self.lookahead(1);
1096 }
1097 while is_blank(self.ch()) {
1098 self.skip();
1099 self.lookahead(1);
1100 }
1101 if self.ch() != '&' {
1102 return Err(ScanError::new(
1103 mark,
1104 "while scanning a tag, did not find expected '&'",
1105 ));
1106 }
1107 self.skip();
1108 self.lookahead(1);
1109
1110 let mut object_id = 0u64;
1111 while is_digit(self.ch()) {
1112 object_id = object_id * 10 + (self.ch() as usize - '0' as usize) as u64;
1113 self.skip();
1114 self.lookahead(1);
1115 }
1116
1117 self.tokens.push_back(Token(mark, TokenType::DocumentStart(class_id, object_id)));
1118 Ok(())
1119 }
1120
1121 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1122 self.save_simple_key()?;
1123 self.allow_simple_key();
1124 let tok = self.scan_block_scalar(literal)?;
1125
1126 self.tokens.push_back(tok);
1127 Ok(())
1128 }
1129
1130 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1131 let start_mark = self.mark;
1132 let mut chomping: i32 = 0;
1133 let mut increment: usize = 0;
1134 let mut indent: usize = 0;
1135 let mut trailing_blank: bool;
1136 let mut leading_blank: bool = false;
1137
1138 let mut string = String::new();
1139 let mut leading_break = String::new();
1140 let mut trailing_breaks = String::new();
1141
1142 self.skip();
1144 self.lookahead(1);
1145
1146 if self.ch() == '+' || self.ch() == '-' {
1147 if self.ch() == '+' {
1148 chomping = 1;
1149 } else {
1150 chomping = -1;
1151 }
1152 self.skip();
1153 self.lookahead(1);
1154 if is_digit(self.ch()) {
1155 if self.ch() == '0' {
1156 return Err(ScanError::new(
1157 start_mark,
1158 "while scanning a block scalar, found an indentation indicator equal to 0",
1159 ));
1160 }
1161 increment = (self.ch() as usize) - ('0' as usize);
1162 self.skip();
1163 }
1164 } else if is_digit(self.ch()) {
1165 if self.ch() == '0' {
1166 return Err(ScanError::new(
1167 start_mark,
1168 "while scanning a block scalar, found an indentation indicator equal to 0",
1169 ));
1170 }
1171
1172 increment = (self.ch() as usize) - ('0' as usize);
1173 self.skip();
1174 self.lookahead(1);
1175 if self.ch() == '+' || self.ch() == '-' {
1176 if self.ch() == '+' {
1177 chomping = 1;
1178 } else {
1179 chomping = -1;
1180 }
1181 self.skip();
1182 }
1183 }
1184
1185 self.lookahead(1);
1187
1188 while is_blank(self.ch()) {
1189 self.skip();
1190 self.lookahead(1);
1191 }
1192
1193 if self.ch() == '#' {
1194 while !is_breakz(self.ch()) {
1195 self.skip();
1196 self.lookahead(1);
1197 }
1198 }
1199
1200 if !is_breakz(self.ch()) {
1202 return Err(ScanError::new(
1203 start_mark,
1204 "while scanning a block scalar, did not find expected comment or line break",
1205 ));
1206 }
1207
1208 if is_break(self.ch()) {
1209 self.lookahead(2);
1210 self.skip_line();
1211 }
1212
1213 if increment > 0 {
1214 indent = if self.indent >= 0 {
1215 (self.indent + increment as isize) as usize
1216 } else {
1217 increment
1218 }
1219 }
1220 self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1222
1223 self.lookahead(1);
1224
1225 let start_mark = self.mark;
1226
1227 while self.mark.col == indent && !is_z(self.ch()) {
1228 trailing_blank = is_blank(self.ch());
1230 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1231 if trailing_breaks.is_empty() {
1232 string.push(' ');
1233 }
1234 leading_break.clear();
1235 } else {
1236 string.push_str(&leading_break);
1237 leading_break.clear();
1238 }
1239
1240 string.push_str(&trailing_breaks);
1241 trailing_breaks.clear();
1242
1243 leading_blank = is_blank(self.ch());
1244
1245 while !is_breakz(self.ch()) {
1246 string.push(self.ch());
1247 self.skip();
1248 self.lookahead(1);
1249 }
1250 if is_z(self.ch()) {
1252 break;
1253 }
1254
1255 self.lookahead(2);
1256 self.read_break(&mut leading_break);
1257
1258 self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1260 }
1261
1262 if chomping != -1 {
1264 string.push_str(&leading_break);
1265 }
1266
1267 if chomping == 1 {
1268 string.push_str(&trailing_breaks);
1269 }
1270
1271 if literal {
1272 Ok(Token(
1273 start_mark,
1274 TokenType::Scalar(TScalarStyle::Literal, string),
1275 ))
1276 } else {
1277 Ok(Token(
1278 start_mark,
1279 TokenType::Scalar(TScalarStyle::Foled, string),
1280 ))
1281 }
1282 }
1283
1284 fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
1285 let mut max_indent = 0;
1286 loop {
1287 self.lookahead(1);
1288 while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' {
1289 self.skip();
1290 self.lookahead(1);
1291 }
1292
1293 if self.mark.col > max_indent {
1294 max_indent = self.mark.col;
1295 }
1296
1297 if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' {
1299 return Err(ScanError::new(self.mark,
1300 "while scanning a block scalar, found a tab character where an indentation space is expected"));
1301 }
1302
1303 if !is_break(self.ch()) {
1304 break;
1305 }
1306
1307 self.lookahead(2);
1308 self.read_break(breaks);
1310 }
1311
1312 if *indent == 0 {
1313 *indent = max_indent;
1314 if *indent < (self.indent + 1) as usize {
1315 *indent = (self.indent + 1) as usize;
1316 }
1317 if *indent < 1 {
1318 *indent = 1;
1319 }
1320 }
1321 Ok(())
1322 }
1323
1324 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1325 self.save_simple_key()?;
1326 self.disallow_simple_key();
1327
1328 let tok = self.scan_flow_scalar(single)?;
1329
1330 self.adjacent_value_allowed_at = self.mark.index;
1333
1334 self.tokens.push_back(tok);
1335 Ok(())
1336 }
1337
1338 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1339 let start_mark = self.mark;
1340
1341 let mut string = String::new();
1342 let mut leading_break = String::new();
1343 let mut trailing_breaks = String::new();
1344 let mut whitespaces = String::new();
1345 let mut leading_blanks;
1346
1347 self.skip();
1349
1350 loop {
1351 self.lookahead(4);
1353
1354 if self.mark.col == 0
1355 && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1356 || ((self.buffer[0] == '.')
1357 && (self.buffer[1] == '.')
1358 && (self.buffer[2] == '.')))
1359 && is_blankz(self.buffer[3])
1360 {
1361 return Err(ScanError::new(
1362 start_mark,
1363 "while scanning a quoted scalar, found unexpected document indicator",
1364 ));
1365 }
1366
1367 if is_z(self.ch()) {
1368 return Err(ScanError::new(
1369 start_mark,
1370 "while scanning a quoted scalar, found unexpected end of stream",
1371 ));
1372 }
1373
1374 self.lookahead(2);
1375
1376 leading_blanks = false;
1377 while !is_blankz(self.ch()) {
1380 match self.ch() {
1381 '\'' if self.buffer[1] == '\'' && single => {
1383 string.push('\'');
1384 self.skip();
1385 self.skip();
1386 }
1387 '\'' if single => break,
1389 '"' if !single => break,
1390 '\\' if !single && is_break(self.buffer[1]) => {
1392 self.lookahead(3);
1393 self.skip();
1394 self.skip_line();
1395 leading_blanks = true;
1396 break;
1397 }
1398 '\\' if !single => {
1400 let mut code_length = 0usize;
1401 match self.buffer[1] {
1402 '0' => string.push('\0'),
1403 'a' => string.push('\x07'),
1404 'b' => string.push('\x08'),
1405 't' | '\t' => string.push('\t'),
1406 'n' => string.push('\n'),
1407 'v' => string.push('\x0b'),
1408 'f' => string.push('\x0c'),
1409 'r' => string.push('\x0d'),
1410 'e' => string.push('\x1b'),
1411 ' ' => string.push('\x20'),
1412 '"' => string.push('"'),
1413 '\'' => string.push('\''),
1414 '\\' => string.push('\\'),
1415 'N' => string.push(char::from_u32(0x85).unwrap()),
1417 '_' => string.push(char::from_u32(0xA0).unwrap()),
1419 'L' => string.push(char::from_u32(0x2028).unwrap()),
1421 'P' => string.push(char::from_u32(0x2029).unwrap()),
1423 'x' => code_length = 2,
1424 'u' => code_length = 4,
1425 'U' => code_length = 8,
1426 _ => {
1427 return Err(ScanError::new(
1428 start_mark,
1429 "while parsing a quoted scalar, found unknown escape character",
1430 ))
1431 }
1432 }
1433 self.skip();
1434 self.skip();
1435 if code_length > 0 {
1437 self.lookahead(code_length);
1438 let mut value = 0u32;
1439 for i in 0..code_length {
1440 if !is_hex(self.buffer[i]) {
1441 return Err(ScanError::new(start_mark,
1442 "while parsing a quoted scalar, did not find expected hexadecimal number"));
1443 }
1444 value = (value << 4) + as_hex(self.buffer[i]);
1445 }
1446
1447 let ch = match char::from_u32(value) {
1448 Some(v) => v,
1449 None => {
1450 return Err(ScanError::new(start_mark,
1451 "while parsing a quoted scalar, found invalid Unicode character escape code"));
1452 }
1453 };
1454 string.push(ch);
1455
1456 for _ in 0..code_length {
1457 self.skip();
1458 }
1459 }
1460 }
1461 c => {
1462 string.push(c);
1463 self.skip();
1464 }
1465 }
1466 self.lookahead(2);
1467 }
1468 self.lookahead(1);
1469 match self.ch() {
1470 '\'' if single => break,
1471 '"' if !single => break,
1472 _ => {}
1473 }
1474
1475 while is_blank(self.ch()) || is_break(self.ch()) {
1477 if is_blank(self.ch()) {
1478 if leading_blanks {
1480 self.skip();
1481 } else {
1482 whitespaces.push(self.ch());
1483 self.skip();
1484 }
1485 } else {
1486 self.lookahead(2);
1487 if leading_blanks {
1489 self.read_break(&mut trailing_breaks);
1490 } else {
1491 whitespaces.clear();
1492 self.read_break(&mut leading_break);
1493 leading_blanks = true;
1494 }
1495 }
1496 self.lookahead(1);
1497 }
1498 if leading_blanks {
1500 if leading_break.is_empty() {
1501 string.push_str(&leading_break);
1502 string.push_str(&trailing_breaks);
1503 trailing_breaks.clear();
1504 leading_break.clear();
1505 } else {
1506 if trailing_breaks.is_empty() {
1507 string.push(' ');
1508 } else {
1509 string.push_str(&trailing_breaks);
1510 trailing_breaks.clear();
1511 }
1512 leading_break.clear();
1513 }
1514 } else {
1515 string.push_str(&whitespaces);
1516 whitespaces.clear();
1517 }
1518 } self.skip();
1522
1523 if single {
1524 Ok(Token(
1525 start_mark,
1526 TokenType::Scalar(TScalarStyle::SingleQuoted, string),
1527 ))
1528 } else {
1529 Ok(Token(
1530 start_mark,
1531 TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
1532 ))
1533 }
1534 }
1535
1536 fn fetch_plain_scalar(&mut self) -> ScanResult {
1537 self.save_simple_key()?;
1538 self.disallow_simple_key();
1539
1540 let tok = self.scan_plain_scalar()?;
1541
1542 self.tokens.push_back(tok);
1543 Ok(())
1544 }
1545
1546 fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
1547 let indent = self.indent + 1;
1548 let start_mark = self.mark;
1549
1550 let mut string = String::new();
1551 let mut leading_break = String::new();
1552 let mut trailing_breaks = String::new();
1553 let mut whitespaces = String::new();
1554 let mut leading_blanks = false;
1555
1556 loop {
1557 self.lookahead(4);
1559
1560 if self.mark.col == 0
1561 && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1562 || ((self.buffer[0] == '.')
1563 && (self.buffer[1] == '.')
1564 && (self.buffer[2] == '.')))
1565 && is_blankz(self.buffer[3])
1566 {
1567 break;
1568 }
1569
1570 if self.ch() == '#' {
1571 break;
1572 }
1573 while !is_blankz(self.ch()) {
1574 match self.ch() {
1576 ':' if is_blankz(self.buffer[1])
1577 || (self.flow_level > 0 && is_flow(self.buffer[1])) =>
1578 {
1579 break;
1580 }
1581 ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
1582 _ => {}
1583 }
1584
1585 if leading_blanks || !whitespaces.is_empty() {
1586 if leading_blanks {
1587 if leading_break.is_empty() {
1588 string.push_str(&leading_break);
1589 string.push_str(&trailing_breaks);
1590 trailing_breaks.clear();
1591 leading_break.clear();
1592 } else {
1593 if trailing_breaks.is_empty() {
1594 string.push(' ');
1595 } else {
1596 string.push_str(&trailing_breaks);
1597 trailing_breaks.clear();
1598 }
1599 leading_break.clear();
1600 }
1601 leading_blanks = false;
1602 } else {
1603 string.push_str(&whitespaces);
1604 whitespaces.clear();
1605 }
1606 }
1607
1608 string.push(self.ch());
1609 self.skip();
1610 self.lookahead(2);
1611 }
1612 if !(is_blank(self.ch()) || is_break(self.ch())) {
1614 break;
1615 }
1616 self.lookahead(1);
1617
1618 while is_blank(self.ch()) || is_break(self.ch()) {
1619 if is_blank(self.ch()) {
1620 if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
1621 return Err(ScanError::new(
1622 start_mark,
1623 "while scanning a plain scalar, found a tab",
1624 ));
1625 }
1626
1627 if leading_blanks {
1628 self.skip();
1629 } else {
1630 whitespaces.push(self.ch());
1631 self.skip();
1632 }
1633 } else {
1634 self.lookahead(2);
1635 if leading_blanks {
1637 self.read_break(&mut trailing_breaks);
1638 } else {
1639 whitespaces.clear();
1640 self.read_break(&mut leading_break);
1641 leading_blanks = true;
1642 }
1643 }
1644 self.lookahead(1);
1645 }
1646
1647 if self.flow_level == 0 && (self.mark.col as isize) < indent {
1649 break;
1650 }
1651 }
1652
1653 if leading_blanks {
1654 self.allow_simple_key();
1655 }
1656
1657 Ok(Token(
1658 start_mark,
1659 TokenType::Scalar(TScalarStyle::Plain, string),
1660 ))
1661 }
1662
1663 fn fetch_key(&mut self) -> ScanResult {
1664 let start_mark = self.mark;
1665 if self.flow_level == 0 {
1666 if !self.simple_key_allowed {
1668 return Err(ScanError::new(
1669 self.mark,
1670 "mapping keys are not allowed in this context",
1671 ));
1672 }
1673 self.roll_indent(
1674 start_mark.col,
1675 None,
1676 TokenType::BlockMappingStart,
1677 start_mark,
1678 );
1679 }
1680
1681 self.remove_simple_key()?;
1682
1683 if self.flow_level == 0 {
1684 self.allow_simple_key();
1685 } else {
1686 self.disallow_simple_key();
1687 }
1688
1689 self.skip();
1690 self.tokens.push_back(Token(start_mark, TokenType::Key));
1691 Ok(())
1692 }
1693
1694 fn fetch_value(&mut self) -> ScanResult {
1695 let sk = self.simple_keys.last().unwrap().clone();
1696 let start_mark = self.mark;
1697 if sk.possible {
1698 let tok = Token(sk.mark, TokenType::Key);
1700 let tokens_parsed = self.tokens_parsed;
1701 self.insert_token(sk.token_number - tokens_parsed, tok);
1702
1703 self.roll_indent(
1705 sk.mark.col,
1706 Some(sk.token_number),
1707 TokenType::BlockMappingStart,
1708 start_mark,
1709 );
1710
1711 self.simple_keys.last_mut().unwrap().possible = false;
1712 self.disallow_simple_key();
1713 } else {
1714 if self.flow_level == 0 {
1716 if !self.simple_key_allowed {
1717 return Err(ScanError::new(
1718 start_mark,
1719 "mapping values are not allowed in this context",
1720 ));
1721 }
1722
1723 self.roll_indent(
1724 start_mark.col,
1725 None,
1726 TokenType::BlockMappingStart,
1727 start_mark,
1728 );
1729 }
1730
1731 if self.flow_level == 0 {
1732 self.allow_simple_key();
1733 } else {
1734 self.disallow_simple_key();
1735 }
1736 }
1737 self.skip();
1738 self.tokens.push_back(Token(start_mark, TokenType::Value));
1739
1740 Ok(())
1741 }
1742
1743 fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
1744 if self.flow_level > 0 {
1745 return;
1746 }
1747
1748 if self.indent < col as isize {
1749 self.indents.push(self.indent);
1750 self.indent = col as isize;
1751 let tokens_parsed = self.tokens_parsed;
1752 match number {
1753 Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
1754 None => self.tokens.push_back(Token(mark, tok)),
1755 }
1756 }
1757 }
1758
1759 fn unroll_indent(&mut self, col: isize) {
1760 if self.flow_level > 0 {
1761 return;
1762 }
1763 while self.indent > col {
1764 self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
1765 self.indent = self.indents.pop().unwrap();
1766 }
1767 }
1768
1769 fn save_simple_key(&mut self) -> Result<(), ScanError> {
1770 let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
1771 if self.simple_key_allowed {
1772 let mut sk = SimpleKey::new(self.mark);
1773 sk.possible = true;
1774 sk.required = required;
1775 sk.token_number = self.tokens_parsed + self.tokens.len();
1776
1777 self.remove_simple_key()?;
1778
1779 self.simple_keys.pop();
1780 self.simple_keys.push(sk);
1781 }
1782 Ok(())
1783 }
1784
1785 fn remove_simple_key(&mut self) -> ScanResult {
1786 let last = self.simple_keys.last_mut().unwrap();
1787 if last.possible && last.required {
1788 return Err(ScanError::new(self.mark, "simple key expected"));
1789 }
1790
1791 last.possible = false;
1792 Ok(())
1793 }
1794}
1795
1796#[cfg(test)]
1797mod test {
1798 use super::TokenType::*;
1799 use super::*;
1800
1801 macro_rules! next {
1802 ($p:ident, $tk:pat) => {{
1803 let tok = $p.next().unwrap();
1804 match tok.1 {
1805 $tk => {}
1806 _ => panic!("unexpected token: {:?}", tok),
1807 }
1808 }};
1809 }
1810
1811 macro_rules! next_scalar {
1812 ($p:ident, $tk:expr, $v:expr) => {{
1813 let tok = $p.next().unwrap();
1814 match tok.1 {
1815 Scalar(style, ref v) => {
1816 assert_eq!(style, $tk);
1817 assert_eq!(v, $v);
1818 }
1819 _ => panic!("unexpected token: {:?}", tok),
1820 }
1821 }};
1822 }
1823
1824 macro_rules! end {
1825 ($p:ident) => {{
1826 assert_eq!($p.next(), None);
1827 }};
1828 }
1829 #[test]
1831 fn test_empty() {
1832 let s = "";
1833 let mut p = Scanner::new(s.chars());
1834 next!(p, StreamStart(..));
1835 next!(p, StreamEnd);
1836 end!(p);
1837 }
1838
1839 #[test]
1840 fn test_scalar() {
1841 let s = "a scalar";
1842 let mut p = Scanner::new(s.chars());
1843 next!(p, StreamStart(..));
1844 next!(p, Scalar(TScalarStyle::Plain, _));
1845 next!(p, StreamEnd);
1846 end!(p);
1847 }
1848
1849 #[test]
1850 fn test_explicit_scalar() {
1851 let s = "---
1852'a scalar'
1853...
1854";
1855 let mut p = Scanner::new(s.chars());
1856 next!(p, StreamStart(..));
1857 next!(p, DocumentStart(..));
1858 next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1859 next!(p, DocumentEnd);
1860 next!(p, StreamEnd);
1861 end!(p);
1862 }
1863
1864 #[test]
1865 fn test_multiple_documents() {
1866 let s = "
1867'a scalar'
1868---
1869'a scalar'
1870---
1871'a scalar'
1872";
1873 let mut p = Scanner::new(s.chars());
1874 next!(p, StreamStart(..));
1875 next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1876 next!(p, DocumentStart(..));
1877 next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1878 next!(p, DocumentStart(..));
1879 next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1880 next!(p, StreamEnd);
1881 end!(p);
1882 }
1883
1884 #[test]
1885 fn test_a_flow_sequence() {
1886 let s = "[item 1, item 2, item 3]";
1887 let mut p = Scanner::new(s.chars());
1888 next!(p, StreamStart(..));
1889 next!(p, FlowSequenceStart);
1890 next_scalar!(p, TScalarStyle::Plain, "item 1");
1891 next!(p, FlowEntry);
1892 next!(p, Scalar(TScalarStyle::Plain, _));
1893 next!(p, FlowEntry);
1894 next!(p, Scalar(TScalarStyle::Plain, _));
1895 next!(p, FlowSequenceEnd);
1896 next!(p, StreamEnd);
1897 end!(p);
1898 }
1899
1900 #[test]
1901 fn test_a_flow_mapping() {
1902 let s = "
1903{
1904 a simple key: a value, # Note that the KEY token is produced.
1905 ? a complex key: another value,
1906}
1907";
1908 let mut p = Scanner::new(s.chars());
1909 next!(p, StreamStart(..));
1910 next!(p, FlowMappingStart);
1911 next!(p, Key);
1912 next!(p, Scalar(TScalarStyle::Plain, _));
1913 next!(p, Value);
1914 next!(p, Scalar(TScalarStyle::Plain, _));
1915 next!(p, FlowEntry);
1916 next!(p, Key);
1917 next_scalar!(p, TScalarStyle::Plain, "a complex key");
1918 next!(p, Value);
1919 next!(p, Scalar(TScalarStyle::Plain, _));
1920 next!(p, FlowEntry);
1921 next!(p, FlowMappingEnd);
1922 next!(p, StreamEnd);
1923 end!(p);
1924 }
1925
1926 #[test]
1927 fn test_block_sequences() {
1928 let s = "
1929- item 1
1930- item 2
1931-
1932 - item 3.1
1933 - item 3.2
1934-
1935 key 1: value 1
1936 key 2: value 2
1937";
1938 let mut p = Scanner::new(s.chars());
1939 next!(p, StreamStart(..));
1940 next!(p, BlockSequenceStart);
1941 next!(p, BlockEntry);
1942 next_scalar!(p, TScalarStyle::Plain, "item 1");
1943 next!(p, BlockEntry);
1944 next_scalar!(p, TScalarStyle::Plain, "item 2");
1945 next!(p, BlockEntry);
1946 next!(p, BlockSequenceStart);
1947 next!(p, BlockEntry);
1948 next_scalar!(p, TScalarStyle::Plain, "item 3.1");
1949 next!(p, BlockEntry);
1950 next_scalar!(p, TScalarStyle::Plain, "item 3.2");
1951 next!(p, BlockEnd);
1952 next!(p, BlockEntry);
1953 next!(p, BlockMappingStart);
1954 next!(p, Key);
1955 next_scalar!(p, TScalarStyle::Plain, "key 1");
1956 next!(p, Value);
1957 next_scalar!(p, TScalarStyle::Plain, "value 1");
1958 next!(p, Key);
1959 next_scalar!(p, TScalarStyle::Plain, "key 2");
1960 next!(p, Value);
1961 next_scalar!(p, TScalarStyle::Plain, "value 2");
1962 next!(p, BlockEnd);
1963 next!(p, BlockEnd);
1964 next!(p, StreamEnd);
1965 end!(p);
1966 }
1967
1968 #[test]
1969 fn test_block_mappings() {
1970 let s = "
1971a simple key: a value # The KEY token is produced here.
1972? a complex key
1973: another value
1974a mapping:
1975 key 1: value 1
1976 key 2: value 2
1977a sequence:
1978 - item 1
1979 - item 2
1980";
1981 let mut p = Scanner::new(s.chars());
1982 next!(p, StreamStart(..));
1983 next!(p, BlockMappingStart);
1984 next!(p, Key);
1985 next!(p, Scalar(_, _));
1986 next!(p, Value);
1987 next!(p, Scalar(_, _));
1988 next!(p, Key);
1989 next!(p, Scalar(_, _));
1990 next!(p, Value);
1991 next!(p, Scalar(_, _));
1992 next!(p, Key);
1993 next!(p, Scalar(_, _));
1994 next!(p, Value); next!(p, BlockMappingStart);
1996 next!(p, Key);
1997 next!(p, Scalar(_, _));
1998 next!(p, Value);
1999 next!(p, Scalar(_, _));
2000 next!(p, Key);
2001 next!(p, Scalar(_, _));
2002 next!(p, Value);
2003 next!(p, Scalar(_, _));
2004 next!(p, BlockEnd);
2005 next!(p, Key);
2006 next!(p, Scalar(_, _));
2007 next!(p, Value);
2008 next!(p, BlockSequenceStart);
2009 next!(p, BlockEntry);
2010 next!(p, Scalar(_, _));
2011 next!(p, BlockEntry);
2012 next!(p, Scalar(_, _));
2013 next!(p, BlockEnd);
2014 next!(p, BlockEnd);
2015 next!(p, StreamEnd);
2016 end!(p);
2017 }
2018
2019 #[test]
2020 fn test_no_block_sequence_start() {
2021 let s = "
2022key:
2023- item 1
2024- item 2
2025";
2026 let mut p = Scanner::new(s.chars());
2027 next!(p, StreamStart(..));
2028 next!(p, BlockMappingStart);
2029 next!(p, Key);
2030 next_scalar!(p, TScalarStyle::Plain, "key");
2031 next!(p, Value);
2032 next!(p, BlockEntry);
2033 next_scalar!(p, TScalarStyle::Plain, "item 1");
2034 next!(p, BlockEntry);
2035 next_scalar!(p, TScalarStyle::Plain, "item 2");
2036 next!(p, BlockEnd);
2037 next!(p, StreamEnd);
2038 end!(p);
2039 }
2040
2041 #[test]
2042 fn test_collections_in_sequence() {
2043 let s = "
2044- - item 1
2045 - item 2
2046- key 1: value 1
2047 key 2: value 2
2048- ? complex key
2049 : complex value
2050";
2051 let mut p = Scanner::new(s.chars());
2052 next!(p, StreamStart(..));
2053 next!(p, BlockSequenceStart);
2054 next!(p, BlockEntry);
2055 next!(p, BlockSequenceStart);
2056 next!(p, BlockEntry);
2057 next_scalar!(p, TScalarStyle::Plain, "item 1");
2058 next!(p, BlockEntry);
2059 next_scalar!(p, TScalarStyle::Plain, "item 2");
2060 next!(p, BlockEnd);
2061 next!(p, BlockEntry);
2062 next!(p, BlockMappingStart);
2063 next!(p, Key);
2064 next_scalar!(p, TScalarStyle::Plain, "key 1");
2065 next!(p, Value);
2066 next_scalar!(p, TScalarStyle::Plain, "value 1");
2067 next!(p, Key);
2068 next_scalar!(p, TScalarStyle::Plain, "key 2");
2069 next!(p, Value);
2070 next_scalar!(p, TScalarStyle::Plain, "value 2");
2071 next!(p, BlockEnd);
2072 next!(p, BlockEntry);
2073 next!(p, BlockMappingStart);
2074 next!(p, Key);
2075 next_scalar!(p, TScalarStyle::Plain, "complex key");
2076 next!(p, Value);
2077 next_scalar!(p, TScalarStyle::Plain, "complex value");
2078 next!(p, BlockEnd);
2079 next!(p, BlockEnd);
2080 next!(p, StreamEnd);
2081 end!(p);
2082 }
2083
2084 #[test]
2085 fn test_collections_in_mapping() {
2086 let s = "
2087? a sequence
2088: - item 1
2089 - item 2
2090? a mapping
2091: key 1: value 1
2092 key 2: value 2
2093";
2094 let mut p = Scanner::new(s.chars());
2095 next!(p, StreamStart(..));
2096 next!(p, BlockMappingStart);
2097 next!(p, Key);
2098 next_scalar!(p, TScalarStyle::Plain, "a sequence");
2099 next!(p, Value);
2100 next!(p, BlockSequenceStart);
2101 next!(p, BlockEntry);
2102 next_scalar!(p, TScalarStyle::Plain, "item 1");
2103 next!(p, BlockEntry);
2104 next_scalar!(p, TScalarStyle::Plain, "item 2");
2105 next!(p, BlockEnd);
2106 next!(p, Key);
2107 next_scalar!(p, TScalarStyle::Plain, "a mapping");
2108 next!(p, Value);
2109 next!(p, BlockMappingStart);
2110 next!(p, Key);
2111 next_scalar!(p, TScalarStyle::Plain, "key 1");
2112 next!(p, Value);
2113 next_scalar!(p, TScalarStyle::Plain, "value 1");
2114 next!(p, Key);
2115 next_scalar!(p, TScalarStyle::Plain, "key 2");
2116 next!(p, Value);
2117 next_scalar!(p, TScalarStyle::Plain, "value 2");
2118 next!(p, BlockEnd);
2119 next!(p, BlockEnd);
2120 next!(p, StreamEnd);
2121 end!(p);
2122 }
2123
2124 #[test]
2125 fn test_spec_ex7_3() {
2126 let s = "
2127{
2128 ? foo :,
2129 : bar,
2130}
2131";
2132 let mut p = Scanner::new(s.chars());
2133 next!(p, StreamStart(..));
2134 next!(p, FlowMappingStart);
2135 next!(p, Key);
2136 next_scalar!(p, TScalarStyle::Plain, "foo");
2137 next!(p, Value);
2138 next!(p, FlowEntry);
2139 next!(p, Value);
2140 next_scalar!(p, TScalarStyle::Plain, "bar");
2141 next!(p, FlowEntry);
2142 next!(p, FlowMappingEnd);
2143 next!(p, StreamEnd);
2144 end!(p);
2145 }
2146
2147 #[test]
2148 fn test_plain_scalar_starting_with_indicators_in_flow() {
2149 let s = "{a: :b}";
2154 let mut p = Scanner::new(s.chars());
2155 next!(p, StreamStart(..));
2156 next!(p, FlowMappingStart);
2157 next!(p, Key);
2158 next_scalar!(p, TScalarStyle::Plain, "a");
2159 next!(p, Value);
2160 next_scalar!(p, TScalarStyle::Plain, ":b");
2161 next!(p, FlowMappingEnd);
2162 next!(p, StreamEnd);
2163 end!(p);
2164
2165 let s = "{a: ?b}";
2166 let mut p = Scanner::new(s.chars());
2167 next!(p, StreamStart(..));
2168 next!(p, FlowMappingStart);
2169 next!(p, Key);
2170 next_scalar!(p, TScalarStyle::Plain, "a");
2171 next!(p, Value);
2172 next_scalar!(p, TScalarStyle::Plain, "?b");
2173 next!(p, FlowMappingEnd);
2174 next!(p, StreamEnd);
2175 end!(p);
2176 }
2177
2178 #[test]
2179 fn test_plain_scalar_starting_with_indicators_in_block() {
2180 let s = ":a";
2181 let mut p = Scanner::new(s.chars());
2182 next!(p, StreamStart(..));
2183 next_scalar!(p, TScalarStyle::Plain, ":a");
2184 next!(p, StreamEnd);
2185 end!(p);
2186
2187 let s = "?a";
2188 let mut p = Scanner::new(s.chars());
2189 next!(p, StreamStart(..));
2190 next_scalar!(p, TScalarStyle::Plain, "?a");
2191 next!(p, StreamEnd);
2192 end!(p);
2193 }
2194
2195 #[test]
2196 fn test_plain_scalar_containing_indicators_in_block() {
2197 let s = "a:,b";
2198 let mut p = Scanner::new(s.chars());
2199 next!(p, StreamStart(..));
2200 next_scalar!(p, TScalarStyle::Plain, "a:,b");
2201 next!(p, StreamEnd);
2202 end!(p);
2203
2204 let s = ":,b";
2205 let mut p = Scanner::new(s.chars());
2206 next!(p, StreamStart(..));
2207 next_scalar!(p, TScalarStyle::Plain, ":,b");
2208 next!(p, StreamEnd);
2209 end!(p);
2210 }
2211
2212 #[test]
2213 fn test_scanner_cr() {
2214 let s = "---\r\n- tok1\r\n- tok2";
2215 let mut p = Scanner::new(s.chars());
2216 next!(p, StreamStart(..));
2217 next!(p, DocumentStart(..));
2218 next!(p, BlockSequenceStart);
2219 next!(p, BlockEntry);
2220 next_scalar!(p, TScalarStyle::Plain, "tok1");
2221 next!(p, BlockEntry);
2222 next_scalar!(p, TScalarStyle::Plain, "tok2");
2223 next!(p, BlockEnd);
2224 next!(p, StreamEnd);
2225 end!(p);
2226 }
2227
2228 #[test]
2229 fn test_uri() {
2230 }
2232
2233 #[test]
2234 fn test_uri_escapes() {
2235 }
2237}