1use std::collections::VecDeque;
2use std::io::BufRead;
3
4use crate::macros::{is_blankz, is_break};
5use crate::reader::yaml_parser_update_buffer;
6use crate::{
7 Encoding, Error, INPUT_BUFFER_SIZE, Mark, Result, ScalarStyle, SimpleKey, Token, TokenData,
8};
9
10const MAX_NUMBER_LENGTH: u64 = 9_u64;
11
12pub struct Scanner<R> {
17 pub(crate) read_handler: Option<R>,
19 pub(crate) eof: bool,
21 pub(crate) buffer: VecDeque<char>,
25 pub(crate) encoding: Encoding,
27 pub(crate) offset: usize,
29 pub(crate) mark: Mark,
31 pub(crate) stream_start_produced: bool,
33 pub(crate) stream_end_produced: bool,
35 pub(crate) flow_level: i32,
37 pub(crate) tokens: VecDeque<Token>,
39 pub(crate) tokens_parsed: usize,
41 pub(crate) token_available: bool,
43 pub(crate) indents: Vec<i32>,
45 pub(crate) indent: i32,
47 pub(crate) simple_key_allowed: bool,
49 pub(crate) simple_keys: Vec<SimpleKey>,
51}
52
53impl<'r, 'b> Scanner<&'r mut &'b [u8]> {
54 pub fn set_input_string(&mut self, input: &'r mut &'b [u8]) {
56 assert!((self.read_handler).is_none());
57 self.read_handler = Some(input);
58 }
59}
60
61impl<R> Scanner<R> {
62 pub fn new() -> Scanner<R> {
63 Self {
64 read_handler: None,
65 eof: false,
66 buffer: VecDeque::with_capacity(INPUT_BUFFER_SIZE),
67 encoding: Encoding::Any,
68 offset: 0,
69 mark: Mark::default(),
70 stream_start_produced: false,
71 stream_end_produced: false,
72 flow_level: 0,
73 tokens: VecDeque::with_capacity(16),
74 tokens_parsed: 0,
75 token_available: false,
76 indents: Vec::with_capacity(16),
77 indent: 0,
78 simple_key_allowed: false,
79 simple_keys: Vec::with_capacity(16),
80 }
81 }
82
83 pub fn set_input(&mut self, input: R) {
85 assert!((self.read_handler).is_none());
86 self.read_handler = Some(input);
87 }
88
89 pub fn set_encoding(&mut self, encoding: Encoding) {
91 assert!(self.encoding == Encoding::Any);
92 self.encoding = encoding;
93 }
94
95 pub fn reset(&mut self) {
97 self.read_handler = None;
98 self.eof = false;
99 self.buffer.clear();
100 self.encoding = Encoding::Any;
101 self.offset = 0;
102 self.mark = Mark::default();
103 self.stream_start_produced = false;
104 self.stream_end_produced = false;
105 self.flow_level = 0;
106 self.tokens.clear();
107 self.tokens_parsed = 0;
108 self.token_available = false;
109 self.indents.clear();
110 self.indent = 0;
111 self.simple_key_allowed = false;
112 self.simple_keys.clear();
113 }
114}
115
116impl<R: BufRead> Scanner<R> {
117 fn cache(&mut self, length: usize) -> Result<()> {
118 if self.buffer.len() >= length {
119 Ok(())
120 } else {
121 yaml_parser_update_buffer(self, length)
122 }
123 }
124
125 fn skip_char(&mut self) {
127 let popped = self.buffer.pop_front().expect("unexpected end of tokens");
128 let width = popped.len_utf8();
129 self.mark.index += width as u64;
130 self.mark.column += 1;
131 }
132
133 fn skip_line_break(&mut self) {
135 if let Some(front) = self.buffer.front().copied() {
136 if let ('\r', Some('\n')) = (front, self.buffer.get(1).copied()) {
137 self.mark.index += 2;
138 self.mark.column = 0;
139 self.mark.line += 1;
140 self.buffer.drain(0..2);
141 } else if is_break(front) {
142 let width = front.len_utf8();
143 self.mark.index += width as u64;
144 self.mark.column = 0;
145 self.mark.line += 1;
146 self.buffer.pop_front();
147 }
148 }
149 }
150
151 fn read_char(&mut self, string: &mut String) {
153 if let Some(popped) = self.buffer.pop_front() {
154 string.push(popped);
155 self.mark.index += popped.len_utf8() as u64;
156 self.mark.column += 1;
157 } else {
158 panic!("unexpected end of input")
159 }
160 }
161
162 fn read_line_break(&mut self, string: &mut String) {
164 let front = match self.buffer.front().copied() {
165 Some(front) => front,
166 None => panic!("unexpected end of input"),
167 };
168
169 if let ('\r', Some('\n')) = (front, self.buffer.get(1).copied()) {
170 string.push('\n');
171 self.buffer.drain(0..2);
172 self.mark.index += 2;
173 self.mark.column = 0;
174 self.mark.line += 1;
175 } else if is_break(front) {
176 self.buffer.pop_front();
177 let char_len = front.len_utf8();
178 if char_len == 3 {
179 string.push(front);
181 } else {
182 string.push('\n');
183 }
184 self.mark.index += char_len as u64;
185 self.mark.column = 0;
186 self.mark.line += 1;
187 }
188 }
189
190 pub fn scan(&mut self) -> Result<Token> {
197 if self.stream_end_produced {
198 return Ok(Token {
199 data: TokenData::StreamEnd,
200 start_mark: Mark::default(),
201 end_mark: Mark::default(),
202 });
203 }
204 if !self.token_available {
205 self.fetch_more_tokens()?;
206 }
207 if let Some(token) = self.tokens.pop_front() {
208 self.token_available = false;
209 self.tokens_parsed += 1;
210 if let TokenData::StreamEnd = &token.data {
211 self.stream_end_produced = true;
212 }
213 Ok(token)
214 } else {
215 unreachable!("no more tokens, but stream-end was not produced")
216 }
217 }
218
219 pub(crate) fn peek(&mut self) -> Result<&Token> {
221 if self.token_available {
222 return Ok(self
223 .tokens
224 .front()
225 .expect("token_available is true, but token queue is empty"));
226 }
227 self.fetch_more_tokens()?;
228 assert!(
229 self.token_available,
230 "fetch_more_tokens() did not produce any tokens, nor an error"
231 );
232 Ok(self
233 .tokens
234 .front()
235 .expect("token_available is true, but token queue is empty"))
236 }
237
238 pub(crate) fn peek_mut(&mut self) -> Result<&mut Token> {
240 if self.token_available {
241 return Ok(self
242 .tokens
243 .front_mut()
244 .expect("token_available is true, but token queue is empty"));
245 }
246 self.fetch_more_tokens()?;
247 assert!(
248 self.token_available,
249 "fetch_more_tokens() did not produce any tokens, nor an error"
250 );
251 Ok(self
252 .tokens
253 .front_mut()
254 .expect("token_available is true, but token queue is empty"))
255 }
256
257 pub(crate) fn skip_token(&mut self) {
259 self.token_available = false;
260 self.tokens_parsed = self.tokens_parsed.wrapping_add(1);
261 let skipped = self.tokens.pop_front().expect("SKIP_TOKEN but EOF");
262 self.stream_end_produced = matches!(
263 skipped,
264 Token {
265 data: TokenData::StreamEnd,
266 ..
267 }
268 );
269 }
270
271 fn set_scanner_error<T>(
272 &mut self,
273 context: &'static str,
274 context_mark: Mark,
275 problem: &'static str,
276 ) -> Result<T> {
277 Err(Error::scanner(context, context_mark, problem, self.mark))
278 }
279
280 pub(crate) fn fetch_more_tokens(&mut self) -> Result<()> {
281 let mut need_more_tokens;
282 loop {
283 need_more_tokens = false;
284 if self.tokens.is_empty() {
285 need_more_tokens = true;
286 } else {
287 self.stale_simple_keys()?;
288 for simple_key in &self.simple_keys {
289 if simple_key.possible && simple_key.token_number == self.tokens_parsed {
290 need_more_tokens = true;
291 break;
292 }
293 }
294 }
295 if !need_more_tokens {
296 break;
297 }
298 self.fetch_next_token()?;
299 }
300 self.token_available = true;
301 Ok(())
302 }
303
304 fn fetch_next_token(&mut self) -> Result<()> {
305 self.cache(1)?;
306 if !self.stream_start_produced {
307 self.fetch_stream_start();
308 return Ok(());
309 }
310 self.scan_to_next_token()?;
311 self.stale_simple_keys()?;
312 self.unroll_indent(self.mark.column as i64);
313 self.cache(4)?;
314 if IS_Z!(self.buffer) {
315 return self.fetch_stream_end();
316 }
317 if self.mark.column == 0_u64 && self.buffer[0] == '%' {
318 return self.fetch_directive();
319 }
320 if self.mark.column == 0_u64
321 && CHECK_AT!(self.buffer, '-', 0)
322 && CHECK_AT!(self.buffer, '-', 1)
323 && CHECK_AT!(self.buffer, '-', 2)
324 && is_blankz(self.buffer.get(3).copied())
325 {
326 return self.fetch_document_indicator(TokenData::DocumentStart);
327 }
328 if self.mark.column == 0_u64
329 && CHECK_AT!(self.buffer, '.', 0)
330 && CHECK_AT!(self.buffer, '.', 1)
331 && CHECK_AT!(self.buffer, '.', 2)
332 && is_blankz(self.buffer.get(3).copied())
333 {
334 return self.fetch_document_indicator(TokenData::DocumentEnd);
335 }
336 if CHECK!(self.buffer, '[') {
337 return self.fetch_flow_collection_start(TokenData::FlowSequenceStart);
338 }
339 if CHECK!(self.buffer, '{') {
340 return self.fetch_flow_collection_start(TokenData::FlowMappingStart);
341 }
342 if CHECK!(self.buffer, ']') {
343 return self.fetch_flow_collection_end(TokenData::FlowSequenceEnd);
344 }
345 if CHECK!(self.buffer, '}') {
346 return self.fetch_flow_collection_end(TokenData::FlowMappingEnd);
347 }
348 if CHECK!(self.buffer, ',') {
349 return self.fetch_flow_entry();
350 }
351 if CHECK!(self.buffer, '-') && IS_BLANKZ_AT!(self.buffer, 1) {
352 return self.fetch_block_entry();
353 }
354 if CHECK!(self.buffer, '?') && (self.flow_level != 0 || IS_BLANKZ_AT!(self.buffer, 1)) {
355 return self.fetch_key();
356 }
357 if CHECK!(self.buffer, ':') && (self.flow_level != 0 || IS_BLANKZ_AT!(self.buffer, 1)) {
358 return self.fetch_value();
359 }
360 if CHECK!(self.buffer, '*') {
361 return self.fetch_anchor(true);
362 }
363 if CHECK!(self.buffer, '&') {
364 return self.fetch_anchor(false);
365 }
366 if CHECK!(self.buffer, '!') {
367 return self.fetch_tag();
368 }
369 if CHECK!(self.buffer, '|') && self.flow_level == 0 {
370 return self.fetch_block_scalar(true);
371 }
372 if CHECK!(self.buffer, '>') && self.flow_level == 0 {
373 return self.fetch_block_scalar(false);
374 }
375 if CHECK!(self.buffer, '\'') {
376 return self.fetch_flow_scalar(true);
377 }
378 if CHECK!(self.buffer, '"') {
379 return self.fetch_flow_scalar(false);
380 }
381 if !(IS_BLANKZ!(self.buffer)
382 || CHECK!(self.buffer, '-')
383 || CHECK!(self.buffer, '?')
384 || CHECK!(self.buffer, ':')
385 || CHECK!(self.buffer, ',')
386 || CHECK!(self.buffer, '[')
387 || CHECK!(self.buffer, ']')
388 || CHECK!(self.buffer, '{')
389 || CHECK!(self.buffer, '}')
390 || CHECK!(self.buffer, '#')
391 || CHECK!(self.buffer, '&')
392 || CHECK!(self.buffer, '*')
393 || CHECK!(self.buffer, '!')
394 || CHECK!(self.buffer, '|')
395 || CHECK!(self.buffer, '>')
396 || CHECK!(self.buffer, '\'')
397 || CHECK!(self.buffer, '"')
398 || CHECK!(self.buffer, '%')
399 || CHECK!(self.buffer, '@')
400 || CHECK!(self.buffer, '`'))
401 || CHECK!(self.buffer, '-') && !IS_BLANK_AT!(self.buffer, 1)
402 || self.flow_level == 0
403 && (CHECK!(self.buffer, '?') || CHECK!(self.buffer, ':'))
404 && !IS_BLANKZ_AT!(self.buffer, 1)
405 {
406 return self.fetch_plain_scalar();
407 }
408 self.set_scanner_error(
409 "while scanning for the next token",
410 self.mark,
411 "found character that cannot start any token",
412 )
413 }
414
415 fn stale_simple_keys(&mut self) -> Result<()> {
416 for simple_key in &mut self.simple_keys {
417 let mark = simple_key.mark;
418 if simple_key.possible
419 && (mark.line < self.mark.line || mark.index + 1024 < self.mark.index)
420 {
421 if simple_key.required {
422 return self.set_scanner_error(
423 "while scanning a simple key",
424 mark,
425 "could not find expected ':'",
426 );
427 }
428 simple_key.possible = false;
429 }
430 }
431
432 Ok(())
433 }
434
435 fn save_simple_key(&mut self) -> Result<()> {
436 let required = self.flow_level == 0 && self.indent as u64 == self.mark.column;
437 if self.simple_key_allowed {
438 let simple_key = SimpleKey {
439 possible: true,
440 required,
441 token_number: self.tokens_parsed + self.tokens.len(),
442 mark: self.mark,
443 };
444 self.remove_simple_key()?;
445 *self.simple_keys.last_mut().unwrap() = simple_key;
446 }
447 Ok(())
448 }
449
450 fn remove_simple_key(&mut self) -> Result<()> {
451 let simple_key: &mut SimpleKey = self.simple_keys.last_mut().unwrap();
452 if simple_key.possible {
453 let mark = simple_key.mark;
454 if simple_key.required {
455 return self.set_scanner_error(
456 "while scanning a simple key",
457 mark,
458 "could not find expected ':'",
459 );
460 }
461 }
462 simple_key.possible = false;
463 Ok(())
464 }
465
466 fn increase_flow_level(&mut self) -> Result<()> {
467 let empty_simple_key = SimpleKey {
468 possible: false,
469 required: false,
470 token_number: 0,
471 mark: Mark {
472 index: 0_u64,
473 line: 0_u64,
474 column: 0_u64,
475 },
476 };
477 self.simple_keys.push(empty_simple_key);
478 assert!(
479 self.flow_level != i32::MAX,
480 "parser.flow_level integer overflow"
481 );
482 self.flow_level += 1;
483 Ok(())
484 }
485
486 fn decrease_flow_level(&mut self) {
487 if self.flow_level != 0 {
488 self.flow_level -= 1;
489 let _ = self.simple_keys.pop();
490 }
491 }
492
493 fn roll_indent(&mut self, column: i64, number: i64, data: TokenData, mark: Mark) -> Result<()> {
494 if self.flow_level != 0 {
495 return Ok(());
496 }
497 if self.indent < column as i32 {
498 self.indents.push(self.indent);
499 assert!(column <= i32::MAX as i64, "integer overflow");
500 self.indent = column as i32;
501 let token = Token {
502 data,
503 start_mark: mark,
504 end_mark: mark,
505 };
506 if number == -1_i64 {
507 self.tokens.push_back(token);
508 } else {
509 self.tokens
510 .insert((number as usize).wrapping_sub(self.tokens_parsed), token);
511 }
512 }
513 Ok(())
514 }
515
516 fn unroll_indent(&mut self, column: i64) {
517 if self.flow_level != 0 {
518 return;
519 }
520 while self.indent as i64 > column {
521 let token = Token {
522 data: TokenData::BlockEnd,
523 start_mark: self.mark,
524 end_mark: self.mark,
525 };
526 self.tokens.push_back(token);
527 self.indent = self.indents.pop().unwrap();
528 }
529 }
530
531 fn fetch_stream_start(&mut self) {
532 let simple_key = SimpleKey {
533 possible: false,
534 required: false,
535 token_number: 0,
536 mark: Mark {
537 index: 0,
538 line: 0,
539 column: 0,
540 },
541 };
542 self.indent = -1;
543 self.simple_keys.push(simple_key);
544 self.simple_key_allowed = true;
545 self.stream_start_produced = true;
546 let token = Token {
547 data: TokenData::StreamStart {
548 encoding: self.encoding,
549 },
550 start_mark: self.mark,
551 end_mark: self.mark,
552 };
553 self.tokens.push_back(token);
554 }
555
556 fn fetch_stream_end(&mut self) -> Result<()> {
557 if self.mark.column != 0_u64 {
558 self.mark.column = 0_u64;
559 self.mark.line += 1;
560 }
561 self.unroll_indent(-1_i64);
562 self.remove_simple_key()?;
563 self.simple_key_allowed = false;
564 let token = Token {
565 data: TokenData::StreamEnd,
566 start_mark: self.mark,
567 end_mark: self.mark,
568 };
569 self.tokens.push_back(token);
570 Ok(())
571 }
572
573 fn fetch_directive(&mut self) -> Result<()> {
574 self.unroll_indent(-1_i64);
575 self.remove_simple_key()?;
576 self.simple_key_allowed = false;
577 let token = self.scan_directive()?;
578 self.tokens.push_back(token);
579 Ok(())
580 }
581
582 fn fetch_document_indicator(&mut self, data: TokenData) -> Result<()> {
583 self.unroll_indent(-1_i64);
584 self.remove_simple_key()?;
585 self.simple_key_allowed = false;
586 let start_mark: Mark = self.mark;
587 self.skip_char();
588 self.skip_char();
589 self.skip_char();
590 let end_mark: Mark = self.mark;
591
592 let token = Token {
593 data,
594 start_mark,
595 end_mark,
596 };
597 self.tokens.push_back(token);
598 Ok(())
599 }
600
601 fn fetch_flow_collection_start(&mut self, data: TokenData) -> Result<()> {
602 self.save_simple_key()?;
603 self.increase_flow_level()?;
604 self.simple_key_allowed = true;
605 let start_mark: Mark = self.mark;
606 self.skip_char();
607 let end_mark: Mark = self.mark;
608 let token = Token {
609 data,
610 start_mark,
611 end_mark,
612 };
613 self.tokens.push_back(token);
614 Ok(())
615 }
616
617 fn fetch_flow_collection_end(&mut self, data: TokenData) -> Result<()> {
618 self.remove_simple_key()?;
619 self.decrease_flow_level();
620 self.simple_key_allowed = false;
621 let start_mark: Mark = self.mark;
622 self.skip_char();
623 let end_mark: Mark = self.mark;
624 let token = Token {
625 data,
626 start_mark,
627 end_mark,
628 };
629 self.tokens.push_back(token);
630 Ok(())
631 }
632
633 fn fetch_flow_entry(&mut self) -> Result<()> {
634 self.remove_simple_key()?;
635 self.simple_key_allowed = true;
636 let start_mark: Mark = self.mark;
637 self.skip_char();
638 let end_mark: Mark = self.mark;
639 let token = Token {
640 data: TokenData::FlowEntry,
641 start_mark,
642 end_mark,
643 };
644 self.tokens.push_back(token);
645 Ok(())
646 }
647
648 fn fetch_block_entry(&mut self) -> Result<()> {
649 if self.flow_level == 0 {
650 if !self.simple_key_allowed {
651 return self.set_scanner_error(
652 "",
653 self.mark,
654 "block sequence entries are not allowed in this context",
655 );
656 }
657 self.roll_indent(
658 self.mark.column as _,
659 -1_i64,
660 TokenData::BlockSequenceStart,
661 self.mark,
662 )?;
663 }
664 self.remove_simple_key()?;
665 self.simple_key_allowed = true;
666 let start_mark: Mark = self.mark;
667 self.skip_char();
668 let end_mark: Mark = self.mark;
669 let token = Token {
670 data: TokenData::BlockEntry,
671 start_mark,
672 end_mark,
673 };
674 self.tokens.push_back(token);
675 Ok(())
676 }
677
678 fn fetch_key(&mut self) -> Result<()> {
679 if self.flow_level == 0 {
680 if !self.simple_key_allowed {
681 return self.set_scanner_error(
682 "",
683 self.mark,
684 "mapping keys are not allowed in this context",
685 );
686 }
687 self.roll_indent(
688 self.mark.column as _,
689 -1_i64,
690 TokenData::BlockMappingStart,
691 self.mark,
692 )?;
693 }
694 self.remove_simple_key()?;
695 self.simple_key_allowed = self.flow_level == 0;
696 let start_mark: Mark = self.mark;
697 self.skip_char();
698 let end_mark: Mark = self.mark;
699 let token = Token {
700 data: TokenData::Key,
701 start_mark,
702 end_mark,
703 };
704 self.tokens.push_back(token);
705 Ok(())
706 }
707
708 fn fetch_value(&mut self) -> Result<()> {
709 let simple_key: &mut SimpleKey = self.simple_keys.last_mut().unwrap();
710 if simple_key.possible {
711 let token = Token {
712 data: TokenData::Key,
713 start_mark: simple_key.mark,
714 end_mark: simple_key.mark,
715 };
716 self.tokens.insert(
717 simple_key.token_number.wrapping_sub(self.tokens_parsed),
718 token,
719 );
720 let mark_column = simple_key.mark.column as _;
721 let token_number = simple_key.token_number as _;
722 let mark = simple_key.mark;
723 simple_key.possible = false;
724 self.roll_indent(
725 mark_column,
726 token_number,
727 TokenData::BlockMappingStart,
728 mark,
729 )?;
730 self.simple_key_allowed = false;
731 } else {
732 if self.flow_level == 0 {
733 if !self.simple_key_allowed {
734 return self.set_scanner_error(
735 "",
736 self.mark,
737 "mapping values are not allowed in this context",
738 );
739 }
740 self.roll_indent(
741 self.mark.column as _,
742 -1_i64,
743 TokenData::BlockMappingStart,
744 self.mark,
745 )?;
746 }
747 self.simple_key_allowed = self.flow_level == 0;
748 }
749 let start_mark: Mark = self.mark;
750 self.skip_char();
751 let end_mark: Mark = self.mark;
752 let token = Token {
753 data: TokenData::Value,
754 start_mark,
755 end_mark,
756 };
757 self.tokens.push_back(token);
758 Ok(())
759 }
760
761 fn fetch_anchor(&mut self, fetch_alias_instead_of_anchor: bool) -> Result<()> {
762 self.save_simple_key()?;
763 self.simple_key_allowed = false;
764 let token = self.scan_anchor(fetch_alias_instead_of_anchor)?;
765 self.tokens.push_back(token);
766 Ok(())
767 }
768
769 fn fetch_tag(&mut self) -> Result<()> {
770 self.save_simple_key()?;
771 self.simple_key_allowed = false;
772 let token = self.scan_tag()?;
773 self.tokens.push_back(token);
774 Ok(())
775 }
776
777 fn fetch_block_scalar(&mut self, literal: bool) -> Result<()> {
778 self.remove_simple_key()?;
779 self.simple_key_allowed = true;
780 let token = self.scan_block_scalar(literal)?;
781 self.tokens.push_back(token);
782 Ok(())
783 }
784
785 fn fetch_flow_scalar(&mut self, single: bool) -> Result<()> {
786 self.save_simple_key()?;
787 self.simple_key_allowed = false;
788 let token = self.scan_flow_scalar(single)?;
789 self.tokens.push_back(token);
790 Ok(())
791 }
792
793 fn fetch_plain_scalar(&mut self) -> Result<()> {
794 self.save_simple_key()?;
795 self.simple_key_allowed = false;
796 let token = self.scan_plain_scalar()?;
797 self.tokens.push_back(token);
798 Ok(())
799 }
800
801 fn scan_to_next_token(&mut self) -> Result<()> {
802 loop {
803 self.cache(1)?;
804 if self.mark.column == 0 && IS_BOM!(self.buffer) {
805 self.skip_char();
806 }
807 self.cache(1)?;
808 while CHECK!(self.buffer, ' ')
809 || (self.flow_level != 0 || !self.simple_key_allowed) && CHECK!(self.buffer, '\t')
810 {
811 self.skip_char();
812 self.cache(1)?;
813 }
814 if CHECK!(self.buffer, '#') {
815 while !IS_BREAKZ!(self.buffer) {
816 self.skip_char();
817 self.cache(1)?;
818 }
819 }
820 if !IS_BREAK!(self.buffer) {
821 break;
822 }
823 self.cache(2)?;
824 self.skip_line_break();
825 if self.flow_level == 0 {
826 self.simple_key_allowed = true;
827 }
828 }
829 Ok(())
830 }
831
832 fn scan_directive(&mut self) -> Result<Token> {
833 let end_mark: Mark;
834 let mut major: i32 = 0;
835 let mut minor: i32 = 0;
836 let start_mark: Mark = self.mark;
837 self.skip_char();
838 let name = self.scan_directive_name(start_mark)?;
839 let token = if name == "YAML" {
840 self.scan_version_directive_value(start_mark, &mut major, &mut minor)?;
841
842 end_mark = self.mark;
843 Token {
844 data: TokenData::VersionDirective { major, minor },
845 start_mark,
846 end_mark,
847 }
848 } else if name == "TAG" {
849 let (handle, prefix) = self.scan_tag_directive_value(start_mark)?;
850 end_mark = self.mark;
851 Token {
852 data: TokenData::TagDirective { handle, prefix },
853 start_mark,
854 end_mark,
855 }
856 } else {
857 return self.set_scanner_error(
858 "while scanning a directive",
859 start_mark,
860 "found unknown directive name",
861 );
862 };
863 self.cache(1)?;
864 loop {
865 if !IS_BLANK!(self.buffer) {
866 break;
867 }
868 self.skip_char();
869 self.cache(1)?;
870 }
871
872 if CHECK!(self.buffer, '#') {
873 loop {
874 if IS_BREAKZ!(self.buffer) {
875 break;
876 }
877 self.skip_char();
878 self.cache(1)?;
879 }
880 }
881
882 if IS_BREAKZ!(self.buffer) {
883 if IS_BREAK!(self.buffer) {
884 self.cache(2)?;
885 self.skip_line_break();
886 }
887 Ok(token)
888 } else {
889 self.set_scanner_error(
890 "while scanning a directive",
891 start_mark,
892 "did not find expected comment or line break",
893 )
894 }
895 }
896
897 fn scan_directive_name(&mut self, start_mark: Mark) -> Result<String> {
898 let mut string = String::new();
899 self.cache(1)?;
900
901 loop {
902 if !IS_ALPHA!(self.buffer) {
903 break;
904 }
905 self.read_char(&mut string);
906 self.cache(1)?;
907 }
908
909 if string.is_empty() {
910 self.set_scanner_error(
911 "while scanning a directive",
912 start_mark,
913 "could not find expected directive name",
914 )
915 } else if !IS_BLANKZ!(self.buffer) {
916 self.set_scanner_error(
917 "while scanning a directive",
918 start_mark,
919 "found unexpected non-alphabetical character",
920 )
921 } else {
922 Ok(string)
923 }
924 }
925
926 fn scan_version_directive_value(
927 &mut self,
928 start_mark: Mark,
929 major: &mut i32,
930 minor: &mut i32,
931 ) -> Result<()> {
932 self.cache(1)?;
933 while IS_BLANK!(self.buffer) {
934 self.skip_char();
935 self.cache(1)?;
936 }
937 self.scan_version_directive_number(start_mark, major)?;
938 if !CHECK!(self.buffer, '.') {
939 return self.set_scanner_error(
940 "while scanning a %YAML directive",
941 start_mark,
942 "did not find expected digit or '.' character",
943 );
944 }
945 self.skip_char();
946 self.scan_version_directive_number(start_mark, minor)
947 }
948
949 fn scan_version_directive_number(&mut self, start_mark: Mark, number: &mut i32) -> Result<()> {
950 let mut value: i32 = 0;
951 let mut length = 0;
952 self.cache(1)?;
953 while IS_DIGIT!(self.buffer) {
954 length += 1;
955 if length > MAX_NUMBER_LENGTH {
956 return self.set_scanner_error(
957 "while scanning a %YAML directive",
958 start_mark,
959 "found extremely long version number",
960 );
961 }
962 value = (value * 10) + AS_DIGIT!(self.buffer) as i32;
963 self.skip_char();
964 self.cache(1)?;
965 }
966 if length == 0 {
967 return self.set_scanner_error(
968 "while scanning a %YAML directive",
969 start_mark,
970 "did not find expected version number",
971 );
972 }
973 *number = value;
974 Ok(())
975 }
976
977 fn scan_tag_directive_value(&mut self, start_mark: Mark) -> Result<(String, String)> {
979 self.cache(1)?;
980
981 loop {
982 if IS_BLANK!(self.buffer) {
983 self.skip_char();
984 self.cache(1)?;
985 } else {
986 let handle_value = self.scan_tag_handle(true, start_mark)?;
987
988 self.cache(1)?;
989
990 if !IS_BLANK!(self.buffer) {
991 return self.set_scanner_error(
992 "while scanning a %TAG directive",
993 start_mark,
994 "did not find expected whitespace",
995 );
996 }
997
998 while IS_BLANK!(self.buffer) {
999 self.skip_char();
1000 self.cache(1)?;
1001 }
1002
1003 let prefix_value = self.scan_tag_uri(true, true, None, start_mark)?;
1004 self.cache(1)?;
1005
1006 if !IS_BLANKZ!(self.buffer) {
1007 return self.set_scanner_error(
1008 "while scanning a %TAG directive",
1009 start_mark,
1010 "did not find expected whitespace or line break",
1011 );
1012 }
1013 return Ok((handle_value, prefix_value));
1014 }
1015 }
1016 }
1017
1018 fn scan_anchor(&mut self, scan_alias_instead_of_anchor: bool) -> Result<Token> {
1019 let mut length: i32 = 0;
1020
1021 let mut string = String::new();
1022 let start_mark: Mark = self.mark;
1023 self.skip_char();
1024 self.cache(1)?;
1025
1026 loop {
1027 if !IS_ALPHA!(self.buffer) {
1028 break;
1029 }
1030 self.read_char(&mut string);
1031 self.cache(1)?;
1032 length += 1;
1033 }
1034 let end_mark: Mark = self.mark;
1035 if length == 0
1036 || !(IS_BLANKZ!(self.buffer)
1037 || CHECK!(self.buffer, '?')
1038 || CHECK!(self.buffer, ':')
1039 || CHECK!(self.buffer, ',')
1040 || CHECK!(self.buffer, ']')
1041 || CHECK!(self.buffer, '}')
1042 || CHECK!(self.buffer, '%')
1043 || CHECK!(self.buffer, '@')
1044 || CHECK!(self.buffer, '`'))
1045 {
1046 self.set_scanner_error(
1047 if scan_alias_instead_of_anchor {
1048 "while scanning an alias"
1049 } else {
1050 "while scanning an anchor"
1051 },
1052 start_mark,
1053 "did not find expected alphabetic or numeric character",
1054 )
1055 } else {
1056 Ok(Token {
1057 data: if scan_alias_instead_of_anchor {
1058 TokenData::Alias { value: string }
1059 } else {
1060 TokenData::Anchor { value: string }
1061 },
1062 start_mark,
1063 end_mark,
1064 })
1065 }
1066 }
1067
1068 fn scan_tag(&mut self) -> Result<Token> {
1069 let mut handle;
1070 let mut suffix;
1071
1072 let start_mark: Mark = self.mark;
1073
1074 self.cache(2)?;
1075
1076 if CHECK_AT!(self.buffer, '<', 1) {
1077 handle = String::new();
1078 self.skip_char();
1079 self.skip_char();
1080 suffix = self.scan_tag_uri(true, false, None, start_mark)?;
1081
1082 if !CHECK!(self.buffer, '>') {
1083 return self.set_scanner_error(
1084 "while scanning a tag",
1085 start_mark,
1086 "did not find the expected '>'",
1087 );
1088 }
1089 self.skip_char();
1090 } else {
1091 handle = self.scan_tag_handle(false, start_mark)?;
1092 if handle.starts_with('!') && handle.len() > 1 && handle.ends_with('!') {
1093 suffix = self.scan_tag_uri(false, false, None, start_mark)?;
1094 } else {
1095 suffix = self.scan_tag_uri(false, false, Some(&handle), start_mark)?;
1096 handle = String::from("!");
1097 if suffix.is_empty() {
1098 core::mem::swap(&mut handle, &mut suffix);
1099 }
1100 }
1101 }
1102
1103 self.cache(1)?;
1104 if !IS_BLANKZ!(self.buffer) {
1105 if self.flow_level == 0 || !CHECK!(self.buffer, ',') {
1106 return self.set_scanner_error(
1107 "while scanning a tag",
1108 start_mark,
1109 "did not find expected whitespace or line break",
1110 );
1111 }
1112 panic!("TODO: What is expected here?");
1113 }
1114
1115 let end_mark: Mark = self.mark;
1116 Ok(Token {
1117 data: TokenData::Tag { handle, suffix },
1118 start_mark,
1119 end_mark,
1120 })
1121 }
1122
1123 fn scan_tag_handle(&mut self, directive: bool, start_mark: Mark) -> Result<String> {
1124 let mut string = String::new();
1125 self.cache(1)?;
1126
1127 if !CHECK!(self.buffer, '!') {
1128 return self.set_scanner_error(
1129 if directive {
1130 "while scanning a tag directive"
1131 } else {
1132 "while scanning a tag"
1133 },
1134 start_mark,
1135 "did not find expected '!'",
1136 );
1137 }
1138
1139 self.read_char(&mut string);
1140 self.cache(1)?;
1141 loop {
1142 if !IS_ALPHA!(self.buffer) {
1143 break;
1144 }
1145 self.read_char(&mut string);
1146 self.cache(1)?;
1147 }
1148 if CHECK!(self.buffer, '!') {
1149 self.read_char(&mut string);
1150 } else if directive && string != "!" {
1151 return self.set_scanner_error(
1152 "while parsing a tag directive",
1153 start_mark,
1154 "did not find expected '!'",
1155 );
1156 }
1157 Ok(string)
1158 }
1159
1160 fn scan_tag_uri(
1161 &mut self,
1162 uri_char: bool,
1163 directive: bool,
1164 head: Option<&str>,
1165 start_mark: Mark,
1166 ) -> Result<String> {
1167 let head = head.unwrap_or("");
1168 let mut length = head.len();
1169 let mut string = String::new();
1170
1171 if length > 1 {
1172 string = String::from(&head[1..]);
1173 }
1174 self.cache(1)?;
1175
1176 while IS_ALPHA!(self.buffer)
1177 || CHECK!(self.buffer, ';')
1178 || CHECK!(self.buffer, '/')
1179 || CHECK!(self.buffer, '?')
1180 || CHECK!(self.buffer, ':')
1181 || CHECK!(self.buffer, '@')
1182 || CHECK!(self.buffer, '&')
1183 || CHECK!(self.buffer, '=')
1184 || CHECK!(self.buffer, '+')
1185 || CHECK!(self.buffer, '$')
1186 || CHECK!(self.buffer, '.')
1187 || CHECK!(self.buffer, '%')
1188 || CHECK!(self.buffer, '!')
1189 || CHECK!(self.buffer, '~')
1190 || CHECK!(self.buffer, '*')
1191 || CHECK!(self.buffer, '\'')
1192 || CHECK!(self.buffer, '(')
1193 || CHECK!(self.buffer, ')')
1194 || uri_char
1195 && (CHECK!(self.buffer, ',')
1196 || CHECK!(self.buffer, '[')
1197 || CHECK!(self.buffer, ']'))
1198 {
1199 if CHECK!(self.buffer, '%') {
1200 self.scan_uri_escapes(directive, start_mark, &mut string)?;
1201 } else {
1202 self.read_char(&mut string);
1203 }
1204 length += 1;
1205 self.cache(1)?;
1206 }
1207 if length == 0 {
1208 self.set_scanner_error(
1209 if directive {
1210 "while parsing a %TAG directive"
1211 } else {
1212 "while parsing a tag"
1213 },
1214 start_mark,
1215 "did not find expected tag URI",
1216 )
1217 } else {
1218 Ok(string)
1219 }
1220 }
1221
1222 fn scan_uri_escapes(
1223 &mut self,
1224 directive: bool,
1225 start_mark: Mark,
1226 string: &mut String,
1227 ) -> Result<()> {
1228 let mut width: i32 = 0;
1229 loop {
1230 self.cache(3)?;
1231 if !(CHECK!(self.buffer, '%')
1232 && IS_HEX_AT!(self.buffer, 1)
1233 && IS_HEX_AT!(self.buffer, 2))
1234 {
1235 return self.set_scanner_error(
1236 if directive {
1237 "while parsing a %TAG directive"
1238 } else {
1239 "while parsing a tag"
1240 },
1241 start_mark,
1242 "did not find URI escaped octet",
1243 );
1244 }
1245 let octet = ((AS_HEX_AT!(self.buffer, 1) << 4) + AS_HEX_AT!(self.buffer, 2)) as u8;
1246 if width == 0 {
1247 width = if octet & 0x80 == 0 {
1248 1
1249 } else if octet & 0xE0 == 0xC0 {
1250 2
1251 } else if octet & 0xF0 == 0xE0 {
1252 3
1253 } else if octet & 0xF8 == 0xF0 {
1254 4
1255 } else {
1256 0
1257 };
1258 if width == 0 {
1260 return self.set_scanner_error(
1261 if directive {
1262 "while parsing a %TAG directive"
1263 } else {
1264 "while parsing a tag"
1265 },
1266 start_mark,
1267 "found an incorrect leading UTF-8 octet",
1268 );
1269 }
1270 } else if octet & 0xC0 != 0x80 {
1271 return self.set_scanner_error(
1272 if directive {
1273 "while parsing a %TAG directive"
1274 } else {
1275 "while parsing a tag"
1276 },
1277 start_mark,
1278 "found an incorrect trailing UTF-8 octet",
1279 );
1280 }
1281 string.push(char::from_u32(octet as _).expect("invalid Unicode"));
1282 self.skip_char();
1283 self.skip_char();
1284 self.skip_char();
1285 width -= 1;
1286 if width == 0 {
1287 break;
1288 }
1289 }
1290 Ok(())
1291 }
1292
1293 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token> {
1294 let mut end_mark: Mark;
1295 let mut string = String::new();
1296 let mut leading_break = String::new();
1297 let mut trailing_breaks = String::new();
1298 let mut chomping: i32 = 0;
1299 let mut increment: i32 = 0;
1300 let mut indent: i32 = 0;
1301 let mut leading_blank: i32 = 0;
1302 let mut trailing_blank: i32;
1303 let start_mark: Mark = self.mark;
1304 self.skip_char();
1305 self.cache(1)?;
1306
1307 if CHECK!(self.buffer, '+') || CHECK!(self.buffer, '-') {
1308 chomping = if CHECK!(self.buffer, '+') { 1 } else { -1 };
1309 self.skip_char();
1310 self.cache(1)?;
1311 if IS_DIGIT!(self.buffer) {
1312 if CHECK!(self.buffer, '0') {
1313 return self.set_scanner_error(
1314 "while scanning a block scalar",
1315 start_mark,
1316 "found an indentation indicator equal to 0",
1317 );
1318 }
1319 increment = AS_DIGIT!(self.buffer) as i32;
1320 self.skip_char();
1321 }
1322 } else if IS_DIGIT!(self.buffer) {
1323 if CHECK!(self.buffer, '0') {
1324 return self.set_scanner_error(
1325 "while scanning a block scalar",
1326 start_mark,
1327 "found an indentation indicator equal to 0",
1328 );
1329 }
1330 increment = AS_DIGIT!(self.buffer) as i32;
1331 self.skip_char();
1332 self.cache(1)?;
1333 if CHECK!(self.buffer, '+') || CHECK!(self.buffer, '-') {
1334 chomping = if CHECK!(self.buffer, '+') { 1 } else { -1 };
1335 self.skip_char();
1336 }
1337 }
1338
1339 self.cache(1)?;
1340 loop {
1341 if !IS_BLANK!(self.buffer) {
1342 break;
1343 }
1344 self.skip_char();
1345 self.cache(1)?;
1346 }
1347
1348 if CHECK!(self.buffer, '#') {
1349 loop {
1350 if IS_BREAKZ!(self.buffer) {
1351 break;
1352 }
1353 self.skip_char();
1354 self.cache(1)?;
1355 }
1356 }
1357
1358 if !IS_BREAKZ!(self.buffer) {
1359 return self.set_scanner_error(
1360 "while scanning a block scalar",
1361 start_mark,
1362 "did not find expected comment or line break",
1363 );
1364 }
1365
1366 if IS_BREAK!(self.buffer) {
1367 self.cache(2)?;
1368 self.skip_line_break();
1369 }
1370
1371 end_mark = self.mark;
1372 if increment != 0 {
1373 indent = if self.indent >= 0 {
1374 self.indent + increment
1375 } else {
1376 increment
1377 };
1378 }
1379 self.scan_block_scalar_breaks(
1380 &mut indent,
1381 &mut trailing_breaks,
1382 start_mark,
1383 &mut end_mark,
1384 )?;
1385
1386 self.cache(1)?;
1387
1388 loop {
1389 if self.mark.column as i32 != indent || IS_Z!(self.buffer) {
1390 break;
1391 }
1392 trailing_blank = IS_BLANK!(self.buffer) as i32;
1393 if !literal
1394 && leading_break.starts_with('\n')
1395 && leading_blank == 0
1396 && trailing_blank == 0
1397 {
1398 if trailing_breaks.is_empty() {
1399 string.push(' ');
1400 }
1401 leading_break.clear();
1402 } else {
1403 string.push_str(&leading_break);
1404 leading_break.clear();
1405 }
1406 string.push_str(&trailing_breaks);
1407 trailing_breaks.clear();
1408 leading_blank = IS_BLANK!(self.buffer) as i32;
1409 while !IS_BREAKZ!(self.buffer) {
1410 self.read_char(&mut string);
1411 self.cache(1)?;
1412 }
1413 self.cache(2)?;
1414 self.read_line_break(&mut leading_break);
1415 self.scan_block_scalar_breaks(
1416 &mut indent,
1417 &mut trailing_breaks,
1418 start_mark,
1419 &mut end_mark,
1420 )?;
1421 }
1422
1423 if chomping != -1 {
1424 string.push_str(&leading_break);
1425 }
1426
1427 if chomping == 1 {
1428 string.push_str(&trailing_breaks);
1429 }
1430
1431 Ok(Token {
1432 data: TokenData::Scalar {
1433 value: string,
1434 style: if literal {
1435 ScalarStyle::Literal
1436 } else {
1437 ScalarStyle::Folded
1438 },
1439 },
1440 start_mark,
1441 end_mark,
1442 })
1443 }
1444
1445 fn scan_block_scalar_breaks(
1446 &mut self,
1447 indent: &mut i32,
1448 breaks: &mut String,
1449 start_mark: Mark,
1450 end_mark: &mut Mark,
1451 ) -> Result<()> {
1452 let mut max_indent: i32 = 0;
1453 *end_mark = self.mark;
1454 loop {
1455 self.cache(1)?;
1456 while (*indent == 0 || (self.mark.column as i32) < *indent) && IS_SPACE!(self.buffer) {
1457 self.skip_char();
1458 self.cache(1)?;
1459 }
1460 if self.mark.column as i32 > max_indent {
1461 max_indent = self.mark.column as i32;
1462 }
1463 if (*indent == 0 || (self.mark.column as i32) < *indent) && IS_TAB!(self.buffer) {
1464 return self.set_scanner_error(
1465 "while scanning a block scalar",
1466 start_mark,
1467 "found a tab character where an indentation space is expected",
1468 );
1469 }
1470 if !IS_BREAK!(self.buffer) {
1471 break;
1472 }
1473 self.cache(2)?;
1474 self.read_line_break(breaks);
1475 *end_mark = self.mark;
1476 }
1477 if *indent == 0 {
1478 *indent = max_indent;
1479 if *indent < self.indent + 1 {
1480 *indent = self.indent + 1;
1481 }
1482 if *indent < 1 {
1483 *indent = 1;
1484 }
1485 }
1486 Ok(())
1487 }
1488
1489 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token> {
1490 let mut string = String::new();
1491 let mut leading_break = String::new();
1492 let mut trailing_breaks = String::new();
1493 let mut whitespaces = String::new();
1494 let mut leading_blanks;
1495
1496 let start_mark: Mark = self.mark;
1497 self.skip_char();
1498 loop {
1499 self.cache(4)?;
1500
1501 if self.mark.column == 0
1502 && (CHECK_AT!(self.buffer, '-', 0)
1503 && CHECK_AT!(self.buffer, '-', 1)
1504 && CHECK_AT!(self.buffer, '-', 2)
1505 || CHECK_AT!(self.buffer, '.', 0)
1506 && CHECK_AT!(self.buffer, '.', 1)
1507 && CHECK_AT!(self.buffer, '.', 2))
1508 && IS_BLANKZ_AT!(self.buffer, 3)
1509 {
1510 return self.set_scanner_error(
1511 "while scanning a quoted scalar",
1512 start_mark,
1513 "found unexpected document indicator",
1514 );
1515 } else if IS_Z!(self.buffer) {
1516 return self.set_scanner_error(
1517 "while scanning a quoted scalar",
1518 start_mark,
1519 "found unexpected end of stream",
1520 );
1521 }
1522 self.cache(2)?;
1523 leading_blanks = false;
1524 while !IS_BLANKZ!(self.buffer) {
1525 if single && CHECK_AT!(self.buffer, '\'', 0) && CHECK_AT!(self.buffer, '\'', 1) {
1526 string.push('\'');
1527 self.skip_char();
1528 self.skip_char();
1529 } else {
1530 if CHECK!(self.buffer, if single { '\'' } else { '"' }) {
1531 break;
1532 }
1533 if !single && CHECK!(self.buffer, '\\') && IS_BREAK_AT!(self.buffer, 1) {
1534 self.cache(3)?;
1535 self.skip_char();
1536 self.skip_line_break();
1537 leading_blanks = true;
1538 break;
1539 } else if !single && CHECK!(self.buffer, '\\') {
1540 let mut code_length = 0usize;
1541 match self.buffer.get(1).copied().unwrap() {
1542 '0' => {
1543 string.push('\0');
1544 }
1545 'a' => {
1546 string.push('\x07');
1547 }
1548 'b' => {
1549 string.push('\x08');
1550 }
1551 't' | '\t' => {
1552 string.push('\t');
1553 }
1554 'n' => {
1555 string.push('\n');
1556 }
1557 'v' => {
1558 string.push('\x0B');
1559 }
1560 'f' => {
1561 string.push('\x0C');
1562 }
1563 'r' => {
1564 string.push('\r');
1565 }
1566 'e' => {
1567 string.push('\x1B');
1568 }
1569 ' ' => {
1570 string.push(' ');
1571 }
1572 '"' => {
1573 string.push('"');
1574 }
1575 '/' => {
1576 string.push('/');
1577 }
1578 '\\' => {
1579 string.push('\\');
1580 }
1581 'N' => {
1583 string.push('\u{0085}');
1584 }
1585 '_' => {
1587 string.push('\u{00a0}');
1588 }
1591 'L' => {
1593 string.push('\u{2028}');
1594 }
1598 'P' => {
1600 string.push('\u{2029}');
1601 }
1605 'x' => {
1606 code_length = 2;
1607 }
1608 'u' => {
1609 code_length = 4;
1610 }
1611 'U' => {
1612 code_length = 8;
1613 }
1614 _ => {
1615 return self.set_scanner_error(
1616 "while parsing a quoted scalar",
1617 start_mark,
1618 "found unknown escape character",
1619 );
1620 }
1621 }
1622 self.skip_char();
1623 self.skip_char();
1624 if code_length != 0 {
1625 let mut value: u32 = 0;
1626 let mut k = 0;
1627 self.cache(code_length)?;
1628 while k < code_length {
1629 if !IS_HEX_AT!(self.buffer, k) {
1630 return self.set_scanner_error(
1631 "while parsing a quoted scalar",
1632 start_mark,
1633 "did not find expected hexdecimal number",
1634 );
1635 }
1636 value = (value << 4) + AS_HEX_AT!(self.buffer, k);
1637 k += 1;
1638 }
1639 if let Some(ch) = char::from_u32(value) {
1640 string.push(ch);
1641 } else {
1642 return self.set_scanner_error(
1643 "while parsing a quoted scalar",
1644 start_mark,
1645 "found invalid Unicode character escape code",
1646 );
1647 }
1648
1649 k = 0;
1650 while k < code_length {
1651 self.skip_char();
1652 k += 1;
1653 }
1654 }
1655 } else {
1656 self.read_char(&mut string);
1657 }
1658 }
1659 self.cache(2)?;
1660 }
1661 self.cache(1)?;
1662 if CHECK!(self.buffer, if single { '\'' } else { '"' }) {
1663 break;
1664 }
1665 self.cache(1)?;
1666 while IS_BLANK!(self.buffer) || IS_BREAK!(self.buffer) {
1667 if IS_BLANK!(self.buffer) {
1668 if leading_blanks {
1669 self.skip_char();
1670 } else {
1671 self.read_char(&mut whitespaces);
1672 }
1673 } else {
1674 self.cache(2)?;
1675 if leading_blanks {
1676 self.read_line_break(&mut trailing_breaks);
1677 } else {
1678 whitespaces.clear();
1679 self.read_line_break(&mut leading_break);
1680 leading_blanks = true;
1681 }
1682 }
1683 self.cache(1)?;
1684 }
1685 if leading_blanks {
1686 if leading_break.starts_with('\n') {
1687 if trailing_breaks.is_empty() {
1688 string.push(' ');
1689 } else {
1690 string.push_str(&trailing_breaks);
1691 trailing_breaks.clear();
1692 }
1693 leading_break.clear();
1694 } else {
1695 string.push_str(&leading_break);
1696 string.push_str(&trailing_breaks);
1697 leading_break.clear();
1698 trailing_breaks.clear();
1699 }
1700 } else {
1701 string.push_str(&whitespaces);
1702 whitespaces.clear();
1703 }
1704 }
1705
1706 self.skip_char();
1707 let end_mark: Mark = self.mark;
1708 Ok(Token {
1709 data: TokenData::Scalar {
1710 value: string,
1711 style: if single {
1712 ScalarStyle::SingleQuoted
1713 } else {
1714 ScalarStyle::DoubleQuoted
1715 },
1716 },
1717 start_mark,
1718 end_mark,
1719 })
1720 }
1721
1722 fn scan_plain_scalar(&mut self) -> Result<Token> {
1723 let mut end_mark: Mark;
1724 let mut string = String::new();
1725 let mut leading_break = String::new();
1726 let mut trailing_breaks = String::new();
1727 let mut whitespaces = String::new();
1728 let mut leading_blanks = false;
1729 let indent: i32 = self.indent + 1;
1730 end_mark = self.mark;
1731 let start_mark: Mark = end_mark;
1732 loop {
1733 self.cache(4)?;
1734 if self.mark.column == 0
1735 && (CHECK_AT!(self.buffer, '-', 0)
1736 && CHECK_AT!(self.buffer, '-', 1)
1737 && CHECK_AT!(self.buffer, '-', 2)
1738 || CHECK_AT!(self.buffer, '.', 0)
1739 && CHECK_AT!(self.buffer, '.', 1)
1740 && CHECK_AT!(self.buffer, '.', 2))
1741 && IS_BLANKZ_AT!(self.buffer, 3)
1742 {
1743 break;
1744 }
1745 if CHECK!(self.buffer, '#') {
1746 break;
1747 }
1748 while !IS_BLANKZ!(self.buffer) {
1749 if self.flow_level != 0
1750 && CHECK!(self.buffer, ':')
1751 && (CHECK_AT!(self.buffer, ',', 1)
1752 || CHECK_AT!(self.buffer, '?', 1)
1753 || CHECK_AT!(self.buffer, '[', 1)
1754 || CHECK_AT!(self.buffer, ']', 1)
1755 || CHECK_AT!(self.buffer, '{', 1)
1756 || CHECK_AT!(self.buffer, '}', 1))
1757 {
1758 return self.set_scanner_error(
1759 "while scanning a plain scalar",
1760 start_mark,
1761 "found unexpected ':'",
1762 );
1763 }
1764
1765 if CHECK!(self.buffer, ':') && IS_BLANKZ_AT!(self.buffer, 1)
1766 || self.flow_level != 0
1767 && (CHECK!(self.buffer, ',')
1768 || CHECK!(self.buffer, '[')
1769 || CHECK!(self.buffer, ']')
1770 || CHECK!(self.buffer, '{')
1771 || CHECK!(self.buffer, '}'))
1772 {
1773 break;
1774 }
1775 if leading_blanks || !whitespaces.is_empty() {
1776 if leading_blanks {
1777 if leading_break.starts_with('\n') {
1778 if trailing_breaks.is_empty() {
1779 string.push(' ');
1780 } else {
1781 string.push_str(&trailing_breaks);
1782 trailing_breaks.clear();
1783 }
1784 leading_break.clear();
1785 } else {
1786 string.push_str(&leading_break);
1787 string.push_str(&trailing_breaks);
1788 leading_break.clear();
1789 trailing_breaks.clear();
1790 }
1791 leading_blanks = false;
1792 } else {
1793 string.push_str(&whitespaces);
1794 whitespaces.clear();
1795 }
1796 }
1797 self.read_char(&mut string);
1798 end_mark = self.mark;
1799 self.cache(2)?;
1800 }
1801 if !(IS_BLANK!(self.buffer) || IS_BREAK!(self.buffer)) {
1802 break;
1803 }
1804 self.cache(1)?;
1805
1806 while IS_BLANK!(self.buffer) || IS_BREAK!(self.buffer) {
1807 if IS_BLANK!(self.buffer) {
1808 if leading_blanks && (self.mark.column as i32) < indent && IS_TAB!(self.buffer)
1809 {
1810 return self.set_scanner_error(
1811 "while scanning a plain scalar",
1812 start_mark,
1813 "found a tab character that violates indentation",
1814 );
1815 } else if !leading_blanks {
1816 self.read_char(&mut whitespaces);
1817 } else {
1818 self.skip_char();
1819 }
1820 } else {
1821 self.cache(2)?;
1822
1823 if leading_blanks {
1824 self.read_line_break(&mut trailing_breaks);
1825 } else {
1826 whitespaces.clear();
1827 self.read_line_break(&mut leading_break);
1828 leading_blanks = true;
1829 }
1830 }
1831 self.cache(1)?;
1832 }
1833 if self.flow_level == 0 && (self.mark.column as i32) < indent {
1834 break;
1835 }
1836 }
1837
1838 if leading_blanks {
1839 self.simple_key_allowed = true;
1840 }
1841
1842 Ok(Token {
1843 data: TokenData::Scalar {
1844 value: string,
1845 style: ScalarStyle::Plain,
1846 },
1847 start_mark,
1848 end_mark,
1849 })
1850 }
1851}
1852
1853impl<R> Default for Scanner<R> {
1854 fn default() -> Self {
1855 Self::new()
1856 }
1857}
1858
1859impl<R: BufRead> Iterator for Scanner<R> {
1860 type Item = Result<Token>;
1861
1862 fn next(&mut self) -> Option<Self::Item> {
1863 if self.stream_end_produced {
1864 None
1865 } else {
1866 Some(self.scan())
1867 }
1868 }
1869}
1870
1871impl<R: BufRead> core::iter::FusedIterator for Scanner<R> {}