1#![allow(dead_code)]
17
18use std::collections::VecDeque;
19
20use super::model::{YamlDiagnostic, diagnostic_codes};
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
25pub(crate) struct Mark {
26 pub index: usize,
27 pub line: usize,
28 pub column: usize,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub(crate) struct SimpleKey {
39 pub token_number: usize,
40 pub required: bool,
41 pub mark: Mark,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub(crate) enum ScalarStyle {
48 Plain,
49 SingleQuoted,
50 DoubleQuoted,
51 Literal,
52 Folded,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub(crate) enum TriviaKind {
59 Whitespace,
60 Newline,
61 Comment,
62}
63
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub(crate) enum TokenKind {
66 StreamStart,
67 StreamEnd,
68 DocumentStart,
69 DocumentEnd,
70 Directive,
71 BlockSequenceStart,
72 BlockMappingStart,
73 BlockEnd,
74 FlowSequenceStart,
75 FlowSequenceEnd,
76 FlowMappingStart,
77 FlowMappingEnd,
78 BlockEntry,
79 FlowEntry,
80 Key,
81 Value,
82 Alias,
83 Anchor,
84 Tag,
85 Scalar(ScalarStyle),
86 Trivia(TriviaKind),
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq)]
90pub(crate) struct Token {
91 pub kind: TokenKind,
92 pub start: Mark,
93 pub end: Mark,
94}
95
96#[derive(Debug)]
97pub(crate) struct Scanner<'a> {
98 input: &'a str,
99 cursor: Mark,
100 tokens: VecDeque<Token>,
101 tokens_taken: usize,
107 indent: i32,
110 indent_stack: Vec<i32>,
112 simple_keys: Vec<Option<SimpleKey>>,
115 flow_level: usize,
116 allow_simple_key: bool,
122 diagnostics: Vec<YamlDiagnostic>,
123 stream_end_emitted: bool,
124}
125
126impl<'a> Scanner<'a> {
127 pub(crate) fn new(input: &'a str) -> Self {
128 let mut scanner = Self {
129 input,
130 cursor: Mark::default(),
131 tokens: VecDeque::new(),
132 tokens_taken: 0,
133 indent: -1,
134 indent_stack: Vec::new(),
135 simple_keys: vec![None],
138 flow_level: 0,
139 allow_simple_key: true,
140 diagnostics: Vec::new(),
141 stream_end_emitted: false,
142 };
143 let mark = scanner.cursor;
144 scanner.tokens.push_back(Token {
145 kind: TokenKind::StreamStart,
146 start: mark,
147 end: mark,
148 });
149 scanner
150 }
151
152 pub(crate) fn next_token(&mut self) -> Option<Token> {
153 while self.need_more_tokens() {
154 self.fetch_more_tokens();
155 }
156 let tok = self.tokens.pop_front();
157 if tok.is_some() {
158 self.tokens_taken += 1;
159 }
160 tok
161 }
162
163 fn need_more_tokens(&mut self) -> bool {
171 if self.stream_end_emitted {
172 return false;
173 }
174 if self.tokens.is_empty() {
175 return true;
176 }
177 self.stale_simple_keys();
178 matches!(
179 self.next_possible_simple_key_index(),
180 Some(min) if min == self.tokens_taken
181 )
182 }
183
184 fn next_possible_simple_key_index(&self) -> Option<usize> {
185 self.simple_keys
186 .iter()
187 .filter_map(|slot| slot.as_ref().map(|k| k.token_number))
188 .min()
189 }
190
191 fn fetch_more_tokens(&mut self) {
194 self.scan_trivia();
195 self.stale_simple_keys();
196 self.unwind_indent(self.cursor.column as i32);
197 if self.at_eof() {
198 self.fetch_stream_end();
199 return;
200 }
201 if self.flow_level == 0 && self.cursor.column == 0 {
204 if self.check_document_indicator(b"---") {
205 self.fetch_document_marker(TokenKind::DocumentStart);
206 return;
207 }
208 if self.check_document_indicator(b"...") {
209 self.fetch_document_marker(TokenKind::DocumentEnd);
210 return;
211 }
212 if self.peek_char() == Some('%') {
213 self.fetch_directive();
214 return;
215 }
216 }
217 match self.peek_char() {
218 Some('[') => {
219 self.fetch_flow_collection_start(TokenKind::FlowSequenceStart);
220 return;
221 }
222 Some('{') => {
223 self.fetch_flow_collection_start(TokenKind::FlowMappingStart);
224 return;
225 }
226 Some(']') => {
227 self.fetch_flow_collection_end(TokenKind::FlowSequenceEnd);
228 return;
229 }
230 Some('}') => {
231 self.fetch_flow_collection_end(TokenKind::FlowMappingEnd);
232 return;
233 }
234 Some(',') if self.flow_level > 0 => {
235 self.fetch_flow_entry();
236 return;
237 }
238 Some('-') if self.check_block_entry() => {
239 self.fetch_block_entry();
240 return;
241 }
242 Some('?') if self.check_key() => {
243 self.fetch_key();
244 return;
245 }
246 Some(':') if self.check_value() => {
247 self.fetch_value();
248 return;
249 }
250 Some('\'') => {
251 self.fetch_flow_scalar(ScalarStyle::SingleQuoted);
252 return;
253 }
254 Some('"') => {
255 self.fetch_flow_scalar(ScalarStyle::DoubleQuoted);
256 return;
257 }
258 Some('|') if self.flow_level == 0 => {
259 self.fetch_block_scalar(ScalarStyle::Literal);
260 return;
261 }
262 Some('>') if self.flow_level == 0 => {
263 self.fetch_block_scalar(ScalarStyle::Folded);
264 return;
265 }
266 _ => {}
267 }
268 self.fetch_plain_scalar();
272 }
273
274 fn fetch_flow_collection_start(&mut self, kind: TokenKind) {
275 let start = self.cursor;
276 self.advance();
277 let end = self.cursor;
278 self.flow_level += 1;
279 self.simple_keys.push(None);
282 self.tokens.push_back(Token { kind, start, end });
283 }
284
285 fn fetch_flow_collection_end(&mut self, kind: TokenKind) {
286 let start = self.cursor;
287 self.advance();
288 let end = self.cursor;
289 if self.flow_level > 0 {
290 self.flow_level -= 1;
291 self.simple_keys.pop();
292 }
293 self.tokens.push_back(Token { kind, start, end });
294 }
295
296 fn fetch_flow_entry(&mut self) {
297 self.allow_simple_key = true;
300 self.remove_simple_key();
301 let start = self.cursor;
302 self.advance();
303 let end = self.cursor;
304 self.tokens.push_back(Token {
305 kind: TokenKind::FlowEntry,
306 start,
307 end,
308 });
309 }
310
311 fn fetch_block_entry(&mut self) {
312 if self.flow_level == 0 {
313 if !self.allow_simple_key {
314 self.push_diagnostic(
315 diagnostic_codes::LEX_BLOCK_ENTRY_NOT_ALLOWED,
316 "block sequence entry not allowed here",
317 );
318 }
319 if self.add_indent(self.cursor.column as i32) {
320 let mark = self.cursor;
321 self.tokens.push_back(Token {
322 kind: TokenKind::BlockSequenceStart,
323 start: mark,
324 end: mark,
325 });
326 }
327 }
328 self.allow_simple_key = true;
329 self.remove_simple_key();
330 let start = self.cursor;
331 self.advance();
332 let end = self.cursor;
333 self.tokens.push_back(Token {
334 kind: TokenKind::BlockEntry,
335 start,
336 end,
337 });
338 }
339
340 fn fetch_key(&mut self) {
341 if self.flow_level == 0 {
342 if !self.allow_simple_key {
343 self.push_diagnostic(
344 diagnostic_codes::LEX_KEY_INDICATOR_NOT_ALLOWED,
345 "explicit key indicator not allowed here",
346 );
347 }
348 if self.add_indent(self.cursor.column as i32) {
349 let mark = self.cursor;
350 self.tokens.push_back(Token {
351 kind: TokenKind::BlockMappingStart,
352 start: mark,
353 end: mark,
354 });
355 }
356 }
357 self.allow_simple_key = self.flow_level == 0;
360 self.remove_simple_key();
361 let start = self.cursor;
362 self.advance();
363 let end = self.cursor;
364 self.tokens.push_back(Token {
365 kind: TokenKind::Key,
366 start,
367 end,
368 });
369 }
370
371 fn fetch_value(&mut self) {
372 if let Some(key) = self.simple_keys[self.flow_level].take() {
373 let queue_pos = key.token_number.saturating_sub(self.tokens_taken);
378 self.tokens.insert(
379 queue_pos,
380 Token {
381 kind: TokenKind::Key,
382 start: key.mark,
383 end: key.mark,
384 },
385 );
386 if self.flow_level == 0 && self.add_indent(key.mark.column as i32) {
387 self.tokens.insert(
388 queue_pos,
389 Token {
390 kind: TokenKind::BlockMappingStart,
391 start: key.mark,
392 end: key.mark,
393 },
394 );
395 }
396 self.allow_simple_key = false;
397 } else {
398 if self.flow_level == 0 {
402 if !self.allow_simple_key {
403 self.push_diagnostic(
404 diagnostic_codes::LEX_VALUE_INDICATOR_NOT_ALLOWED,
405 "value indicator not allowed here",
406 );
407 }
408 if self.add_indent(self.cursor.column as i32) {
409 let mark = self.cursor;
410 self.tokens.push_back(Token {
411 kind: TokenKind::BlockMappingStart,
412 start: mark,
413 end: mark,
414 });
415 }
416 }
417 self.allow_simple_key = self.flow_level == 0;
418 self.remove_simple_key();
419 }
420 let start = self.cursor;
421 self.advance();
422 let end = self.cursor;
423 self.tokens.push_back(Token {
424 kind: TokenKind::Value,
425 start,
426 end,
427 });
428 }
429
430 fn fetch_plain_scalar(&mut self) {
445 self.save_simple_key();
446 self.allow_simple_key = false;
447 let start = self.cursor;
448 let min_indent = self.indent + 1;
449 loop {
450 let chunk_start = self.cursor.index;
451 self.consume_plain_chunk();
452 if self.cursor.index == chunk_start {
453 break;
454 }
455 let saved = self.cursor;
459 while matches!(self.peek_char(), Some(' ' | '\t')) {
460 self.advance();
461 }
462 match self.peek_char() {
463 None | Some('#') => {
464 self.cursor = saved;
465 break;
466 }
467 Some('\n' | '\r') => {
468 if !self.try_consume_plain_line_break(min_indent) {
469 self.cursor = saved;
470 break;
471 }
472 }
473 Some(_) => {
474 }
477 }
478 }
479 let end = self.cursor;
480 if start.index == end.index {
481 self.advance();
486 let end = self.cursor;
487 self.tokens.push_back(Token {
488 kind: TokenKind::Scalar(ScalarStyle::Plain),
489 start,
490 end,
491 });
492 return;
493 }
494 self.tokens.push_back(Token {
495 kind: TokenKind::Scalar(ScalarStyle::Plain),
496 start,
497 end,
498 });
499 }
500
501 fn consume_plain_chunk(&mut self) {
505 loop {
506 match self.peek_char() {
507 None | Some('\n' | '\r' | ' ' | '\t') => break,
508 Some(':') => {
509 let next = self.peek_at(1);
510 if matches!(next, None | Some(' ' | '\t' | '\n' | '\r')) {
511 break;
512 }
513 if self.flow_level > 0 && matches!(next, Some(',' | ']' | '}')) {
514 break;
515 }
516 self.advance();
517 }
518 Some(',' | '[' | ']' | '{' | '}') if self.flow_level > 0 => break,
519 _ => {
520 self.advance();
521 }
522 }
523 }
524 }
525
526 fn try_consume_plain_line_break(&mut self, min_indent: i32) -> bool {
533 let saved = self.cursor;
534 self.consume_one_line_break();
535 loop {
536 while matches!(self.peek_char(), Some(' ' | '\t')) {
537 self.advance();
538 }
539 match self.peek_char() {
540 None => {
541 self.cursor = saved;
542 return false;
543 }
544 Some('\n' | '\r') => {
545 self.consume_one_line_break();
546 continue;
547 }
548 Some('#') => {
549 self.cursor = saved;
550 return false;
551 }
552 Some(_) => {
553 let col = self.cursor.column as i32;
554 if col < min_indent {
555 self.cursor = saved;
556 return false;
557 }
558 if self.flow_level == 0 {
559 if col == 0
561 && (self.check_document_indicator(b"---")
562 || self.check_document_indicator(b"..."))
563 {
564 self.cursor = saved;
565 return false;
566 }
567 if matches!(self.peek_char(), Some('-' | '?' | ':'))
573 && matches!(self.peek_at(1), None | Some(' ' | '\t' | '\n' | '\r'))
574 {
575 self.cursor = saved;
576 return false;
577 }
578 } else if matches!(self.peek_char(), Some(',' | ']' | '}')) {
579 self.cursor = saved;
584 return false;
585 }
586 return true;
587 }
588 }
589 }
590 }
591
592 fn fetch_flow_scalar(&mut self, style: ScalarStyle) {
598 self.save_simple_key();
599 self.allow_simple_key = false;
600 let start = self.cursor;
601 let quote = match style {
602 ScalarStyle::SingleQuoted => '\'',
603 ScalarStyle::DoubleQuoted => '"',
604 _ => unreachable!("fetch_flow_scalar called with non-quoted style"),
605 };
606 self.advance();
608 let mut closed = false;
609 while let Some(c) = self.peek_char() {
610 if c == quote {
611 if style == ScalarStyle::SingleQuoted && self.peek_at(1) == Some('\'') {
612 self.advance();
615 self.advance();
616 continue;
617 }
618 self.advance();
619 closed = true;
620 break;
621 }
622 if style == ScalarStyle::DoubleQuoted && c == '\\' {
623 self.advance();
624 self.consume_double_quoted_escape();
625 continue;
626 }
627 if self.flow_level == 0
632 && self.cursor.column == 0
633 && (self.check_document_indicator(b"---") || self.check_document_indicator(b"..."))
634 {
635 break;
636 }
637 self.advance();
638 }
639 if !closed {
640 self.diagnostics.push(YamlDiagnostic {
641 code: diagnostic_codes::LEX_UNTERMINATED_QUOTED_SCALAR,
642 message: "unterminated quoted scalar",
643 byte_start: start.index,
644 byte_end: self.cursor.index,
645 });
646 }
647 let end = self.cursor;
648 self.tokens.push_back(Token {
649 kind: TokenKind::Scalar(style),
650 start,
651 end,
652 });
653 }
654
655 fn consume_double_quoted_escape(&mut self) {
662 let backslash_index = self.cursor.index.saturating_sub(1);
665 match self.peek_char() {
666 None => {
667 }
670 Some('\n') => {
671 self.advance();
672 }
673 Some('\r') => {
674 self.advance();
675 if self.peek_char() == Some('\n') {
676 self.advance();
677 }
678 }
679 Some('x') => {
680 self.advance();
681 self.consume_hex_digits(2, backslash_index);
682 }
683 Some('u') => {
684 self.advance();
685 self.consume_hex_digits(4, backslash_index);
686 }
687 Some('U') => {
688 self.advance();
689 self.consume_hex_digits(8, backslash_index);
690 }
691 Some(c) if Self::is_double_quoted_single_byte_escape(c) => {
692 self.advance();
693 }
694 Some(_) => {
695 let invalid_end = self.cursor.index + self.peek_char().unwrap().len_utf8();
696 self.diagnostics.push(YamlDiagnostic {
697 code: diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE,
698 message: "invalid double-quoted escape",
699 byte_start: backslash_index,
700 byte_end: invalid_end,
701 });
702 self.advance();
703 }
704 }
705 }
706
707 fn consume_hex_digits(&mut self, count: usize, backslash_index: usize) {
708 let mut consumed = 0;
709 while consumed < count {
710 match self.peek_char() {
711 Some(c) if c.is_ascii_hexdigit() => {
712 self.advance();
713 consumed += 1;
714 }
715 _ => break,
716 }
717 }
718 if consumed < count {
719 self.diagnostics.push(YamlDiagnostic {
720 code: diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE,
721 message: "incomplete hex escape in double-quoted scalar",
722 byte_start: backslash_index,
723 byte_end: self.cursor.index,
724 });
725 }
726 }
727
728 fn is_double_quoted_single_byte_escape(c: char) -> bool {
729 matches!(
731 c,
732 '0' | 'a'
733 | 'b'
734 | 't'
735 | '\t'
736 | 'n'
737 | 'v'
738 | 'f'
739 | 'r'
740 | 'e'
741 | ' '
742 | '"'
743 | '/'
744 | '\\'
745 | 'N'
746 | '_'
747 | 'L'
748 | 'P'
749 )
750 }
751
752 fn fetch_block_scalar(&mut self, style: ScalarStyle) {
763 self.allow_simple_key = true;
768 self.remove_simple_key();
769 let start = self.cursor;
770 let parent_indent = self.indent;
771 self.advance();
773 let mut explicit_increment: Option<u32> = None;
775 for _ in 0..2 {
776 match self.peek_char() {
777 Some('+' | '-') => {
778 self.advance();
779 }
780 Some(d @ '1'..='9') if explicit_increment.is_none() => {
781 explicit_increment = Some(d.to_digit(10).expect("hex digit"));
782 self.advance();
783 }
784 _ => break,
785 }
786 }
787 while matches!(self.peek_char(), Some(' ' | '\t')) {
789 self.advance();
790 }
791 if self.peek_char() == Some('#') {
793 while !matches!(self.peek_char(), None | Some('\n' | '\r')) {
794 self.advance();
795 }
796 }
797 match self.peek_char() {
801 Some('\n') => {
802 self.advance();
803 }
804 Some('\r') => {
805 self.advance();
806 if self.peek_char() == Some('\n') {
807 self.advance();
808 }
809 }
810 None => {
811 let end = self.cursor;
813 self.tokens.push_back(Token {
814 kind: TokenKind::Scalar(style),
815 start,
816 end,
817 });
818 return;
819 }
820 Some(_) => {
821 while !matches!(self.peek_char(), None | Some('\n' | '\r')) {
823 self.advance();
824 }
825 match self.peek_char() {
826 Some('\n') => {
827 self.advance();
828 }
829 Some('\r') => {
830 self.advance();
831 if self.peek_char() == Some('\n') {
832 self.advance();
833 }
834 }
835 _ => {}
836 }
837 }
838 }
839 let base = parent_indent.max(0);
844 let min_indent = match explicit_increment {
845 Some(m) => base + m as i32,
846 None => self
847 .auto_detect_block_scalar_indent()
848 .unwrap_or(base + 1)
849 .max(base + 1),
850 };
851 loop {
854 let line_start = self.cursor.index;
855 let bytes = self.input.as_bytes();
856 let mut probe = line_start;
857 while bytes.get(probe) == Some(&b' ') {
858 probe += 1;
859 }
860 let leading_spaces = probe - line_start;
861 match bytes.get(probe) {
862 None => break,
863 Some(b'\n' | b'\r') => {
864 while self.cursor.index < probe {
867 self.advance();
868 }
869 self.consume_one_line_break();
870 continue;
871 }
872 _ => {}
873 }
874 if (leading_spaces as i32) < min_indent {
875 break;
877 }
878 if leading_spaces == 0
879 && (bytes.get(probe..probe + 3) == Some(b"---")
880 || bytes.get(probe..probe + 3) == Some(b"..."))
881 && matches!(
882 bytes.get(probe + 3),
883 None | Some(b' ' | b'\t' | b'\n' | b'\r')
884 )
885 {
886 break;
888 }
889 while !matches!(self.peek_char(), None | Some('\n' | '\r')) {
891 self.advance();
892 }
893 self.consume_one_line_break();
894 if self.at_eof() {
895 break;
896 }
897 }
898 let end = self.cursor;
899 self.tokens.push_back(Token {
900 kind: TokenKind::Scalar(style),
901 start,
902 end,
903 });
904 }
905
906 fn auto_detect_block_scalar_indent(&self) -> Option<i32> {
910 let bytes = self.input.as_bytes();
911 let mut i = self.cursor.index;
912 while i < bytes.len() {
913 let line_start = i;
914 while bytes.get(i) == Some(&b' ') {
915 i += 1;
916 }
917 match bytes.get(i) {
918 None => return None,
919 Some(b'\n') => {
920 i += 1;
921 continue;
922 }
923 Some(b'\r') => {
924 i += 1;
925 if bytes.get(i) == Some(&b'\n') {
926 i += 1;
927 }
928 continue;
929 }
930 _ => {
931 return Some((i - line_start) as i32);
932 }
933 }
934 }
935 None
936 }
937
938 fn consume_one_line_break(&mut self) {
939 match self.peek_char() {
940 Some('\n') => {
941 self.advance();
942 }
943 Some('\r') => {
944 self.advance();
945 if self.peek_char() == Some('\n') {
946 self.advance();
947 }
948 }
949 _ => {}
950 }
951 }
952
953 fn fetch_stream_end(&mut self) {
954 if self.stream_end_emitted {
955 return;
956 }
957 self.unwind_indent(-1);
958 for slot in self.simple_keys.iter_mut() {
962 if let Some(key) = slot.take()
963 && key.required
964 {
965 self.diagnostics.push(YamlDiagnostic {
966 code: diagnostic_codes::LEX_REQUIRED_SIMPLE_KEY_NOT_FOUND,
967 message: "could not find expected ':' for required simple key",
968 byte_start: key.mark.index,
969 byte_end: key.mark.index,
970 });
971 }
972 }
973 self.allow_simple_key = false;
974 self.stream_end_emitted = true;
975 let mark = self.cursor;
976 self.tokens.push_back(Token {
977 kind: TokenKind::StreamEnd,
978 start: mark,
979 end: mark,
980 });
981 }
982
983 fn check_block_entry(&self) -> bool {
984 matches!(self.peek_at(1), None | Some(' ' | '\t' | '\n' | '\r'))
985 }
986
987 fn check_key(&self) -> bool {
993 matches!(self.peek_at(1), None | Some(' ' | '\t' | '\n' | '\r'))
994 }
995
996 fn check_value(&self) -> bool {
1001 if self.flow_level > 0 {
1002 return true;
1003 }
1004 matches!(self.peek_at(1), None | Some(' ' | '\t' | '\n' | '\r'))
1005 }
1006
1007 fn add_indent(&mut self, column: i32) -> bool {
1011 if self.indent < column {
1012 self.indent_stack.push(self.indent);
1013 self.indent = column;
1014 true
1015 } else {
1016 false
1017 }
1018 }
1019
1020 fn unwind_indent(&mut self, column: i32) {
1023 if self.flow_level > 0 {
1024 return;
1025 }
1026 while self.indent > column {
1027 let mark = self.cursor;
1028 self.indent = self.indent_stack.pop().unwrap_or(-1);
1029 self.tokens.push_back(Token {
1030 kind: TokenKind::BlockEnd,
1031 start: mark,
1032 end: mark,
1033 });
1034 }
1035 }
1036
1037 fn save_simple_key(&mut self) {
1044 if !self.allow_simple_key {
1045 return;
1046 }
1047 let required = self.flow_level == 0 && self.indent == self.cursor.column as i32;
1048 self.remove_simple_key();
1049 let token_number = self.tokens_taken + self.tokens.len();
1050 self.simple_keys[self.flow_level] = Some(SimpleKey {
1051 token_number,
1052 required,
1053 mark: self.cursor,
1054 });
1055 }
1056
1057 fn remove_simple_key(&mut self) {
1062 if let Some(key) = self.simple_keys[self.flow_level].take()
1063 && key.required
1064 {
1065 self.diagnostics.push(YamlDiagnostic {
1066 code: diagnostic_codes::LEX_REQUIRED_SIMPLE_KEY_NOT_FOUND,
1067 message: "could not find expected ':' for required simple key",
1068 byte_start: key.mark.index,
1069 byte_end: key.mark.index,
1070 });
1071 }
1072 }
1073
1074 fn stale_simple_keys(&mut self) {
1079 let line = self.cursor.line;
1080 for slot in self.simple_keys.iter_mut() {
1081 let stale = match slot {
1082 Some(key) => key.mark.line != line,
1083 None => false,
1084 };
1085 if stale
1086 && let Some(key) = slot.take()
1087 && key.required
1088 {
1089 self.diagnostics.push(YamlDiagnostic {
1090 code: diagnostic_codes::LEX_REQUIRED_SIMPLE_KEY_NOT_FOUND,
1091 message: "could not find expected ':' for required simple key",
1092 byte_start: key.mark.index,
1093 byte_end: key.mark.index,
1094 });
1095 }
1096 }
1097 }
1098
1099 fn push_diagnostic(&mut self, code: &'static str, message: &'static str) {
1100 self.diagnostics.push(YamlDiagnostic {
1101 code,
1102 message,
1103 byte_start: self.cursor.index,
1104 byte_end: self.cursor.index,
1105 });
1106 }
1107
1108 fn check_document_indicator(&self, marker: &[u8; 3]) -> bool {
1112 let bytes = self.input.as_bytes();
1113 let i = self.cursor.index;
1114 if bytes.get(i..i + 3) != Some(marker.as_slice()) {
1115 return false;
1116 }
1117 matches!(bytes.get(i + 3), None | Some(b' ' | b'\t' | b'\n' | b'\r'))
1118 }
1119
1120 fn fetch_document_marker(&mut self, kind: TokenKind) {
1121 self.unwind_indent(-1);
1132 self.remove_simple_key();
1133 self.allow_simple_key = false;
1134 let start = self.cursor;
1135 self.advance();
1136 self.advance();
1137 self.advance();
1138 let end = self.cursor;
1139 self.tokens.push_back(Token { kind, start, end });
1140 }
1141
1142 fn fetch_directive(&mut self) {
1146 let start = self.cursor;
1147 debug_assert_eq!(self.peek_char(), Some('%'));
1148 self.advance();
1149 while let Some(c) = self.peek_char() {
1150 if c == '\n' || c == '\r' {
1151 break;
1152 }
1153 self.advance();
1154 }
1155 let end = self.cursor;
1156 self.tokens.push_back(Token {
1157 kind: TokenKind::Directive,
1158 start,
1159 end,
1160 });
1161 }
1162
1163 fn scan_trivia(&mut self) {
1167 while !self.at_eof() {
1168 match self.peek_char() {
1169 Some(' ' | '\t') => self.scan_whitespace_run(),
1170 Some('\n' | '\r') => self.scan_newline(),
1171 Some('#') => self.scan_comment(),
1172 _ => break,
1173 }
1174 }
1175 }
1176
1177 fn scan_whitespace_run(&mut self) {
1178 let start = self.cursor;
1179 while matches!(self.peek_char(), Some(' ' | '\t')) {
1180 self.advance();
1181 }
1182 let end = self.cursor;
1183 self.tokens.push_back(Token {
1184 kind: TokenKind::Trivia(TriviaKind::Whitespace),
1185 start,
1186 end,
1187 });
1188 }
1189
1190 fn scan_newline(&mut self) {
1191 let start = self.cursor;
1192 match self.peek_char() {
1193 Some('\n') => {
1194 self.advance();
1195 }
1196 Some('\r') => {
1197 self.advance();
1198 if self.peek_char() == Some('\n') {
1199 self.advance();
1200 }
1201 }
1202 _ => unreachable!("scan_newline called on non-newline char"),
1203 }
1204 let end = self.cursor;
1205 if self.flow_level == 0 {
1210 self.allow_simple_key = true;
1211 }
1212 self.tokens.push_back(Token {
1213 kind: TokenKind::Trivia(TriviaKind::Newline),
1214 start,
1215 end,
1216 });
1217 }
1218
1219 fn scan_comment(&mut self) {
1220 let start = self.cursor;
1221 debug_assert_eq!(self.peek_char(), Some('#'));
1222 self.advance();
1223 while let Some(c) = self.peek_char() {
1224 if c == '\n' || c == '\r' {
1225 break;
1226 }
1227 self.advance();
1228 }
1229 let end = self.cursor;
1230 self.tokens.push_back(Token {
1231 kind: TokenKind::Trivia(TriviaKind::Comment),
1232 start,
1233 end,
1234 });
1235 }
1236
1237 pub(crate) fn diagnostics(&self) -> &[YamlDiagnostic] {
1238 &self.diagnostics
1239 }
1240
1241 pub(crate) fn cursor(&self) -> Mark {
1242 self.cursor
1243 }
1244
1245 pub(crate) fn at_eof(&self) -> bool {
1246 self.cursor.index >= self.input.len()
1247 }
1248
1249 fn remaining(&self) -> &str {
1250 &self.input[self.cursor.index..]
1251 }
1252
1253 pub(crate) fn peek_char(&self) -> Option<char> {
1254 self.remaining().chars().next()
1255 }
1256
1257 pub(crate) fn peek_at(&self, offset: usize) -> Option<char> {
1260 self.remaining().chars().nth(offset)
1261 }
1262
1263 pub(crate) fn advance(&mut self) -> Option<char> {
1267 let c = self.peek_char()?;
1268 self.cursor.index += c.len_utf8();
1269 match c {
1270 '\n' => {
1271 self.cursor.line += 1;
1272 self.cursor.column = 0;
1273 }
1274 '\r' => {
1275 if self.peek_char() != Some('\n') {
1279 self.cursor.line += 1;
1280 self.cursor.column = 0;
1281 }
1282 }
1283 _ => {
1284 self.cursor.column += 1;
1285 }
1286 }
1287 Some(c)
1288 }
1289}
1290
1291#[derive(Debug, Clone)]
1297pub struct ShadowScannerReport {
1298 pub byte_complete: bool,
1301 pub token_count: usize,
1303 pub diagnostic_codes: Vec<&'static str>,
1305 pub last_token_end: usize,
1307 pub input_len: usize,
1308 pub gap_at: Option<usize>,
1310 pub overlapping: bool,
1313}
1314
1315pub fn shadow_scanner_check(input: &str) -> ShadowScannerReport {
1320 let mut scanner = Scanner::new(input);
1321 let mut tokens = Vec::new();
1322 while let Some(tok) = scanner.next_token() {
1323 tokens.push(tok);
1324 }
1325 let mut cursor = 0usize;
1326 let mut overlapping = false;
1327 let mut gap_at: Option<usize> = None;
1328 for tok in &tokens {
1329 match tok.kind {
1330 TokenKind::StreamStart | TokenKind::StreamEnd => {}
1331 _ => {
1332 if tok.start.index < cursor {
1333 overlapping = true;
1334 } else if tok.start.index > cursor && gap_at.is_none() {
1335 gap_at = Some(cursor);
1336 }
1337 if tok.end.index > cursor {
1338 cursor = tok.end.index;
1339 }
1340 }
1341 }
1342 }
1343 let byte_complete = !overlapping && gap_at.is_none() && cursor == input.len();
1344 ShadowScannerReport {
1345 byte_complete,
1346 token_count: tokens.len(),
1347 diagnostic_codes: scanner.diagnostics.iter().map(|d| d.code).collect(),
1348 last_token_end: cursor,
1349 input_len: input.len(),
1350 gap_at,
1351 overlapping,
1352 }
1353}
1354
1355#[cfg(test)]
1356mod tests {
1357 use super::*;
1358
1359 #[test]
1360 fn empty_input_emits_stream_start_then_stream_end() {
1361 let mut scanner = Scanner::new("");
1362 assert_eq!(
1363 scanner.next_token().map(|t| t.kind),
1364 Some(TokenKind::StreamStart)
1365 );
1366 assert_eq!(
1367 scanner.next_token().map(|t| t.kind),
1368 Some(TokenKind::StreamEnd)
1369 );
1370 assert_eq!(scanner.next_token(), None);
1371 }
1372
1373 #[test]
1374 fn first_and_last_tokens_are_always_stream_markers() {
1375 let mut scanner = Scanner::new("foo: bar\n");
1376 assert_eq!(
1377 scanner.next_token().map(|t| t.kind),
1378 Some(TokenKind::StreamStart)
1379 );
1380 let mut last = None;
1381 while let Some(tok) = scanner.next_token() {
1382 last = Some(tok);
1383 }
1384 assert_eq!(last.map(|t| t.kind), Some(TokenKind::StreamEnd));
1385 }
1386
1387 #[test]
1388 fn stream_end_marks_cursor_position_after_trivia_only_input() {
1389 let input = " \n";
1390 let mut scanner = Scanner::new(input);
1391 let mut last = None;
1393 while let Some(tok) = scanner.next_token() {
1394 last = Some(tok);
1395 }
1396 let end = last.expect("stream end");
1397 assert_eq!(end.kind, TokenKind::StreamEnd);
1398 assert_eq!(end.start.index, input.len());
1399 assert_eq!(end.end.index, input.len());
1400 }
1401
1402 #[test]
1403 fn diagnostics_start_empty() {
1404 let scanner = Scanner::new("");
1405 assert!(scanner.diagnostics().is_empty());
1406 }
1407
1408 #[test]
1409 fn cursor_starts_at_origin() {
1410 let scanner = Scanner::new("anything");
1411 assert_eq!(
1412 scanner.cursor(),
1413 Mark {
1414 index: 0,
1415 line: 0,
1416 column: 0
1417 }
1418 );
1419 }
1420
1421 #[test]
1422 fn at_eof_is_true_for_empty_input() {
1423 let scanner = Scanner::new("");
1424 assert!(scanner.at_eof());
1425 assert_eq!(scanner.peek_char(), None);
1426 }
1427
1428 #[test]
1429 fn peek_does_not_advance_cursor() {
1430 let scanner = Scanner::new("abc");
1431 assert_eq!(scanner.peek_char(), Some('a'));
1432 assert_eq!(scanner.peek_at(1), Some('b'));
1433 assert_eq!(scanner.peek_at(2), Some('c'));
1434 assert_eq!(scanner.peek_at(3), None);
1435 assert_eq!(scanner.cursor().index, 0);
1436 }
1437
1438 #[test]
1439 fn advance_moves_through_ascii_one_column_per_char() {
1440 let mut scanner = Scanner::new("abc");
1441 assert_eq!(scanner.advance(), Some('a'));
1442 assert_eq!(
1443 scanner.cursor(),
1444 Mark {
1445 index: 1,
1446 line: 0,
1447 column: 1
1448 }
1449 );
1450 assert_eq!(scanner.advance(), Some('b'));
1451 assert_eq!(
1452 scanner.cursor(),
1453 Mark {
1454 index: 2,
1455 line: 0,
1456 column: 2
1457 }
1458 );
1459 assert_eq!(scanner.advance(), Some('c'));
1460 assert_eq!(
1461 scanner.cursor(),
1462 Mark {
1463 index: 3,
1464 line: 0,
1465 column: 3
1466 }
1467 );
1468 assert_eq!(scanner.advance(), None);
1469 assert!(scanner.at_eof());
1470 }
1471
1472 #[test]
1473 fn lf_increments_line_and_resets_column() {
1474 let mut scanner = Scanner::new("a\nb");
1475 scanner.advance(); scanner.advance(); assert_eq!(
1478 scanner.cursor(),
1479 Mark {
1480 index: 2,
1481 line: 1,
1482 column: 0
1483 }
1484 );
1485 scanner.advance(); assert_eq!(
1487 scanner.cursor(),
1488 Mark {
1489 index: 3,
1490 line: 1,
1491 column: 1
1492 }
1493 );
1494 }
1495
1496 #[test]
1497 fn crlf_counts_as_one_line_break() {
1498 let mut scanner = Scanner::new("a\r\nb");
1499 scanner.advance(); scanner.advance(); assert_eq!(scanner.cursor().line, 0);
1502 assert_eq!(scanner.cursor().index, 2);
1503 scanner.advance(); assert_eq!(
1505 scanner.cursor(),
1506 Mark {
1507 index: 3,
1508 line: 1,
1509 column: 0
1510 }
1511 );
1512 scanner.advance(); assert_eq!(
1514 scanner.cursor(),
1515 Mark {
1516 index: 4,
1517 line: 1,
1518 column: 1
1519 }
1520 );
1521 }
1522
1523 #[test]
1524 fn lone_cr_takes_its_own_line_break() {
1525 let mut scanner = Scanner::new("a\rb");
1526 scanner.advance(); scanner.advance(); assert_eq!(
1529 scanner.cursor(),
1530 Mark {
1531 index: 2,
1532 line: 1,
1533 column: 0
1534 }
1535 );
1536 scanner.advance(); assert_eq!(
1538 scanner.cursor(),
1539 Mark {
1540 index: 3,
1541 line: 1,
1542 column: 1
1543 }
1544 );
1545 }
1546
1547 #[test]
1548 fn multibyte_utf8_advances_index_by_byte_length_and_column_by_one() {
1549 let mut scanner = Scanner::new("é!");
1551 scanner.advance();
1552 assert_eq!(
1553 scanner.cursor(),
1554 Mark {
1555 index: 2,
1556 line: 0,
1557 column: 1
1558 }
1559 );
1560 scanner.advance();
1561 assert_eq!(
1562 scanner.cursor(),
1563 Mark {
1564 index: 3,
1565 line: 0,
1566 column: 2
1567 }
1568 );
1569 }
1570
1571 #[test]
1572 fn mixed_line_endings_track_correctly() {
1573 let mut scanner = Scanner::new("a\nb\r\nc\rd");
1575 while scanner.advance().is_some() {}
1576 assert_eq!(scanner.cursor().line, 3);
1577 assert_eq!(scanner.cursor().column, 1);
1578 assert_eq!(scanner.cursor().index, 8);
1579 }
1580
1581 fn collect_tokens(input: &str) -> Vec<Token> {
1582 let mut scanner = Scanner::new(input);
1583 let mut out = Vec::new();
1584 while let Some(tok) = scanner.next_token() {
1585 out.push(tok);
1586 }
1587 out
1588 }
1589
1590 fn trivia_kinds(tokens: &[Token]) -> Vec<TriviaKind> {
1591 tokens
1592 .iter()
1593 .filter_map(|t| match t.kind {
1594 TokenKind::Trivia(k) => Some(k),
1595 _ => None,
1596 })
1597 .collect()
1598 }
1599
1600 fn assert_byte_complete(input: &str, tokens: &[Token]) {
1601 let mut cursor = 0usize;
1604 for tok in tokens {
1605 match tok.kind {
1606 TokenKind::StreamStart | TokenKind::StreamEnd => {
1607 assert_eq!(tok.start.index, tok.end.index, "synthetic token has extent");
1608 }
1609 _ => {
1610 assert_eq!(tok.start.index, cursor, "token starts at expected position");
1611 assert!(tok.end.index >= tok.start.index);
1612 cursor = tok.end.index;
1613 }
1614 }
1615 }
1616 assert_eq!(cursor, input.len(), "all bytes covered");
1617 }
1618
1619 #[test]
1620 fn pure_whitespace_yields_one_whitespace_trivia_token() {
1621 let tokens = collect_tokens(" \t ");
1622 assert_eq!(
1623 trivia_kinds(&tokens),
1624 vec![TriviaKind::Whitespace],
1625 "whitespace coalesces into a single run"
1626 );
1627 assert_byte_complete(" \t ", &tokens);
1628 }
1629
1630 #[test]
1631 fn newline_emits_one_newline_per_logical_break() {
1632 let input = "\n\r\n\r";
1633 let tokens = collect_tokens(input);
1634 assert_eq!(
1635 trivia_kinds(&tokens),
1636 vec![
1637 TriviaKind::Newline,
1638 TriviaKind::Newline,
1639 TriviaKind::Newline
1640 ],
1641 );
1642 assert_byte_complete(input, &tokens);
1643 }
1644
1645 #[test]
1646 fn comment_runs_to_end_of_line_excluding_break() {
1647 let input = "# hello\n# next\n";
1648 let tokens = collect_tokens(input);
1649 assert_eq!(
1650 trivia_kinds(&tokens),
1651 vec![
1652 TriviaKind::Comment,
1653 TriviaKind::Newline,
1654 TriviaKind::Comment,
1655 TriviaKind::Newline,
1656 ],
1657 );
1658 let comment_tok = tokens
1660 .iter()
1661 .find(|t| matches!(t.kind, TokenKind::Trivia(TriviaKind::Comment)))
1662 .unwrap();
1663 assert_eq!(
1664 &input[comment_tok.start.index..comment_tok.end.index],
1665 "# hello"
1666 );
1667 assert_byte_complete(input, &tokens);
1668 }
1669
1670 #[test]
1671 fn whitespace_then_comment_then_newline_separates_into_three_tokens() {
1672 let input = " # comment\n";
1673 let tokens = collect_tokens(input);
1674 assert_eq!(
1675 trivia_kinds(&tokens),
1676 vec![
1677 TriviaKind::Whitespace,
1678 TriviaKind::Comment,
1679 TriviaKind::Newline
1680 ],
1681 );
1682 assert_byte_complete(input, &tokens);
1683 }
1684
1685 #[test]
1686 fn pure_trivia_input_round_trips_byte_complete() {
1687 let input = " \t# c1\r\n\n # c2\n\r";
1691 let tokens = collect_tokens(input);
1692 assert_byte_complete(input, &tokens);
1693 assert!(matches!(
1694 tokens.last().map(|t| t.kind),
1695 Some(TokenKind::StreamEnd),
1696 ));
1697 }
1698
1699 #[test]
1700 fn empty_input_emits_only_stream_markers() {
1701 let tokens = collect_tokens("");
1702 assert_eq!(tokens.len(), 2);
1703 assert_eq!(tokens[0].kind, TokenKind::StreamStart);
1704 assert_eq!(tokens[1].kind, TokenKind::StreamEnd);
1705 }
1706
1707 fn meaningful_kinds(tokens: &[Token]) -> Vec<TokenKind> {
1708 tokens
1709 .iter()
1710 .map(|t| t.kind)
1711 .filter(|k| !matches!(k, TokenKind::Trivia(_)))
1712 .collect()
1713 }
1714
1715 #[test]
1716 fn document_start_marker_at_column_zero_emits_token() {
1717 let input = "---\n";
1718 let tokens = collect_tokens(input);
1719 assert_eq!(
1720 meaningful_kinds(&tokens),
1721 vec![
1722 TokenKind::StreamStart,
1723 TokenKind::DocumentStart,
1724 TokenKind::StreamEnd
1725 ],
1726 );
1727 assert_byte_complete(input, &tokens);
1728 }
1729
1730 #[test]
1731 fn document_end_marker_at_column_zero_emits_token() {
1732 let input = "...\n";
1733 let tokens = collect_tokens(input);
1734 assert_eq!(
1735 meaningful_kinds(&tokens),
1736 vec![
1737 TokenKind::StreamStart,
1738 TokenKind::DocumentEnd,
1739 TokenKind::StreamEnd
1740 ],
1741 );
1742 assert_byte_complete(input, &tokens);
1743 }
1744
1745 #[test]
1746 fn document_marker_at_eof_without_trailing_break_still_emits() {
1747 let input = "---";
1748 let tokens = collect_tokens(input);
1749 assert_eq!(
1750 meaningful_kinds(&tokens),
1751 vec![
1752 TokenKind::StreamStart,
1753 TokenKind::DocumentStart,
1754 TokenKind::StreamEnd
1755 ],
1756 );
1757 }
1758
1759 #[test]
1760 fn three_dashes_followed_by_non_break_is_not_a_marker() {
1761 let tokens = collect_tokens("---abc\n");
1763 let kinds = meaningful_kinds(&tokens);
1764 assert!(!kinds.contains(&TokenKind::DocumentStart), "got {kinds:?}",);
1765 assert!(
1766 kinds.contains(&TokenKind::Scalar(ScalarStyle::Plain)),
1767 "got {kinds:?}",
1768 );
1769 }
1770
1771 #[test]
1772 fn three_dashes_indented_is_not_a_marker() {
1773 let tokens = collect_tokens(" ---\n");
1775 let kinds = meaningful_kinds(&tokens);
1776 assert!(!kinds.contains(&TokenKind::DocumentStart), "got {kinds:?}",);
1777 }
1778
1779 #[test]
1780 fn directive_at_column_zero_emits_directive_token() {
1781 let input = "%YAML 1.2\n";
1782 let tokens = collect_tokens(input);
1783 let directive = tokens
1784 .iter()
1785 .find(|t| matches!(t.kind, TokenKind::Directive))
1786 .expect("directive token");
1787 assert_eq!(
1788 &input[directive.start.index..directive.end.index],
1789 "%YAML 1.2",
1790 );
1791 assert_byte_complete(input, &tokens);
1792 }
1793
1794 #[test]
1795 fn directive_indented_is_not_recognized() {
1796 let tokens = collect_tokens(" %YAML 1.2\n");
1798 let kinds = meaningful_kinds(&tokens);
1799 assert!(!kinds.contains(&TokenKind::Directive), "got {kinds:?}",);
1800 }
1801
1802 #[test]
1803 fn document_start_then_marker_on_new_line() {
1804 let input = "---\n...\n";
1806 let tokens = collect_tokens(input);
1807 assert_eq!(
1808 meaningful_kinds(&tokens),
1809 vec![
1810 TokenKind::StreamStart,
1811 TokenKind::DocumentStart,
1812 TokenKind::DocumentEnd,
1813 TokenKind::StreamEnd,
1814 ],
1815 );
1816 assert_byte_complete(input, &tokens);
1817 }
1818
1819 #[test]
1820 fn directive_followed_by_doc_start_emits_both_in_order() {
1821 let input = "%YAML 1.2\n---\n";
1822 let tokens = collect_tokens(input);
1823 assert_eq!(
1824 meaningful_kinds(&tokens),
1825 vec![
1826 TokenKind::StreamStart,
1827 TokenKind::Directive,
1828 TokenKind::DocumentStart,
1829 TokenKind::StreamEnd,
1830 ],
1831 );
1832 assert_byte_complete(input, &tokens);
1833 }
1834
1835 #[test]
1836 fn document_marker_followed_by_space_emits_marker_then_content_scalar() {
1837 let input = "--- foo\n";
1838 let tokens = collect_tokens(input);
1839 let kinds = meaningful_kinds(&tokens);
1840 assert_eq!(kinds[0], TokenKind::StreamStart);
1841 assert_eq!(kinds[1], TokenKind::DocumentStart);
1842 assert_eq!(kinds[2], TokenKind::Scalar(ScalarStyle::Plain));
1844 assert_eq!(*kinds.last().unwrap(), TokenKind::StreamEnd);
1845 assert_byte_complete(input, &tokens);
1846 }
1847
1848 #[test]
1849 fn empty_flow_sequence_emits_start_then_end() {
1850 let input = "[]";
1851 let tokens = collect_tokens(input);
1852 assert_eq!(
1853 meaningful_kinds(&tokens),
1854 vec![
1855 TokenKind::StreamStart,
1856 TokenKind::FlowSequenceStart,
1857 TokenKind::FlowSequenceEnd,
1858 TokenKind::StreamEnd,
1859 ],
1860 );
1861 assert_byte_complete(input, &tokens);
1862 }
1863
1864 #[test]
1865 fn empty_flow_mapping_emits_start_then_end() {
1866 let input = "{}";
1867 let tokens = collect_tokens(input);
1868 assert_eq!(
1869 meaningful_kinds(&tokens),
1870 vec![
1871 TokenKind::StreamStart,
1872 TokenKind::FlowMappingStart,
1873 TokenKind::FlowMappingEnd,
1874 TokenKind::StreamEnd,
1875 ],
1876 );
1877 assert_byte_complete(input, &tokens);
1878 }
1879
1880 #[test]
1881 fn nested_flow_sequence_brackets_emit_in_order() {
1882 let input = "[[]]";
1883 let tokens = collect_tokens(input);
1884 assert_eq!(
1885 meaningful_kinds(&tokens),
1886 vec![
1887 TokenKind::StreamStart,
1888 TokenKind::FlowSequenceStart,
1889 TokenKind::FlowSequenceStart,
1890 TokenKind::FlowSequenceEnd,
1891 TokenKind::FlowSequenceEnd,
1892 TokenKind::StreamEnd,
1893 ],
1894 );
1895 assert_byte_complete(input, &tokens);
1896 }
1897
1898 #[test]
1899 fn nested_flow_mixed_brackets_emit_in_order() {
1900 let input = "[{}]";
1901 let tokens = collect_tokens(input);
1902 assert_eq!(
1903 meaningful_kinds(&tokens),
1904 vec![
1905 TokenKind::StreamStart,
1906 TokenKind::FlowSequenceStart,
1907 TokenKind::FlowMappingStart,
1908 TokenKind::FlowMappingEnd,
1909 TokenKind::FlowSequenceEnd,
1910 TokenKind::StreamEnd,
1911 ],
1912 );
1913 assert_byte_complete(input, &tokens);
1914 }
1915
1916 #[test]
1917 fn comma_inside_flow_emits_flow_entry() {
1918 let input = "[,,]";
1919 let tokens = collect_tokens(input);
1920 assert_eq!(
1921 meaningful_kinds(&tokens),
1922 vec![
1923 TokenKind::StreamStart,
1924 TokenKind::FlowSequenceStart,
1925 TokenKind::FlowEntry,
1926 TokenKind::FlowEntry,
1927 TokenKind::FlowSequenceEnd,
1928 TokenKind::StreamEnd,
1929 ],
1930 );
1931 assert_byte_complete(input, &tokens);
1932 }
1933
1934 #[test]
1935 fn comma_outside_flow_is_not_a_flow_entry() {
1936 let tokens = collect_tokens(",");
1938 let kinds = meaningful_kinds(&tokens);
1939 assert!(!kinds.contains(&TokenKind::FlowEntry), "got {kinds:?}");
1940 }
1941
1942 #[test]
1943 fn doc_markers_inside_flow_context_are_not_recognized() {
1944 let tokens = collect_tokens("[---]");
1947 let kinds = meaningful_kinds(&tokens);
1948 assert!(!kinds.contains(&TokenKind::DocumentStart), "got {kinds:?}");
1949 assert_eq!(kinds[1], TokenKind::FlowSequenceStart);
1950 }
1951
1952 #[test]
1953 fn flow_brackets_with_whitespace_emit_trivia_between() {
1954 let input = "[ , ]";
1955 let tokens = collect_tokens(input);
1956 assert_eq!(
1958 tokens
1959 .iter()
1960 .map(|t| t.kind)
1961 .filter(|k| !matches!(k, TokenKind::StreamStart | TokenKind::StreamEnd))
1962 .collect::<Vec<_>>(),
1963 vec![
1964 TokenKind::FlowSequenceStart,
1965 TokenKind::Trivia(TriviaKind::Whitespace),
1966 TokenKind::FlowEntry,
1967 TokenKind::Trivia(TriviaKind::Whitespace),
1968 TokenKind::FlowSequenceEnd,
1969 ],
1970 );
1971 assert_byte_complete(input, &tokens);
1972 }
1973
1974 #[test]
1975 fn block_mapping_implicit_key_splices_block_mapping_start_and_key() {
1976 let input = "key: value";
1980 let tokens = collect_tokens(input);
1981 assert_eq!(
1982 meaningful_kinds(&tokens),
1983 vec![
1984 TokenKind::StreamStart,
1985 TokenKind::BlockMappingStart,
1986 TokenKind::Key,
1987 TokenKind::Scalar(ScalarStyle::Plain),
1988 TokenKind::Value,
1989 TokenKind::Scalar(ScalarStyle::Plain),
1990 TokenKind::BlockEnd,
1991 TokenKind::StreamEnd,
1992 ],
1993 );
1994 assert_byte_complete(input, &tokens);
1995 }
1996
1997 #[test]
1998 fn block_sequence_emits_block_sequence_start_then_entries() {
1999 let input = "- a\n- b\n";
2000 let tokens = collect_tokens(input);
2001 assert_eq!(
2002 meaningful_kinds(&tokens),
2003 vec![
2004 TokenKind::StreamStart,
2005 TokenKind::BlockSequenceStart,
2006 TokenKind::BlockEntry,
2007 TokenKind::Scalar(ScalarStyle::Plain),
2008 TokenKind::BlockEntry,
2009 TokenKind::Scalar(ScalarStyle::Plain),
2010 TokenKind::BlockEnd,
2011 TokenKind::StreamEnd,
2012 ],
2013 );
2014 assert_byte_complete(input, &tokens);
2015 }
2016
2017 #[test]
2018 fn explicit_key_indicator_emits_key_and_value_without_splice() {
2019 let input = "? a\n: b\n";
2023 let tokens = collect_tokens(input);
2024 let kinds = meaningful_kinds(&tokens);
2025 assert_eq!(
2026 kinds,
2027 vec![
2028 TokenKind::StreamStart,
2029 TokenKind::BlockMappingStart,
2030 TokenKind::Key,
2031 TokenKind::Scalar(ScalarStyle::Plain),
2032 TokenKind::Value,
2033 TokenKind::Scalar(ScalarStyle::Plain),
2034 TokenKind::BlockEnd,
2035 TokenKind::StreamEnd,
2036 ],
2037 );
2038 assert_byte_complete(input, &tokens);
2039 }
2040
2041 #[test]
2042 fn multi_line_plain_scalar_does_not_confirm_simple_key_on_next_line() {
2043 let input = "a\nb: c\n";
2049 let tokens = collect_tokens(input);
2050 let kinds = meaningful_kinds(&tokens);
2051 let scalar_pos = kinds
2054 .iter()
2055 .position(|&k| k == TokenKind::Scalar(ScalarStyle::Plain))
2056 .expect("plain scalar present");
2057 if let Some(key_pos) = kinds.iter().position(|&k| k == TokenKind::Key) {
2058 assert!(
2059 scalar_pos < key_pos,
2060 "multi-line scalar must precede any key: {kinds:?}",
2061 );
2062 }
2063 let scalar = tokens
2065 .iter()
2066 .find(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2067 .unwrap();
2068 assert_eq!(&input[scalar.start.index..scalar.end.index], "a\nb");
2069 }
2070
2071 #[test]
2072 fn flow_mapping_with_implicit_key_emits_only_flow_indicators() {
2073 let input = "{a: b}";
2076 let tokens = collect_tokens(input);
2077 let kinds = meaningful_kinds(&tokens);
2078 assert_eq!(
2079 kinds,
2080 vec![
2081 TokenKind::StreamStart,
2082 TokenKind::FlowMappingStart,
2083 TokenKind::Key,
2084 TokenKind::Scalar(ScalarStyle::Plain),
2085 TokenKind::Value,
2086 TokenKind::Scalar(ScalarStyle::Plain),
2087 TokenKind::FlowMappingEnd,
2088 TokenKind::StreamEnd,
2089 ],
2090 );
2091 assert!(
2092 !kinds.contains(&TokenKind::BlockMappingStart),
2093 "got {kinds:?}",
2094 );
2095 assert_byte_complete(input, &tokens);
2096 }
2097
2098 #[test]
2099 fn flow_explicit_key_indicator_emits_key_token() {
2100 let input = "{? a: b}";
2103 let tokens = collect_tokens(input);
2104 let kinds = meaningful_kinds(&tokens);
2105 assert_eq!(kinds[0], TokenKind::StreamStart);
2106 assert_eq!(kinds[1], TokenKind::FlowMappingStart);
2107 assert_eq!(kinds[2], TokenKind::Key);
2108 assert!(kinds.contains(&TokenKind::Value));
2111 assert_byte_complete(input, &tokens);
2112 }
2113
2114 #[test]
2115 fn nested_block_mapping_emits_block_end_on_dedent() {
2116 let input = "outer:\n inner: x\ny: z\n";
2122 let tokens = collect_tokens(input);
2123 let kinds = meaningful_kinds(&tokens);
2124 let block_ends = kinds.iter().filter(|&&k| k == TokenKind::BlockEnd).count();
2125 assert_eq!(block_ends, 2, "got {kinds:?}");
2128 assert_byte_complete(input, &tokens);
2129 }
2130
2131 #[test]
2132 fn nested_block_sequence_inside_mapping_unwinds_correctly() {
2133 let input = "items:\n - a\n - b\nstatus: ok\n";
2141 let tokens = collect_tokens(input);
2142 let kinds = meaningful_kinds(&tokens);
2143 let key_positions: Vec<_> = kinds
2146 .iter()
2147 .enumerate()
2148 .filter_map(|(i, &k)| (k == TokenKind::Key).then_some(i))
2149 .collect();
2150 assert_eq!(key_positions.len(), 2, "expected 2 keys: {kinds:?}");
2151 let second_key = key_positions[1];
2152 let preceding_block_end = kinds[..second_key]
2153 .iter()
2154 .rposition(|&k| k == TokenKind::BlockEnd);
2155 assert!(
2156 preceding_block_end.is_some(),
2157 "BlockEnd must precede second key: {kinds:?}",
2158 );
2159 let n = kinds.len();
2161 assert_eq!(kinds[n - 1], TokenKind::StreamEnd);
2162 assert_eq!(kinds[n - 2], TokenKind::BlockEnd);
2163 assert_byte_complete(input, &tokens);
2164 }
2165
2166 #[test]
2167 fn value_indicator_with_no_simple_key_emits_block_mapping_start() {
2168 let input = ": value\n";
2172 let tokens = collect_tokens(input);
2173 let kinds = meaningful_kinds(&tokens);
2174 assert_eq!(kinds[0], TokenKind::StreamStart);
2175 assert_eq!(kinds[1], TokenKind::BlockMappingStart);
2176 assert_eq!(kinds[2], TokenKind::Value);
2177 assert!(!kinds[..3].contains(&TokenKind::Key), "got {kinds:?}",);
2179 assert_byte_complete(input, &tokens);
2180 }
2181
2182 #[test]
2183 fn block_mapping_unwinds_indents_at_stream_end() {
2184 let input = "a:\n b: c";
2189 let tokens = collect_tokens(input);
2190 let kinds = meaningful_kinds(&tokens);
2191 let n = kinds.len();
2193 assert_eq!(kinds[n - 1], TokenKind::StreamEnd);
2194 assert_eq!(kinds[n - 2], TokenKind::BlockEnd);
2195 assert_eq!(kinds[n - 3], TokenKind::BlockEnd);
2196 assert_byte_complete(input, &tokens);
2197 }
2198
2199 #[test]
2200 fn colon_inside_plain_scalar_token_does_not_break_scalar() {
2201 let input = "https://example.com";
2204 let tokens = collect_tokens(input);
2205 let scalar = tokens
2206 .iter()
2207 .find(|t| matches!(t.kind, TokenKind::Scalar(_)))
2208 .expect("plain scalar token");
2209 assert_eq!(
2210 &input[scalar.start.index..scalar.end.index],
2211 "https://example.com",
2212 );
2213 assert_byte_complete(input, &tokens);
2214 }
2215
2216 #[test]
2217 fn diagnostics_remain_empty_for_well_formed_inputs() {
2218 for input in ["key: value", "- a\n- b\n", "{a: b, c: d}", "? k\n: v\n"] {
2219 let mut scanner = Scanner::new(input);
2220 while scanner.next_token().is_some() {}
2221 assert!(
2222 scanner.diagnostics().is_empty(),
2223 "{input:?} produced unexpected diagnostics: {:?}",
2224 scanner.diagnostics(),
2225 );
2226 }
2227 }
2228
2229 fn find_scalar(tokens: &[Token]) -> &Token {
2230 tokens
2231 .iter()
2232 .find(|t| matches!(t.kind, TokenKind::Scalar(_)))
2233 .expect("expected scalar token")
2234 }
2235
2236 #[test]
2237 fn single_quoted_scalar_emits_token_spanning_quotes() {
2238 let input = "'hello'";
2239 let tokens = collect_tokens(input);
2240 let scalar = find_scalar(&tokens);
2241 assert_eq!(scalar.kind, TokenKind::Scalar(ScalarStyle::SingleQuoted));
2242 assert_eq!(&input[scalar.start.index..scalar.end.index], "'hello'");
2243 assert_byte_complete(input, &tokens);
2244 }
2245
2246 #[test]
2247 fn double_quoted_scalar_emits_token_spanning_quotes() {
2248 let input = "\"hello\"";
2249 let tokens = collect_tokens(input);
2250 let scalar = find_scalar(&tokens);
2251 assert_eq!(scalar.kind, TokenKind::Scalar(ScalarStyle::DoubleQuoted));
2252 assert_eq!(&input[scalar.start.index..scalar.end.index], "\"hello\"");
2253 assert_byte_complete(input, &tokens);
2254 }
2255
2256 #[test]
2257 fn single_quoted_scalar_treats_doubled_quote_as_escape() {
2258 let input = "'it''s'";
2261 let tokens = collect_tokens(input);
2262 let scalars: Vec<_> = tokens
2263 .iter()
2264 .filter(|t| matches!(t.kind, TokenKind::Scalar(_)))
2265 .collect();
2266 assert_eq!(scalars.len(), 1, "got {:?}", tokens);
2267 assert_eq!(
2268 &input[scalars[0].start.index..scalars[0].end.index],
2269 "'it''s'",
2270 );
2271 }
2272
2273 #[test]
2274 fn double_quoted_scalar_with_escaped_quote_does_not_terminate_early() {
2275 let input = "\"a\\\"b\"";
2278 let tokens = collect_tokens(input);
2279 let scalars: Vec<_> = tokens
2280 .iter()
2281 .filter(|t| matches!(t.kind, TokenKind::Scalar(_)))
2282 .collect();
2283 assert_eq!(scalars.len(), 1, "got {tokens:?}");
2284 assert_eq!(
2285 &input[scalars[0].start.index..scalars[0].end.index],
2286 "\"a\\\"b\"",
2287 );
2288 assert_byte_complete(input, &tokens);
2289 }
2290
2291 #[test]
2292 fn double_quoted_scalar_recognises_common_single_byte_escapes() {
2293 let input = "\"\\n\\t\\r\\0\\\\\\\"\"";
2295 let tokens = collect_tokens(input);
2296 let scalar = find_scalar(&tokens);
2297 assert_eq!(scalar.kind, TokenKind::Scalar(ScalarStyle::DoubleQuoted));
2298 assert_eq!(scalar.start.index, 0);
2300 assert_eq!(scalar.end.index, input.len());
2301 let mut scanner = Scanner::new(input);
2302 while scanner.next_token().is_some() {}
2303 assert!(scanner.diagnostics().is_empty());
2304 }
2305
2306 #[test]
2307 fn double_quoted_scalar_recognises_hex_escapes() {
2308 let input = "\"\\x41\\u00E9\\U0001F600\"";
2310 let mut scanner = Scanner::new(input);
2311 while scanner.next_token().is_some() {}
2312 assert!(
2313 scanner.diagnostics().is_empty(),
2314 "got {:?}",
2315 scanner.diagnostics()
2316 );
2317 }
2318
2319 #[test]
2320 fn double_quoted_scalar_with_invalid_escape_emits_diagnostic() {
2321 let input = "\"\\q\"";
2322 let mut scanner = Scanner::new(input);
2323 while scanner.next_token().is_some() {}
2324 assert_eq!(
2325 scanner.diagnostics().len(),
2326 1,
2327 "got {:?}",
2328 scanner.diagnostics(),
2329 );
2330 assert_eq!(
2331 scanner.diagnostics()[0].code,
2332 diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE,
2333 );
2334 }
2335
2336 #[test]
2337 fn double_quoted_scalar_with_short_hex_escape_emits_diagnostic() {
2338 let input = "\"\\x4\"";
2341 let mut scanner = Scanner::new(input);
2342 while scanner.next_token().is_some() {}
2343 assert!(
2344 scanner
2345 .diagnostics()
2346 .iter()
2347 .any(|d| d.code == diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE),
2348 "got {:?}",
2349 scanner.diagnostics(),
2350 );
2351 }
2352
2353 #[test]
2354 fn double_quoted_scalar_spans_multiple_lines() {
2355 let input = "\"line1\nline2\"";
2357 let tokens = collect_tokens(input);
2358 let scalar = find_scalar(&tokens);
2359 assert_eq!(scalar.kind, TokenKind::Scalar(ScalarStyle::DoubleQuoted));
2360 assert_eq!(scalar.start.index, 0);
2364 assert_eq!(scalar.end.index, input.len());
2365 }
2366
2367 #[test]
2368 fn line_continuation_escape_consumes_newline_inside_quoted_scalar() {
2369 let input = "\"a\\\nb\"";
2372 let mut scanner = Scanner::new(input);
2373 while scanner.next_token().is_some() {}
2374 assert!(
2375 scanner.diagnostics().is_empty(),
2376 "got {:?}",
2377 scanner.diagnostics(),
2378 );
2379 }
2380
2381 #[test]
2382 fn unterminated_quoted_scalar_emits_diagnostic() {
2383 for input in ["'oops", "\"oops"] {
2384 let mut scanner = Scanner::new(input);
2385 while scanner.next_token().is_some() {}
2386 assert!(
2387 scanner
2388 .diagnostics()
2389 .iter()
2390 .any(|d| d.code == diagnostic_codes::LEX_UNTERMINATED_QUOTED_SCALAR),
2391 "{input:?} produced {:?}",
2392 scanner.diagnostics(),
2393 );
2394 }
2395 }
2396
2397 #[test]
2398 fn quoted_scalar_can_be_implicit_key() {
2399 let input = "\"key\": value";
2400 let tokens = collect_tokens(input);
2401 let kinds = meaningful_kinds(&tokens);
2402 assert_eq!(
2403 kinds,
2404 vec![
2405 TokenKind::StreamStart,
2406 TokenKind::BlockMappingStart,
2407 TokenKind::Key,
2408 TokenKind::Scalar(ScalarStyle::DoubleQuoted),
2409 TokenKind::Value,
2410 TokenKind::Scalar(ScalarStyle::Plain),
2411 TokenKind::BlockEnd,
2412 TokenKind::StreamEnd,
2413 ],
2414 );
2415 assert_byte_complete(input, &tokens);
2416 }
2417
2418 #[test]
2419 fn multi_line_quoted_scalar_cannot_be_implicit_key() {
2420 let input = "\"line1\nline2\": value\n";
2425 let tokens = collect_tokens(input);
2426 let kinds = meaningful_kinds(&tokens);
2427 assert_eq!(kinds[0], TokenKind::StreamStart);
2431 assert_eq!(kinds[1], TokenKind::Scalar(ScalarStyle::DoubleQuoted));
2432 assert_eq!(kinds[2], TokenKind::BlockMappingStart);
2433 assert_eq!(kinds[3], TokenKind::Value);
2434 assert!(!kinds[..3].contains(&TokenKind::Key), "got {kinds:?}",);
2435 }
2436
2437 #[test]
2438 fn quoted_scalar_inside_flow_mapping_terminates_at_closing_quote() {
2439 let input = "{\"a\": \"b\"}";
2440 let tokens = collect_tokens(input);
2441 let kinds = meaningful_kinds(&tokens);
2442 assert_eq!(
2443 kinds,
2444 vec![
2445 TokenKind::StreamStart,
2446 TokenKind::FlowMappingStart,
2447 TokenKind::Key,
2448 TokenKind::Scalar(ScalarStyle::DoubleQuoted),
2449 TokenKind::Value,
2450 TokenKind::Scalar(ScalarStyle::DoubleQuoted),
2451 TokenKind::FlowMappingEnd,
2452 TokenKind::StreamEnd,
2453 ],
2454 );
2455 assert_byte_complete(input, &tokens);
2456 }
2457
2458 #[test]
2459 fn literal_block_scalar_at_top_level_spans_to_eof() {
2460 let input = "|\n hello\n world\n";
2461 let tokens = collect_tokens(input);
2462 let scalar = tokens
2463 .iter()
2464 .find(|t| t.kind == TokenKind::Scalar(ScalarStyle::Literal))
2465 .expect("literal scalar");
2466 assert_eq!(scalar.start.index, 0);
2469 assert_eq!(scalar.end.index, input.len());
2470 assert_byte_complete(input, &tokens);
2471 }
2472
2473 #[test]
2474 fn folded_block_scalar_emits_folded_style() {
2475 let input = ">\n hello\n";
2476 let tokens = collect_tokens(input);
2477 assert!(
2478 tokens
2479 .iter()
2480 .any(|t| t.kind == TokenKind::Scalar(ScalarStyle::Folded)),
2481 "got {tokens:?}",
2482 );
2483 }
2484
2485 #[test]
2486 fn block_scalar_terminates_on_dedent_to_parent_indent() {
2487 let input = "key: |\n line1\n line2\nnext: x\n";
2496 let tokens = collect_tokens(input);
2497 let kinds = meaningful_kinds(&tokens);
2498 let scalar = tokens
2501 .iter()
2502 .find(|t| t.kind == TokenKind::Scalar(ScalarStyle::Literal))
2503 .expect("literal scalar");
2504 let next_idx = input.find("next:").expect("next key in fixture");
2505 assert!(
2506 scalar.end.index <= next_idx,
2507 "scalar should end before `next:` at {next_idx}: scalar ends at {}",
2508 scalar.end.index,
2509 );
2510 let key_count = kinds.iter().filter(|&&k| k == TokenKind::Key).count();
2512 assert_eq!(key_count, 2, "got {kinds:?}");
2513 }
2514
2515 #[test]
2516 fn block_scalar_with_keep_chomping_indicator_in_header() {
2517 let input = "|+\n text\n\n";
2518 let tokens = collect_tokens(input);
2519 let scalar = tokens
2520 .iter()
2521 .find(|t| t.kind == TokenKind::Scalar(ScalarStyle::Literal))
2522 .expect("literal scalar");
2523 assert_eq!(scalar.start.index, 0);
2526 assert_eq!(scalar.end.index, input.len());
2527 assert_byte_complete(input, &tokens);
2528 }
2529
2530 #[test]
2531 fn block_scalar_with_explicit_indent_indicator_uses_that_indent() {
2532 let input = "key: |2\n hi\nbye: x\n";
2536 let tokens = collect_tokens(input);
2537 let scalar = tokens
2538 .iter()
2539 .find(|t| t.kind == TokenKind::Scalar(ScalarStyle::Literal))
2540 .expect("literal scalar");
2541 let bye_idx = input.find("bye:").expect("bye key in fixture");
2542 assert!(
2543 scalar.end.index <= bye_idx,
2544 "scalar must end before `bye`: {} vs {}",
2545 scalar.end.index,
2546 bye_idx,
2547 );
2548 assert_byte_complete(input, &tokens);
2549 }
2550
2551 #[test]
2552 fn block_scalar_at_eof_without_trailing_newline_still_emits() {
2553 let input = "|\n text";
2554 let tokens = collect_tokens(input);
2555 let scalar = tokens
2556 .iter()
2557 .find(|t| t.kind == TokenKind::Scalar(ScalarStyle::Literal))
2558 .expect("literal scalar");
2559 assert_eq!(scalar.end.index, input.len());
2560 }
2561
2562 #[test]
2563 fn block_scalar_with_internal_blank_lines_includes_them() {
2564 let input = "|\n a\n\n b\n";
2566 let tokens = collect_tokens(input);
2567 let scalar = tokens
2568 .iter()
2569 .find(|t| t.kind == TokenKind::Scalar(ScalarStyle::Literal))
2570 .expect("literal scalar");
2571 assert_eq!(scalar.end.index, input.len());
2572 assert_byte_complete(input, &tokens);
2573 }
2574
2575 #[test]
2576 fn pipe_inside_flow_context_is_part_of_plain_scalar_not_block() {
2577 let input = "[|]";
2579 let tokens = collect_tokens(input);
2580 let kinds = meaningful_kinds(&tokens);
2581 assert!(
2584 !kinds.contains(&TokenKind::Scalar(ScalarStyle::Literal)),
2585 "got {kinds:?}",
2586 );
2587 assert_eq!(kinds[1], TokenKind::FlowSequenceStart);
2588 assert!(kinds.contains(&TokenKind::Scalar(ScalarStyle::Plain)));
2589 }
2590
2591 #[test]
2592 fn block_scalar_terminates_on_document_marker() {
2593 let input = "|\n text\n---\nnext\n";
2594 let tokens = collect_tokens(input);
2595 let kinds = meaningful_kinds(&tokens);
2596 assert!(kinds.contains(&TokenKind::DocumentStart), "got {kinds:?}");
2598 }
2599
2600 #[test]
2601 fn plain_scalar_with_internal_whitespace_is_one_token() {
2602 let input = "hello world";
2603 let tokens = collect_tokens(input);
2604 let scalars: Vec<_> = tokens
2605 .iter()
2606 .filter(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2607 .collect();
2608 assert_eq!(scalars.len(), 1, "got {tokens:?}");
2609 assert_eq!(
2610 &input[scalars[0].start.index..scalars[0].end.index],
2611 "hello world",
2612 );
2613 assert_byte_complete(input, &tokens);
2614 }
2615
2616 #[test]
2617 fn plain_scalar_with_multiple_internal_spaces_is_one_token() {
2618 let input = "a b c";
2619 let tokens = collect_tokens(input);
2620 let scalars: Vec<_> = tokens
2621 .iter()
2622 .filter(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2623 .collect();
2624 assert_eq!(scalars.len(), 1, "got {tokens:?}");
2625 assert_eq!(
2626 &input[scalars[0].start.index..scalars[0].end.index],
2627 "a b c",
2628 );
2629 }
2630
2631 #[test]
2632 fn plain_scalar_drops_trailing_whitespace_before_eof() {
2633 let input = "hello ";
2635 let tokens = collect_tokens(input);
2636 let scalar = tokens
2637 .iter()
2638 .find(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2639 .expect("plain scalar");
2640 assert_eq!(&input[scalar.start.index..scalar.end.index], "hello");
2641 assert!(
2643 tokens
2644 .iter()
2645 .any(|t| t.kind == TokenKind::Trivia(TriviaKind::Whitespace)),
2646 "expected trailing whitespace as trivia: {tokens:?}",
2647 );
2648 assert_byte_complete(input, &tokens);
2649 }
2650
2651 #[test]
2652 fn plain_scalar_drops_trailing_whitespace_before_comment() {
2653 let input = "hello # comment";
2656 let tokens = collect_tokens(input);
2657 let scalar = tokens
2658 .iter()
2659 .find(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2660 .expect("plain scalar");
2661 assert_eq!(&input[scalar.start.index..scalar.end.index], "hello");
2662 assert!(
2663 tokens
2664 .iter()
2665 .any(|t| t.kind == TokenKind::Trivia(TriviaKind::Comment)),
2666 "expected comment trivia: {tokens:?}",
2667 );
2668 }
2669
2670 #[test]
2671 fn colon_inside_url_does_not_break_plain_scalar() {
2672 let input = "url: https://example.com\n";
2675 let tokens = collect_tokens(input);
2676 let scalars: Vec<_> = tokens
2677 .iter()
2678 .filter(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2679 .map(|t| &input[t.start.index..t.end.index])
2680 .collect();
2681 assert_eq!(scalars, vec!["url", "https://example.com"]);
2682 }
2683
2684 #[test]
2685 fn multi_line_plain_scalar_continues_under_indent() {
2686 let input = "key: hello\n world\n";
2689 let tokens = collect_tokens(input);
2690 let plain_scalars: Vec<_> = tokens
2691 .iter()
2692 .filter(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2693 .collect();
2694 assert_eq!(plain_scalars.len(), 2, "got {tokens:?}");
2696 let value = plain_scalars[1];
2698 assert!(
2699 input[value.start.index..value.end.index].contains("hello"),
2700 "scalar text: {:?}",
2701 &input[value.start.index..value.end.index],
2702 );
2703 assert!(
2704 input[value.start.index..value.end.index].contains("world"),
2705 "scalar text: {:?}",
2706 &input[value.start.index..value.end.index],
2707 );
2708 }
2709
2710 #[test]
2711 fn plain_scalar_terminates_at_blank_line_continuation() {
2712 let input = "key: hello\n\n world\n";
2714 let tokens = collect_tokens(input);
2715 let plain_scalars: Vec<_> = tokens
2716 .iter()
2717 .filter(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2718 .map(|t| &input[t.start.index..t.end.index])
2719 .collect();
2720 let merged = plain_scalars.iter().any(|s| s.contains("world"));
2725 assert!(
2726 merged || plain_scalars.contains(&"world"),
2727 "got {plain_scalars:?}"
2728 );
2729 }
2730
2731 #[test]
2732 fn plain_scalar_terminates_on_dedent() {
2733 let input = "outer:\n hello\nnext: x\n";
2737 let tokens = collect_tokens(input);
2738 let kinds = meaningful_kinds(&tokens);
2739 let key_count = kinds.iter().filter(|&&k| k == TokenKind::Key).count();
2741 assert_eq!(key_count, 2, "got {kinds:?}");
2742 let plain_count = kinds
2744 .iter()
2745 .filter(|&&k| k == TokenKind::Scalar(ScalarStyle::Plain))
2746 .count();
2747 assert_eq!(plain_count, 4, "got {kinds:?}");
2748 }
2749
2750 #[test]
2751 fn plain_scalar_terminates_on_following_block_entry_indicator() {
2752 let input = "outer:\n - a\n - b\n";
2756 let tokens = collect_tokens(input);
2757 let kinds = meaningful_kinds(&tokens);
2758 let block_entry_count = kinds
2762 .iter()
2763 .filter(|&&k| k == TokenKind::BlockEntry)
2764 .count();
2765 assert!(block_entry_count >= 1, "got {kinds:?}");
2766 }
2767
2768 #[test]
2769 fn flow_context_plain_scalar_does_not_absorb_terminator_line_break() {
2770 let input = "{a: 42\n}\n";
2775 let tokens = collect_tokens(input);
2776 let scalars: Vec<_> = tokens
2777 .iter()
2778 .filter(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2779 .map(|t| &input[t.start.index..t.end.index])
2780 .collect();
2781 assert!(scalars.contains(&"42"), "got {scalars:?}");
2782 assert_byte_complete(input, &tokens);
2783 }
2784
2785 #[test]
2786 fn plain_scalar_in_flow_context_terminates_on_flow_indicators() {
2787 let input = "[a b, c]";
2788 let tokens = collect_tokens(input);
2789 let plain_scalars: Vec<_> = tokens
2790 .iter()
2791 .filter(|t| matches!(t.kind, TokenKind::Scalar(ScalarStyle::Plain)))
2792 .map(|t| &input[t.start.index..t.end.index])
2793 .collect();
2794 assert_eq!(plain_scalars, vec!["a b", "c"]);
2797 }
2798
2799 #[test]
2800 fn multi_line_plain_scalar_does_not_register_as_simple_key() {
2801 let input = "hello\n world: value\n";
2809 let tokens = collect_tokens(input);
2810 let kinds = meaningful_kinds(&tokens);
2811 let scalar_pos = kinds
2813 .iter()
2814 .position(|&k| k == TokenKind::Scalar(ScalarStyle::Plain));
2815 let key_pos = kinds.iter().position(|&k| k == TokenKind::Key);
2816 assert!(scalar_pos.is_some(), "no scalar: {kinds:?}");
2817 if let Some(k) = key_pos {
2824 let s = scalar_pos.unwrap();
2825 assert!(s < k, "multi-line scalar must precede any key: {kinds:?}",);
2826 }
2827 }
2828
2829 #[test]
2830 fn plain_scalar_preserves_single_line_simple_key_behaviour() {
2831 let input = "hello world: value\n";
2835 let tokens = collect_tokens(input);
2836 let kinds = meaningful_kinds(&tokens);
2837 assert_eq!(
2838 kinds,
2839 vec![
2840 TokenKind::StreamStart,
2841 TokenKind::BlockMappingStart,
2842 TokenKind::Key,
2843 TokenKind::Scalar(ScalarStyle::Plain),
2844 TokenKind::Value,
2845 TokenKind::Scalar(ScalarStyle::Plain),
2846 TokenKind::BlockEnd,
2847 TokenKind::StreamEnd,
2848 ],
2849 );
2850 }
2851}