1#[derive(Debug, Clone, PartialEq, Eq)]
5pub struct WhitespaceError {
6 pub message: String,
8 pub range: std::ops::Range<usize>,
10 pub category: WhitespaceErrorCategory,
12}
13
14#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum WhitespaceErrorCategory {
17 TabIndentation,
19 LineTooLong,
21 MixedLineEndings,
23 InvalidIndentation,
25}
26
27#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
130#[repr(u16)]
131#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
132pub enum SyntaxKind {
133 ROOT = 0,
136 DOCUMENT,
138 SEQUENCE,
140 MAPPING,
142 SCALAR,
144 ALIAS,
146 TAGGED_NODE,
148 ERROR,
150
151 DASH,
154 PLUS,
156 COLON,
158 QUESTION,
160 LEFT_BRACKET,
162 RIGHT_BRACKET,
164 LEFT_BRACE,
166 RIGHT_BRACE,
168 COMMA,
170 PIPE,
172 GREATER,
174 AMPERSAND,
176 ASTERISK,
178 EXCLAMATION,
180 PERCENT,
182 AT,
184 BACKTICK,
186 QUOTE,
188 SINGLE_QUOTE,
190
191 DOC_START,
194 DOC_END,
196
197 KEY,
200 VALUE,
202 MAPPING_ENTRY,
204 SEQUENCE_ENTRY,
206
207 STRING,
210 UNTERMINATED_STRING,
212 INT,
214 FLOAT,
216 BOOL,
218 NULL,
220 TAG,
222 ANCHOR,
224 REFERENCE,
226 MERGE_KEY,
228 DIRECTIVE,
230
231 WHITESPACE,
234 NEWLINE,
236 INDENT,
238 COMMENT,
240
241 BOM,
244 EOF,
246}
247
248impl From<SyntaxKind> for rowan::SyntaxKind {
249 fn from(kind: SyntaxKind) -> Self {
250 Self(kind as u16)
251 }
252}
253
254fn read_scalar_from<'a>(
256 chars: &mut std::iter::Peekable<std::str::CharIndices<'a>>,
257 input: &'a str,
258 start_idx: usize,
259 exclude_chars: &str,
260) -> &'a str {
261 let mut end_idx = start_idx;
262 while let Some((idx, ch)) = chars.peek() {
263 if ch.is_whitespace() || is_yaml_special_except(*ch, exclude_chars) {
264 break;
265 }
266 end_idx = *idx + ch.len_utf8();
267 chars.next();
268 }
269 &input[start_idx..end_idx]
270}
271
272pub fn lex(input: &str) -> Vec<(SyntaxKind, &str)> {
274 let (tokens, _) = lex_with_validation(input);
275 tokens
276}
277
278pub struct ValidationConfig {
280 pub max_line_length: Option<usize>,
282 pub enforce_consistent_line_endings: bool,
284}
285
286impl Default for ValidationConfig {
287 fn default() -> Self {
288 Self {
289 max_line_length: Some(120), enforce_consistent_line_endings: true,
291 }
292 }
293}
294
295pub fn lex_with_validation(input: &str) -> (Vec<(SyntaxKind, &str)>, Vec<WhitespaceError>) {
297 lex_with_validation_config(input, &ValidationConfig::default())
298}
299
300pub fn lex_with_validation_config<'a>(
302 input: &'a str,
303 config: &ValidationConfig,
304) -> (Vec<(SyntaxKind, &'a str)>, Vec<WhitespaceError>) {
305 use SyntaxKind::*;
306
307 let mut tokens = Vec::with_capacity(input.len() / 8); let mut chars = input.char_indices().peekable();
309 let mut whitespace_errors = Vec::new();
310 let bytes = input.as_bytes();
311
312 let mut current_line_start = 0;
314 let mut detected_line_ending: Option<&str> = None;
315
316 let mut flow_depth: u32 = 0;
318
319 if let Some((0, '\u{FEFF}')) = chars.peek() {
322 chars.next(); tokens.push((BOM, "\u{FEFF}"));
324 }
325
326 while let Some((start_idx, ch)) = chars.next() {
327 let token_start = start_idx;
328
329 match ch {
330 '-' => {
332 if let Some((_, '-')) = chars.peek() {
333 chars.next(); if let Some((_, '-')) = chars.peek() {
335 chars.next(); tokens.push((DOC_START, &input[token_start..start_idx + 3]));
337 } else {
338 tokens.push((DASH, &input[token_start..start_idx + 1]));
340 tokens.push((DASH, &input[start_idx + 1..start_idx + 2]));
341 }
342 } else {
343 let line_start_pos = input[..token_start]
352 .rfind(['\n', '\r'])
353 .map(|pos| pos + 1)
354 .unwrap_or(0);
355 let before_dash = &input[line_start_pos..token_start];
356 let only_whitespace_before = before_dash.chars().all(|c| c == ' ' || c == '\t');
357
358 let after_value_indicator = tokens
361 .iter()
362 .rev()
363 .find(|(kind, _)| !matches!(kind, WHITESPACE | INDENT))
364 .is_some_and(|(kind, _)| matches!(kind, QUESTION | COLON));
365
366 let followed_by_whitespace_or_end = chars
368 .peek()
369 .map_or(true, |(_, next_ch)| next_ch.is_whitespace());
370
371 let is_sequence_marker = (only_whitespace_before || after_value_indicator)
372 && followed_by_whitespace_or_end;
373
374 if is_sequence_marker {
375 tokens.push((DASH, &input[token_start..start_idx + 1]));
376 } else {
377 let text = read_scalar_from(&mut chars, input, start_idx + 1, "-");
379 let full_text = &input[token_start..token_start + 1 + text.len()];
380 let token_kind = classify_scalar(full_text);
381 tokens.push((token_kind, full_text));
382 }
383 }
384 }
385 '+' => tokens.push((PLUS, &input[token_start..start_idx + 1])),
386 ':' => {
387 if flow_depth > 0 {
390 tokens.push((COLON, &input[token_start..start_idx + 1]));
392 } else if let Some((_, next_ch)) = chars.peek() {
393 if next_ch.is_whitespace() {
394 tokens.push((COLON, &input[token_start..start_idx + 1]));
396 } else {
397 let mut end_idx = start_idx + 1;
400 while let Some((idx, next_ch)) = chars.peek() {
401 if next_ch.is_whitespace() {
402 break;
403 }
404 if is_yaml_special_except(*next_ch, ":") {
406 break;
407 }
408 end_idx = *idx + next_ch.len_utf8();
409 chars.next();
410 }
411 let text = &input[token_start..end_idx];
412 tokens.push((classify_scalar(text), text));
413 }
414 } else {
415 tokens.push((COLON, &input[token_start..start_idx + 1]));
417 }
418 }
419 '?' => tokens.push((QUESTION, &input[token_start..start_idx + 1])),
420 '[' => {
421 flow_depth += 1;
422 tokens.push((LEFT_BRACKET, &input[token_start..start_idx + 1]));
423 }
424 ']' => {
425 flow_depth = flow_depth.saturating_sub(1);
426 tokens.push((RIGHT_BRACKET, &input[token_start..start_idx + 1]));
427 }
428 '{' => {
429 flow_depth += 1;
430 tokens.push((LEFT_BRACE, &input[token_start..start_idx + 1]));
431 }
432 '}' => {
433 flow_depth = flow_depth.saturating_sub(1);
434 tokens.push((RIGHT_BRACE, &input[token_start..start_idx + 1]));
435 }
436 ',' => tokens.push((COMMA, &input[token_start..start_idx + 1])),
437 '|' => tokens.push((PIPE, &input[token_start..start_idx + 1])),
438 '>' => tokens.push((GREATER, &input[token_start..start_idx + 1])),
439 '<' => {
440 if let Some((_, '<')) = chars.peek() {
442 chars.next(); tokens.push((MERGE_KEY, &input[token_start..start_idx + 2]));
444 } else {
445 let mut end_idx = start_idx + 1;
447 while let Some((idx, ch)) = chars.peek() {
448 if ch.is_whitespace() || is_yaml_special(*ch) {
449 break;
450 }
451 end_idx = *idx + ch.len_utf8();
452 chars.next();
453 }
454 let text = &input[token_start..end_idx];
455 let token_kind = classify_scalar(text);
456 tokens.push((token_kind, text));
457 }
458 }
459 '&' => {
460 let name = read_scalar_from(&mut chars, input, start_idx + 1, "");
462 if !name.is_empty() {
463 tokens.push((ANCHOR, &input[token_start..start_idx + 1 + name.len()]));
464 } else {
465 tokens.push((AMPERSAND, &input[token_start..start_idx + 1]));
466 }
467 }
468 '*' => {
469 let name = read_scalar_from(&mut chars, input, start_idx + 1, "");
471 if !name.is_empty() {
472 tokens.push((REFERENCE, &input[token_start..start_idx + 1 + name.len()]));
473 } else {
474 tokens.push((ASTERISK, &input[token_start..start_idx + 1]));
475 }
476 }
477 '"' => {
478 let mut end_idx = start_idx + 1;
480 let mut escaped = false;
481 let mut found_closing = false;
482
483 while let Some((idx, ch)) = chars.peek() {
484 let current_idx = *idx;
485 let current_ch = *ch;
486
487 if escaped {
488 escaped = false;
489 end_idx = current_idx + current_ch.len_utf8();
490 chars.next();
491 continue;
492 }
493
494 if current_ch == '\\' {
495 escaped = true;
496 end_idx = current_idx + current_ch.len_utf8();
497 chars.next();
498 } else if current_ch == '"' {
499 end_idx = current_idx + current_ch.len_utf8();
500 chars.next();
501 found_closing = true;
502 break;
503 } else {
504 end_idx = current_idx + current_ch.len_utf8();
505 chars.next();
506 }
507 }
508
509 if found_closing {
510 tokens.push((STRING, &input[token_start..end_idx]));
511 } else {
512 tokens.push((UNTERMINATED_STRING, &input[token_start..end_idx]));
514 }
515 }
516 '\'' => {
517 let mut end_idx = start_idx + 1;
519 let mut found_closing = false;
520
521 while let Some((idx, ch)) = chars.peek() {
522 let current_idx = *idx;
523 let current_ch = *ch;
524
525 if current_ch == '\'' {
526 end_idx = current_idx + current_ch.len_utf8();
528 chars.next();
529 if let Some((next_idx, '\'')) = chars.peek() {
530 end_idx = *next_idx + 1;
532 chars.next();
533 } else {
534 found_closing = true;
536 break;
537 }
538 } else {
539 end_idx = current_idx + current_ch.len_utf8();
540 chars.next();
541 }
542 }
543
544 if found_closing {
545 tokens.push((STRING, &input[token_start..end_idx]));
546 } else {
547 tokens.push((UNTERMINATED_STRING, &input[token_start..end_idx]));
549 }
550 }
551
552 '.' => {
554 if chars.peek() == Some(&(start_idx + 1, '.')) {
556 chars.next(); if chars.peek() == Some(&(start_idx + 2, '.')) {
558 chars.next(); tokens.push((DOC_END, &input[token_start..start_idx + 3]));
560 } else {
561 let rest = read_scalar_from(&mut chars, input, start_idx + 2, "");
563 let text = &input[token_start..start_idx + 2 + rest.len()];
564 let token_kind = classify_scalar(text);
565 tokens.push((token_kind, text));
566 }
567 } else {
568 let rest = read_scalar_from(&mut chars, input, start_idx + 1, "");
570 let text = &input[token_start..start_idx + 1 + rest.len()];
571 let token_kind = classify_scalar(text);
572 tokens.push((token_kind, text));
573 }
574 }
575
576 '#' => {
578 let mut end_idx = start_idx + 1;
579 while let Some((idx, ch)) = chars.peek() {
580 if *ch == '\n' || *ch == '\r' {
581 break;
582 }
583 end_idx = *idx + ch.len_utf8();
584 chars.next();
585 }
586 tokens.push((COMMENT, &input[token_start..end_idx]));
587 }
588
589 '!' => {
591 let mut end_idx = start_idx + 1;
593
594 if let Some((_, '!')) = chars.peek() {
596 chars.next(); end_idx = start_idx + 2;
598 }
599
600 while let Some((idx, ch)) = chars.peek() {
602 if ch.is_whitespace() || is_yaml_special(*ch) {
603 break;
604 }
605 end_idx = *idx + ch.len_utf8();
606 chars.next();
607 }
608
609 tokens.push((TAG, &input[token_start..end_idx]));
610 }
611
612 '%' => {
613 if flow_depth > 0 {
615 let mut end_idx = start_idx + 1;
617 while let Some((idx, next_ch)) = chars.peek() {
618 if next_ch.is_whitespace() {
619 break;
620 }
621 if is_yaml_special_except(*next_ch, "%") {
622 break;
623 }
624 end_idx = *idx + next_ch.len_utf8();
625 chars.next();
626 }
627 let text = &input[token_start..end_idx];
628 tokens.push((classify_scalar(text), text));
629 } else {
630 let mut end_idx = start_idx + 1;
632 while let Some((idx, ch)) = chars.peek() {
633 if *ch == '\n' || *ch == '\r' {
634 break;
635 }
636 end_idx = *idx + ch.len_utf8();
637 chars.next();
638 }
639 tokens.push((DIRECTIVE, &input[token_start..end_idx]));
640 }
641 }
642
643 '\n' => {
645 if let Some(max_len) = config.max_line_length {
647 let line_length = start_idx - current_line_start;
648 if line_length > max_len {
649 whitespace_errors.push(WhitespaceError {
650 message: format!(
651 "Line too long ({} > {} characters)",
652 line_length, max_len
653 ),
654 range: current_line_start..start_idx,
655 category: WhitespaceErrorCategory::LineTooLong,
656 });
657 }
658 }
659
660 let line_ending = "\n";
662 if config.enforce_consistent_line_endings {
663 if let Some(detected) = detected_line_ending {
664 if detected != line_ending {
665 whitespace_errors.push(WhitespaceError {
666 message: "Inconsistent line endings detected".to_string(),
667 range: token_start..start_idx + 1,
668 category: WhitespaceErrorCategory::MixedLineEndings,
669 });
670 }
671 } else {
672 detected_line_ending = Some(line_ending);
673 }
674 }
675
676 tokens.push((NEWLINE, &input[token_start..start_idx + 1]));
677 current_line_start = start_idx + 1;
678 }
679 '\r' => {
680 if let Some(max_len) = config.max_line_length {
682 let line_length = start_idx - current_line_start;
683 if line_length > max_len {
684 whitespace_errors.push(WhitespaceError {
685 message: format!(
686 "Line too long ({} > {} characters)",
687 line_length, max_len
688 ),
689 range: current_line_start..start_idx,
690 category: WhitespaceErrorCategory::LineTooLong,
691 });
692 }
693 }
694
695 let (line_ending, end_pos) = if let Some((_, '\n')) = chars.peek() {
696 chars.next();
697 ("\r\n", start_idx + 2)
698 } else {
699 ("\r", start_idx + 1)
700 };
701
702 if config.enforce_consistent_line_endings {
704 if let Some(detected) = detected_line_ending {
705 if detected != line_ending {
706 whitespace_errors.push(WhitespaceError {
707 message: "Inconsistent line endings detected".to_string(),
708 range: token_start..end_pos,
709 category: WhitespaceErrorCategory::MixedLineEndings,
710 });
711 }
712 } else {
713 detected_line_ending = Some(line_ending);
714 }
715 }
716
717 tokens.push((NEWLINE, &input[token_start..end_pos]));
718 current_line_start = end_pos;
719 }
720
721 ' ' | '\t' => {
723 let mut end_idx = start_idx + 1;
724 let mut has_tabs = ch == '\t';
725
726 while let Some((idx, ch)) = chars.peek() {
727 if *ch != ' ' && *ch != '\t' {
728 break;
729 }
730 if *ch == '\t' {
731 has_tabs = true;
732 }
733 end_idx = *idx + 1;
734 chars.next();
735 }
736
737 let is_indentation = token_start == 0
740 || (token_start > 0
741 && (bytes[token_start - 1] == b'\n' || bytes[token_start - 1] == b'\r'));
742
743 if is_indentation {
744 if has_tabs {
746 whitespace_errors.push(WhitespaceError {
747 message: "Tab character used for indentation (forbidden in YAML)"
748 .to_string(),
749 range: token_start..end_idx,
750 category: WhitespaceErrorCategory::TabIndentation,
751 });
752 }
753 tokens.push((INDENT, &input[token_start..end_idx]));
754 } else {
755 tokens.push((WHITESPACE, &input[token_start..end_idx]));
756 }
757 }
758
759 _ => {
761 let mut end_idx = start_idx + ch.len_utf8();
762
763 while let Some((idx, next_ch)) = chars.peek() {
765 if next_ch.is_whitespace() {
766 break;
767 }
768
769 if *next_ch == ':' {
772 let next_idx = *idx + next_ch.len_utf8();
774 if next_idx >= input.len() {
775 break;
777 } else if let Some(after) = input[next_idx..].chars().next() {
778 if after.is_whitespace() {
779 break;
781 }
782 }
783 end_idx = *idx + next_ch.len_utf8();
785 chars.next();
786 continue;
787 }
788
789 if is_yaml_special_except(*next_ch, "-:") {
791 if flow_depth == 0 && matches!(*next_ch, '[' | ']' | '{' | '}' | ',') {
793 } else {
795 break;
796 }
797 }
798
799 if *next_ch == '-' {
801 let line_start = input[..(*idx)].rfind('\n').map(|p| p + 1).unwrap_or(0);
804 let before_hyphen = &input[line_start..*idx];
805
806 if before_hyphen.chars().all(|c| c == ' ' || c == '\t') && *idx == end_idx {
809 break;
810 }
811 }
812
813 end_idx = *idx + next_ch.len_utf8();
814 chars.next();
815 }
816
817 let text = &input[token_start..end_idx];
818 tokens.push((classify_scalar(text), text));
819 }
820 }
821 }
822
823 if let Some(max_len) = config.max_line_length {
825 let final_line_length = input.len() - current_line_start;
826 if final_line_length > max_len && final_line_length > 0 {
827 whitespace_errors.push(WhitespaceError {
828 message: format!(
829 "Line too long ({} > {} characters)",
830 final_line_length, max_len
831 ),
832 range: current_line_start..input.len(),
833 category: WhitespaceErrorCategory::LineTooLong,
834 });
835 }
836 }
837
838 (tokens, whitespace_errors)
839}
840
841fn classify_scalar(text: &str) -> SyntaxKind {
843 use SyntaxKind::*;
844
845 match text {
847 "true" | "false" | "True" | "False" | "TRUE" | "FALSE" => return BOOL,
848 "null" | "Null" | "NULL" | "~" => return NULL,
849 _ => {}
850 }
851
852 if crate::scalar::ScalarValue::parse_integer(text).is_some() {
854 return INT;
855 }
856
857 match text {
862 ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" | "-.inf" | "-.Inf" | "-.INF"
863 | ".nan" | ".NaN" | ".NAN" => return FLOAT,
864 "infinity" | "inf" | "Infinity" | "Inf" | "INFINITY" | "INF" | "-infinity" | "-inf"
867 | "-Infinity" | "-Inf" | "-INFINITY" | "-INF" | "+infinity" | "+inf" | "+Infinity"
868 | "+Inf" | "+INFINITY" | "+INF" | "nan" | "NaN" | "NAN" => return STRING,
869 _ => {}
870 }
871
872 if text.parse::<f64>().is_ok() {
874 return FLOAT;
875 }
876
877 STRING
879}
880
881const YAML_SPECIAL_CHARS: &str = ":+-?[]{},'|>&*!%\"#";
883
884fn is_yaml_special(ch: char) -> bool {
886 YAML_SPECIAL_CHARS.contains(ch)
887}
888
889fn is_yaml_special_except(ch: char, exclude: &str) -> bool {
891 YAML_SPECIAL_CHARS.contains(ch) && !exclude.contains(ch)
892}
893
894#[cfg(test)]
895mod tests {
896 use super::*;
897
898 #[test]
899 fn test_simple_mapping() {
900 let input = "key: value";
901 let tokens = lex(input);
902
903 assert_eq!(tokens.len(), 4);
904 assert_eq!(tokens[0], (SyntaxKind::STRING, "key"));
905 assert_eq!(tokens[1], (SyntaxKind::COLON, ":"));
906 assert_eq!(tokens[2], (SyntaxKind::WHITESPACE, " "));
907 assert_eq!(tokens[3], (SyntaxKind::STRING, "value"));
908 }
909
910 #[test]
911 fn test_scalar_types() {
912 let tokens = lex("age: 42");
914 assert_eq!(tokens[0], (SyntaxKind::STRING, "age"));
915 assert_eq!(tokens[3], (SyntaxKind::INT, "42"));
916
917 let tokens = lex("pi: 3.14");
919 assert_eq!(tokens[0], (SyntaxKind::STRING, "pi"));
920 assert_eq!(tokens[3], (SyntaxKind::FLOAT, "3.14"));
921
922 let tokens = lex("enabled: true");
924 assert_eq!(tokens[0], (SyntaxKind::STRING, "enabled"));
925 assert_eq!(tokens[3], (SyntaxKind::BOOL, "true"));
926
927 let tokens = lex("disabled: false");
929 assert_eq!(tokens[3], (SyntaxKind::BOOL, "false"));
930
931 let tokens = lex("value: null");
933 assert_eq!(tokens[3], (SyntaxKind::NULL, "null"));
934
935 let tokens = lex("value: ~");
937 assert_eq!(tokens[3], (SyntaxKind::NULL, "~"));
938 }
939
940 #[test]
941 fn test_sequences() {
942 let input = "- item1\n- item2";
943 let tokens = lex(input);
944
945 assert_eq!(tokens[0], (SyntaxKind::DASH, "-"));
946 assert_eq!(tokens[1], (SyntaxKind::WHITESPACE, " "));
947 assert_eq!(tokens[2], (SyntaxKind::STRING, "item1"));
948 assert_eq!(tokens[3], (SyntaxKind::NEWLINE, "\n"));
949 assert_eq!(tokens[4], (SyntaxKind::DASH, "-"));
950 assert_eq!(tokens[5], (SyntaxKind::WHITESPACE, " "));
951 assert_eq!(tokens[6], (SyntaxKind::STRING, "item2"));
952 }
953
954 #[test]
955 fn test_hyphen_in_scalars() {
956 let input = "Name: example-project";
958 let tokens = lex(input);
959
960 println!("Hyphen test tokens:");
961 for (i, (kind, text)) in tokens.iter().enumerate() {
962 println!(" {}: {:?} = {:?}", i, kind, text);
963 }
964
965 assert_eq!(tokens.len(), 4);
967 assert_eq!(tokens[0], (SyntaxKind::STRING, "Name"));
968 assert_eq!(tokens[1], (SyntaxKind::COLON, ":"));
969 assert_eq!(tokens[2], (SyntaxKind::WHITESPACE, " "));
970 assert_eq!(tokens[3], (SyntaxKind::STRING, "example-project"));
971 }
972
973 #[test]
974 fn test_hyphen_sequence_vs_scalar() {
975 let sequence_input = "- example-item";
977 let tokens = lex(sequence_input);
978
979 println!("Sequence hyphen tokens:");
980 for (i, (kind, text)) in tokens.iter().enumerate() {
981 println!(" {}: {:?} = {:?}", i, kind, text);
982 }
983
984 assert_eq!(tokens[0], (SyntaxKind::DASH, "-"));
986 assert_eq!(tokens[1], (SyntaxKind::WHITESPACE, " "));
987 assert_eq!(tokens[2], (SyntaxKind::STRING, "example-item"));
988
989 let scalar_input = "package-name: my-awesome-package";
991 let tokens = lex(scalar_input);
992
993 println!("Package hyphen tokens:");
994 for (i, (kind, text)) in tokens.iter().enumerate() {
995 println!(" {}: {:?} = {:?}", i, kind, text);
996 }
997
998 assert_eq!(tokens.len(), 4);
1000 assert_eq!(tokens[0], (SyntaxKind::STRING, "package-name"));
1001 assert_eq!(tokens[3], (SyntaxKind::STRING, "my-awesome-package"));
1002 }
1003
1004 #[test]
1005 fn test_flow_style() {
1006 let tokens = lex("[1, 2, 3]");
1008 assert_eq!(tokens[0], (SyntaxKind::LEFT_BRACKET, "["));
1009 assert_eq!(tokens[1], (SyntaxKind::INT, "1"));
1010 assert_eq!(tokens[2], (SyntaxKind::COMMA, ","));
1011 assert_eq!(tokens[3], (SyntaxKind::WHITESPACE, " "));
1012 assert_eq!(tokens[4], (SyntaxKind::INT, "2"));
1013 assert_eq!(tokens[5], (SyntaxKind::COMMA, ","));
1014 assert_eq!(tokens[6], (SyntaxKind::WHITESPACE, " "));
1015 assert_eq!(tokens[7], (SyntaxKind::INT, "3"));
1016 assert_eq!(tokens[8], (SyntaxKind::RIGHT_BRACKET, "]"));
1017
1018 let tokens = lex("{a: 1, b: 2}");
1020 assert_eq!(tokens[0], (SyntaxKind::LEFT_BRACE, "{"));
1021 assert_eq!(tokens[1], (SyntaxKind::STRING, "a"));
1022 assert_eq!(tokens[2], (SyntaxKind::COLON, ":"));
1023 assert_eq!(tokens[3], (SyntaxKind::WHITESPACE, " "));
1024 assert_eq!(tokens[4], (SyntaxKind::INT, "1"));
1025 }
1026
1027 #[test]
1028 fn test_comments() {
1029 let input = "key: value # this is a comment\n# full line comment";
1030 let tokens = lex(input);
1031
1032 let comments: Vec<_> = tokens
1034 .iter()
1035 .filter(|(kind, _)| *kind == SyntaxKind::COMMENT)
1036 .collect();
1037
1038 assert_eq!(comments.len(), 2);
1039 assert_eq!(comments[0].1, "# this is a comment");
1040 assert_eq!(comments[1].1, "# full line comment");
1041 }
1042
1043 #[test]
1044 fn test_multiline_scalar() {
1045 let input = "key: value\n continued";
1046 let tokens = lex(input);
1047
1048 let indents: Vec<_> = tokens
1050 .iter()
1051 .filter(|(kind, _)| *kind == SyntaxKind::INDENT)
1052 .collect();
1053 assert_eq!(indents.len(), 1);
1054 assert_eq!(indents[0].1, " ");
1055 }
1056
1057 #[test]
1058 fn test_quoted_strings() {
1059 let input = r#"single: 'quoted'
1060double: "quoted""#;
1061 let tokens = lex(input);
1062
1063 let quoted_strings: Vec<_> = tokens
1065 .iter()
1066 .filter(|(kind, text)| {
1067 *kind == SyntaxKind::STRING && (text.starts_with('\'') || text.starts_with('"'))
1068 })
1069 .collect();
1070 assert_eq!(quoted_strings.len(), 2); let quoted_texts: Vec<&str> = {
1074 let mut v: Vec<&str> = quoted_strings.iter().map(|(_, t)| *t).collect();
1075 v.sort();
1076 v
1077 };
1078 assert_eq!(quoted_texts, ["\"quoted\"", "'quoted'"]);
1079 }
1080
1081 #[test]
1082 fn test_document_markers() {
1083 let input = "---\nkey: value\n...";
1084 let tokens = lex(input);
1085
1086 println!("Document tokens:");
1087 for (i, (kind, text)) in tokens.iter().enumerate() {
1088 println!(" {}: {:?} = {:?}", i, kind, text);
1089 }
1090
1091 let doc_start_count = tokens
1093 .iter()
1094 .filter(|(kind, _)| *kind == SyntaxKind::DOC_START)
1095 .count();
1096 let doc_end_count = tokens
1097 .iter()
1098 .filter(|(kind, _)| *kind == SyntaxKind::DOC_END)
1099 .count();
1100 assert_eq!(doc_start_count, 1);
1101 assert_eq!(doc_end_count, 1);
1102 }
1103
1104 #[test]
1105 fn test_empty_input() {
1106 let input = "";
1107 let tokens = lex(input);
1108 println!("Empty input tokens: {:?}", tokens);
1109 assert_eq!(tokens.len(), 0);
1110 }
1111
1112 #[test]
1113 fn test_anchors_and_aliases() {
1114 let input = "key: &anchor_name value";
1116 let tokens = lex(input);
1117 println!("Anchor tokens: {:?}", tokens);
1118
1119 let anchors: Vec<_> = tokens
1120 .iter()
1121 .filter(|(kind, _)| *kind == SyntaxKind::ANCHOR)
1122 .collect();
1123 assert_eq!(anchors.len(), 1);
1124 assert_eq!(anchors[0].1, "&anchor_name");
1125
1126 let input = "key: *reference_name";
1128 let tokens = lex(input);
1129 println!("Reference tokens: {:?}", tokens);
1130
1131 let references: Vec<_> = tokens
1132 .iter()
1133 .filter(|(kind, _)| *kind == SyntaxKind::REFERENCE)
1134 .collect();
1135 assert_eq!(references.len(), 1);
1136 assert_eq!(references[0].1, "*reference_name");
1137
1138 let input = "key: & *";
1140 let tokens = lex(input);
1141
1142 let ampersands: Vec<_> = tokens
1143 .iter()
1144 .filter(|(kind, _)| *kind == SyntaxKind::AMPERSAND)
1145 .collect();
1146 assert_eq!(ampersands.len(), 1);
1147
1148 let asterisks: Vec<_> = tokens
1149 .iter()
1150 .filter(|(kind, _)| *kind == SyntaxKind::ASTERISK)
1151 .collect();
1152 assert_eq!(asterisks.len(), 1);
1153 }
1154
1155 #[test]
1156 fn test_merge_key_token() {
1157 let input = "<<: *defaults";
1159 let tokens = lex(input);
1160
1161 let merge_keys: Vec<_> = tokens
1162 .iter()
1163 .filter(|(kind, _)| *kind == SyntaxKind::MERGE_KEY)
1164 .collect();
1165 assert_eq!(merge_keys.len(), 1);
1166 assert_eq!(merge_keys[0].1, "<<");
1167
1168 let input2 = "key: < value";
1170 let tokens2 = lex(input2);
1171
1172 let merge_keys2: Vec<_> = tokens2
1173 .iter()
1174 .filter(|(kind, _)| *kind == SyntaxKind::MERGE_KEY)
1175 .collect();
1176 assert_eq!(merge_keys2.len(), 0, "Single < should not be a merge key");
1177 }
1178
1179 #[test]
1180 fn test_plus_token() {
1181 let input = "key: |+ value";
1183 let tokens = lex(input);
1184
1185 let plus_tokens: Vec<_> = tokens
1186 .iter()
1187 .filter(|(kind, _)| *kind == SyntaxKind::PLUS)
1188 .collect();
1189 assert_eq!(plus_tokens.len(), 1);
1190 assert_eq!(plus_tokens[0].1, "+");
1191 }
1192
1193 #[test]
1194 fn test_block_scalar_indicators() {
1195 let input1 = "key: |+ content";
1197 let tokens1 = lex(input1);
1198
1199 assert!(tokens1
1200 .iter()
1201 .any(|(kind, text)| *kind == SyntaxKind::PIPE && *text == "|"));
1202 assert!(tokens1
1203 .iter()
1204 .any(|(kind, text)| *kind == SyntaxKind::PLUS && *text == "+"));
1205
1206 let input2 = "key: >- content";
1208 let tokens2 = lex(input2);
1209
1210 assert!(tokens2
1211 .iter()
1212 .any(|(kind, text)| *kind == SyntaxKind::GREATER && *text == ">"));
1213 assert!(tokens2
1214 .iter()
1215 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "-"));
1216
1217 let input3 = "key: |2+ content";
1219 let tokens3 = lex(input3);
1220
1221 assert!(tokens3
1222 .iter()
1223 .any(|(kind, text)| *kind == SyntaxKind::PIPE && *text == "|"));
1224 assert!(tokens3
1225 .iter()
1226 .any(|(kind, text)| *kind == SyntaxKind::INT && *text == "2"));
1227 assert!(tokens3
1228 .iter()
1229 .any(|(kind, text)| *kind == SyntaxKind::PLUS && *text == "+"));
1230 }
1231
1232 #[test]
1233 fn test_special_characters_in_block_content() {
1234 let input = "line with - and + and : characters";
1235 let tokens = lex(input);
1236
1237 assert!(tokens
1240 .iter()
1241 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "-"));
1242
1243 assert!(tokens
1245 .iter()
1246 .any(|(kind, text)| *kind == SyntaxKind::PLUS && *text == "+"));
1247 assert!(tokens
1248 .iter()
1249 .any(|(kind, text)| *kind == SyntaxKind::COLON && *text == ":"));
1250
1251 assert!(tokens
1253 .iter()
1254 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "line"));
1255 assert!(tokens
1256 .iter()
1257 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "with"));
1258 assert!(tokens
1259 .iter()
1260 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "and"));
1261 assert!(tokens
1262 .iter()
1263 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "characters"));
1264 }
1265
1266 #[test]
1267 fn test_token_recognition() {
1268 let input = "key: |2+ \n content with - and : and > chars\n more content";
1269 let tokens = lex(input);
1270
1271 println!("Comprehensive tokens:");
1273 for (i, (kind, text)) in tokens.iter().enumerate() {
1274 println!(" {}: {:?} = {:?}", i, kind, text);
1275 }
1276
1277 let count = |k: SyntaxKind| tokens.iter().filter(|(kind, _)| *kind == k).count();
1279 assert_eq!(count(SyntaxKind::COLON), 2);
1281 assert_eq!(count(SyntaxKind::PIPE), 1); assert_eq!(count(SyntaxKind::INT), 1); assert_eq!(count(SyntaxKind::PLUS), 1); assert_eq!(count(SyntaxKind::GREATER), 1); assert_eq!(count(SyntaxKind::NEWLINE), 2); assert_eq!(count(SyntaxKind::INDENT), 2); assert!(count(SyntaxKind::STRING) >= 1, "expected STRING tokens");
1289
1290 assert_eq!(
1292 tokens
1293 .iter()
1294 .filter(|(kind, text)| *kind == SyntaxKind::STRING && *text == "-")
1295 .count(),
1296 1
1297 );
1298 }
1299
1300 #[test]
1301 fn test_dash_handling() {
1302 let input = "---\nkey: value";
1304 let tokens = lex(input);
1305 assert_eq!(tokens[0], (SyntaxKind::DOC_START, "---"));
1306
1307 let input = "---";
1309 let tokens = lex(input);
1310 assert_eq!(tokens.len(), 1);
1311 assert_eq!(tokens[0], (SyntaxKind::DOC_START, "---"));
1312
1313 let input = "--";
1315 let tokens = lex(input);
1316 assert_eq!(tokens.len(), 2);
1317 assert_eq!(tokens[0], (SyntaxKind::DASH, "-"));
1318 assert_eq!(tokens[1], (SyntaxKind::DASH, "-"));
1319
1320 let input = "----";
1322 let tokens = lex(input);
1323 assert_eq!(tokens[0], (SyntaxKind::DOC_START, "---"));
1324 assert_eq!(tokens[1], (SyntaxKind::STRING, "-"));
1325 }
1326
1327 #[test]
1328 fn test_dash_in_different_scalar_contexts() {
1329 let input = "package-name: my-awesome-package-v2";
1331 let tokens = lex(input);
1332 assert_eq!(tokens[0], (SyntaxKind::STRING, "package-name"));
1333 assert_eq!(tokens[1], (SyntaxKind::COLON, ":"));
1334 assert_eq!(tokens[2], (SyntaxKind::WHITESPACE, " "));
1335 assert_eq!(tokens[3], (SyntaxKind::STRING, "my-awesome-package-v2"));
1336
1337 let input = "id: 123e4567-e89b-12d3-a456-426614174000";
1339 let tokens = lex(input);
1340 assert_eq!(tokens[0], (SyntaxKind::STRING, "id"));
1341 assert_eq!(
1342 tokens[3],
1343 (SyntaxKind::STRING, "123e4567-e89b-12d3-a456-426614174000")
1344 );
1345
1346 let input = "args: --verbose --log-level=debug";
1348 let tokens = lex(input);
1349 assert_eq!(
1351 tokens
1352 .windows(3)
1353 .filter(|w| {
1354 w[0] == (SyntaxKind::DASH, "-")
1355 && w[1] == (SyntaxKind::DASH, "-")
1356 && w[2] == (SyntaxKind::STRING, "verbose")
1357 })
1358 .count(),
1359 1
1360 );
1361
1362 let input = "temperature: -40";
1364 let tokens = lex(input);
1365 assert_eq!(
1367 tokens
1368 .iter()
1369 .filter(|(kind, text)| *kind == SyntaxKind::INT && *text == "-40")
1370 .count(),
1371 1
1372 );
1373
1374 let input = "range: 1-10";
1376 let tokens = lex(input);
1377 assert_eq!(
1378 tokens
1379 .iter()
1380 .filter(|(kind, text)| *kind == SyntaxKind::STRING && *text == "1-10")
1381 .count(),
1382 1
1383 );
1384 }
1385
1386 #[test]
1387 fn test_sequence_markers_with_indentation() {
1388 let input = "- item1\n- item2";
1390 let tokens = lex(input);
1391 assert_eq!(tokens[0], (SyntaxKind::DASH, "-"));
1392 assert_eq!(tokens[1], (SyntaxKind::WHITESPACE, " "));
1393 assert_eq!(tokens[2], (SyntaxKind::STRING, "item1"));
1394
1395 let input = " - item1\n - item2";
1397 let tokens = lex(input);
1398 assert_eq!(tokens[0], (SyntaxKind::INDENT, " "));
1399 assert_eq!(tokens[1], (SyntaxKind::DASH, "-"));
1400
1401 let input = "- item1\n - nested1\n - nested2\n- item2";
1403 let tokens = lex(input);
1404 let dash_tokens: Vec<_> = tokens
1405 .iter()
1406 .filter(|(kind, _)| *kind == SyntaxKind::DASH)
1407 .collect();
1408 assert_eq!(dash_tokens.len(), 4); let input = "- first-item\n- second-item";
1412 let tokens = lex(input);
1413 assert_eq!(tokens[0], (SyntaxKind::DASH, "-"));
1414 assert_eq!(tokens[2], (SyntaxKind::STRING, "first-item"));
1415 assert_eq!(tokens[4], (SyntaxKind::DASH, "-"));
1416 assert_eq!(tokens[6], (SyntaxKind::STRING, "second-item"));
1417 }
1418
1419 #[test]
1420 fn test_dash_after_colon() {
1421 let input = "key:-value";
1425 let tokens = lex(input);
1426 assert_eq!(tokens[0], (SyntaxKind::STRING, "key:-value"));
1427
1428 let input = "key: -value";
1430 let tokens = lex(input);
1431 assert_eq!(tokens[0], (SyntaxKind::STRING, "key"));
1432 assert_eq!(tokens[1], (SyntaxKind::COLON, ":"));
1433 assert_eq!(tokens[2], (SyntaxKind::WHITESPACE, " "));
1434 assert_eq!(tokens[3], (SyntaxKind::STRING, "-value"));
1435 }
1436
1437 #[test]
1438 fn test_yaml_spec_compliant_colon_handling() {
1439 let input = "http://example.com:8080/path";
1445 let tokens = lex(input);
1446 assert_eq!(tokens.len(), 1);
1447 assert_eq!(
1448 tokens[0],
1449 (SyntaxKind::STRING, "http://example.com:8080/path")
1450 );
1451
1452 let input = "2024:12:31:23:59:59";
1454 let tokens = lex(input);
1455 assert_eq!(tokens.len(), 1);
1456 assert_eq!(tokens[0], (SyntaxKind::STRING, "2024:12:31:23:59:59"));
1457
1458 let input = "key: value";
1460 let tokens = lex(input);
1461 assert_eq!(tokens[0], (SyntaxKind::STRING, "key"));
1462 assert_eq!(tokens[1], (SyntaxKind::COLON, ":"));
1463 assert_eq!(tokens[2], (SyntaxKind::WHITESPACE, " "));
1464 assert_eq!(tokens[3], (SyntaxKind::STRING, "value"));
1465
1466 let input = "key:value";
1468 let tokens = lex(input);
1469 assert_eq!(tokens.len(), 1);
1470 assert_eq!(tokens[0], (SyntaxKind::STRING, "key:value"));
1471
1472 let input = "a:b:c:d";
1474 let tokens = lex(input);
1475 assert_eq!(tokens.len(), 1);
1476 assert_eq!(tokens[0], (SyntaxKind::STRING, "a:b:c:d"));
1477 }
1478
1479 #[test]
1480 fn test_block_scalar_with_chomping() {
1481 let count_kind = |toks: &[(SyntaxKind, &str)], k: SyntaxKind| {
1483 toks.iter().filter(|(kind, _)| *kind == k).count()
1484 };
1485
1486 let input = "text: |-\n content";
1488 let tokens = lex(input);
1489 assert_eq!(count_kind(&tokens, SyntaxKind::PIPE), 1);
1490 assert_eq!(
1491 tokens
1492 .iter()
1493 .filter(|(kind, text)| *kind == SyntaxKind::STRING && *text == "-")
1494 .count(),
1495 1
1496 );
1497
1498 let input = "text: |+\n content";
1500 let tokens = lex(input);
1501 assert_eq!(count_kind(&tokens, SyntaxKind::PIPE), 1);
1502 assert_eq!(count_kind(&tokens, SyntaxKind::PLUS), 1);
1503
1504 let input = "text: >-\n content";
1506 let tokens = lex(input);
1507 assert_eq!(count_kind(&tokens, SyntaxKind::GREATER), 1);
1508 assert_eq!(
1509 tokens
1510 .iter()
1511 .filter(|(kind, text)| *kind == SyntaxKind::STRING && *text == "-")
1512 .count(),
1513 1
1514 );
1515
1516 let input = "text: |2-\n content";
1518 let tokens = lex(input);
1519 assert_eq!(count_kind(&tokens, SyntaxKind::PIPE), 1);
1520 let has_2_token = tokens.iter().any(|(kind, text)| {
1522 (*kind == SyntaxKind::STRING || *kind == SyntaxKind::INT) && text.contains("2")
1523 });
1524 assert!(has_2_token, "expected a token containing '2'");
1525 }
1526
1527 #[test]
1528 fn test_dash_edge_cases() {
1529 let input = "value-";
1531 let tokens = lex(input);
1532 assert_eq!(tokens[0], (SyntaxKind::STRING, "value-"));
1533
1534 let input = "-value";
1536 let tokens = lex(input);
1537 assert_eq!(tokens[0], (SyntaxKind::STRING, "-value"));
1538
1539 let input = "key: a---b";
1541 let tokens = lex(input);
1542 assert_eq!(
1543 tokens
1544 .iter()
1545 .filter(|(kind, text)| *kind == SyntaxKind::STRING && *text == "a---b")
1546 .count(),
1547 1
1548 );
1549
1550 let input = "key: value-\nnext: item";
1552 let tokens = lex(input);
1553 assert!(tokens
1554 .iter()
1555 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "value-"));
1556
1557 let input = "snake_case-with-dash_mix";
1559 let tokens = lex(input);
1560 assert_eq!(tokens[0], (SyntaxKind::STRING, "snake_case-with-dash_mix"));
1561 }
1562
1563 #[test]
1564 fn test_whitespace_validation_tab_indentation() {
1565 let input_with_tabs = "key: value\n\tindented_key: indented_value";
1567 let (tokens, errors) = lex_with_validation(input_with_tabs);
1568
1569 assert_eq!(errors.len(), 1);
1571 assert_eq!(errors[0].category, WhitespaceErrorCategory::TabIndentation);
1572 assert_eq!(
1573 errors[0].message,
1574 "Tab character used for indentation (forbidden in YAML)"
1575 );
1576
1577 assert!(tokens
1579 .iter()
1580 .any(|(kind, text)| *kind == SyntaxKind::INDENT && text.contains('\t')));
1581 }
1582
1583 #[test]
1584 fn test_whitespace_validation_line_endings() {
1585 let input_mixed = "line1\nline2\r\nline3\rline4";
1587 let config = ValidationConfig {
1588 enforce_consistent_line_endings: true,
1589 max_line_length: None,
1590 };
1591 let (tokens, errors) = lex_with_validation_config(input_mixed, &config);
1592
1593 assert!(errors
1595 .iter()
1596 .any(|e| e.category == WhitespaceErrorCategory::MixedLineEndings));
1597
1598 let newlines: Vec<_> = tokens
1600 .iter()
1601 .filter(|(kind, _)| *kind == SyntaxKind::NEWLINE)
1602 .collect();
1603 assert_eq!(newlines.len(), 3); assert_eq!(newlines[0].1, "\n");
1605 assert_eq!(newlines[1].1, "\r\n");
1606 assert_eq!(newlines[2].1, "\r");
1607 }
1608
1609 #[test]
1610 fn test_whitespace_validation_line_length() {
1611 let long_line = format!("key: {}", "a".repeat(150));
1613 let config = ValidationConfig {
1614 enforce_consistent_line_endings: false,
1615 max_line_length: Some(120),
1616 };
1617 let (_, errors) = lex_with_validation_config(&long_line, &config);
1618
1619 assert_eq!(errors.len(), 1);
1621 assert_eq!(errors[0].category, WhitespaceErrorCategory::LineTooLong);
1622 assert_eq!(errors[0].message, "Line too long (155 > 120 characters)");
1623 }
1624
1625 #[test]
1626 fn test_whitespace_validation_disabled() {
1627 let input_with_issues = "key: value\n\tindented: with_tabs\n";
1629 let config = ValidationConfig {
1630 enforce_consistent_line_endings: false,
1631 max_line_length: None,
1632 };
1633 let (tokens, errors) = lex_with_validation_config(input_with_issues, &config);
1634
1635 assert_eq!(errors.len(), 1);
1637 assert_eq!(errors[0].category, WhitespaceErrorCategory::TabIndentation);
1638
1639 assert!(!tokens.is_empty());
1641 }
1642
1643 #[test]
1644 fn test_dash_in_flow_collections() {
1645 let input = "[item-one, item-two]";
1647 let tokens = lex(input);
1648 assert_eq!(tokens[0], (SyntaxKind::LEFT_BRACKET, "["));
1649 assert_eq!(tokens[1], (SyntaxKind::STRING, "item-one"));
1650 assert_eq!(tokens[2], (SyntaxKind::COMMA, ","));
1651 assert_eq!(tokens[4], (SyntaxKind::STRING, "item-two"));
1652 assert_eq!(tokens[5], (SyntaxKind::RIGHT_BRACKET, "]"));
1653
1654 let input = "{kebab-key: kebab-value}";
1656 let tokens = lex(input);
1657 assert_eq!(tokens[0], (SyntaxKind::LEFT_BRACE, "{"));
1658 assert_eq!(tokens[1], (SyntaxKind::STRING, "kebab-key"));
1659 assert_eq!(tokens[2], (SyntaxKind::COLON, ":"));
1660 assert_eq!(tokens[4], (SyntaxKind::STRING, "kebab-value"));
1661 assert_eq!(tokens[5], (SyntaxKind::RIGHT_BRACE, "}"));
1662 }
1663
1664 #[test]
1665 fn test_dash_with_quotes() {
1666 let input = r#"key: "- not a sequence marker""#;
1668 let tokens = lex(input);
1669 assert_eq!(
1670 tokens
1671 .iter()
1672 .filter(|(kind, text)| {
1673 *kind == SyntaxKind::STRING && *text == "\"- not a sequence marker\""
1674 })
1675 .count(),
1676 1
1677 );
1678
1679 let input = r#"key: '- also not a sequence marker'"#;
1680 let tokens = lex(input);
1681 assert_eq!(
1682 tokens
1683 .iter()
1684 .filter(|(kind, text)| {
1685 *kind == SyntaxKind::STRING && *text == "'- also not a sequence marker'"
1686 })
1687 .count(),
1688 1
1689 );
1690 }
1691
1692 #[test]
1693 fn test_dash_in_multiline_values() {
1694 let input = "description: This is a multi-\n line value with dashes";
1696 let tokens = lex(input);
1697 assert!(tokens
1698 .iter()
1699 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "multi-"));
1700
1701 let input = "text: value\n - but this is not a sequence";
1703 let tokens = lex(input);
1704 let indent_dash: Vec<_> = tokens
1706 .windows(2)
1707 .filter(|w| w[0].0 == SyntaxKind::INDENT && w[1].0 == SyntaxKind::DASH)
1708 .collect();
1709 assert_eq!(indent_dash.len(), 1);
1710 }
1711
1712 #[test]
1713 fn test_dash_special_yaml_values() {
1714 let input = "date: 2024-01-15";
1716 let tokens = lex(input);
1717 assert!(tokens
1718 .iter()
1719 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "2024-01-15"));
1720
1721 let input = "timestamp: 2024-01-15T10:30:00-05:00";
1723 let tokens = lex(input);
1724 assert!(tokens.iter().any(
1726 |(kind, text)| *kind == SyntaxKind::STRING && *text == "2024-01-15T10:30:00-05:00"
1727 ));
1728
1729 let input = "version: 1.0.0-beta.1";
1731 let tokens = lex(input);
1732 assert!(tokens
1733 .iter()
1734 .any(|(kind, text)| *kind == SyntaxKind::STRING && *text == "1.0.0-beta.1"));
1735 }
1736
1737 #[test]
1738 fn test_flow_indicators_in_block_scalar() {
1739 let input = "key: unix:///Users/${metadata.username}/path";
1742 let tokens = lex(input);
1743 assert_eq!(tokens.len(), 4);
1744 assert_eq!(tokens[0], (SyntaxKind::STRING, "key"));
1745 assert_eq!(tokens[1], (SyntaxKind::COLON, ":"));
1746 assert_eq!(tokens[2], (SyntaxKind::WHITESPACE, " "));
1747 assert_eq!(
1748 tokens[3],
1749 (
1750 SyntaxKind::STRING,
1751 "unix:///Users/${metadata.username}/path"
1752 )
1753 );
1754 }
1755}