1use crate::yaml::{Document, SyntaxNode};
26use rowan::ast::AstNode;
27use std::fmt;
28
29#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct Violation {
32 pub message: String,
34 pub location: Option<String>,
36 pub text_range: Option<crate::TextPosition>,
38 pub severity: Severity,
40 pub rule: Rule,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
46pub enum Severity {
47 Error,
49 Warning,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum Rule {
56 InvalidIndentation,
58 InvalidDocumentMarker,
60 InvalidTabUsage,
62 MissingSyntax,
64 InvalidEscape,
66 DuplicateKeys,
68 InvalidAnchor,
70 InvalidTag,
72 Other,
74}
75
76impl fmt::Display for Violation {
77 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78 match &self.location {
79 Some(loc) => write!(
80 f,
81 "[{}] {}: {} ({:?})",
82 match self.severity {
83 Severity::Error => "ERROR",
84 Severity::Warning => "WARN",
85 },
86 loc,
87 self.message,
88 self.rule
89 ),
90 None => write!(
91 f,
92 "[{}] {} ({:?})",
93 match self.severity {
94 Severity::Error => "ERROR",
95 Severity::Warning => "WARN",
96 },
97 self.message,
98 self.rule
99 ),
100 }
101 }
102}
103
104pub struct Validator {
110 config: ValidatorConfig,
112}
113
114#[derive(Debug, Clone)]
116pub struct ValidatorConfig {
117 pub check_duplicate_keys: bool,
119 pub check_indentation: bool,
121 pub check_tabs: bool,
123 pub check_document_markers: bool,
125 pub check_anchors: bool,
127}
128
129impl Default for ValidatorConfig {
130 fn default() -> Self {
131 Self {
132 check_duplicate_keys: true,
133 check_indentation: true,
134 check_tabs: true,
135 check_document_markers: true,
136 check_anchors: true,
137 }
138 }
139}
140
141fn find_root(node: &SyntaxNode) -> SyntaxNode {
143 if node.kind() == crate::SyntaxKind::ROOT {
144 return node.clone();
145 }
146 node.ancestors()
147 .find(|n| n.kind() == crate::SyntaxKind::ROOT)
148 .unwrap_or_else(|| node.clone())
149}
150
151fn range_to_text_position(range: rowan::TextRange) -> crate::TextPosition {
153 crate::TextPosition::new(u32::from(range.start()), u32::from(range.end()))
154}
155
156impl Validator {
157 pub fn new() -> Self {
159 Self {
160 config: ValidatorConfig::default(),
161 }
162 }
163
164 pub fn with_config(config: ValidatorConfig) -> Self {
166 Self { config }
167 }
168
169 pub fn validate(&self, doc: &Document) -> Vec<Violation> {
173 let mut violations = Vec::new();
174
175 self.check_duplicate_directives(doc.syntax(), &mut violations);
177
178 self.check_directive_without_document(doc.syntax(), &mut violations);
180
181 self.validate_node(doc.syntax(), &mut violations);
184
185 violations
186 }
187
188 pub fn validate_syntax(&self, node: &SyntaxNode) -> Vec<Violation> {
193 let mut violations = Vec::new();
194
195 self.check_duplicate_directives(node, &mut violations);
197
198 self.check_directives_at_root(node, &mut violations);
200
201 self.check_directive_after_document(node, &mut violations);
203
204 self.validate_node(node, &mut violations);
206
207 violations
208 }
209
210 fn validate_node(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
211 use crate::SyntaxKind;
212
213 if self.config.check_tabs {
215 self.check_tab_usage(node, violations);
216 }
217
218 if self.config.check_anchors {
221 self.check_multiple_anchors(node, violations);
222 }
223
224 match node.kind() {
225 SyntaxKind::ERROR => {
226 let content = node.text().to_string();
229 let preview = if content.len() > 50 {
230 format!("{}...", &content[..50])
231 } else {
232 content
233 };
234 violations.push(Violation {
235 message: format!("Invalid content in document: {:?}", preview),
236 location: None,
237 text_range: Some(range_to_text_position(node.text_range())),
238 severity: Severity::Error,
239 rule: Rule::Other,
240 });
241 }
242 SyntaxKind::MAPPING_ENTRY => {
243 self.check_implicit_key_multiline(node, violations);
245 self.check_sequence_on_same_line_as_key(node, violations);
247 }
248 SyntaxKind::SCALAR => {
249 self.check_escape_sequences(node, violations);
251 self.check_block_scalar_indicator(node, violations);
253 self.check_trailing_content_after_quoted(node, violations);
255 self.check_colon_in_plain_scalar(node, violations);
257 self.check_document_marker_in_string(node, violations);
259 self.check_directive_in_content(node, violations);
261 }
262 SyntaxKind::DOC_START | SyntaxKind::DOC_END if self.config.check_document_markers => {
263 self.check_document_marker_placement(node, violations);
264 }
265 SyntaxKind::MAPPING => {
266 self.check_flow_collection_commas(node, violations);
267 self.check_block_mapping_entries_on_same_line(node, violations);
268 if self.config.check_duplicate_keys {
269 self.check_duplicate_keys(node, violations);
270 }
271 }
272 SyntaxKind::SEQUENCE => {
273 self.check_flow_collection_commas(node, violations);
274 self.check_sequence_entry_in_flow(node, violations);
275 }
276 SyntaxKind::VALUE => {
277 self.check_anchor_and_alias(node, violations);
278 }
279 SyntaxKind::DOCUMENT => {
280 self.check_document_level_anchors(node, violations);
281 }
282 _ => {}
283 }
284
285 for element in node.children_with_tokens() {
287 if let Some(token) = element.as_token() {
288 if token.kind() == crate::SyntaxKind::COMMENT {
290 self.check_comment_token_whitespace(token, violations);
291 }
292 if token.kind() == crate::SyntaxKind::DOC_START {
294 self.check_doc_start_token_content(token, violations);
295 }
296 if token.kind() == crate::SyntaxKind::TAG {
298 self.check_tag_characters(token, violations);
299 self.check_tag_followed_by_comma(token, violations);
300 }
301 }
302 }
303
304 self.check_sequence_indentation(node, violations);
306 self.check_quoted_string_indentation(node, violations);
307
308 for child in node.children() {
310 self.validate_node(&child, violations);
311 }
312 }
313
314 fn check_directive_without_document(
316 &self,
317 doc_node: &SyntaxNode,
318 violations: &mut Vec<Violation>,
319 ) {
320 let root = find_root(doc_node);
321
322 let has_directives = root
324 .descendants()
325 .any(|n| n.kind() == crate::SyntaxKind::DIRECTIVE);
326
327 if !has_directives {
328 return;
329 }
330
331 let has_content = doc_node.descendants().any(|n| {
334 matches!(
335 n.kind(),
336 crate::SyntaxKind::MAPPING
337 | crate::SyntaxKind::SEQUENCE
338 | crate::SyntaxKind::SCALAR
339 | crate::SyntaxKind::STRING
340 | crate::SyntaxKind::TAGGED_NODE
341 )
342 });
343
344 if !has_content {
345 violations.push(Violation {
346 message: "Directive requires a document with content".to_string(),
347 location: None,
348 text_range: None,
349 severity: Severity::Error,
350 rule: Rule::Other,
351 });
352 }
353 }
354
355 fn check_directives_at_root(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
359 use crate::SyntaxKind;
360
361 let check_node = find_root(node);
362
363 let has_directives = check_node
365 .children()
366 .any(|child| child.kind() == SyntaxKind::DIRECTIVE);
367
368 if !has_directives {
369 return;
370 }
371
372 let has_document_with_content = check_node.children().any(|child| {
374 if child.kind() == SyntaxKind::DOCUMENT {
375 child.descendants().any(|n| {
377 matches!(
378 n.kind(),
379 SyntaxKind::MAPPING
380 | SyntaxKind::SEQUENCE
381 | SyntaxKind::SCALAR
382 | SyntaxKind::STRING
383 | SyntaxKind::TAGGED_NODE
384 )
385 })
386 } else {
387 false
388 }
389 });
390
391 if !has_document_with_content {
392 violations.push(Violation {
393 message: "Directive without document content".to_string(),
394 location: None,
395 text_range: None,
396 severity: Severity::Error,
397 rule: Rule::Other,
398 });
399 }
400 }
401
402 fn check_directive_after_document(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
407 use crate::SyntaxKind;
408
409 let check_node = find_root(node);
410
411 let mut seen_document_with_content = false;
413
414 for child in check_node.children() {
415 match child.kind() {
416 SyntaxKind::DOCUMENT => {
417 let has_content = child.descendants().any(|n| {
419 matches!(
420 n.kind(),
421 SyntaxKind::MAPPING
422 | SyntaxKind::SEQUENCE
423 | SyntaxKind::SCALAR
424 | SyntaxKind::STRING
425 | SyntaxKind::TAGGED_NODE
426 )
427 });
428
429 let has_doc_end = child
431 .children_with_tokens()
432 .any(|t| t.kind() == SyntaxKind::DOC_END);
433
434 if has_content {
435 seen_document_with_content = true;
436
437 if !has_doc_end {
440 }
443 }
444 }
445 SyntaxKind::DIRECTIVE if seen_document_with_content => {
446 let mut prev_sibling = child.prev_sibling();
449 let mut found_doc_with_end = false;
450
451 while let Some(prev) = prev_sibling {
452 if prev.kind() == SyntaxKind::DOCUMENT {
453 let has_doc_end = prev
455 .children_with_tokens()
456 .any(|t| t.kind() == SyntaxKind::DOC_END);
457
458 if has_doc_end {
459 found_doc_with_end = true;
460 }
461 break;
462 }
463 prev_sibling = prev.prev_sibling();
464 }
465
466 if !found_doc_with_end {
467 violations.push(Violation {
468 message: "Directive after document requires document end marker (...)"
469 .to_string(),
470 location: None,
471 text_range: None,
472 severity: Severity::Error,
473 rule: Rule::Other,
474 });
475 }
476 }
477 _ => {}
478 }
479 }
480 }
481
482 fn check_directive_in_content(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
487 use crate::SyntaxKind;
488
489 let has_directive = node
490 .children_with_tokens()
491 .any(|c| c.kind() == SyntaxKind::DIRECTIVE);
492 if has_directive {
493 violations.push(Violation {
494 message: "Directive in document content (missing document end marker `...` before directive)".to_string(),
495 location: None,
496 text_range: Some(range_to_text_position(node.text_range())),
497 severity: Severity::Error,
498 rule: Rule::Other,
499 });
500 }
501 }
502
503 fn check_duplicate_directives(&self, doc_node: &SyntaxNode, violations: &mut Vec<Violation>) {
505 use std::collections::HashMap;
506
507 let root = find_root(doc_node);
508
509 let mut directive_counts: HashMap<String, usize> = HashMap::new();
511
512 for node in root.descendants() {
513 if node.kind() == crate::SyntaxKind::DIRECTIVE {
514 let text = node.text().to_string();
516
517 if let Some(directive_type) = text.split_whitespace().next() {
519 *directive_counts
520 .entry(directive_type.to_string())
521 .or_insert(0) += 1;
522 }
523 }
524 }
525
526 for (directive_type, count) in directive_counts {
528 if count > 1 {
529 violations.push(Violation {
530 message: format!("Duplicate {} directive", directive_type),
531 location: None,
532 text_range: None,
533 severity: Severity::Error,
534 rule: Rule::Other,
535 });
536 }
537 }
538 }
539
540 fn check_multiple_anchors(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
542 let anchor_count = node
544 .children_with_tokens()
545 .filter(|child| {
546 child
547 .as_token()
548 .is_some_and(|t| t.kind() == crate::SyntaxKind::ANCHOR)
549 })
550 .count();
551
552 if anchor_count > 1 {
553 violations.push(Violation {
554 message: "Multiple anchors on the same node".to_string(),
555 location: None,
556 text_range: Some(range_to_text_position(node.text_range())),
557 severity: Severity::Error,
558 rule: Rule::InvalidAnchor,
559 });
560 }
561 }
562
563 fn check_escape_sequences(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
565 let first_char = node.first_token().and_then(|t| t.text().chars().next());
567 if first_char != Some('"') {
568 return;
569 }
570
571 let text = node.text().to_string();
573 let mut chars = text.chars().peekable();
574
575 while let Some(ch) = chars.next() {
576 if ch == '\\' {
577 if let Some(&next) = chars.peek() {
578 let valid_escapes = [
580 '0', 'a', 'b', 't', 'n', 'v', 'f', 'r', 'e', ' ', '"', '/', '\\', 'N', '_',
581 'L', 'P', 'x', 'u', 'U',
582 ];
583
584 if !valid_escapes.contains(&next) {
585 violations.push(Violation {
586 message: format!("Invalid escape sequence: \\{}", next),
587 location: None,
588 text_range: Some(range_to_text_position(node.text_range())),
589 severity: Severity::Error,
590 rule: Rule::InvalidEscape,
591 });
592 return; }
594 }
595 }
596 }
597 }
598
599 fn check_block_scalar_indicator(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
605 let has_block_indicator = node.children_with_tokens().any(|child| {
607 if let rowan::NodeOrToken::Token(token) = child {
608 matches!(
609 token.kind(),
610 crate::SyntaxKind::GREATER | crate::SyntaxKind::PIPE
611 )
612 } else {
613 false
614 }
615 });
616
617 if !has_block_indicator {
618 return;
619 }
620
621 let mut found_indicator = false;
623 let mut found_newline = false;
624
625 for child in node.children_with_tokens() {
626 if let rowan::NodeOrToken::Token(token) = child {
627 if matches!(
629 token.kind(),
630 crate::SyntaxKind::GREATER | crate::SyntaxKind::PIPE
631 ) {
632 found_indicator = true;
633 continue;
634 }
635
636 if found_indicator && !found_newline {
638 match token.kind() {
639 crate::SyntaxKind::NEWLINE => {
640 found_newline = true;
641 }
642 crate::SyntaxKind::STRING => {
643 violations.push(Violation {
645 message:
646 "Block scalar content cannot appear on same line as indicator"
647 .to_string(),
648 location: None,
649 text_range: None,
650 severity: Severity::Error,
651 rule: Rule::Other,
652 });
653 return;
654 }
655 _ => {}
657 }
658 }
659 }
660 }
661 }
662
663 fn check_trailing_content_after_quoted(
668 &self,
669 node: &SyntaxNode,
670 violations: &mut Vec<Violation>,
671 ) {
672 let mut found_quoted = false;
673 let mut found_quote_end = false;
674 let mut found_newline = false;
675
676 for child in node.children_with_tokens() {
677 if let rowan::NodeOrToken::Token(token) = child {
678 match token.kind() {
679 crate::SyntaxKind::STRING => {
680 let text = token.text();
681
682 if !found_quoted && (text.starts_with('"') || text.starts_with('\'')) {
684 found_quoted = true;
685
686 if text.len() > 1 && (text.ends_with('"') || text.ends_with('\'')) {
688 found_quote_end = true;
689 }
690 } else if found_quoted && !found_quote_end {
691 if text.ends_with('"') || text.ends_with('\'') {
693 found_quote_end = true;
694 }
695 } else if found_quote_end && !found_newline {
696 violations.push(Violation {
698 message: "Trailing content after quoted string".to_string(),
699 location: None,
700 text_range: None,
701 severity: Severity::Error,
702 rule: Rule::Other,
703 });
704 return;
705 }
706 }
707 crate::SyntaxKind::NEWLINE => {
708 found_newline = true;
709 }
710 crate::SyntaxKind::WHITESPACE | crate::SyntaxKind::COMMENT => {
711 }
713 _ => {}
714 }
715 }
716 }
717 }
718
719 fn check_colon_in_plain_scalar(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
725 let has_colon = node.children_with_tokens().any(|child| {
727 if let rowan::NodeOrToken::Token(token) = child {
728 token.kind() == crate::SyntaxKind::COLON
729 } else {
730 false
731 }
732 });
733
734 if !has_colon {
735 return;
736 }
737
738 let is_quoted = node.first_token().is_some_and(|t| {
740 let text = t.text();
741 text.starts_with('"') || text.starts_with('\'')
742 });
743
744 if is_quoted {
745 return;
746 }
747
748 let parent_is_value = node
751 .parent()
752 .is_some_and(|p| p.kind() == crate::SyntaxKind::VALUE);
753
754 if parent_is_value {
755 violations.push(Violation {
756 message: "Plain scalar value cannot contain mapping syntax (colon)".to_string(),
757 location: None,
758 text_range: None,
759 severity: Severity::Error,
760 rule: Rule::Other,
761 });
762 }
763 }
764
765 fn check_document_marker_in_string(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
771 let text = node.text().to_string();
773
774 if !text.starts_with('"') && !text.starts_with('\'') {
776 return;
777 }
778
779 if text.contains("\n---\n")
782 || text.contains("\n---\"")
783 || text.contains("\n---'")
784 || text.contains("\n...\n")
785 || text.contains("\n...\"")
786 || text.contains("\n...'")
787 {
788 violations.push(Violation {
789 message: "Document marker on its own line inside quoted string".to_string(),
790 location: None,
791 text_range: Some(range_to_text_position(node.text_range())),
792 severity: Severity::Error,
793 rule: Rule::InvalidDocumentMarker,
794 });
795 }
796 }
797
798 fn check_tab_usage(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
800 for token in node.children_with_tokens() {
803 if let rowan::NodeOrToken::Token(token) = token {
804 if token.text().contains('\t') {
806 violations.push(Violation {
807 message: "Tabs are not allowed for indentation in YAML".to_string(),
808 location: None,
809 text_range: Some(range_to_text_position(token.text_range())),
810 severity: Severity::Error,
811 rule: Rule::InvalidTabUsage,
812 });
813 return; }
815 }
816 }
817 }
818
819 fn check_document_marker_placement(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
821 if let Some(parent) = node.parent() {
824 if matches!(
825 parent.kind(),
826 crate::SyntaxKind::STRING | crate::SyntaxKind::SCALAR
827 ) {
828 violations.push(Violation {
829 message: "Document marker inside string is invalid".to_string(),
830 location: None,
831 text_range: Some(range_to_text_position(node.text_range())),
832 severity: Severity::Error,
833 rule: Rule::InvalidDocumentMarker,
834 });
835 }
836 }
837 }
838
839 fn check_flow_collection_commas(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
841 let first_token = node.first_token();
843 let is_flow_mapping = first_token.as_ref().is_some_and(|t| t.text() == "{");
844 let is_flow_sequence = first_token.as_ref().is_some_and(|t| t.text() == "[");
845
846 if !is_flow_mapping && !is_flow_sequence {
847 return;
848 }
849
850 let entry_kind = if is_flow_mapping {
852 crate::SyntaxKind::MAPPING_ENTRY
853 } else {
854 crate::SyntaxKind::SEQUENCE_ENTRY
855 };
856
857 let mut entry_count = 0;
858 let mut comma_count = 0;
859 let mut prev_was_comma = false;
860
861 for child in node.children() {
862 match child.kind() {
863 k if k == entry_kind => entry_count += 1,
864 crate::SyntaxKind::COMMA => {
865 comma_count += 1;
866 if prev_was_comma {
867 violations.push(Violation {
868 message: "Double comma in flow collection".to_string(),
869 location: None,
870 text_range: Some(range_to_text_position(node.text_range())),
871 severity: Severity::Error,
872 rule: Rule::Other,
873 });
874 }
875 prev_was_comma = true;
876 }
877 crate::SyntaxKind::WHITESPACE | crate::SyntaxKind::NEWLINE => {}
878 _ => prev_was_comma = false,
879 }
880 }
881
882 if entry_count > 1 && comma_count < entry_count - 1 {
884 violations.push(Violation {
885 message: format!(
886 "Flow collection missing commas: {} entries but only {} commas",
887 entry_count, comma_count
888 ),
889 location: None,
890 text_range: None,
891 severity: Severity::Error,
892 rule: Rule::MissingSyntax,
893 });
894 }
895 }
896
897 fn check_block_mapping_entries_on_same_line(
902 &self,
903 node: &SyntaxNode,
904 violations: &mut Vec<Violation>,
905 ) {
906 let first_token = node.first_token();
908 let is_flow_mapping = first_token.as_ref().is_some_and(|t| t.text() == "{");
909
910 if is_flow_mapping {
911 return; }
913
914 let mut prev_entry: Option<SyntaxNode> = None;
916
917 for child in node.children() {
918 if child.kind() == crate::SyntaxKind::MAPPING_ENTRY {
919 if let Some(prev) = prev_entry {
920 let has_newline_between = {
924 let prev_ends_with_newline = prev
926 .last_token()
927 .is_some_and(|t| t.kind() == crate::SyntaxKind::NEWLINE);
928
929 if prev_ends_with_newline {
930 true
931 } else {
932 let mut current_sibling = prev.next_sibling_or_token();
934 let mut found_newline = false;
935
936 while let Some(sibling) = current_sibling {
937 if let rowan::NodeOrToken::Node(n) = &sibling {
938 if n == &child {
939 break;
940 }
941 }
942
943 if let rowan::NodeOrToken::Token(t) = &sibling {
944 if t.kind() == crate::SyntaxKind::NEWLINE {
945 found_newline = true;
946 break;
947 }
948 }
949
950 current_sibling = sibling.next_sibling_or_token();
951 }
952
953 found_newline
954 }
955 };
956
957 if !has_newline_between {
958 violations.push(Violation {
959 message: "Block mapping entries must be on separate lines".to_string(),
960 location: None,
961 text_range: None,
962 severity: Severity::Error,
963 rule: Rule::Other,
964 });
965 return; }
967 }
968
969 prev_entry = Some(child);
970 }
971 }
972 }
973
974 fn check_sequence_entry_in_flow(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
980 let first_token = node.first_token();
982 let is_flow_sequence = first_token.as_ref().is_some_and(|t| t.text() == "[");
983
984 if !is_flow_sequence {
985 return;
986 }
987
988 for child in node.children() {
990 if child.kind() == crate::SyntaxKind::SEQUENCE_ENTRY {
991 violations.push(Violation {
992 message: "Flow sequence cannot use block sequence syntax (-)".to_string(),
993 location: None,
994 text_range: Some(range_to_text_position(node.text_range())),
995 severity: Severity::Error,
996 rule: Rule::Other,
997 });
998 return; }
1000 }
1001 }
1002
1003 fn check_document_level_anchors(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
1008 for child in node.children_with_tokens() {
1010 if let rowan::NodeOrToken::Token(token) = child {
1011 if token.kind() == crate::SyntaxKind::ANCHOR {
1012 violations.push(Violation {
1013 message: "Anchor must be attached to a node, not at document level"
1014 .to_string(),
1015 location: None,
1016 text_range: Some(range_to_text_position(token.text_range())),
1017 severity: Severity::Error,
1018 rule: Rule::Other,
1019 });
1020 }
1021 }
1022 }
1023 }
1024
1025 fn check_anchor_and_alias(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
1030 let mut has_anchor = false;
1031 let mut has_alias = false;
1032
1033 for child in node.children_with_tokens() {
1035 if let rowan::NodeOrToken::Token(token) = child {
1036 if token.kind() == crate::SyntaxKind::ANCHOR {
1037 has_anchor = true;
1038 }
1039 }
1040 }
1041
1042 for desc in node.descendants() {
1044 if desc.kind() == crate::SyntaxKind::SCALAR {
1045 for token in desc.children_with_tokens() {
1046 if let rowan::NodeOrToken::Token(t) = token {
1047 if t.kind() == crate::SyntaxKind::REFERENCE {
1048 has_alias = true;
1049 break;
1050 }
1051 }
1052 }
1053 }
1054 }
1055
1056 if has_anchor && has_alias {
1057 violations.push(Violation {
1058 message: "Node cannot have both an anchor and be an alias".to_string(),
1059 location: None,
1060 text_range: None,
1061 severity: Severity::Error,
1062 rule: Rule::Other,
1063 });
1064 }
1065 }
1066
1067 fn check_comment_token_whitespace(
1072 &self,
1073 token: &rowan::SyntaxToken<crate::Lang>,
1074 violations: &mut Vec<Violation>,
1075 ) {
1076 if let Some(prev) = token.prev_sibling_or_token() {
1078 match prev {
1079 rowan::NodeOrToken::Token(prev_token) => {
1080 if prev_token.kind() != crate::SyntaxKind::WHITESPACE
1082 && prev_token.kind() != crate::SyntaxKind::NEWLINE
1083 {
1084 violations.push(Violation {
1085 message: "Comment without whitespace separation".to_string(),
1086 location: None,
1087 text_range: Some(range_to_text_position(token.text_range())),
1088 severity: Severity::Error,
1089 rule: Rule::Other,
1090 });
1091 }
1092 }
1093 rowan::NodeOrToken::Node(_prev_node) => {
1094 violations.push(Violation {
1096 message: "Comment without whitespace separation".to_string(),
1097 location: None,
1098 text_range: Some(range_to_text_position(token.text_range())),
1099 severity: Severity::Error,
1100 rule: Rule::Other,
1101 });
1102 }
1103 }
1104 }
1105 }
1106
1107 fn check_doc_start_token_content(
1112 &self,
1113 token: &rowan::SyntaxToken<crate::Lang>,
1114 violations: &mut Vec<Violation>,
1115 ) {
1116 let mut found_newline = false;
1118 let mut found_content = false;
1119
1120 let mut current = token.next_sibling_or_token();
1122 while let Some(sibling) = current {
1123 let next = match &sibling {
1124 rowan::NodeOrToken::Token(t) => {
1125 match t.kind() {
1126 crate::SyntaxKind::NEWLINE => {
1127 found_newline = true;
1128 break;
1129 }
1130 crate::SyntaxKind::WHITESPACE | crate::SyntaxKind::COMMENT => {
1131 }
1133 _ => {}
1134 }
1135 t.next_sibling_or_token()
1136 }
1137 rowan::NodeOrToken::Node(n) => {
1138 match n.kind() {
1140 crate::SyntaxKind::MAPPING
1141 | crate::SyntaxKind::SEQUENCE
1142 | crate::SyntaxKind::SCALAR
1143 | crate::SyntaxKind::TAGGED_NODE => {
1144 found_content = true;
1145 break;
1146 }
1147 _ => {}
1148 }
1149 n.next_sibling_or_token()
1150 }
1151 };
1152 current = next;
1153 }
1154
1155 if found_content && !found_newline {
1156 violations.push(Violation {
1157 message: "Content on same line as document start marker".to_string(),
1158 location: None,
1159 text_range: None,
1160 severity: Severity::Error,
1161 rule: Rule::InvalidDocumentMarker,
1162 });
1163 }
1164 }
1165
1166 fn check_tag_characters(
1171 &self,
1172 token: &rowan::SyntaxToken<crate::Lang>,
1173 violations: &mut Vec<Violation>,
1174 ) {
1175 let tag_text = token.text();
1176
1177 let invalid_chars = ['{', '}', '[', ']', ','];
1179 for ch in invalid_chars {
1180 if tag_text.contains(ch) {
1181 violations.push(Violation {
1182 message: format!("Invalid character '{}' in tag", ch),
1183 location: None,
1184 text_range: Some(range_to_text_position(token.text_range())),
1185 severity: Severity::Error,
1186 rule: Rule::InvalidTag,
1187 });
1188 return; }
1190 }
1191 }
1192
1193 fn check_tag_followed_by_comma(
1199 &self,
1200 token: &rowan::SyntaxToken<crate::Lang>,
1201 violations: &mut Vec<Violation>,
1202 ) {
1203 let mut current = token.next_sibling_or_token();
1205
1206 while let Some(sibling) = current {
1208 match &sibling {
1209 rowan::NodeOrToken::Token(t) => {
1210 match t.kind() {
1211 crate::SyntaxKind::WHITESPACE | crate::SyntaxKind::NEWLINE => {
1212 current = t.next_sibling_or_token();
1214 continue;
1215 }
1216 crate::SyntaxKind::COMMA => {
1217 violations.push(Violation {
1219 message: "Invalid comma after tag".to_string(),
1220 location: None,
1221 text_range: Some(range_to_text_position(token.text_range())),
1222 severity: Severity::Error,
1223 rule: Rule::InvalidTag,
1224 });
1225 return;
1226 }
1227 _ => {
1228 return;
1230 }
1231 }
1232 }
1233 rowan::NodeOrToken::Node(n) => {
1234 if n.kind() == crate::SyntaxKind::SCALAR {
1236 for child in n.children_with_tokens() {
1238 if let rowan::NodeOrToken::Token(t) = child {
1239 if t.kind() == crate::SyntaxKind::COMMA {
1240 violations.push(Violation {
1242 message: "Invalid comma after tag".to_string(),
1243 location: None,
1244 text_range: None,
1245 severity: Severity::Error,
1246 rule: Rule::InvalidTag,
1247 });
1248 return;
1249 } else if t.kind() != crate::SyntaxKind::WHITESPACE
1250 && t.kind() != crate::SyntaxKind::NEWLINE
1251 {
1252 return;
1254 }
1255 }
1256 }
1257 }
1258 return;
1260 }
1261 }
1262 }
1263 }
1264
1265 fn check_implicit_key_multiline(
1270 &self,
1271 entry_node: &SyntaxNode,
1272 violations: &mut Vec<Violation>,
1273 ) {
1274 for child in entry_node.children() {
1276 if child.kind() == crate::SyntaxKind::KEY {
1277 let key_text = child.text().to_string();
1279 if key_text.contains('\n') {
1280 violations.push(Violation {
1281 message: "Implicit key cannot span multiple lines".to_string(),
1282 location: None,
1283 text_range: Some(range_to_text_position(child.text_range())),
1284 severity: Severity::Error,
1285 rule: Rule::Other,
1286 });
1287 return; }
1289 }
1290 }
1291 }
1292
1293 fn check_sequence_on_same_line_as_key(
1302 &self,
1303 entry_node: &SyntaxNode,
1304 violations: &mut Vec<Violation>,
1305 ) {
1306 use crate::SyntaxKind;
1307
1308 let mut key_node: Option<SyntaxNode> = None;
1310 let mut value_node: Option<SyntaxNode> = None;
1311
1312 for child in entry_node.children() {
1313 match child.kind() {
1314 SyntaxKind::KEY => key_node = Some(child),
1315 SyntaxKind::VALUE => value_node = Some(child),
1316 _ => {}
1317 }
1318 }
1319
1320 let Some(value) = value_node else { return };
1322
1323 let mut sequence_node: Option<SyntaxNode> = None;
1325 for child in value.children() {
1326 if child.kind() == SyntaxKind::SEQUENCE {
1327 sequence_node = Some(child);
1328 break;
1329 }
1330 }
1331
1332 let Some(sequence) = sequence_node else {
1333 return;
1334 };
1335
1336 let first_token = sequence.first_token();
1338 let is_flow_sequence = first_token.as_ref().is_some_and(|t| t.text() == "[");
1339
1340 if is_flow_sequence {
1341 return; }
1343
1344 let mut found_colon = false;
1348 let mut has_newline = false;
1349
1350 if let Some(key) = key_node {
1351 let mut current = key.next_sibling_or_token();
1353
1354 while let Some(element) = current {
1355 if let rowan::NodeOrToken::Token(t) = &element {
1356 if t.kind() == SyntaxKind::COLON {
1357 found_colon = true;
1358 } else if found_colon && t.kind() == SyntaxKind::NEWLINE {
1359 has_newline = true;
1360 break;
1361 }
1362 }
1363
1364 if let rowan::NodeOrToken::Node(n) = &element {
1366 if n == &sequence {
1367 break;
1368 }
1369 }
1370
1371 current = element.next_sibling_or_token();
1372 }
1373 }
1374
1375 if !has_newline {
1377 violations.push(Violation {
1378 message: "Block sequence cannot start on same line as mapping key".to_string(),
1379 location: None,
1380 text_range: None,
1381 severity: Severity::Error,
1382 rule: Rule::Other,
1383 });
1384 }
1385 }
1386
1387 fn get_column(&self, text: &str, offset: usize) -> usize {
1389 let mut col = 0;
1390 for (i, ch) in text.char_indices() {
1391 if i >= offset {
1392 break;
1393 }
1394 if ch == '\n' {
1395 col = 0;
1396 } else {
1397 col += 1;
1398 }
1399 }
1400 col
1401 }
1402
1403 fn check_sequence_indentation(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
1405 use crate::SyntaxKind;
1406
1407 if node.kind() != SyntaxKind::SEQUENCE {
1409 return;
1410 }
1411
1412 let root = find_root(node);
1414 let full_text = root.text().to_string();
1415 let mut dash_columns: Vec<usize> = Vec::new();
1416
1417 fn collect_dashes(
1419 node: &rowan::SyntaxNode<crate::Lang>,
1420 dashes: &mut Vec<rowan::SyntaxToken<crate::Lang>>,
1421 ) {
1422 for child in node.children_with_tokens() {
1423 match child {
1424 rowan::NodeOrToken::Token(token) if token.kind() == crate::SyntaxKind::DASH => {
1425 dashes.push(token);
1426 }
1427 rowan::NodeOrToken::Node(n)
1428 if n.kind() == crate::SyntaxKind::SEQUENCE_ENTRY =>
1429 {
1430 collect_dashes(&n, dashes);
1432 }
1433 _ => {}
1434 }
1435 }
1436 }
1437
1438 let mut dashes = Vec::new();
1439 collect_dashes(node, &mut dashes);
1440
1441 for token in dashes {
1442 let offset: usize = token.text_range().start().into();
1443 let col = self.get_column(&full_text, offset);
1444 dash_columns.push(col);
1445 }
1446
1447 if let Some(&first_col) = dash_columns.first() {
1449 for &col in &dash_columns[1..] {
1450 if col != first_col {
1451 violations.push(Violation {
1452 message: "Inconsistent sequence item indentation".to_string(),
1453 location: None,
1454 text_range: None,
1455 severity: Severity::Error,
1456 rule: Rule::InvalidIndentation,
1457 });
1458 return; }
1460 }
1461 }
1462 }
1463
1464 fn check_quoted_string_indentation(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
1466 use crate::SyntaxKind;
1467
1468 if node.kind() != SyntaxKind::SCALAR {
1470 return;
1471 }
1472
1473 let text = node.text().to_string();
1475 if !text.starts_with('"') && !text.starts_with('\'') {
1476 return; }
1478
1479 if !text.contains('\n') {
1480 return; }
1482
1483 let lines: Vec<&str> = text.split('\n').collect();
1487 if lines.len() > 1 {
1488 for (i, line) in lines.iter().enumerate().skip(1) {
1491 if i == lines.len() - 1 && line.trim().is_empty() {
1492 continue;
1494 }
1495
1496 let leading_spaces = line.len() - line.trim_start().len();
1498
1499 if leading_spaces == 0 && !line.trim().is_empty() {
1502 violations.push(Violation {
1503 message: "Wrong indented multiline quoted scalar".to_string(),
1504 location: None,
1505 text_range: None,
1506 severity: Severity::Error,
1507 rule: Rule::InvalidIndentation,
1508 });
1509 return;
1510 }
1511 }
1512 }
1513 }
1514
1515 fn check_duplicate_keys(&self, node: &SyntaxNode, violations: &mut Vec<Violation>) {
1524 use crate::yaml_eq;
1525 use crate::SyntaxKind;
1526
1527 let keys: Vec<(SyntaxNode, String, rowan::TextRange)> = node
1529 .children()
1530 .filter(|child| child.kind() == SyntaxKind::MAPPING_ENTRY)
1531 .filter_map(|child| {
1532 let entry_range = child.text_range();
1533 child
1534 .children()
1535 .find(|n| n.kind() == SyntaxKind::KEY)
1536 .map(|key_node| {
1537 let key_text = key_node.text().to_string();
1539 let key_text = key_text.trim().to_string();
1540 (key_node, key_text, entry_range)
1541 })
1542 })
1543 .collect();
1544
1545 for i in 0..keys.len() {
1548 for j in (i + 1)..keys.len() {
1549 let key1_child = keys[i].0.children().next();
1551 let key2_child = keys[j].0.children().next();
1552
1553 if let (Some(v1), Some(v2)) = (key1_child, key2_child) {
1554 use crate::nodes::{Mapping, Scalar, Sequence};
1556
1557 let are_equal = match (v1.kind(), v2.kind()) {
1558 (SyntaxKind::SCALAR, SyntaxKind::SCALAR) => Scalar::cast(v1)
1559 .zip(Scalar::cast(v2))
1560 .is_some_and(|(s1, s2)| yaml_eq(&s1, &s2)),
1561 (SyntaxKind::SEQUENCE, SyntaxKind::SEQUENCE) => Sequence::cast(v1)
1562 .zip(Sequence::cast(v2))
1563 .is_some_and(|(s1, s2)| yaml_eq(&s1, &s2)),
1564 (SyntaxKind::MAPPING, SyntaxKind::MAPPING) => Mapping::cast(v1)
1565 .zip(Mapping::cast(v2))
1566 .is_some_and(|(m1, m2)| yaml_eq(&m1, &m2)),
1567 _ => false, };
1569
1570 if are_equal {
1571 let first_text = &keys[i].1;
1572 let dup_text = &keys[j].1;
1573
1574 let format_key = |s: &str| {
1576 if s.is_empty() {
1577 "\"\"".to_string()
1578 } else {
1579 format!("{:?}", s)
1580 }
1581 };
1582
1583 violations.push(Violation {
1584 message: format!(
1585 "Duplicate key: {} (semantically equal to {})",
1586 format_key(dup_text),
1587 format_key(first_text)
1588 ),
1589 location: None,
1590 text_range: Some(range_to_text_position(keys[j].2)),
1591 severity: Severity::Error,
1592 rule: Rule::DuplicateKeys,
1593 });
1594 break;
1596 }
1597 }
1598 }
1599 }
1600 }
1601}
1602
1603impl Default for Validator {
1604 fn default() -> Self {
1605 Self::new()
1606 }
1607}
1608
1609#[cfg(test)]
1610mod tests {
1611 use super::*;
1612 use std::str::FromStr;
1613
1614 #[test]
1615 fn test_validator_basic() {
1616 let doc = Document::from_str("key: value").unwrap();
1617 let validator = Validator::new();
1618 let violations = validator.validate(&doc);
1619
1620 assert_eq!(violations.len(), 0);
1622 }
1623
1624 #[test]
1625 fn test_validator_tabs_debug() {
1626 let yaml = "---\na:\n\tb:\n\t\tc: value";
1627 let doc = Document::from_str(yaml).unwrap();
1628 let validator = Validator::new();
1629
1630 let mut found_tab = false;
1632 for child in doc.syntax().descendants_with_tokens() {
1633 if let rowan::NodeOrToken::Token(token) = child {
1634 if token.text().contains('\t') {
1635 println!(
1636 "Found tab in token: {:?} = {:?}",
1637 token.kind(),
1638 token.text()
1639 );
1640 found_tab = true;
1641 }
1642 }
1643 }
1644
1645 println!("Found tab in tree: {}", found_tab);
1646
1647 let violations = validator.validate(&doc);
1648 println!("Violations: {}", violations.len());
1649 for v in &violations {
1650 println!(" {}", v);
1651 }
1652
1653 assert!(found_tab, "Tabs should be in the syntax tree");
1654 }
1655
1656 #[test]
1657 fn test_validator_missing_comma() {
1658 let doc = Document::from_str("{foo: 1 bar: 2}").unwrap();
1659 let validator = Validator::new();
1660 let violations = validator.validate(&doc);
1661
1662 println!("Found {} violations:", violations.len());
1664 for v in &violations {
1665 println!(" {}", v);
1666 }
1667
1668 assert!(
1669 !violations.is_empty(),
1670 "Expected violations for missing comma, got none"
1671 );
1672 }
1673
1674 #[test]
1675 fn test_validator_invalid_escape() {
1676 let doc = Document::from_str("\"\\.\"\n").unwrap();
1677 let validator = Validator::new();
1678 let violations = validator.validate(&doc);
1679
1680 assert!(
1682 !violations.is_empty(),
1683 "Expected violations for invalid escape \\., got none"
1684 );
1685 assert_eq!(violations[0].rule, Rule::InvalidEscape);
1686 }
1687
1688 #[test]
1689 fn test_validator_multiple_anchors() {
1690 let doc = Document::from_str("&a &b key: value").unwrap();
1692 let validator = Validator::new();
1693 let violations = validator.validate(&doc);
1694
1695 assert!(
1696 !violations.is_empty(),
1697 "Expected violations for multiple anchors, got none"
1698 );
1699 assert_eq!(violations[0].rule, Rule::InvalidAnchor);
1700
1701 let yaml = "top1: &node1\n &k1 key1: val1\ntop2: &node2\n &v2 val2\n";
1703 let doc2 = Document::from_str(yaml).unwrap();
1704 let violations2 = validator.validate(&doc2);
1705
1706 assert!(
1708 violations2.len() >= 2,
1709 "Expected at least 2 violations for 4JVG"
1710 );
1711 }
1712
1713 #[test]
1714 fn test_validator_duplicate_directive() {
1715 let yaml = "%YAML 1.2\n%YAML 1.2\n---\nkey: value\n";
1716 let doc = Document::from_str(yaml).unwrap();
1717 let validator = Validator::new();
1718 let violations = validator.validate(&doc);
1719
1720 assert_eq!(
1721 violations.len(),
1722 1,
1723 "Expected exactly one violation for duplicate YAML directive"
1724 );
1725 assert_eq!(violations[0].message, "Duplicate %YAML directive");
1726 }
1727
1728 #[test]
1729 fn test_validator_duplicate_keys() {
1730 let yaml = "a: 1\nb: 2\na: 3\n";
1731 let doc = Document::from_str(yaml).unwrap();
1732 let validator = Validator::new();
1733 let violations = validator.validate(&doc);
1734
1735 let dup_violations: Vec<_> = violations
1736 .iter()
1737 .filter(|v| v.rule == Rule::DuplicateKeys)
1738 .collect();
1739 assert_eq!(
1740 dup_violations.len(),
1741 1,
1742 "Expected exactly one DuplicateKeys violation, got: {:?}",
1743 dup_violations
1744 );
1745 assert_eq!(
1746 dup_violations[0].message,
1747 "Duplicate key: \"a\" (semantically equal to \"a\")"
1748 );
1749 }
1750
1751 #[test]
1752 fn test_validator_no_duplicate_keys() {
1753 let yaml = "a: 1\nb: 2\nc: 3\n";
1754 let doc = Document::from_str(yaml).unwrap();
1755 let validator = Validator::new();
1756 let violations = validator.validate(&doc);
1757
1758 let dup_violations: Vec<_> = violations
1759 .iter()
1760 .filter(|v| v.rule == Rule::DuplicateKeys)
1761 .collect();
1762 assert_eq!(
1763 dup_violations.len(),
1764 0,
1765 "Expected no DuplicateKeys violations"
1766 );
1767 }
1768
1769 #[test]
1770 fn test_validator_duplicate_keys_disabled() {
1771 let yaml = "a: 1\nb: 2\na: 3\n";
1772 let doc = Document::from_str(yaml).unwrap();
1773 let validator = Validator::with_config(ValidatorConfig {
1774 check_duplicate_keys: false,
1775 ..ValidatorConfig::default()
1776 });
1777 let violations = validator.validate(&doc);
1778
1779 let dup_violations: Vec<_> = violations
1780 .iter()
1781 .filter(|v| v.rule == Rule::DuplicateKeys)
1782 .collect();
1783 assert_eq!(
1784 dup_violations.len(),
1785 0,
1786 "Expected no violations when duplicate key check is disabled"
1787 );
1788 }
1789
1790 #[test]
1791 fn test_validator_semantic_duplicate_keys() {
1792 let validator = Validator::new();
1793
1794 let yaml1 = "'a': 1\na: 2";
1796 let doc1 = Document::from_str(yaml1).unwrap();
1797 let violations1 = validator.validate(&doc1);
1798 assert_eq!(
1799 violations1
1800 .iter()
1801 .filter(|v| v.rule == Rule::DuplicateKeys)
1802 .count(),
1803 1,
1804 "Quoted 'a' and unquoted a should be duplicates"
1805 );
1806
1807 let yaml2 = "true: 1\nTrue: 2";
1809 let doc2 = Document::from_str(yaml2).unwrap();
1810 let violations2 = validator.validate(&doc2);
1811 assert_eq!(
1812 violations2
1813 .iter()
1814 .filter(|v| v.rule == Rule::DuplicateKeys)
1815 .count(),
1816 1,
1817 "true and True should be duplicates"
1818 );
1819
1820 let yaml3 = "1: one\n0x1: hex";
1822 let doc3 = Document::from_str(yaml3).unwrap();
1823 let violations3 = validator.validate(&doc3);
1824 assert_eq!(
1825 violations3
1826 .iter()
1827 .filter(|v| v.rule == Rule::DuplicateKeys)
1828 .count(),
1829 1,
1830 "1 and 0x1 should be duplicates"
1831 );
1832
1833 let yaml4 = "null: 1\n~: 2";
1835 let doc4 = Document::from_str(yaml4).unwrap();
1836 let violations4 = validator.validate(&doc4);
1837 assert_eq!(
1838 violations4
1839 .iter()
1840 .filter(|v| v.rule == Rule::DuplicateKeys)
1841 .count(),
1842 1,
1843 "null and ~ should be duplicates"
1844 );
1845
1846 let yaml5 = "\"1\": string\n1: int";
1848 let doc5 = Document::from_str(yaml5).unwrap();
1849 let violations5 = validator.validate(&doc5);
1850 assert_eq!(
1851 violations5
1852 .iter()
1853 .filter(|v| v.rule == Rule::DuplicateKeys)
1854 .count(),
1855 0,
1856 "String '1' and int 1 should not be duplicates"
1857 );
1858
1859 let yaml6 = "1.0: float\n1: int";
1861 let doc6 = Document::from_str(yaml6).unwrap();
1862 let violations6 = validator.validate(&doc6);
1863 assert_eq!(
1864 violations6
1865 .iter()
1866 .filter(|v| v.rule == Rule::DuplicateKeys)
1867 .count(),
1868 0,
1869 "Float 1.0 and int 1 should not be duplicates"
1870 );
1871 }
1872
1873 #[test]
1874 fn test_validator_directive_without_document() {
1875 let yaml = "%YAML 1.2\n";
1877 let doc = Document::from_str(yaml).unwrap();
1878
1879 let root = doc
1881 .syntax()
1882 .parent()
1883 .unwrap_or_else(|| doc.syntax().clone());
1884 let directive_count = root
1885 .descendants()
1886 .filter(|n| n.kind() == crate::SyntaxKind::DIRECTIVE)
1887 .count();
1888 let content_count = doc
1889 .syntax()
1890 .descendants()
1891 .filter(|n| {
1892 matches!(
1893 n.kind(),
1894 crate::SyntaxKind::MAPPING
1895 | crate::SyntaxKind::SEQUENCE
1896 | crate::SyntaxKind::SCALAR
1897 | crate::SyntaxKind::TAGGED_NODE
1898 )
1899 })
1900 .count();
1901
1902 let validator = Validator::new();
1903 let violations = validator.validate(&doc);
1904
1905 if directive_count > 0 && content_count == 0 {
1907 assert!(
1908 !violations.is_empty(),
1909 "Expected violation for directive without document (directives={}, content={})",
1910 directive_count,
1911 content_count
1912 );
1913 }
1914 }
1915
1916 #[test]
1917 fn test_validator_content_after_doc_end() {
1918 let yaml = "---\nkey: value\n... invalid\n";
1921 let doc = Document::from_str(yaml).unwrap();
1922
1923 let validator = Validator::new();
1924 let violations = validator.validate(&doc);
1925
1926 let invalid_content_violations: Vec<_> = violations
1927 .iter()
1928 .filter(|v| v.message.starts_with("Invalid content in document:"))
1929 .collect();
1930 assert_eq!(
1931 invalid_content_violations.len(),
1932 1,
1933 "Expected exactly one 'Invalid content' violation for content after document end marker"
1934 );
1935 }
1936
1937 #[test]
1938 fn test_validator_directive_with_tagged_node_content() {
1939 let yaml = "%YAML 1.2\n---\n!custom foo\n";
1942 let doc = Document::from_str(yaml).unwrap();
1943 let validator = Validator::new();
1944 let violations = validator.validate(&doc);
1945
1946 assert_eq!(
1947 violations.len(),
1948 0,
1949 "Tagged scalar is real content; valid document should have no violations"
1950 );
1951 }
1952
1953 #[test]
1954 fn test_validator_with_config() {
1955 let config = ValidatorConfig {
1956 check_duplicate_keys: false,
1957 ..Default::default()
1958 };
1959 let validator = Validator::with_config(config);
1960
1961 let doc = Document::from_str("key: value").unwrap();
1962 let violations = validator.validate(&doc);
1963
1964 assert_eq!(violations.len(), 0);
1965 }
1966
1967 #[test]
1968 fn test_violation_display() {
1969 let violation = Violation {
1970 message: "Test violation".to_string(),
1971 location: Some("1:5".to_string()),
1972 text_range: None,
1973 severity: Severity::Error,
1974 rule: Rule::InvalidIndentation,
1975 };
1976
1977 assert_eq!(
1978 format!("{}", violation),
1979 "[ERROR] 1:5: Test violation (InvalidIndentation)"
1980 );
1981 }
1982
1983 #[test]
1984 fn test_u99r_invalid_comma_in_tag() {
1985 let yaml = "- !!str, xxx\n";
1987 use crate::YamlFile;
1988 let file = YamlFile::from_str(yaml).unwrap();
1989 let validator = Validator::new();
1990
1991 println!("\n=== Syntax tree ===");
1993 crate::debug::print_tree(file.syntax());
1994
1995 let violations = validator.validate_syntax(file.syntax());
1996 println!("\n=== Violations ({}) ===", violations.len());
1997 for v in &violations {
1998 println!(" {}", v);
1999 }
2000
2001 assert!(
2002 !violations.is_empty(),
2003 "Expected violation for invalid comma after tag"
2004 );
2005 assert_eq!(violations.len(), 1);
2006 assert_eq!(violations[0].message, "Invalid comma after tag");
2007 assert_eq!(violations[0].rule, Rule::InvalidTag);
2008 }
2009
2010 #[test]
2011 fn test_comment_whitespace() {
2012 use crate::YamlFile;
2013
2014 let yaml = "key: \"value\"# invalid comment\n";
2016 let parsed = YamlFile::from_str(yaml).expect("Should parse");
2017
2018 let validator = Validator::new();
2019 let violations = validator.validate_syntax(parsed.syntax());
2020
2021 assert!(
2022 !violations.is_empty(),
2023 "Should catch comment without whitespace"
2024 );
2025 assert_eq!(
2026 violations[0].message,
2027 "Comment without whitespace separation"
2028 );
2029 }
2030
2031 #[test]
2032 fn test_doc_start_content() {
2033 use crate::YamlFile;
2034
2035 let yaml = "--- key1: value1\n key2: value2\n";
2037 let parsed = YamlFile::from_str(yaml).expect("Should parse");
2038
2039 let validator = Validator::new();
2040 let violations = validator.validate_syntax(parsed.syntax());
2041
2042 assert!(
2043 !violations.is_empty(),
2044 "Should catch content on doc start line"
2045 );
2046 assert_eq!(
2047 violations[0].message,
2048 "Content on same line as document start marker"
2049 );
2050 }
2051
2052 #[test]
2053 fn test_directive_in_document_content() {
2054 let input = "%YAML 1.2\n---\n%YAML 1.2\n---\n";
2056 let file = crate::YamlFile::from_str(input).unwrap();
2057 let validator = Validator::new();
2058 use rowan::ast::AstNode;
2059 let violations = validator.validate_syntax(file.syntax());
2060
2061 assert_eq!(
2062 violations.len(),
2063 1,
2064 "Expected one violation for directive in content, got: {:?}",
2065 violations
2066 );
2067 assert_eq!(
2068 violations[0].message,
2069 "Directive in document content (missing document end marker `...` before directive)"
2070 );
2071 }
2072}