1#![allow(dead_code)]
33
34use crate::syntax::{SyntaxKind, SyntaxNode};
35use rowan::GreenNodeBuilder;
36
37use super::model::{
38 ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlInputKind, YamlParseReport,
39};
40use super::scanner::{Scanner, TokenKind, TriviaKind};
41
42pub fn parse_shadow(input: &str, options: ShadowYamlOptions) -> ShadowYamlReport {
47 let line_count = input.lines().count().max(1);
48
49 if !options.enabled {
50 return ShadowYamlReport {
51 outcome: ShadowYamlOutcome::SkippedDisabled,
52 shadow_reason: "shadow-disabled",
53 input_kind: options.input_kind,
54 input_len_bytes: input.len(),
55 line_count,
56 normalized_input: None,
57 };
58 }
59
60 let normalized = match options.input_kind {
61 YamlInputKind::Plain => input.to_owned(),
62 YamlInputKind::Hashpipe => normalize_hashpipe_input(input),
63 };
64
65 let parsed = parse_yaml_tree(&normalized).is_some();
66
67 ShadowYamlReport {
68 outcome: if parsed {
69 ShadowYamlOutcome::PrototypeParsed
70 } else {
71 ShadowYamlOutcome::PrototypeRejected
72 },
73 shadow_reason: if parsed {
74 "prototype-basic-mapping-parsed"
75 } else {
76 "prototype-basic-mapping-rejected"
77 },
78 input_kind: options.input_kind,
79 input_len_bytes: input.len(),
80 line_count,
81 normalized_input: Some(normalized),
82 }
83}
84
85fn normalize_hashpipe_input(input: &str) -> String {
86 input
87 .lines()
88 .map(strip_hashpipe_prefix)
89 .collect::<Vec<_>>()
90 .join("\n")
91}
92
93fn strip_hashpipe_prefix(line: &str) -> &str {
94 if let Some(rest) = line.strip_prefix("#|") {
95 return rest.strip_prefix(' ').unwrap_or(rest);
96 }
97 line
98}
99
100pub fn parse_yaml_tree(input: &str) -> Option<SyntaxNode> {
102 parse_yaml_report(input).tree
103}
104
105pub fn parse_yaml_report(input: &str) -> YamlParseReport {
117 if let Some(err) = super::validator::validate_yaml(input) {
118 return YamlParseReport {
119 tree: None,
120 diagnostics: vec![err],
121 };
122 }
123
124 let stream = parse_stream(input);
125 let mut builder = GreenNodeBuilder::new();
126 builder.start_node(SyntaxKind::DOCUMENT.into());
127 builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
128 let stream_green = stream.green().into_owned();
129 builder.start_node(SyntaxKind::YAML_STREAM.into());
130 for child in stream_green.children() {
131 match child {
132 rowan::NodeOrToken::Node(n) => {
133 push_green_node(&mut builder, n);
134 }
135 rowan::NodeOrToken::Token(t) => {
136 builder.token(t.kind(), t.text());
137 }
138 }
139 }
140 builder.finish_node(); builder.finish_node(); builder.finish_node(); YamlParseReport {
144 tree: Some(SyntaxNode::new_root(builder.finish())),
145 diagnostics: Vec::new(),
146 }
147}
148
149fn push_green_node(builder: &mut GreenNodeBuilder<'_>, node: &rowan::GreenNodeData) {
150 builder.start_node(node.kind());
151 for child in node.children() {
152 match child {
153 rowan::NodeOrToken::Node(n) => push_green_node(builder, n),
154 rowan::NodeOrToken::Token(t) => builder.token(t.kind(), t.text()),
155 }
156 }
157 builder.finish_node();
158}
159
160pub fn parse_stream(input: &str) -> SyntaxNode {
164 let mut builder = GreenNodeBuilder::new();
165 builder.start_node(SyntaxKind::YAML_STREAM.into());
166 let mut scanner = Scanner::new(input);
167 let mut doc_open = false;
168 let mut doc_only_has_directives = false;
175 let mut block_stack: Vec<BlockFrame> = Vec::new();
180 let mut prev_significant: Option<TokenKind> = None;
192 let mut decoration_col_floor: Option<usize> = None;
198 while let Some(tok) = scanner.next_token() {
199 let last_significant = prev_significant;
200 let decorations_so_far = decoration_col_floor;
201 let is_decoration = matches!(
202 tok.kind,
203 TokenKind::Anchor | TokenKind::Tag | TokenKind::Alias
204 );
205 if !matches!(
206 tok.kind,
207 TokenKind::Trivia(_) | TokenKind::StreamStart | TokenKind::StreamEnd
208 ) {
209 if is_decoration {
210 decoration_col_floor = Some(
211 decoration_col_floor.map_or(tok.start.column, |c| c.min(tok.start.column)),
212 );
213 } else {
214 prev_significant = Some(tok.kind);
215 decoration_col_floor = None;
216 }
217 }
218 match tok.kind {
219 TokenKind::StreamStart | TokenKind::StreamEnd => continue,
220 TokenKind::BlockMappingStart => {
221 ensure_doc_open(&mut builder, &mut doc_open);
222 doc_only_has_directives = false;
223 ensure_flow_seq_item_open(&mut builder, &mut block_stack);
224 builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
225 block_stack.push(BlockFrame::BlockMap {
226 entry_open: false,
227 in_value: false,
228 });
229 continue;
230 }
231 TokenKind::BlockSequenceStart => {
232 ensure_doc_open(&mut builder, &mut doc_open);
233 doc_only_has_directives = false;
234 ensure_flow_seq_item_open(&mut builder, &mut block_stack);
235 builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
236 block_stack.push(BlockFrame::BlockSequence {
237 item_open: false,
238 indentless: false,
239 });
240 continue;
241 }
242 TokenKind::BlockEnd => {
243 close_indentless_sequences(&mut builder, &mut block_stack);
248 close_open_sub_wrapper(&mut builder, &mut block_stack);
249 if block_stack.pop().is_some() {
253 builder.finish_node();
254 }
255 continue;
256 }
257 TokenKind::FlowSequenceStart => {
258 ensure_doc_open(&mut builder, &mut doc_open);
259 doc_only_has_directives = false;
260 ensure_flow_seq_item_open(&mut builder, &mut block_stack);
261 builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE.into());
264 block_stack.push(BlockFrame::FlowSequence { item_open: false });
265 let text = &input[tok.start.index..tok.end.index];
266 builder.token(SyntaxKind::YAML_SCALAR.into(), text);
267 continue;
268 }
269 TokenKind::FlowSequenceEnd => {
270 close_open_sub_wrapper(&mut builder, &mut block_stack);
271 let text = &input[tok.start.index..tok.end.index];
272 builder.token(SyntaxKind::YAML_SCALAR.into(), text);
273 if matches!(
274 block_stack.last(),
275 Some(BlockFrame::FlowSequence { .. } | BlockFrame::FlowMap { .. })
276 ) {
277 block_stack.pop();
278 builder.finish_node();
279 }
280 continue;
281 }
282 TokenKind::FlowMappingStart => {
283 ensure_doc_open(&mut builder, &mut doc_open);
284 doc_only_has_directives = false;
285 ensure_flow_seq_item_open(&mut builder, &mut block_stack);
286 builder.start_node(SyntaxKind::YAML_FLOW_MAP.into());
287 block_stack.push(BlockFrame::FlowMap {
288 entry_open: false,
289 in_value: false,
290 });
291 let text = &input[tok.start.index..tok.end.index];
292 builder.token(SyntaxKind::YAML_SCALAR.into(), text);
293 continue;
294 }
295 TokenKind::FlowMappingEnd => {
296 close_open_sub_wrapper(&mut builder, &mut block_stack);
297 let text = &input[tok.start.index..tok.end.index];
298 builder.token(SyntaxKind::YAML_SCALAR.into(), text);
299 if matches!(
300 block_stack.last(),
301 Some(BlockFrame::FlowMap { .. } | BlockFrame::FlowSequence { .. })
302 ) {
303 block_stack.pop();
304 builder.finish_node();
305 }
306 continue;
307 }
308 TokenKind::FlowEntry => {
309 close_open_sub_wrapper(&mut builder, &mut block_stack);
312 let text = &input[tok.start.index..tok.end.index];
313 builder.token(SyntaxKind::YAML_SCALAR.into(), text);
314 continue;
315 }
316 TokenKind::Key => {
317 close_indentless_sequences(&mut builder, &mut block_stack);
321 if matches!(
326 block_stack.last(),
327 Some(BlockFrame::BlockMap { .. } | BlockFrame::FlowMap { .. })
328 ) {
329 open_map_entry_with_key(&mut builder, &mut block_stack);
330 }
331 if tok.start.index == tok.end.index {
332 continue;
334 }
335 ensure_flow_seq_item_open(&mut builder, &mut block_stack);
337 }
340 TokenKind::Value => {
341 close_indentless_sequences(&mut builder, &mut block_stack);
344 let map_state = match block_stack.last().copied() {
345 Some(BlockFrame::BlockMap {
346 entry_open,
347 in_value,
348 }) => Some((false, entry_open, in_value)),
349 Some(BlockFrame::FlowMap {
350 entry_open,
351 in_value,
352 }) => Some((true, entry_open, in_value)),
353 _ => None,
354 };
355 if let Some((is_flow, mut entry_open, mut in_value)) = map_state {
356 if !is_flow && entry_open && in_value {
368 close_open_sub_wrapper(&mut builder, &mut block_stack);
369 entry_open = false;
370 in_value = false;
371 }
372 if !entry_open {
375 open_map_entry_with_key(&mut builder, &mut block_stack);
376 }
377 if !in_value {
378 let text = &input[tok.start.index..tok.end.index];
381 if !text.is_empty() {
382 builder.token(SyntaxKind::YAML_COLON.into(), text);
383 }
384 builder.finish_node(); let value_kind = if is_flow {
386 SyntaxKind::YAML_FLOW_MAP_VALUE
387 } else {
388 SyntaxKind::YAML_BLOCK_MAP_VALUE
389 };
390 builder.start_node(value_kind.into());
391 if let Some(
392 BlockFrame::BlockMap { in_value, .. }
393 | BlockFrame::FlowMap { in_value, .. },
394 ) = block_stack.last_mut()
395 {
396 *in_value = true;
397 }
398 continue;
399 }
400 }
404 ensure_flow_seq_item_open(&mut builder, &mut block_stack);
407 }
408 TokenKind::BlockEntry => {
409 let decorations_inside_value =
425 decorations_so_far.is_none_or(|c| c > tok.start.column);
426 let indentless_value = last_significant == Some(TokenKind::Value)
427 && matches!(
428 block_stack.last(),
429 Some(BlockFrame::BlockMap { in_value: true, .. })
430 )
431 && decorations_inside_value;
432 let indentless_key = last_significant == Some(TokenKind::Key)
439 && matches!(
440 block_stack.last(),
441 Some(BlockFrame::BlockMap {
442 entry_open: true,
443 in_value: false,
444 })
445 )
446 && decorations_inside_value;
447 if indentless_value || indentless_key {
448 builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
449 block_stack.push(BlockFrame::BlockSequence {
450 item_open: false,
451 indentless: true,
452 });
453 }
454 if matches!(block_stack.last(), Some(BlockFrame::BlockSequence { .. })) {
455 close_open_sub_wrapper(&mut builder, &mut block_stack);
456 builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM.into());
457 if let Some(BlockFrame::BlockSequence { item_open, .. }) =
458 block_stack.last_mut()
459 {
460 *item_open = true;
461 }
462 }
463 }
466 TokenKind::Trivia(_) => {
467 }
470 _ => {
471 if !matches!(tok.kind, TokenKind::DocumentStart | TokenKind::DocumentEnd) {
476 ensure_flow_seq_item_open(&mut builder, &mut block_stack);
477 }
478 }
479 }
480 let text = &input[tok.start.index..tok.end.index];
481 if text.is_empty() {
482 continue;
484 }
485 let kind = map_token_to_syntax_kind(tok.kind);
486 match tok.kind {
487 TokenKind::DocumentStart => {
488 if doc_open && doc_only_has_directives {
499 builder.token(kind.into(), text);
500 doc_only_has_directives = false;
501 } else {
502 close_block_containers(&mut builder, &mut block_stack);
503 if doc_open {
504 builder.finish_node();
505 }
506 builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
507 doc_open = true;
508 doc_only_has_directives = false;
509 builder.token(kind.into(), text);
510 }
511 }
512 TokenKind::DocumentEnd => {
513 close_block_containers(&mut builder, &mut block_stack);
517 if !doc_open {
518 builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
519 }
520 builder.token(kind.into(), text);
521 builder.finish_node();
522 doc_open = false;
523 doc_only_has_directives = false;
524 }
525 TokenKind::Trivia(_) => {
526 builder.token(kind.into(), text);
531 }
532 TokenKind::Directive => {
533 let was_open = doc_open;
537 ensure_doc_open(&mut builder, &mut doc_open);
538 if !was_open {
539 doc_only_has_directives = true;
540 }
541 builder.token(kind.into(), text);
542 }
543 _ => {
544 ensure_doc_open(&mut builder, &mut doc_open);
548 doc_only_has_directives = false;
549 builder.token(kind.into(), text);
550 }
551 }
552 }
553 close_block_containers(&mut builder, &mut block_stack);
559 if doc_open {
560 builder.finish_node();
561 }
562 builder.finish_node();
563 SyntaxNode::new_root(builder.finish())
564}
565
566#[derive(Debug, Clone, Copy)]
576enum BlockFrame {
577 BlockMap {
578 entry_open: bool,
579 in_value: bool,
580 },
581 BlockSequence {
588 item_open: bool,
589 indentless: bool,
590 },
591 FlowMap {
592 entry_open: bool,
593 in_value: bool,
594 },
595 FlowSequence {
596 item_open: bool,
597 },
598}
599
600fn ensure_doc_open(builder: &mut GreenNodeBuilder<'_>, doc_open: &mut bool) {
601 if !*doc_open {
602 builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
603 *doc_open = true;
604 }
605}
606
607fn ensure_flow_seq_item_open(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
612 if let Some(BlockFrame::FlowSequence { item_open }) = stack.last_mut()
613 && !*item_open
614 {
615 builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
616 *item_open = true;
617 }
618}
619
620fn open_map_entry_with_key(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
624 close_open_sub_wrapper(builder, stack);
625 let (entry_kind, key_kind) = match stack.last() {
626 Some(BlockFrame::BlockMap { .. }) => (
627 SyntaxKind::YAML_BLOCK_MAP_ENTRY,
628 SyntaxKind::YAML_BLOCK_MAP_KEY,
629 ),
630 Some(BlockFrame::FlowMap { .. }) => (
631 SyntaxKind::YAML_FLOW_MAP_ENTRY,
632 SyntaxKind::YAML_FLOW_MAP_KEY,
633 ),
634 _ => return,
635 };
636 builder.start_node(entry_kind.into());
637 builder.start_node(key_kind.into());
638 if let Some(
639 BlockFrame::BlockMap {
640 entry_open,
641 in_value,
642 }
643 | BlockFrame::FlowMap {
644 entry_open,
645 in_value,
646 },
647 ) = stack.last_mut()
648 {
649 *entry_open = true;
650 *in_value = false;
651 }
652}
653
654fn close_indentless_sequences(builder: &mut GreenNodeBuilder<'_>, stack: &mut Vec<BlockFrame>) {
663 while let Some(BlockFrame::BlockSequence {
664 indentless: true, ..
665 }) = stack.last()
666 {
667 close_open_sub_wrapper(builder, stack);
668 stack.pop();
669 builder.finish_node(); }
671}
672
673fn close_open_sub_wrapper(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
683 let Some(frame) = stack.last_mut() else {
684 return;
685 };
686 match frame {
687 BlockFrame::BlockMap {
688 entry_open: true,
689 in_value,
690 } => {
691 if *in_value {
692 builder.finish_node(); } else {
694 builder.finish_node(); builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
696 builder.finish_node(); }
698 builder.finish_node(); *frame = BlockFrame::BlockMap {
700 entry_open: false,
701 in_value: false,
702 };
703 }
704 BlockFrame::FlowMap {
705 entry_open: true,
706 in_value,
707 } => {
708 if *in_value {
709 builder.finish_node();
710 } else {
711 builder.finish_node();
712 builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
713 builder.finish_node();
714 }
715 builder.finish_node();
716 *frame = BlockFrame::FlowMap {
717 entry_open: false,
718 in_value: false,
719 };
720 }
721 BlockFrame::BlockSequence {
722 item_open: true,
723 indentless,
724 } => {
725 let indentless = *indentless;
726 builder.finish_node();
727 *frame = BlockFrame::BlockSequence {
728 item_open: false,
729 indentless,
730 };
731 }
732 BlockFrame::FlowSequence { item_open: true } => {
733 builder.finish_node();
734 *frame = BlockFrame::FlowSequence { item_open: false };
735 }
736 _ => {}
737 }
738}
739
740fn close_block_containers(builder: &mut GreenNodeBuilder<'_>, stack: &mut Vec<BlockFrame>) {
741 while let Some(frame) = stack.pop() {
742 match frame {
743 BlockFrame::BlockMap {
744 entry_open: true,
745 in_value,
746 } => {
747 if in_value {
748 builder.finish_node(); } else {
750 builder.finish_node(); builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
752 builder.finish_node();
753 }
754 builder.finish_node(); }
756 BlockFrame::FlowMap {
757 entry_open: true,
758 in_value,
759 } => {
760 if in_value {
761 builder.finish_node();
762 } else {
763 builder.finish_node();
764 builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
765 builder.finish_node();
766 }
767 builder.finish_node();
768 }
769 BlockFrame::BlockSequence {
770 item_open: true, ..
771 }
772 | BlockFrame::FlowSequence { item_open: true } => {
773 builder.finish_node();
774 }
775 _ => {}
776 }
777 builder.finish_node();
778 }
779}
780
781fn map_token_to_syntax_kind(kind: TokenKind) -> SyntaxKind {
782 match kind {
783 TokenKind::Trivia(TriviaKind::Whitespace) => SyntaxKind::WHITESPACE,
784 TokenKind::Trivia(TriviaKind::Newline) => SyntaxKind::NEWLINE,
785 TokenKind::Trivia(TriviaKind::Comment) => SyntaxKind::YAML_COMMENT,
786 TokenKind::DocumentStart => SyntaxKind::YAML_DOCUMENT_START,
787 TokenKind::DocumentEnd => SyntaxKind::YAML_DOCUMENT_END,
788 TokenKind::Directive => SyntaxKind::YAML_SCALAR,
789 TokenKind::BlockEntry => SyntaxKind::YAML_BLOCK_SEQ_ENTRY,
790 TokenKind::FlowEntry => SyntaxKind::YAML_SCALAR,
791 TokenKind::FlowSequenceStart | TokenKind::FlowSequenceEnd => SyntaxKind::YAML_SCALAR,
792 TokenKind::FlowMappingStart | TokenKind::FlowMappingEnd => SyntaxKind::YAML_SCALAR,
793 TokenKind::Value => SyntaxKind::YAML_COLON,
794 TokenKind::Anchor => SyntaxKind::YAML_ANCHOR,
795 TokenKind::Alias => SyntaxKind::YAML_ALIAS,
796 TokenKind::Tag => SyntaxKind::YAML_TAG,
797 TokenKind::Scalar(_) => SyntaxKind::YAML_SCALAR,
798 TokenKind::Key => SyntaxKind::YAML_KEY,
801 TokenKind::StreamStart
804 | TokenKind::StreamEnd
805 | TokenKind::BlockSequenceStart
806 | TokenKind::BlockMappingStart
807 | TokenKind::BlockEnd => SyntaxKind::YAML_SCALAR,
808 }
809}
810
811#[derive(Debug, Clone)]
815pub struct ShadowParserReport {
816 pub text_lossless: bool,
818 pub stream_child_count: usize,
822}
823
824pub fn shadow_parser_check(input: &str) -> ShadowParserReport {
828 let tree = parse_stream(input);
829 let text = tree.text().to_string();
830 ShadowParserReport {
831 text_lossless: text == input,
832 stream_child_count: tree.children().count(),
833 }
834}
835
836#[cfg(test)]
837mod tests {
838 use super::*;
839 use crate::syntax::SyntaxKind;
840
841 #[test]
842 fn returns_byte_lossless_cst_for_empty_input() {
843 let report = shadow_parser_check("");
844 assert!(report.text_lossless);
845 }
846
847 #[test]
848 fn returns_byte_lossless_cst_for_simple_mapping() {
849 let report = shadow_parser_check("key: value\n");
850 assert!(report.text_lossless);
851 }
852
853 #[test]
854 fn returns_byte_lossless_cst_for_block_sequence() {
855 let report = shadow_parser_check("- a\n- b\n");
856 assert!(report.text_lossless);
857 }
858
859 #[test]
860 fn returns_byte_lossless_cst_for_flow_mapping() {
861 let report = shadow_parser_check("{a: b, c: d}\n");
862 assert!(report.text_lossless);
863 }
864
865 #[test]
866 fn returns_byte_lossless_cst_for_block_scalar() {
867 let report = shadow_parser_check("key: |\n hello\n world\n");
868 assert!(report.text_lossless);
869 }
870
871 #[test]
872 fn returns_byte_lossless_cst_for_quoted_scalar() {
873 let report = shadow_parser_check("\"key\": \"value\"\n");
874 assert!(report.text_lossless);
875 }
876
877 #[test]
878 fn returns_byte_lossless_cst_for_multi_line_plain_scalar() {
879 let report = shadow_parser_check("key: hello\n world\n");
880 assert!(report.text_lossless);
881 }
882
883 #[test]
884 fn preserves_explicit_key_indicator_byte_in_flow_context() {
885 let input = "{ ?foo: bar }\n";
890 let report = shadow_parser_check(input);
891 assert!(report.text_lossless, "input {input:?} not preserved");
892 }
893
894 #[test]
895 fn does_not_absorb_terminator_line_break_into_flow_scalar() {
896 let input = "{a: 42\n}\n";
902 let report = shadow_parser_check(input);
903 assert!(report.text_lossless, "input {input:?} not preserved");
904 }
905
906 fn document_count(tree: &SyntaxNode) -> usize {
907 tree.children()
908 .filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
909 .count()
910 }
911
912 #[test]
913 fn implicit_document_wraps_body_with_no_markers() {
914 let input = "key: value\n";
917 let tree = parse_stream(input);
918 assert_eq!(document_count(&tree), 1);
919 assert_eq!(tree.text().to_string(), input);
920 }
921
922 #[test]
923 fn explicit_doc_start_opens_document_marker_lives_inside() {
924 let input = "---\nkey: value\n";
925 let tree = parse_stream(input);
926 assert_eq!(document_count(&tree), 1);
927 let doc = tree
928 .children()
929 .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
930 .expect("document node");
931 assert!(
932 doc.children_with_tokens().any(|el| el
933 .as_token()
934 .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_START)),
935 "`---` token should live inside YAML_DOCUMENT"
936 );
937 assert_eq!(tree.text().to_string(), input);
938 }
939
940 #[test]
941 fn explicit_doc_end_closes_document_marker_lives_inside() {
942 let input = "key: value\n...\n";
943 let tree = parse_stream(input);
944 assert_eq!(document_count(&tree), 1);
945 let doc = tree
946 .children()
947 .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
948 .expect("document node");
949 assert!(
950 doc.children_with_tokens().any(|el| el
951 .as_token()
952 .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_END)),
953 "`...` token should live inside YAML_DOCUMENT"
954 );
955 assert_eq!(tree.text().to_string(), input);
956 }
957
958 #[test]
959 fn consecutive_doc_starts_emit_two_documents() {
960 let input = "---\na\n---\nb\n";
961 let tree = parse_stream(input);
962 assert_eq!(document_count(&tree), 2);
963 assert_eq!(tree.text().to_string(), input);
964 }
965
966 #[test]
967 fn pre_document_trivia_stays_at_stream_level() {
968 let input = "\n---\nkey: value\n";
972 let tree = parse_stream(input);
973 let stream_token_kinds: Vec<SyntaxKind> = tree
974 .children_with_tokens()
975 .filter_map(|el| el.into_token())
976 .map(|t| t.kind())
977 .collect();
978 assert!(
979 stream_token_kinds.contains(&SyntaxKind::NEWLINE),
980 "leading newline should be a direct child of YAML_STREAM, got {stream_token_kinds:?}"
981 );
982 assert_eq!(tree.text().to_string(), input);
983 }
984
985 #[test]
986 fn bare_doc_end_at_stream_start_opens_synthetic_empty_document() {
987 let input = "...\n";
991 let tree = parse_stream(input);
992 assert_eq!(document_count(&tree), 1);
993 assert_eq!(tree.text().to_string(), input);
994 }
995
996 fn first_document(tree: &SyntaxNode) -> SyntaxNode {
997 tree.children()
998 .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
999 .expect("at least one document")
1000 }
1001
1002 fn block_map_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1003 parent
1004 .children()
1005 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1006 }
1007
1008 fn block_seq_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1009 parent
1010 .children()
1011 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
1012 }
1013
1014 fn block_map_entries(map: &SyntaxNode) -> Vec<SyntaxNode> {
1015 map.children()
1016 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
1017 .collect()
1018 }
1019
1020 fn block_seq_items(seq: &SyntaxNode) -> Vec<SyntaxNode> {
1021 seq.children()
1022 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
1023 .collect()
1024 }
1025
1026 fn entry_key(entry: &SyntaxNode) -> SyntaxNode {
1027 entry
1028 .children()
1029 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
1030 .expect("entry should have a YAML_BLOCK_MAP_KEY child")
1031 }
1032
1033 fn entry_value(entry: &SyntaxNode) -> SyntaxNode {
1034 entry
1035 .children()
1036 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
1037 .expect("entry should have a YAML_BLOCK_MAP_VALUE child")
1038 }
1039
1040 #[test]
1041 fn consecutive_empty_key_colons_open_separate_entries() {
1042 let input = ": a\n: b\n";
1049 let tree = parse_stream(input);
1050 let doc = first_document(&tree);
1051 let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1052 let entries = block_map_entries(&map);
1053 assert_eq!(entries.len(), 2, "expected two empty-key ENTRY nodes");
1054 for (entry, scalar) in entries.iter().zip(["a", "b"]) {
1055 let key = entry_key(entry);
1056 assert!(
1058 !key.children_with_tokens().any(|el| el
1059 .as_token()
1060 .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1061 "empty key should carry no scalar, got {key:?}",
1062 );
1063 let value = entry_value(entry);
1064 assert!(
1065 value.children_with_tokens().any(|el| el
1066 .as_token()
1067 .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR && t.text() == scalar)),
1068 "value should be {scalar:?}, got {value:?}",
1069 );
1070 }
1071 assert_eq!(tree.text().to_string(), input);
1072 }
1073
1074 #[test]
1075 fn block_mapping_wraps_key_value_with_key_and_value_sub_wrappers() {
1076 let input = "key: value\n";
1077 let tree = parse_stream(input);
1078 let doc = first_document(&tree);
1079 let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1080 let entries = block_map_entries(&map);
1081 assert_eq!(entries.len(), 1, "expected one ENTRY for `key: value`");
1082 let key = entry_key(&entries[0]);
1083 let value = entry_value(&entries[0]);
1084 assert!(
1086 key.children_with_tokens().any(|el| el
1087 .as_token()
1088 .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1089 "colon should be the trailing token of YAML_BLOCK_MAP_KEY",
1090 );
1091 assert!(
1092 value.children_with_tokens().any(|el| el
1093 .as_token()
1094 .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1095 "scalar `value` should live inside YAML_BLOCK_MAP_VALUE",
1096 );
1097 assert_eq!(tree.text().to_string(), input);
1098 }
1099
1100 #[test]
1101 fn block_sequence_wraps_entries_in_yaml_block_sequence() {
1102 let input = "- a\n- b\n";
1103 let tree = parse_stream(input);
1104 let doc = first_document(&tree);
1105 let seq = block_seq_under(&doc).expect("YAML_BLOCK_SEQUENCE child");
1106 let items = block_seq_items(&seq);
1107 assert_eq!(items.len(), 2, "expected 2 YAML_BLOCK_SEQUENCE_ITEM");
1108 for item in &items {
1110 let dash_count = item
1111 .children_with_tokens()
1112 .filter(|el| {
1113 el.as_token()
1114 .is_some_and(|t| t.kind() == SyntaxKind::YAML_BLOCK_SEQ_ENTRY)
1115 })
1116 .count();
1117 assert_eq!(dash_count, 1, "each item owns exactly one `-` token");
1118 }
1119 assert_eq!(tree.text().to_string(), input);
1120 }
1121
1122 #[test]
1123 fn nested_block_mapping_nests_inner_block_map_inside_outer_value() {
1124 let input = "outer:\n inner: x\n";
1125 let tree = parse_stream(input);
1126 let doc = first_document(&tree);
1127 let outer = block_map_under(&doc).expect("outer YAML_BLOCK_MAP");
1128 let outer_entries = block_map_entries(&outer);
1129 assert_eq!(outer_entries.len(), 1);
1130 let outer_value = entry_value(&outer_entries[0]);
1131 let inner = outer_value
1132 .children()
1133 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1134 .expect("inner YAML_BLOCK_MAP nested under outer VALUE");
1135 let inner_entries = block_map_entries(&inner);
1136 assert_eq!(inner_entries.len(), 1);
1137 let inner_key = entry_key(&inner_entries[0]);
1138 assert!(
1139 inner_key.children_with_tokens().any(|el| el
1140 .as_token()
1141 .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1142 "inner key should own its colon",
1143 );
1144 assert_eq!(tree.text().to_string(), input);
1145 }
1146
1147 #[test]
1148 fn block_sequence_inside_mapping_nests_under_outer_map_value() {
1149 let input = "items:\n - a\n - b\n";
1150 let tree = parse_stream(input);
1151 let doc = first_document(&tree);
1152 let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1153 let entries = block_map_entries(&map);
1154 assert_eq!(entries.len(), 1, "one entry: `items: <seq>`");
1155 let value = entry_value(&entries[0]);
1156 let seq = value
1157 .children()
1158 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
1159 .expect("YAML_BLOCK_SEQUENCE nested under map VALUE");
1160 let items = block_seq_items(&seq);
1161 assert_eq!(items.len(), 2);
1162 assert_eq!(tree.text().to_string(), input);
1163 }
1164
1165 #[test]
1166 fn dedent_closes_inner_block_map_before_next_outer_key() {
1167 let input = "outer:\n inner: x\nsibling: y\n";
1174 let tree = parse_stream(input);
1175 let doc = first_document(&tree);
1176 let outer = block_map_under(&doc).expect("outer YAML_BLOCK_MAP");
1177 let entries = block_map_entries(&outer);
1178 assert_eq!(
1179 entries.len(),
1180 2,
1181 "outer map should have two entries (`outer:` and `sibling:`)",
1182 );
1183 let first_value = entry_value(&entries[0]);
1185 let nested_in_first = first_value
1186 .children()
1187 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1188 .count();
1189 assert_eq!(nested_in_first, 1);
1190 let second_value = entry_value(&entries[1]);
1191 let nested_in_second = second_value
1192 .children()
1193 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1194 .count();
1195 assert_eq!(nested_in_second, 0);
1196 assert_eq!(tree.text().to_string(), input);
1197 }
1198
1199 #[test]
1200 fn block_map_with_two_top_level_entries_emits_two_entry_wrappers() {
1201 let input = "a: 1\nb: 2\n";
1202 let tree = parse_stream(input);
1203 let doc = first_document(&tree);
1204 let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1205 assert_eq!(block_map_entries(&map).len(), 2);
1206 assert_eq!(tree.text().to_string(), input);
1207 }
1208
1209 #[test]
1210 fn explicit_key_indicator_question_mark_lives_inside_key() {
1211 let input = "? a\n: b\n";
1215 let tree = parse_stream(input);
1216 let doc = first_document(&tree);
1217 let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1218 let entries = block_map_entries(&map);
1219 assert_eq!(entries.len(), 1);
1220 let key = entry_key(&entries[0]);
1221 let has_question = key.children_with_tokens().any(|el| {
1222 el.as_token()
1223 .is_some_and(|t| t.kind() == SyntaxKind::YAML_KEY)
1224 });
1225 assert!(has_question, "`?` should live inside YAML_BLOCK_MAP_KEY");
1226 assert_eq!(tree.text().to_string(), input);
1227 }
1228
1229 #[test]
1230 fn explicit_key_indentless_sequence_wraps_inside_key() {
1231 let input = "?\n- a\n- b\n:\n- c\n- d\n";
1238 let tree = parse_stream(input);
1239 let doc = first_document(&tree);
1240 let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1241 let entries = block_map_entries(&map);
1242 assert_eq!(entries.len(), 1);
1243 let key = entry_key(&entries[0]);
1244 assert!(
1245 key.children()
1246 .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE),
1247 "explicit-key block sequence should be wrapped in YAML_BLOCK_SEQUENCE inside KEY",
1248 );
1249 let value = entry_value(&entries[0]);
1250 assert!(
1251 value
1252 .children()
1253 .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE),
1254 "value-side block sequence should remain wrapped",
1255 );
1256 assert_eq!(tree.text().to_string(), input);
1257 }
1258
1259 #[test]
1260 fn empty_key_shorthand_opens_entry_with_empty_key() {
1261 let input = ": value\n";
1265 let tree = parse_stream(input);
1266 let doc = first_document(&tree);
1267 let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1268 let entries = block_map_entries(&map);
1269 assert_eq!(entries.len(), 1);
1270 let key = entry_key(&entries[0]);
1271 assert!(
1273 !key.children_with_tokens().any(|el| el
1274 .as_token()
1275 .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1276 "empty-key shorthand has no scalar in KEY",
1277 );
1278 assert!(
1279 key.children_with_tokens().any(|el| el
1280 .as_token()
1281 .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1282 "empty-key KEY still owns the `:` token",
1283 );
1284 let value = entry_value(&entries[0]);
1285 assert!(
1286 value.children_with_tokens().any(|el| el
1287 .as_token()
1288 .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1289 "VALUE owns the `value` scalar",
1290 );
1291 assert_eq!(tree.text().to_string(), input);
1292 }
1293
1294 #[test]
1295 fn document_end_marker_lives_at_document_level_not_inside_block_map() {
1296 let input = "key: value\n...\n";
1300 let tree = parse_stream(input);
1301 let doc = first_document(&tree);
1302 let has_doc_end = doc.children_with_tokens().any(|el| {
1303 el.as_token()
1304 .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_END)
1305 });
1306 assert!(
1307 has_doc_end,
1308 "DOCUMENT_END should be a direct child of YAML_DOCUMENT"
1309 );
1310 assert_eq!(tree.text().to_string(), input);
1311 }
1312
1313 fn flow_map_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1314 parent
1315 .children()
1316 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1317 }
1318
1319 fn flow_seq_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1320 parent
1321 .children()
1322 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1323 }
1324
1325 fn flow_map_entries(map: &SyntaxNode) -> Vec<SyntaxNode> {
1326 map.children()
1327 .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
1328 .collect()
1329 }
1330
1331 fn flow_seq_items(seq: &SyntaxNode) -> Vec<SyntaxNode> {
1332 seq.children()
1333 .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
1334 .collect()
1335 }
1336
1337 #[test]
1338 fn flow_sequence_wraps_each_item_in_flow_sequence_item() {
1339 let input = "[a, b, c]\n";
1340 let tree = parse_stream(input);
1341 let doc = first_document(&tree);
1342 let seq = flow_seq_under(&doc).expect("YAML_FLOW_SEQUENCE child");
1343 let items = flow_seq_items(&seq);
1344 assert_eq!(items.len(), 3);
1345 let bracket_count = seq
1348 .children_with_tokens()
1349 .filter(|el| {
1350 el.as_token().map(|t| t.text()) == Some("[")
1351 || el.as_token().map(|t| t.text()) == Some("]")
1352 })
1353 .count();
1354 assert_eq!(bracket_count, 2, "`[` and `]` at SEQUENCE level");
1355 assert_eq!(tree.text().to_string(), input);
1356 }
1357
1358 #[test]
1359 fn flow_mapping_wraps_each_entry_with_key_and_value() {
1360 let input = "{a: 1, b: 2}\n";
1361 let tree = parse_stream(input);
1362 let doc = first_document(&tree);
1363 let map = flow_map_under(&doc).expect("YAML_FLOW_MAP child");
1364 let entries = flow_map_entries(&map);
1365 assert_eq!(entries.len(), 2);
1366 for entry in &entries {
1367 let key = entry
1368 .children()
1369 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
1370 .expect("entry has YAML_FLOW_MAP_KEY");
1371 assert!(
1372 key.children_with_tokens().any(|el| el
1373 .as_token()
1374 .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1375 "flow KEY owns trailing `:`",
1376 );
1377 let value = entry
1378 .children()
1379 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
1380 .expect("entry has YAML_FLOW_MAP_VALUE");
1381 assert!(
1382 value.children_with_tokens().any(|el| el
1383 .as_token()
1384 .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1385 "flow VALUE owns its scalar",
1386 );
1387 }
1388 assert_eq!(tree.text().to_string(), input);
1389 }
1390
1391 #[test]
1392 fn flow_sequence_inside_flow_sequence_nests_under_outer_item() {
1393 let input = "[[1, 2], [3, 4]]\n";
1394 let tree = parse_stream(input);
1395 let doc = first_document(&tree);
1396 let outer = flow_seq_under(&doc).expect("outer YAML_FLOW_SEQUENCE");
1397 let outer_items = flow_seq_items(&outer);
1398 assert_eq!(outer_items.len(), 2);
1399 for item in &outer_items {
1400 assert!(
1401 item.children()
1402 .any(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE),
1403 "outer item should contain a nested YAML_FLOW_SEQUENCE",
1404 );
1405 }
1406 assert_eq!(tree.text().to_string(), input);
1407 }
1408
1409 #[test]
1410 fn flow_mapping_inside_flow_sequence_nests_under_item() {
1411 let input = "[{a: 1}, {b: 2}]\n";
1412 let tree = parse_stream(input);
1413 let doc = first_document(&tree);
1414 let seq = flow_seq_under(&doc).expect("YAML_FLOW_SEQUENCE child");
1415 let items = flow_seq_items(&seq);
1416 assert_eq!(items.len(), 2);
1417 for item in &items {
1418 assert!(
1419 item.children()
1420 .any(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP),
1421 "each item should contain a nested YAML_FLOW_MAP",
1422 );
1423 }
1424 assert_eq!(tree.text().to_string(), input);
1425 }
1426
1427 #[test]
1428 fn flow_mapping_at_block_map_value_nests_under_block_map_value() {
1429 let input = "key: {a: 1, b: 2}\n";
1430 let tree = parse_stream(input);
1431 let doc = first_document(&tree);
1432 let block_map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1433 let entries = block_map_entries(&block_map);
1434 assert_eq!(entries.len(), 1);
1435 let value = entry_value(&entries[0]);
1436 assert!(
1437 value
1438 .children()
1439 .any(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP),
1440 "flow map should be nested under outer block map's VALUE",
1441 );
1442 assert_eq!(tree.text().to_string(), input);
1443 }
1444
1445 #[test]
1446 fn directive_prelude_stays_inside_document_opened_by_marker() {
1447 let input = "%TAG !e! tag:example.com,2000:app/\n---\n!e!foo \"bar\"\n";
1452 let tree = parse_stream(input);
1453 assert_eq!(document_count(&tree), 1);
1454 let doc = first_document(&tree);
1455 let has_doc_start = doc.children_with_tokens().any(|el| {
1456 el.as_token()
1457 .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_START)
1458 });
1459 assert!(has_doc_start, "the `---` should live inside the same doc");
1460 assert_eq!(tree.text().to_string(), input);
1461 }
1462
1463 #[test]
1464 fn explicit_key_without_value_emits_empty_value_for_shape_parity() {
1465 let input = "? a\n? b\n";
1469 let tree = parse_stream(input);
1470 let doc = first_document(&tree);
1471 let map = block_map_under(&doc).expect("YAML_BLOCK_MAP");
1472 let entries = block_map_entries(&map);
1473 assert_eq!(entries.len(), 2);
1474 for entry in &entries {
1475 assert!(
1476 entry
1477 .children()
1478 .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY),
1479 "ENTRY missing KEY child",
1480 );
1481 assert!(
1482 entry
1483 .children()
1484 .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE),
1485 "ENTRY missing VALUE child",
1486 );
1487 }
1488 }
1489}