Skip to main content

panache_parser/parser/
yaml.rs

1//! YAML parser groundwork for long-term Panache integration.
2//!
3//! This module is intentionally minimal and currently acts as a placeholder for a
4//! future in-tree YAML parser that can produce Panache-compatible CST structures.
5//! Initial goals:
6//! - support plain YAML and hashpipe-prefixed YAML from shared parsing primitives,
7//! - preserve lossless syntax/trivia needed for exact host document ranges,
8//! - enable shadow-mode comparison against the existing YAML engine before rollout.
9//! - prepare for first-class YAML formatting support once parser parity is proven.
10
11#[path = "yaml/events.rs"]
12mod events;
13#[path = "yaml/lexer.rs"]
14mod lexer;
15#[path = "yaml/model.rs"]
16mod model;
17#[path = "yaml/parser.rs"]
18mod parser;
19
20pub use events::project_events;
21pub use lexer::lex_mapping_tokens;
22pub use model::{
23    ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlDiagnostic, YamlInputKind,
24    YamlParseReport, YamlToken, YamlTokenSpan, diagnostic_codes,
25};
26pub use parser::{parse_shadow, parse_yaml_report, parse_yaml_tree};
27
28#[cfg(test)]
29mod tests {
30    use super::*;
31    use crate::syntax::SyntaxKind;
32
33    #[test]
34    fn builds_basic_rowan_tree_for_multiline_mapping() {
35        let tree = parse_yaml_tree("title: My Title\nauthor: Me\n").expect("tree");
36        assert_eq!(tree.kind(), SyntaxKind::DOCUMENT);
37        assert_eq!(tree.text().to_string(), "title: My Title\nauthor: Me\n");
38
39        let mapping = tree
40            .descendants()
41            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
42            .expect("yaml block map");
43        let entries: Vec<_> = mapping
44            .children()
45            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
46            .collect();
47        assert_eq!(entries.len(), 2);
48
49        let token_kinds: Vec<_> = mapping
50            .descendants_with_tokens()
51            .filter_map(|el| el.into_token())
52            .map(|tok| tok.kind())
53            .collect();
54        assert_eq!(
55            token_kinds,
56            vec![
57                SyntaxKind::YAML_KEY,
58                SyntaxKind::YAML_COLON,
59                SyntaxKind::WHITESPACE,
60                SyntaxKind::YAML_SCALAR,
61                SyntaxKind::NEWLINE,
62                SyntaxKind::YAML_KEY,
63                SyntaxKind::YAML_COLON,
64                SyntaxKind::WHITESPACE,
65                SyntaxKind::YAML_SCALAR,
66                SyntaxKind::NEWLINE,
67            ]
68        );
69    }
70
71    #[test]
72    fn mapping_nodes_preserve_entry_text_boundaries() {
73        let tree = parse_yaml_tree("title: A\nauthor: B\n").expect("tree");
74        let mapping = tree
75            .descendants()
76            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
77            .expect("yaml block map");
78
79        let entry_texts: Vec<_> = mapping
80            .children()
81            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
82            .map(|n| n.text().to_string())
83            .collect();
84        assert_eq!(
85            entry_texts,
86            vec!["title: A\n".to_string(), "author: B\n".to_string(),]
87        );
88    }
89
90    #[test]
91    fn splits_mapping_on_colon_outside_quoted_key() {
92        let input = "\"foo:bar\": 23\n'x:y': 24\n";
93        let tree = parse_yaml_tree(input).expect("tree");
94        assert_eq!(tree.text().to_string(), input);
95
96        let keys: Vec<String> = tree
97            .descendants_with_tokens()
98            .filter_map(|el| el.into_token())
99            .filter(|tok| tok.kind() == SyntaxKind::YAML_KEY)
100            .map(|tok| tok.text().to_string())
101            .collect();
102        assert_eq!(keys, vec!["\"foo:bar\"".to_string(), "'x:y'".to_string()]);
103    }
104
105    #[test]
106    fn splits_mapping_on_colon_outside_flow_key() {
107        let input = "{a: b}: 23\n";
108        let tree = parse_yaml_tree(input).expect("tree");
109        assert_eq!(tree.text().to_string(), input);
110
111        let keys: Vec<String> = tree
112            .descendants_with_tokens()
113            .filter_map(|el| el.into_token())
114            .filter(|tok| tok.kind() == SyntaxKind::YAML_KEY)
115            .map(|tok| tok.text().to_string())
116            .collect();
117        assert_eq!(keys, vec!["{a: b}".to_string()]);
118    }
119
120    #[test]
121    fn keeps_colon_inside_escaped_double_quoted_key() {
122        let input = "\"foo\\\":bar\": 23\n";
123        let tree = parse_yaml_tree(input).expect("tree");
124        assert_eq!(tree.text().to_string(), input);
125
126        let keys: Vec<String> = tree
127            .descendants_with_tokens()
128            .filter_map(|el| el.into_token())
129            .filter(|tok| tok.kind() == SyntaxKind::YAML_KEY)
130            .map(|tok| tok.text().to_string())
131            .collect();
132        assert_eq!(keys, vec!["\"foo\\\":bar\"".to_string()]);
133    }
134
135    #[test]
136    fn keeps_hash_in_double_quoted_scalar_value() {
137        let input = "foo: \"a#b\"\n";
138        let tree = parse_yaml_tree(input).expect("tree");
139
140        let comment_count = tree
141            .descendants_with_tokens()
142            .filter_map(|el| el.into_token())
143            .filter(|tok| tok.kind() == SyntaxKind::YAML_COMMENT)
144            .count();
145        assert_eq!(comment_count, 0);
146
147        let scalar_values: Vec<String> = tree
148            .descendants_with_tokens()
149            .filter_map(|el| el.into_token())
150            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
151            .map(|tok| tok.text().to_string())
152            .collect();
153        assert_eq!(scalar_values, vec!["\"a#b\"".to_string()]);
154    }
155
156    #[test]
157    fn keeps_colon_inside_single_quoted_key_with_escaped_quote() {
158        let input = "'foo'':bar': 23\n";
159        let tree = parse_yaml_tree(input).expect("tree");
160        assert_eq!(tree.text().to_string(), input);
161
162        let keys: Vec<String> = tree
163            .descendants_with_tokens()
164            .filter_map(|el| el.into_token())
165            .filter(|tok| tok.kind() == SyntaxKind::YAML_KEY)
166            .map(|tok| tok.text().to_string())
167            .collect();
168        assert_eq!(keys, vec!["'foo'':bar'".to_string()]);
169    }
170
171    #[test]
172    fn preserves_explicit_tag_tokens_in_key_and_value() {
173        let input = "!!str a: !!int 42\n";
174        let tree = parse_yaml_tree(input).expect("tree");
175        assert_eq!(tree.text().to_string(), input);
176
177        let tag_tokens: Vec<_> = tree
178            .descendants_with_tokens()
179            .filter_map(|el| el.into_token())
180            .filter(|tok| tok.kind() == SyntaxKind::YAML_TAG)
181            .map(|tok| tok.text().to_string())
182            .collect();
183        assert_eq!(tag_tokens, vec!["!!str".to_string(), "!!int".to_string()]);
184    }
185
186    #[test]
187    fn lexer_emits_tokens_for_quoted_keys_and_inline_comments() {
188        let input = "\"foo:bar\": 23 # note\n'x:y': 'z' # ok\n";
189        let tokens = lex_mapping_tokens(input).expect("tokens");
190        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
191        assert_eq!(
192            kinds,
193            vec![
194                YamlToken::Key,
195                YamlToken::Colon,
196                YamlToken::Whitespace,
197                YamlToken::Scalar,
198                YamlToken::Whitespace,
199                YamlToken::Comment,
200                YamlToken::Newline,
201                YamlToken::Key,
202                YamlToken::Colon,
203                YamlToken::Whitespace,
204                YamlToken::Scalar,
205                YamlToken::Whitespace,
206                YamlToken::Comment,
207                YamlToken::Newline,
208            ]
209        );
210        let comments: Vec<_> = tokens
211            .iter()
212            .filter(|t| t.kind == YamlToken::Comment)
213            .map(|t| t.text)
214            .collect();
215        assert_eq!(comments, vec!["# note", "# ok"]);
216    }
217
218    #[test]
219    fn lexer_emits_indent_and_dedent_for_indented_entries() {
220        let input = "root: 1\n  child: 2\n";
221        let tokens = lex_mapping_tokens(input).expect("tokens");
222        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
223        assert!(kinds.contains(&YamlToken::Indent));
224        assert!(kinds.contains(&YamlToken::Dedent));
225    }
226
227    #[test]
228    fn lexer_emits_document_start_marker_token() {
229        let input = "---\n";
230        let tokens = lex_mapping_tokens(input).expect("tokens");
231        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
232        assert_eq!(kinds, vec![YamlToken::DocumentStart, YamlToken::Newline,]);
233    }
234
235    #[test]
236    fn lexer_emits_flow_tokens_for_standalone_flow_mapping() {
237        let input = "{foo: bar}\n";
238        let tokens = lex_mapping_tokens(input).expect("tokens");
239        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
240        assert_eq!(
241            kinds,
242            vec![
243                YamlToken::FlowMapStart,
244                YamlToken::Scalar,
245                YamlToken::FlowMapEnd,
246                YamlToken::Newline,
247            ]
248        );
249    }
250
251    #[test]
252    fn lexer_emits_flow_sequence_tokens_in_mapping_value() {
253        let input = "a: [b, c]\n";
254        let tokens = lex_mapping_tokens(input).expect("tokens");
255        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
256        assert_eq!(
257            kinds,
258            vec![
259                YamlToken::Key,
260                YamlToken::Colon,
261                YamlToken::Whitespace,
262                YamlToken::FlowSeqStart,
263                YamlToken::Scalar,
264                YamlToken::Comma,
265                YamlToken::Scalar,
266                YamlToken::FlowSeqEnd,
267                YamlToken::Newline,
268            ]
269        );
270    }
271
272    #[test]
273    fn lexer_tokens_round_trip_input_bytes_for_supported_cases() {
274        let cases = [
275            "foo: bar\n",
276            "a: [b, c]\n",
277            "---\nfoo: bar\n...\n",
278            "%YAML 1.2\nfoo: \"a#b\"\n",
279        ];
280
281        for input in cases {
282            let tokens = lex_mapping_tokens(input).expect("tokens");
283            let rebuilt = tokens.iter().map(|t| t.text).collect::<String>();
284            assert_eq!(rebuilt, input);
285        }
286    }
287
288    #[test]
289    fn lexer_emits_monotonic_byte_ranges() {
290        let input = "root: 1\n  child: 2\n";
291        let tokens = lex_mapping_tokens(input).expect("tokens");
292
293        let mut offset = 0usize;
294        for token in tokens {
295            if token.text.is_empty() {
296                assert_eq!(token.byte_start, offset);
297                assert_eq!(token.byte_end, offset);
298                continue;
299            }
300
301            assert_eq!(token.byte_start, offset);
302            assert_eq!(&input[token.byte_start..token.byte_end], token.text);
303            offset = token.byte_end;
304        }
305
306        assert_eq!(offset, input.len());
307    }
308
309    #[test]
310    fn parser_preserves_document_markers_and_directives() {
311        let input = "%YAML 1.2\n---\nfoo: bar\n...\n";
312        let tree = parse_yaml_tree(input).expect("tree");
313        assert_eq!(tree.text().to_string(), input);
314
315        let scalar_tokens: Vec<String> = tree
316            .descendants_with_tokens()
317            .filter_map(|el| el.into_token())
318            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
319            .map(|tok| tok.text().to_string())
320            .collect();
321
322        assert!(scalar_tokens.contains(&"%YAML 1.2".to_string()));
323        assert!(scalar_tokens.contains(&"bar".to_string()));
324
325        let has_doc_start = tree
326            .descendants_with_tokens()
327            .filter_map(|el| el.into_token())
328            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START && tok.text() == "---");
329        assert!(has_doc_start, "--- should be a YAML_DOCUMENT_START token");
330
331        let has_doc_end = tree
332            .descendants_with_tokens()
333            .filter_map(|el| el.into_token())
334            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END && tok.text() == "...");
335        assert!(has_doc_end, "... should be a YAML_DOCUMENT_END token");
336    }
337
338    #[test]
339    fn parser_preserves_standalone_flow_mapping_lines() {
340        let input = "{foo: bar}\n";
341        let tree = parse_yaml_tree(input).expect("tree");
342        assert_eq!(tree.text().to_string(), input);
343
344        let flow_entry_count = tree
345            .descendants()
346            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
347            .count();
348        assert_eq!(flow_entry_count, 1);
349
350        let flow_values: Vec<String> = tree
351            .descendants()
352            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
353            .map(|n| n.text().to_string())
354            .collect();
355        assert_eq!(flow_values, vec![" bar".to_string()]);
356    }
357
358    #[test]
359    fn parser_preserves_top_level_quoted_scalar_document() {
360        let input = "\"foo: bar\\\": baz\"\n";
361        let tree = parse_yaml_tree(input).expect("tree");
362        assert_eq!(tree.text().to_string(), input);
363    }
364
365    #[test]
366    fn parse_yaml_report_emits_error_code_for_invalid_yaml() {
367        let report = parse_yaml_report("this\n is\n  invalid: x\n");
368        assert!(report.tree.is_none());
369        assert_eq!(report.diagnostics.len(), 1);
370        assert_eq!(
371            report.diagnostics[0].code,
372            diagnostic_codes::PARSE_UNEXPECTED_INDENT
373        );
374    }
375
376    #[test]
377    fn parse_yaml_report_detects_trailing_content_after_document_end() {
378        let report = parse_yaml_report("---\nkey: value\n... invalid\n");
379        assert!(report.tree.is_none());
380        assert_eq!(report.diagnostics.len(), 1);
381        assert_eq!(
382            report.diagnostics[0].code,
383            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END
384        );
385    }
386
387    #[test]
388    fn parse_yaml_report_detects_unexpected_flow_closer() {
389        let report = parse_yaml_report("---\n[ a, b, c ] ]\n");
390        assert!(report.tree.is_none());
391        assert_eq!(report.diagnostics.len(), 1);
392        assert_eq!(
393            report.diagnostics[0].code,
394            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
395        );
396    }
397
398    #[test]
399    fn parse_yaml_report_detects_unterminated_nested_flow_sequence() {
400        let report = parse_yaml_report("---\n[ [ a, b, c ]\n");
401        assert!(report.tree.is_none());
402        assert_eq!(report.diagnostics.len(), 1);
403        assert_eq!(
404            report.diagnostics[0].code,
405            diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE
406        );
407    }
408
409    #[test]
410    fn parse_yaml_report_detects_invalid_leading_flow_sequence_comma() {
411        let report = parse_yaml_report("---\n[ , a, b, c ]\n");
412        assert!(report.tree.is_none());
413        assert_eq!(report.diagnostics.len(), 1);
414        assert_eq!(
415            report.diagnostics[0].code,
416            diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
417        );
418    }
419
420    #[test]
421    fn parse_yaml_report_detects_trailing_content_after_flow_end() {
422        let report = parse_yaml_report("---\n[ a, b, c, ]#invalid\n");
423        assert!(report.tree.is_none());
424        assert_eq!(report.diagnostics.len(), 1);
425        assert_eq!(
426            report.diagnostics[0].code,
427            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
428        );
429    }
430
431    #[test]
432    fn parse_yaml_report_detects_invalid_double_quoted_escape() {
433        let report = parse_yaml_report("---\n\"\\.\"\n");
434        assert!(report.tree.is_none());
435        assert_eq!(report.diagnostics.len(), 1);
436        assert_eq!(
437            report.diagnostics[0].code,
438            diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE
439        );
440    }
441
442    #[test]
443    fn parse_yaml_report_detects_trailing_content_after_document_start() {
444        let report = parse_yaml_report("--- key1: value1\n    key2: value2\n");
445        assert!(report.tree.is_none());
446        assert_eq!(report.diagnostics.len(), 1);
447        assert_eq!(
448            report.diagnostics[0].code,
449            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_START
450        );
451    }
452
453    #[test]
454    fn parse_yaml_report_detects_directive_without_document_start() {
455        let report = parse_yaml_report("%YAML 1.2\n");
456        assert!(report.tree.is_none());
457        assert_eq!(report.diagnostics.len(), 1);
458        assert_eq!(
459            report.diagnostics[0].code,
460            diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
461        );
462    }
463
464    #[test]
465    fn parse_yaml_report_detects_directive_after_content() {
466        let report = parse_yaml_report("!foo \"bar\"\n%TAG ! tag:example.com,2000:app/\n---\n");
467        assert!(report.tree.is_none());
468        assert_eq!(report.diagnostics.len(), 1);
469        assert_eq!(
470            report.diagnostics[0].code,
471            diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT
472        );
473    }
474
475    #[test]
476    fn parse_yaml_report_detects_wrong_indented_flow_continuation() {
477        let report = parse_yaml_report("---\nflow: [a,\nb,\nc]\n");
478        assert!(report.tree.is_none());
479        assert_eq!(report.diagnostics.len(), 1);
480        assert_eq!(
481            report.diagnostics[0].code,
482            diagnostic_codes::LEX_WRONG_INDENTED_FLOW
483        );
484    }
485
486    #[test]
487    fn parser_builds_flow_sequence_nodes_in_mapping_value() {
488        let input = "a: [b, c]\n";
489        let tree = parse_yaml_tree(input).expect("tree");
490        assert_eq!(tree.text().to_string(), input);
491
492        let seq = tree
493            .descendants()
494            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
495            .expect("flow sequence node");
496        let item_count = seq
497            .children()
498            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
499            .count();
500        assert_eq!(item_count, 2);
501    }
502
503    #[test]
504    fn parser_absorbs_literal_block_scalar_into_map_value() {
505        let input = "a: |\n  line1\n  line2\n";
506        let tree = parse_yaml_tree(input).expect("tree");
507        assert_eq!(tree.text().to_string(), input);
508
509        let map = tree
510            .descendants()
511            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
512            .expect("block map");
513        let entry = map
514            .children()
515            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
516            .expect("entry");
517        let value = entry
518            .children()
519            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
520            .expect("value");
521        let value_text = value.text().to_string();
522        assert!(
523            value_text.starts_with('|') || value_text.starts_with(" |"),
524            "value should contain the `|` header, got {value_text:?}"
525        );
526        assert!(
527            value_text.contains("line1") && value_text.contains("line2"),
528            "value should absorb block scalar content, got {value_text:?}"
529        );
530    }
531
532    #[test]
533    fn lexer_emits_literal_block_scalar_header_and_content() {
534        let input = "a: |\n  line1\n  line2\n";
535        let tokens = lex_mapping_tokens(input).expect("tokens");
536        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
537        assert_eq!(
538            kinds,
539            vec![
540                YamlToken::Key,
541                YamlToken::Colon,
542                YamlToken::Whitespace,
543                YamlToken::BlockScalarHeader,
544                YamlToken::Newline,
545                YamlToken::BlockScalarContent,
546                YamlToken::Newline,
547                YamlToken::BlockScalarContent,
548                YamlToken::Newline,
549            ]
550        );
551        let texts: Vec<_> = tokens.iter().map(|t| t.text).collect();
552        assert_eq!(
553            texts,
554            vec!["a", ":", " ", "|", "\n", "  line1", "\n", "  line2", "\n"]
555        );
556    }
557
558    #[test]
559    fn parser_builds_nested_block_sequence_on_same_line() {
560        let input = "- - a\n  - b\n- c\n";
561        let tree = parse_yaml_tree(input).expect("tree");
562        assert_eq!(tree.text().to_string(), input);
563
564        let outer = tree
565            .descendants()
566            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
567            .expect("outer block sequence");
568        let outer_items: Vec<_> = outer
569            .children()
570            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
571            .collect();
572        assert_eq!(outer_items.len(), 2);
573
574        let nested = outer_items[0]
575            .children()
576            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
577            .expect("nested block sequence inside first item");
578        let nested_items = nested
579            .children()
580            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
581            .count();
582        assert_eq!(nested_items, 2);
583    }
584
585    #[test]
586    fn parser_builds_multiline_flow_map_inside_block_sequence_item() {
587        let input = "- { multi\n  line, a: b}\n";
588        let tree = parse_yaml_tree(input).expect("tree");
589        assert_eq!(tree.text().to_string(), input);
590
591        let seq = tree
592            .descendants()
593            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
594            .expect("block sequence");
595        let item = seq
596            .children()
597            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
598            .expect("sequence item");
599        let flow_map = item
600            .children()
601            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
602            .expect("flow map inside sequence item");
603        let entry_count = flow_map
604            .children()
605            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
606            .count();
607        assert_eq!(entry_count, 2);
608    }
609
610    #[test]
611    fn parser_builds_flow_sequence_inside_block_sequence_item() {
612        let input = "- [a, b]\n- [c, d]\n";
613        let tree = parse_yaml_tree(input).expect("tree");
614        assert_eq!(tree.text().to_string(), input);
615
616        let seq = tree
617            .descendants()
618            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
619            .expect("block sequence");
620        let items: Vec<_> = seq
621            .children()
622            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
623            .collect();
624        assert_eq!(items.len(), 2);
625
626        for item in &items {
627            let flow = item
628                .children()
629                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
630                .expect("flow sequence inside item");
631            let flow_items = flow
632                .children()
633                .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
634                .count();
635            assert_eq!(flow_items, 2);
636        }
637    }
638
639    #[test]
640    fn lexer_recognizes_single_bang_tag_in_top_level_scalar() {
641        let tokens = lex_mapping_tokens("! a\n").expect("tokens");
642        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
643        assert_eq!(
644            kinds,
645            vec![
646                YamlToken::Tag,
647                YamlToken::Whitespace,
648                YamlToken::Scalar,
649                YamlToken::Newline,
650            ]
651        );
652        let texts: Vec<_> = tokens.iter().map(|t| t.text).collect();
653        assert_eq!(texts, vec!["!", " ", "a", "\n"]);
654    }
655
656    #[test]
657    fn parser_emits_scalar_document_for_tag_without_colon() {
658        let input = "! a\n";
659        let tree = parse_yaml_tree(input).expect("tree");
660        assert_eq!(tree.text().to_string(), input);
661
662        let has_block_map = tree
663            .descendants()
664            .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP);
665        assert!(
666            !has_block_map,
667            "scalar document should not be wrapped in YAML_BLOCK_MAP"
668        );
669
670        let has_tag = tree
671            .descendants_with_tokens()
672            .filter_map(|el| el.into_token())
673            .any(|tok| tok.kind() == SyntaxKind::YAML_TAG && tok.text() == "!");
674        assert!(has_tag, "tree should contain YAML_TAG '!'");
675    }
676
677    #[test]
678    fn lexer_extracts_explicit_tag_before_block_sequence_scalar() {
679        let tokens = lex_mapping_tokens("- !!int 1\n").expect("tokens");
680        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
681        assert_eq!(
682            kinds,
683            vec![
684                YamlToken::BlockSeqEntry,
685                YamlToken::Whitespace,
686                YamlToken::Tag,
687                YamlToken::Whitespace,
688                YamlToken::Scalar,
689                YamlToken::Newline,
690            ]
691        );
692        let texts: Vec<_> = tokens.iter().map(|t| t.text).collect();
693        assert_eq!(texts, vec!["-", " ", "!!int", " ", "1", "\n"]);
694    }
695
696    #[test]
697    fn parser_builds_nested_block_map_inside_block_sequence() {
698        let input = "-\n  name: Mark\n  hr: 65\n";
699        let tree = parse_yaml_tree(input).expect("tree");
700        assert_eq!(tree.text().to_string(), input);
701
702        let seq = tree
703            .descendants()
704            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
705            .expect("block sequence");
706        let items: Vec<_> = seq
707            .children()
708            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
709            .collect();
710        assert_eq!(items.len(), 1);
711
712        let nested_map = items[0]
713            .children()
714            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
715            .expect("nested block map inside sequence item");
716        let entry_count = nested_map
717            .children()
718            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
719            .count();
720        assert_eq!(entry_count, 2);
721    }
722
723    #[test]
724    fn parser_builds_nested_block_map_from_indent_tokens() {
725        let input = "root: 1\n  child: 2\n";
726        let tree = parse_yaml_tree(input).expect("tree");
727
728        let outer_map = tree
729            .descendants()
730            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
731            .expect("outer map");
732        let outer_entry = outer_map
733            .children()
734            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
735            .expect("outer entry");
736        let outer_value = outer_entry
737            .children()
738            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
739            .expect("outer value");
740
741        let nested_map = outer_value
742            .children()
743            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
744            .expect("nested map");
745        let nested_entry_count = nested_map
746            .children()
747            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
748            .count();
749        assert_eq!(nested_entry_count, 1);
750    }
751
752    #[test]
753    fn shadow_parse_is_disabled_by_default() {
754        let report = parse_shadow("title: My Title", ShadowYamlOptions::default());
755        assert_eq!(report.outcome, ShadowYamlOutcome::SkippedDisabled);
756        assert_eq!(report.shadow_reason, "shadow-disabled");
757        assert_eq!(report.normalized_input, None);
758    }
759
760    #[test]
761    fn shadow_parse_skips_when_disabled_even_for_valid_input() {
762        let report = parse_shadow(
763            "title: My Title",
764            ShadowYamlOptions {
765                enabled: false,
766                input_kind: YamlInputKind::Plain,
767            },
768        );
769        assert_eq!(report.outcome, ShadowYamlOutcome::SkippedDisabled);
770        assert_eq!(report.shadow_reason, "shadow-disabled");
771    }
772
773    #[test]
774    fn shadow_parse_reports_prototype_parsed_when_enabled() {
775        let report = parse_shadow(
776            "title: My Title",
777            ShadowYamlOptions {
778                enabled: true,
779                input_kind: YamlInputKind::Plain,
780            },
781        );
782        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeParsed);
783        assert_eq!(report.shadow_reason, "prototype-basic-mapping-parsed");
784        assert_eq!(report.normalized_input.as_deref(), Some("title: My Title"));
785    }
786
787    #[test]
788    fn shadow_parse_reports_prototype_rejected_when_enabled() {
789        // Tab indentation is prohibited by YAML spec for block structures
790        let report = parse_shadow(
791            "\ttitle: value",
792            ShadowYamlOptions {
793                enabled: true,
794                input_kind: YamlInputKind::Plain,
795            },
796        );
797        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeRejected);
798        assert_eq!(report.shadow_reason, "prototype-basic-mapping-rejected");
799    }
800
801    #[test]
802    fn shadow_parse_accepts_hashpipe_mode_but_remains_prototype_scoped() {
803        let report = parse_shadow(
804            "#| title: My Title",
805            ShadowYamlOptions {
806                enabled: true,
807                input_kind: YamlInputKind::Hashpipe,
808            },
809        );
810        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeParsed);
811        assert_eq!(report.shadow_reason, "prototype-basic-mapping-parsed");
812        assert_eq!(report.normalized_input.as_deref(), Some("title: My Title"));
813    }
814}