Skip to main content

panache_parser/parser/
yaml.rs

1//! YAML parser groundwork for long-term Panache integration.
2//!
3//! This module is intentionally minimal and currently acts as a placeholder for a
4//! future in-tree YAML parser that can produce Panache-compatible CST structures.
5//! Initial goals:
6//! - support plain YAML and hashpipe-prefixed YAML from shared parsing primitives,
7//! - preserve lossless syntax/trivia needed for exact host document ranges,
8//! - enable shadow-mode comparison against the existing YAML engine before rollout.
9//! - prepare for first-class YAML formatting support once parser parity is proven.
10
11#[path = "yaml/events.rs"]
12mod events;
13#[path = "yaml/lexer.rs"]
14mod lexer;
15#[path = "yaml/model.rs"]
16mod model;
17#[path = "yaml/parser.rs"]
18mod parser;
19
20pub use events::project_events;
21pub use lexer::lex_mapping_tokens;
22pub use model::{
23    ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlDiagnostic, YamlInputKind,
24    YamlParseReport, YamlToken, YamlTokenSpan, diagnostic_codes,
25};
26pub use parser::{parse_shadow, parse_yaml_report, parse_yaml_tree};
27
28#[cfg(test)]
29mod tests {
30    use super::*;
31    use crate::syntax::SyntaxKind;
32
33    #[test]
34    fn builds_basic_rowan_tree_for_multiline_mapping() {
35        let tree = parse_yaml_tree("title: My Title\nauthor: Me\n").expect("tree");
36        assert_eq!(tree.kind(), SyntaxKind::DOCUMENT);
37        assert_eq!(tree.text().to_string(), "title: My Title\nauthor: Me\n");
38
39        let content = tree
40            .children()
41            .find(|n| n.kind() == SyntaxKind::YAML_METADATA_CONTENT)
42            .expect("yaml metadata content");
43        let mapping = content
44            .children()
45            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
46            .expect("yaml block map");
47        let entries: Vec<_> = mapping
48            .children()
49            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
50            .collect();
51        assert_eq!(entries.len(), 2);
52
53        let token_kinds: Vec<_> = mapping
54            .descendants_with_tokens()
55            .filter_map(|el| el.into_token())
56            .map(|tok| tok.kind())
57            .collect();
58        assert_eq!(
59            token_kinds,
60            vec![
61                SyntaxKind::YAML_KEY,
62                SyntaxKind::YAML_COLON,
63                SyntaxKind::WHITESPACE,
64                SyntaxKind::YAML_SCALAR,
65                SyntaxKind::NEWLINE,
66                SyntaxKind::YAML_KEY,
67                SyntaxKind::YAML_COLON,
68                SyntaxKind::WHITESPACE,
69                SyntaxKind::YAML_SCALAR,
70                SyntaxKind::NEWLINE,
71            ]
72        );
73    }
74
75    #[test]
76    fn mapping_nodes_preserve_entry_text_boundaries() {
77        let tree = parse_yaml_tree("title: A\nauthor: B\n").expect("tree");
78        let content = tree
79            .children()
80            .find(|n| n.kind() == SyntaxKind::YAML_METADATA_CONTENT)
81            .expect("yaml metadata content");
82        let mapping = content
83            .children()
84            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
85            .expect("yaml block map");
86
87        let entry_texts: Vec<_> = mapping
88            .children()
89            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
90            .map(|n| n.text().to_string())
91            .collect();
92        assert_eq!(
93            entry_texts,
94            vec!["title: A\n".to_string(), "author: B\n".to_string(),]
95        );
96    }
97
98    #[test]
99    fn splits_mapping_on_colon_outside_quoted_key() {
100        let input = "\"foo:bar\": 23\n'x:y': 24\n";
101        let tree = parse_yaml_tree(input).expect("tree");
102        assert_eq!(tree.text().to_string(), input);
103
104        let keys: Vec<String> = tree
105            .descendants_with_tokens()
106            .filter_map(|el| el.into_token())
107            .filter(|tok| tok.kind() == SyntaxKind::YAML_KEY)
108            .map(|tok| tok.text().to_string())
109            .collect();
110        assert_eq!(keys, vec!["\"foo:bar\"".to_string(), "'x:y'".to_string()]);
111    }
112
113    #[test]
114    fn splits_mapping_on_colon_outside_flow_key() {
115        let input = "{a: b}: 23\n";
116        let tree = parse_yaml_tree(input).expect("tree");
117        assert_eq!(tree.text().to_string(), input);
118
119        let keys: Vec<String> = tree
120            .descendants_with_tokens()
121            .filter_map(|el| el.into_token())
122            .filter(|tok| tok.kind() == SyntaxKind::YAML_KEY)
123            .map(|tok| tok.text().to_string())
124            .collect();
125        assert_eq!(keys, vec!["{a: b}".to_string()]);
126    }
127
128    #[test]
129    fn keeps_colon_inside_escaped_double_quoted_key() {
130        let input = "\"foo\\\":bar\": 23\n";
131        let tree = parse_yaml_tree(input).expect("tree");
132        assert_eq!(tree.text().to_string(), input);
133
134        let keys: Vec<String> = tree
135            .descendants_with_tokens()
136            .filter_map(|el| el.into_token())
137            .filter(|tok| tok.kind() == SyntaxKind::YAML_KEY)
138            .map(|tok| tok.text().to_string())
139            .collect();
140        assert_eq!(keys, vec!["\"foo\\\":bar\"".to_string()]);
141    }
142
143    #[test]
144    fn keeps_hash_in_double_quoted_scalar_value() {
145        let input = "foo: \"a#b\"\n";
146        let tree = parse_yaml_tree(input).expect("tree");
147
148        let comment_count = tree
149            .descendants_with_tokens()
150            .filter_map(|el| el.into_token())
151            .filter(|tok| tok.kind() == SyntaxKind::YAML_COMMENT)
152            .count();
153        assert_eq!(comment_count, 0);
154
155        let scalar_values: Vec<String> = tree
156            .descendants_with_tokens()
157            .filter_map(|el| el.into_token())
158            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
159            .map(|tok| tok.text().to_string())
160            .collect();
161        assert_eq!(scalar_values, vec!["\"a#b\"".to_string()]);
162    }
163
164    #[test]
165    fn keeps_colon_inside_single_quoted_key_with_escaped_quote() {
166        let input = "'foo'':bar': 23\n";
167        let tree = parse_yaml_tree(input).expect("tree");
168        assert_eq!(tree.text().to_string(), input);
169
170        let keys: Vec<String> = tree
171            .descendants_with_tokens()
172            .filter_map(|el| el.into_token())
173            .filter(|tok| tok.kind() == SyntaxKind::YAML_KEY)
174            .map(|tok| tok.text().to_string())
175            .collect();
176        assert_eq!(keys, vec!["'foo'':bar'".to_string()]);
177    }
178
179    #[test]
180    fn preserves_explicit_tag_tokens_in_key_and_value() {
181        let input = "!!str a: !!int 42\n";
182        let tree = parse_yaml_tree(input).expect("tree");
183        assert_eq!(tree.text().to_string(), input);
184
185        let tag_tokens: Vec<_> = tree
186            .descendants_with_tokens()
187            .filter_map(|el| el.into_token())
188            .filter(|tok| tok.kind() == SyntaxKind::YAML_TAG)
189            .map(|tok| tok.text().to_string())
190            .collect();
191        assert_eq!(tag_tokens, vec!["!!str".to_string(), "!!int".to_string()]);
192    }
193
194    #[test]
195    fn lexer_emits_tokens_for_quoted_keys_and_inline_comments() {
196        let input = "\"foo:bar\": 23 # note\n'x:y': 'z' # ok\n";
197        let tokens = lex_mapping_tokens(input).expect("tokens");
198        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
199        assert_eq!(
200            kinds,
201            vec![
202                YamlToken::Key,
203                YamlToken::Colon,
204                YamlToken::Whitespace,
205                YamlToken::Scalar,
206                YamlToken::Whitespace,
207                YamlToken::Comment,
208                YamlToken::Newline,
209                YamlToken::Key,
210                YamlToken::Colon,
211                YamlToken::Whitespace,
212                YamlToken::Scalar,
213                YamlToken::Whitespace,
214                YamlToken::Comment,
215                YamlToken::Newline,
216            ]
217        );
218        let comments: Vec<_> = tokens
219            .iter()
220            .filter(|t| t.kind == YamlToken::Comment)
221            .map(|t| t.text)
222            .collect();
223        assert_eq!(comments, vec!["# note", "# ok"]);
224    }
225
226    #[test]
227    fn lexer_emits_indent_and_dedent_for_indented_entries() {
228        let input = "root: 1\n  child: 2\n";
229        let tokens = lex_mapping_tokens(input).expect("tokens");
230        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
231        assert!(kinds.contains(&YamlToken::Indent));
232        assert!(kinds.contains(&YamlToken::Dedent));
233    }
234
235    #[test]
236    fn lexer_emits_document_start_marker_token() {
237        let input = "---\n";
238        let tokens = lex_mapping_tokens(input).expect("tokens");
239        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
240        assert_eq!(kinds, vec![YamlToken::DocumentStart, YamlToken::Newline,]);
241    }
242
243    #[test]
244    fn lexer_emits_flow_tokens_for_standalone_flow_mapping() {
245        let input = "{foo: bar}\n";
246        let tokens = lex_mapping_tokens(input).expect("tokens");
247        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
248        assert_eq!(
249            kinds,
250            vec![
251                YamlToken::FlowMapStart,
252                YamlToken::Scalar,
253                YamlToken::FlowMapEnd,
254                YamlToken::Newline,
255            ]
256        );
257    }
258
259    #[test]
260    fn lexer_emits_flow_sequence_tokens_in_mapping_value() {
261        let input = "a: [b, c]\n";
262        let tokens = lex_mapping_tokens(input).expect("tokens");
263        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
264        assert_eq!(
265            kinds,
266            vec![
267                YamlToken::Key,
268                YamlToken::Colon,
269                YamlToken::Whitespace,
270                YamlToken::FlowSeqStart,
271                YamlToken::Scalar,
272                YamlToken::Comma,
273                YamlToken::Scalar,
274                YamlToken::FlowSeqEnd,
275                YamlToken::Newline,
276            ]
277        );
278    }
279
280    #[test]
281    fn lexer_tokens_round_trip_input_bytes_for_supported_cases() {
282        let cases = [
283            "foo: bar\n",
284            "a: [b, c]\n",
285            "---\nfoo: bar\n...\n",
286            "%YAML 1.2\nfoo: \"a#b\"\n",
287        ];
288
289        for input in cases {
290            let tokens = lex_mapping_tokens(input).expect("tokens");
291            let rebuilt = tokens.iter().map(|t| t.text).collect::<String>();
292            assert_eq!(rebuilt, input);
293        }
294    }
295
296    #[test]
297    fn lexer_emits_monotonic_byte_ranges() {
298        let input = "root: 1\n  child: 2\n";
299        let tokens = lex_mapping_tokens(input).expect("tokens");
300
301        let mut offset = 0usize;
302        for token in tokens {
303            if token.text.is_empty() {
304                assert_eq!(token.byte_start, offset);
305                assert_eq!(token.byte_end, offset);
306                continue;
307            }
308
309            assert_eq!(token.byte_start, offset);
310            assert_eq!(&input[token.byte_start..token.byte_end], token.text);
311            offset = token.byte_end;
312        }
313
314        assert_eq!(offset, input.len());
315    }
316
317    #[test]
318    fn parser_preserves_document_markers_and_directives() {
319        let input = "%YAML 1.2\n---\nfoo: bar\n...\n";
320        let tree = parse_yaml_tree(input).expect("tree");
321        assert_eq!(tree.text().to_string(), input);
322
323        let scalar_tokens: Vec<String> = tree
324            .descendants_with_tokens()
325            .filter_map(|el| el.into_token())
326            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
327            .map(|tok| tok.text().to_string())
328            .collect();
329
330        assert!(scalar_tokens.contains(&"%YAML 1.2".to_string()));
331        assert!(scalar_tokens.contains(&"bar".to_string()));
332
333        let has_doc_start = tree
334            .descendants_with_tokens()
335            .filter_map(|el| el.into_token())
336            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START && tok.text() == "---");
337        assert!(has_doc_start, "--- should be a YAML_DOCUMENT_START token");
338
339        let has_doc_end = tree
340            .descendants_with_tokens()
341            .filter_map(|el| el.into_token())
342            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END && tok.text() == "...");
343        assert!(has_doc_end, "... should be a YAML_DOCUMENT_END token");
344    }
345
346    #[test]
347    fn parser_preserves_standalone_flow_mapping_lines() {
348        let input = "{foo: bar}\n";
349        let tree = parse_yaml_tree(input).expect("tree");
350        assert_eq!(tree.text().to_string(), input);
351
352        let flow_entry_count = tree
353            .descendants()
354            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
355            .count();
356        assert_eq!(flow_entry_count, 1);
357
358        let flow_values: Vec<String> = tree
359            .descendants()
360            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
361            .map(|n| n.text().to_string())
362            .collect();
363        assert_eq!(flow_values, vec![" bar".to_string()]);
364    }
365
366    #[test]
367    fn parser_preserves_top_level_quoted_scalar_document() {
368        let input = "\"foo: bar\\\": baz\"\n";
369        let tree = parse_yaml_tree(input).expect("tree");
370        assert_eq!(tree.text().to_string(), input);
371    }
372
373    #[test]
374    fn parse_yaml_report_emits_error_code_for_invalid_yaml() {
375        let report = parse_yaml_report("this\n is\n  invalid: x\n");
376        assert!(report.tree.is_none());
377        assert_eq!(report.diagnostics.len(), 1);
378        assert_eq!(
379            report.diagnostics[0].code,
380            diagnostic_codes::PARSE_UNEXPECTED_INDENT
381        );
382    }
383
384    #[test]
385    fn parse_yaml_report_detects_trailing_content_after_document_end() {
386        let report = parse_yaml_report("---\nkey: value\n... invalid\n");
387        assert!(report.tree.is_none());
388        assert_eq!(report.diagnostics.len(), 1);
389        assert_eq!(
390            report.diagnostics[0].code,
391            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END
392        );
393    }
394
395    #[test]
396    fn parse_yaml_report_detects_unexpected_flow_closer() {
397        let report = parse_yaml_report("---\n[ a, b, c ] ]\n");
398        assert!(report.tree.is_none());
399        assert_eq!(report.diagnostics.len(), 1);
400        assert_eq!(
401            report.diagnostics[0].code,
402            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
403        );
404    }
405
406    #[test]
407    fn parse_yaml_report_detects_unterminated_nested_flow_sequence() {
408        let report = parse_yaml_report("---\n[ [ a, b, c ]\n");
409        assert!(report.tree.is_none());
410        assert_eq!(report.diagnostics.len(), 1);
411        assert_eq!(
412            report.diagnostics[0].code,
413            diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE
414        );
415    }
416
417    #[test]
418    fn parse_yaml_report_detects_invalid_leading_flow_sequence_comma() {
419        let report = parse_yaml_report("---\n[ , a, b, c ]\n");
420        assert!(report.tree.is_none());
421        assert_eq!(report.diagnostics.len(), 1);
422        assert_eq!(
423            report.diagnostics[0].code,
424            diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
425        );
426    }
427
428    #[test]
429    fn parse_yaml_report_detects_trailing_content_after_flow_end() {
430        let report = parse_yaml_report("---\n[ a, b, c, ]#invalid\n");
431        assert!(report.tree.is_none());
432        assert_eq!(report.diagnostics.len(), 1);
433        assert_eq!(
434            report.diagnostics[0].code,
435            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
436        );
437    }
438
439    #[test]
440    fn parse_yaml_report_detects_invalid_double_quoted_escape() {
441        let report = parse_yaml_report("---\n\"\\.\"\n");
442        assert!(report.tree.is_none());
443        assert_eq!(report.diagnostics.len(), 1);
444        assert_eq!(
445            report.diagnostics[0].code,
446            diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE
447        );
448    }
449
450    #[test]
451    fn parse_yaml_report_detects_trailing_content_after_document_start() {
452        let report = parse_yaml_report("--- key1: value1\n    key2: value2\n");
453        assert!(report.tree.is_none());
454        assert_eq!(report.diagnostics.len(), 1);
455        assert_eq!(
456            report.diagnostics[0].code,
457            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_START
458        );
459    }
460
461    #[test]
462    fn parse_yaml_report_detects_directive_without_document_start() {
463        let report = parse_yaml_report("%YAML 1.2\n");
464        assert!(report.tree.is_none());
465        assert_eq!(report.diagnostics.len(), 1);
466        assert_eq!(
467            report.diagnostics[0].code,
468            diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
469        );
470    }
471
472    #[test]
473    fn parse_yaml_report_detects_directive_after_content() {
474        let report = parse_yaml_report("!foo \"bar\"\n%TAG ! tag:example.com,2000:app/\n---\n");
475        assert!(report.tree.is_none());
476        assert_eq!(report.diagnostics.len(), 1);
477        assert_eq!(
478            report.diagnostics[0].code,
479            diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT
480        );
481    }
482
483    #[test]
484    fn parse_yaml_report_detects_wrong_indented_flow_continuation() {
485        let report = parse_yaml_report("---\nflow: [a,\nb,\nc]\n");
486        assert!(report.tree.is_none());
487        assert_eq!(report.diagnostics.len(), 1);
488        assert_eq!(
489            report.diagnostics[0].code,
490            diagnostic_codes::LEX_WRONG_INDENTED_FLOW
491        );
492    }
493
494    #[test]
495    fn parser_builds_flow_sequence_nodes_in_mapping_value() {
496        let input = "a: [b, c]\n";
497        let tree = parse_yaml_tree(input).expect("tree");
498        assert_eq!(tree.text().to_string(), input);
499
500        let seq = tree
501            .descendants()
502            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
503            .expect("flow sequence node");
504        let item_count = seq
505            .children()
506            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
507            .count();
508        assert_eq!(item_count, 2);
509    }
510
511    #[test]
512    fn parser_absorbs_literal_block_scalar_into_map_value() {
513        let input = "a: |\n  line1\n  line2\n";
514        let tree = parse_yaml_tree(input).expect("tree");
515        assert_eq!(tree.text().to_string(), input);
516
517        let map = tree
518            .descendants()
519            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
520            .expect("block map");
521        let entry = map
522            .children()
523            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
524            .expect("entry");
525        let value = entry
526            .children()
527            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
528            .expect("value");
529        let value_text = value.text().to_string();
530        assert!(
531            value_text.starts_with('|') || value_text.starts_with(" |"),
532            "value should contain the `|` header, got {value_text:?}"
533        );
534        assert!(
535            value_text.contains("line1") && value_text.contains("line2"),
536            "value should absorb block scalar content, got {value_text:?}"
537        );
538    }
539
540    #[test]
541    fn lexer_emits_literal_block_scalar_header_and_content() {
542        let input = "a: |\n  line1\n  line2\n";
543        let tokens = lex_mapping_tokens(input).expect("tokens");
544        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
545        assert_eq!(
546            kinds,
547            vec![
548                YamlToken::Key,
549                YamlToken::Colon,
550                YamlToken::Whitespace,
551                YamlToken::BlockScalarHeader,
552                YamlToken::Newline,
553                YamlToken::BlockScalarContent,
554                YamlToken::Newline,
555                YamlToken::BlockScalarContent,
556                YamlToken::Newline,
557            ]
558        );
559        let texts: Vec<_> = tokens.iter().map(|t| t.text).collect();
560        assert_eq!(
561            texts,
562            vec!["a", ":", " ", "|", "\n", "  line1", "\n", "  line2", "\n"]
563        );
564    }
565
566    #[test]
567    fn parser_builds_nested_block_sequence_on_same_line() {
568        let input = "- - a\n  - b\n- c\n";
569        let tree = parse_yaml_tree(input).expect("tree");
570        assert_eq!(tree.text().to_string(), input);
571
572        let outer = tree
573            .descendants()
574            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
575            .expect("outer block sequence");
576        let outer_items: Vec<_> = outer
577            .children()
578            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
579            .collect();
580        assert_eq!(outer_items.len(), 2);
581
582        let nested = outer_items[0]
583            .children()
584            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
585            .expect("nested block sequence inside first item");
586        let nested_items = nested
587            .children()
588            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
589            .count();
590        assert_eq!(nested_items, 2);
591    }
592
593    #[test]
594    fn parser_builds_multiline_flow_map_inside_block_sequence_item() {
595        let input = "- { multi\n  line, a: b}\n";
596        let tree = parse_yaml_tree(input).expect("tree");
597        assert_eq!(tree.text().to_string(), input);
598
599        let seq = tree
600            .descendants()
601            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
602            .expect("block sequence");
603        let item = seq
604            .children()
605            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
606            .expect("sequence item");
607        let flow_map = item
608            .children()
609            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
610            .expect("flow map inside sequence item");
611        let entry_count = flow_map
612            .children()
613            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
614            .count();
615        assert_eq!(entry_count, 2);
616    }
617
618    #[test]
619    fn parser_builds_flow_sequence_inside_block_sequence_item() {
620        let input = "- [a, b]\n- [c, d]\n";
621        let tree = parse_yaml_tree(input).expect("tree");
622        assert_eq!(tree.text().to_string(), input);
623
624        let seq = tree
625            .descendants()
626            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
627            .expect("block sequence");
628        let items: Vec<_> = seq
629            .children()
630            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
631            .collect();
632        assert_eq!(items.len(), 2);
633
634        for item in &items {
635            let flow = item
636                .children()
637                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
638                .expect("flow sequence inside item");
639            let flow_items = flow
640                .children()
641                .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
642                .count();
643            assert_eq!(flow_items, 2);
644        }
645    }
646
647    #[test]
648    fn lexer_recognizes_single_bang_tag_in_top_level_scalar() {
649        let tokens = lex_mapping_tokens("! a\n").expect("tokens");
650        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
651        assert_eq!(
652            kinds,
653            vec![
654                YamlToken::Tag,
655                YamlToken::Whitespace,
656                YamlToken::Scalar,
657                YamlToken::Newline,
658            ]
659        );
660        let texts: Vec<_> = tokens.iter().map(|t| t.text).collect();
661        assert_eq!(texts, vec!["!", " ", "a", "\n"]);
662    }
663
664    #[test]
665    fn parser_emits_scalar_document_for_tag_without_colon() {
666        let input = "! a\n";
667        let tree = parse_yaml_tree(input).expect("tree");
668        assert_eq!(tree.text().to_string(), input);
669
670        let has_block_map = tree
671            .descendants()
672            .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP);
673        assert!(
674            !has_block_map,
675            "scalar document should not be wrapped in YAML_BLOCK_MAP"
676        );
677
678        let has_tag = tree
679            .descendants_with_tokens()
680            .filter_map(|el| el.into_token())
681            .any(|tok| tok.kind() == SyntaxKind::YAML_TAG && tok.text() == "!");
682        assert!(has_tag, "tree should contain YAML_TAG '!'");
683    }
684
685    #[test]
686    fn lexer_extracts_explicit_tag_before_block_sequence_scalar() {
687        let tokens = lex_mapping_tokens("- !!int 1\n").expect("tokens");
688        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
689        assert_eq!(
690            kinds,
691            vec![
692                YamlToken::BlockSeqEntry,
693                YamlToken::Whitespace,
694                YamlToken::Tag,
695                YamlToken::Whitespace,
696                YamlToken::Scalar,
697                YamlToken::Newline,
698            ]
699        );
700        let texts: Vec<_> = tokens.iter().map(|t| t.text).collect();
701        assert_eq!(texts, vec!["-", " ", "!!int", " ", "1", "\n"]);
702    }
703
704    #[test]
705    fn parser_builds_nested_block_map_inside_block_sequence() {
706        let input = "-\n  name: Mark\n  hr: 65\n";
707        let tree = parse_yaml_tree(input).expect("tree");
708        assert_eq!(tree.text().to_string(), input);
709
710        let seq = tree
711            .descendants()
712            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
713            .expect("block sequence");
714        let items: Vec<_> = seq
715            .children()
716            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
717            .collect();
718        assert_eq!(items.len(), 1);
719
720        let nested_map = items[0]
721            .children()
722            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
723            .expect("nested block map inside sequence item");
724        let entry_count = nested_map
725            .children()
726            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
727            .count();
728        assert_eq!(entry_count, 2);
729    }
730
731    #[test]
732    fn parser_builds_nested_block_map_from_indent_tokens() {
733        let input = "root: 1\n  child: 2\n";
734        let tree = parse_yaml_tree(input).expect("tree");
735
736        let outer_map = tree
737            .descendants()
738            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
739            .expect("outer map");
740        let outer_entry = outer_map
741            .children()
742            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
743            .expect("outer entry");
744        let outer_value = outer_entry
745            .children()
746            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
747            .expect("outer value");
748
749        let nested_map = outer_value
750            .children()
751            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
752            .expect("nested map");
753        let nested_entry_count = nested_map
754            .children()
755            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
756            .count();
757        assert_eq!(nested_entry_count, 1);
758    }
759
760    #[test]
761    fn shadow_parse_is_disabled_by_default() {
762        let report = parse_shadow("title: My Title", ShadowYamlOptions::default());
763        assert_eq!(report.outcome, ShadowYamlOutcome::SkippedDisabled);
764        assert_eq!(report.shadow_reason, "shadow-disabled");
765        assert_eq!(report.normalized_input, None);
766    }
767
768    #[test]
769    fn shadow_parse_skips_when_disabled_even_for_valid_input() {
770        let report = parse_shadow(
771            "title: My Title",
772            ShadowYamlOptions {
773                enabled: false,
774                input_kind: YamlInputKind::Plain,
775            },
776        );
777        assert_eq!(report.outcome, ShadowYamlOutcome::SkippedDisabled);
778        assert_eq!(report.shadow_reason, "shadow-disabled");
779    }
780
781    #[test]
782    fn shadow_parse_reports_prototype_parsed_when_enabled() {
783        let report = parse_shadow(
784            "title: My Title",
785            ShadowYamlOptions {
786                enabled: true,
787                input_kind: YamlInputKind::Plain,
788            },
789        );
790        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeParsed);
791        assert_eq!(report.shadow_reason, "prototype-basic-mapping-parsed");
792        assert_eq!(report.normalized_input.as_deref(), Some("title: My Title"));
793    }
794
795    #[test]
796    fn shadow_parse_reports_prototype_rejected_when_enabled() {
797        // Tab indentation is prohibited by YAML spec for block structures
798        let report = parse_shadow(
799            "\ttitle: value",
800            ShadowYamlOptions {
801                enabled: true,
802                input_kind: YamlInputKind::Plain,
803            },
804        );
805        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeRejected);
806        assert_eq!(report.shadow_reason, "prototype-basic-mapping-rejected");
807    }
808
809    #[test]
810    fn shadow_parse_accepts_hashpipe_mode_but_remains_prototype_scoped() {
811        let report = parse_shadow(
812            "#| title: My Title",
813            ShadowYamlOptions {
814                enabled: true,
815                input_kind: YamlInputKind::Hashpipe,
816            },
817        );
818        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeParsed);
819        assert_eq!(report.shadow_reason, "prototype-basic-mapping-parsed");
820        assert_eq!(report.normalized_input.as_deref(), Some("title: My Title"));
821    }
822}