Skip to main content

panache_parser/parser/
yaml.rs

1//! YAML parser groundwork for long-term Panache integration.
2//!
3//! This module is intentionally minimal and currently acts as a placeholder for a
4//! future in-tree YAML parser that can produce Panache-compatible CST structures.
5//! Initial goals:
6//! - support plain YAML and hashpipe-prefixed YAML from shared parsing primitives,
7//! - preserve lossless syntax/trivia needed for exact host document ranges,
8//! - enable shadow-mode comparison against the existing YAML engine before rollout.
9//! - prepare for first-class YAML formatting support once parser parity is proven.
10
11#[path = "yaml/events.rs"]
12mod events;
13#[path = "yaml/model.rs"]
14mod model;
15#[path = "yaml/parser.rs"]
16mod parser;
17#[path = "yaml/parser_v2.rs"]
18mod parser_v2;
19#[path = "yaml/scanner.rs"]
20mod scanner;
21#[path = "yaml/validator.rs"]
22mod validator;
23
24pub use events::{project_events, project_events_from_tree};
25pub use model::{
26    ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlDiagnostic, YamlInputKind,
27    YamlParseReport, diagnostic_codes,
28};
29pub use parser::{parse_shadow, parse_yaml_report, parse_yaml_tree};
30pub use parser_v2::{ShadowParserV2Report, parse_v2, shadow_parser_v2_check};
31pub use scanner::{ShadowScannerReport, shadow_scanner_check};
32
33#[doc(hidden)]
34pub fn validate_yaml_for_test(input: &str) -> Option<YamlDiagnostic> {
35    validator::validate_yaml(input)
36}
37
38#[cfg(test)]
39mod tests {
40    use super::*;
41    use crate::syntax::SyntaxKind;
42
43    #[test]
44    fn builds_basic_rowan_tree_for_multiline_mapping() {
45        let tree = parse_yaml_tree("title: My Title\nauthor: Me\n").expect("tree");
46        assert_eq!(tree.kind(), SyntaxKind::DOCUMENT);
47        assert_eq!(tree.text().to_string(), "title: My Title\nauthor: Me\n");
48
49        let mapping = tree
50            .descendants()
51            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
52            .expect("yaml block map");
53        let entries: Vec<_> = mapping
54            .children()
55            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
56            .collect();
57        assert_eq!(entries.len(), 2);
58
59        let token_kinds: Vec<_> = mapping
60            .descendants_with_tokens()
61            .filter_map(|el| el.into_token())
62            .map(|tok| tok.kind())
63            .collect();
64        assert_eq!(
65            token_kinds,
66            vec![
67                SyntaxKind::YAML_SCALAR,
68                SyntaxKind::YAML_COLON,
69                SyntaxKind::WHITESPACE,
70                SyntaxKind::YAML_SCALAR,
71                SyntaxKind::NEWLINE,
72                SyntaxKind::YAML_SCALAR,
73                SyntaxKind::YAML_COLON,
74                SyntaxKind::WHITESPACE,
75                SyntaxKind::YAML_SCALAR,
76                SyntaxKind::NEWLINE,
77            ]
78        );
79    }
80
81    fn block_map_key_texts(tree: &crate::syntax::SyntaxNode) -> Vec<String> {
82        tree.descendants()
83            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
84            .map(|key| {
85                key.children_with_tokens()
86                    .filter_map(|el| el.into_token())
87                    .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
88                    .map(|tok| tok.text().to_string())
89                    .collect::<Vec<_>>()
90                    .join("")
91            })
92            .filter(|s| !s.is_empty())
93            .collect()
94    }
95
96    #[test]
97    fn mapping_nodes_preserve_entry_text_boundaries() {
98        let tree = parse_yaml_tree("title: A\nauthor: B\n").expect("tree");
99        let mapping = tree
100            .descendants()
101            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
102            .expect("yaml block map");
103
104        let entry_texts: Vec<_> = mapping
105            .children()
106            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
107            .map(|n| n.text().to_string())
108            .collect();
109        assert_eq!(
110            entry_texts,
111            vec!["title: A\n".to_string(), "author: B\n".to_string(),]
112        );
113    }
114
115    #[test]
116    fn splits_mapping_on_colon_outside_quoted_key() {
117        let input = "\"foo:bar\": 23\n'x:y': 24\n";
118        let tree = parse_yaml_tree(input).expect("tree");
119        assert_eq!(tree.text().to_string(), input);
120        assert_eq!(
121            block_map_key_texts(&tree),
122            vec!["\"foo:bar\"".to_string(), "'x:y'".to_string()]
123        );
124    }
125
126    #[test]
127    fn keeps_colon_inside_escaped_double_quoted_key() {
128        let input = "\"foo\\\":bar\": 23\n";
129        let tree = parse_yaml_tree(input).expect("tree");
130        assert_eq!(tree.text().to_string(), input);
131        assert_eq!(
132            block_map_key_texts(&tree),
133            vec!["\"foo\\\":bar\"".to_string()]
134        );
135    }
136
137    #[test]
138    fn keeps_hash_in_double_quoted_scalar_value() {
139        let input = "foo: \"a#b\"\n";
140        let tree = parse_yaml_tree(input).expect("tree");
141
142        let comment_count = tree
143            .descendants_with_tokens()
144            .filter_map(|el| el.into_token())
145            .filter(|tok| tok.kind() == SyntaxKind::YAML_COMMENT)
146            .count();
147        assert_eq!(comment_count, 0);
148
149        let value_scalars: Vec<String> = tree
150            .descendants()
151            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
152            .flat_map(|value| {
153                value
154                    .children_with_tokens()
155                    .filter_map(|el| el.into_token())
156                    .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
157                    .map(|tok| tok.text().to_string())
158                    .collect::<Vec<_>>()
159            })
160            .collect();
161        assert_eq!(value_scalars, vec!["\"a#b\"".to_string()]);
162    }
163
164    #[test]
165    fn keeps_colon_inside_single_quoted_key_with_escaped_quote() {
166        let input = "'foo'':bar': 23\n";
167        let tree = parse_yaml_tree(input).expect("tree");
168        assert_eq!(tree.text().to_string(), input);
169        assert_eq!(block_map_key_texts(&tree), vec!["'foo'':bar'".to_string()]);
170    }
171
172    #[test]
173    fn parser_preserves_document_markers_and_directives() {
174        let input = "%YAML 1.2\n---\nfoo: bar\n...\n";
175        let tree = parse_yaml_tree(input).expect("tree");
176        assert_eq!(tree.text().to_string(), input);
177
178        let scalar_tokens: Vec<String> = tree
179            .descendants_with_tokens()
180            .filter_map(|el| el.into_token())
181            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
182            .map(|tok| tok.text().to_string())
183            .collect();
184
185        assert!(scalar_tokens.contains(&"%YAML 1.2".to_string()));
186        assert!(scalar_tokens.contains(&"bar".to_string()));
187
188        let has_doc_start = tree
189            .descendants_with_tokens()
190            .filter_map(|el| el.into_token())
191            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START && tok.text() == "---");
192        assert!(has_doc_start, "--- should be a YAML_DOCUMENT_START token");
193
194        let has_doc_end = tree
195            .descendants_with_tokens()
196            .filter_map(|el| el.into_token())
197            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END && tok.text() == "...");
198        assert!(has_doc_end, "... should be a YAML_DOCUMENT_END token");
199    }
200
201    #[test]
202    fn parser_preserves_standalone_flow_mapping_lines() {
203        let input = "{foo: bar}\n";
204        let tree = parse_yaml_tree(input).expect("tree");
205        assert_eq!(tree.text().to_string(), input);
206
207        let flow_entry_count = tree
208            .descendants()
209            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
210            .count();
211        assert_eq!(flow_entry_count, 1);
212
213        let flow_values: Vec<String> = tree
214            .descendants()
215            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
216            .map(|n| n.text().to_string())
217            .collect();
218        assert_eq!(flow_values, vec![" bar".to_string()]);
219    }
220
221    #[test]
222    fn parser_preserves_top_level_quoted_scalar_document() {
223        let input = "\"foo: bar\\\": baz\"\n";
224        let tree = parse_yaml_tree(input).expect("tree");
225        assert_eq!(tree.text().to_string(), input);
226    }
227
228    #[test]
229    fn parse_yaml_report_emits_error_code_for_invalid_yaml() {
230        // `this` at the top of a block-map context is a stray scalar with no
231        // following colon — flagged at the leading scalar rather than at the
232        // later indent that surfaced as a side-effect.
233        let report = parse_yaml_report("this\n is\n  invalid: x\n");
234        assert!(report.tree.is_none());
235        assert_eq!(report.diagnostics.len(), 1);
236        assert_eq!(
237            report.diagnostics[0].code,
238            diagnostic_codes::PARSE_INVALID_KEY_TOKEN
239        );
240    }
241
242    #[test]
243    fn parse_yaml_report_detects_trailing_content_after_document_end() {
244        let report = parse_yaml_report("---\nkey: value\n... invalid\n");
245        assert!(report.tree.is_none());
246        assert_eq!(report.diagnostics.len(), 1);
247        assert_eq!(
248            report.diagnostics[0].code,
249            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END
250        );
251    }
252
253    #[test]
254    fn parse_yaml_report_detects_unexpected_flow_closer() {
255        let report = parse_yaml_report("---\n[ a, b, c ] ]\n");
256        assert!(report.tree.is_none());
257        assert_eq!(report.diagnostics.len(), 1);
258        assert_eq!(
259            report.diagnostics[0].code,
260            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
261        );
262    }
263
264    #[test]
265    fn parse_yaml_report_detects_unterminated_nested_flow_sequence() {
266        let report = parse_yaml_report("---\n[ [ a, b, c ]\n");
267        assert!(report.tree.is_none());
268        assert_eq!(report.diagnostics.len(), 1);
269        assert_eq!(
270            report.diagnostics[0].code,
271            diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE
272        );
273    }
274
275    #[test]
276    fn parse_yaml_report_detects_invalid_leading_flow_sequence_comma() {
277        let report = parse_yaml_report("---\n[ , a, b, c ]\n");
278        assert!(report.tree.is_none());
279        assert_eq!(report.diagnostics.len(), 1);
280        assert_eq!(
281            report.diagnostics[0].code,
282            diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
283        );
284    }
285
286    #[test]
287    fn parse_yaml_report_detects_trailing_content_after_flow_end() {
288        let report = parse_yaml_report("---\n[ a, b, c, ]#invalid\n");
289        assert!(report.tree.is_none());
290        assert_eq!(report.diagnostics.len(), 1);
291        assert_eq!(
292            report.diagnostics[0].code,
293            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
294        );
295    }
296
297    #[test]
298    fn parse_yaml_report_detects_invalid_double_quoted_escape() {
299        let report = parse_yaml_report("---\n\"\\.\"\n");
300        assert!(report.tree.is_none());
301        assert_eq!(report.diagnostics.len(), 1);
302        assert_eq!(
303            report.diagnostics[0].code,
304            diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE
305        );
306    }
307
308    #[test]
309    fn parse_yaml_report_detects_trailing_content_after_document_start() {
310        let report = parse_yaml_report("--- key1: value1\n    key2: value2\n");
311        assert!(report.tree.is_none());
312        assert_eq!(report.diagnostics.len(), 1);
313        assert_eq!(
314            report.diagnostics[0].code,
315            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_START
316        );
317    }
318
319    #[test]
320    fn parse_yaml_report_detects_directive_without_document_start() {
321        let report = parse_yaml_report("%YAML 1.2\n");
322        assert!(report.tree.is_none());
323        assert_eq!(report.diagnostics.len(), 1);
324        assert_eq!(
325            report.diagnostics[0].code,
326            diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
327        );
328    }
329
330    #[test]
331    fn parse_yaml_report_detects_directive_after_content() {
332        // EB22-shape: a comment terminates the plain scalar, leaving
333        // `%YAML 1.2` at column 0 in directive position after content.
334        let report = parse_yaml_report("---\nscalar1 # comment\n%YAML 1.2\n---\nscalar2\n");
335        assert!(report.tree.is_none());
336        assert_eq!(report.diagnostics.len(), 1);
337        assert_eq!(
338            report.diagnostics[0].code,
339            diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT
340        );
341    }
342
343    #[test]
344    fn parse_yaml_report_detects_wrong_indented_flow_continuation() {
345        let report = parse_yaml_report("---\nflow: [a,\nb,\nc]\n");
346        assert!(report.tree.is_none());
347        assert_eq!(report.diagnostics.len(), 1);
348        assert_eq!(
349            report.diagnostics[0].code,
350            diagnostic_codes::LEX_WRONG_INDENTED_FLOW
351        );
352    }
353
354    #[test]
355    fn parser_builds_flow_sequence_nodes_in_mapping_value() {
356        let input = "a: [b, c]\n";
357        let tree = parse_yaml_tree(input).expect("tree");
358        assert_eq!(tree.text().to_string(), input);
359
360        let seq = tree
361            .descendants()
362            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
363            .expect("flow sequence node");
364        let item_count = seq
365            .children()
366            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
367            .count();
368        assert_eq!(item_count, 2);
369    }
370
371    #[test]
372    fn parser_absorbs_literal_block_scalar_into_map_value() {
373        let input = "a: |\n  line1\n  line2\n";
374        let tree = parse_yaml_tree(input).expect("tree");
375        assert_eq!(tree.text().to_string(), input);
376
377        let map = tree
378            .descendants()
379            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
380            .expect("block map");
381        let entry = map
382            .children()
383            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
384            .expect("entry");
385        let value = entry
386            .children()
387            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
388            .expect("value");
389        let value_text = value.text().to_string();
390        assert!(
391            value_text.starts_with('|') || value_text.starts_with(" |"),
392            "value should contain the `|` header, got {value_text:?}"
393        );
394        assert!(
395            value_text.contains("line1") && value_text.contains("line2"),
396            "value should absorb block scalar content, got {value_text:?}"
397        );
398    }
399
400    #[test]
401    fn parser_builds_nested_block_sequence_on_same_line() {
402        let input = "- - a\n  - b\n- c\n";
403        let tree = parse_yaml_tree(input).expect("tree");
404        assert_eq!(tree.text().to_string(), input);
405
406        let outer = tree
407            .descendants()
408            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
409            .expect("outer block sequence");
410        let outer_items: Vec<_> = outer
411            .children()
412            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
413            .collect();
414        assert_eq!(outer_items.len(), 2);
415
416        let nested = outer_items[0]
417            .children()
418            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
419            .expect("nested block sequence inside first item");
420        let nested_items = nested
421            .children()
422            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
423            .count();
424        assert_eq!(nested_items, 2);
425    }
426
427    #[test]
428    fn parser_builds_multiline_flow_map_inside_block_sequence_item() {
429        let input = "- { multi\n  line, a: b}\n";
430        let tree = parse_yaml_tree(input).expect("tree");
431        assert_eq!(tree.text().to_string(), input);
432
433        let seq = tree
434            .descendants()
435            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
436            .expect("block sequence");
437        let item = seq
438            .children()
439            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
440            .expect("sequence item");
441        item.children()
442            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
443            .expect("flow map inside sequence item");
444    }
445
446    #[test]
447    fn parser_builds_flow_sequence_inside_block_sequence_item() {
448        let input = "- [a, b]\n- [c, d]\n";
449        let tree = parse_yaml_tree(input).expect("tree");
450        assert_eq!(tree.text().to_string(), input);
451
452        let seq = tree
453            .descendants()
454            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
455            .expect("block sequence");
456        let items: Vec<_> = seq
457            .children()
458            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
459            .collect();
460        assert_eq!(items.len(), 2);
461
462        for item in &items {
463            let flow = item
464                .children()
465                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
466                .expect("flow sequence inside item");
467            let flow_items = flow
468                .children()
469                .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
470                .count();
471            assert_eq!(flow_items, 2);
472        }
473    }
474
475    #[test]
476    fn parser_emits_scalar_document_for_tag_without_colon() {
477        let input = "! a\n";
478        let tree = parse_yaml_tree(input).expect("tree");
479        assert_eq!(tree.text().to_string(), input);
480
481        let has_block_map = tree
482            .descendants()
483            .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP);
484        assert!(
485            !has_block_map,
486            "scalar document should not be wrapped in YAML_BLOCK_MAP"
487        );
488
489        // v2 keeps the leading `!` inside the scalar's source bytes; the
490        // projection layer resolves tags from the scalar text.
491        let has_tagged_scalar = tree
492            .descendants_with_tokens()
493            .filter_map(|el| el.into_token())
494            .any(|tok| tok.kind() == SyntaxKind::YAML_SCALAR && tok.text().starts_with('!'));
495        assert!(has_tagged_scalar, "tree should contain tag bytes in scalar");
496    }
497
498    #[test]
499    fn parser_builds_nested_block_map_inside_block_sequence() {
500        let input = "-\n  name: Mark\n  hr: 65\n";
501        let tree = parse_yaml_tree(input).expect("tree");
502        assert_eq!(tree.text().to_string(), input);
503
504        let seq = tree
505            .descendants()
506            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
507            .expect("block sequence");
508        let items: Vec<_> = seq
509            .children()
510            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
511            .collect();
512        assert_eq!(items.len(), 1);
513
514        let nested_map = items[0]
515            .children()
516            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
517            .expect("nested block map inside sequence item");
518        let entry_count = nested_map
519            .children()
520            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
521            .count();
522        assert_eq!(entry_count, 2);
523    }
524
525    #[test]
526    fn parser_builds_nested_block_map_from_indent_tokens() {
527        let input = "root:\n  child: 2\n";
528        let tree = parse_yaml_tree(input).expect("tree");
529
530        let outer_map = tree
531            .descendants()
532            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
533            .expect("outer map");
534        let outer_entry = outer_map
535            .children()
536            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
537            .expect("outer entry");
538        let outer_value = outer_entry
539            .children()
540            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
541            .expect("outer value");
542
543        let nested_map = outer_value
544            .children()
545            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
546            .expect("nested map");
547        let nested_entry_count = nested_map
548            .children()
549            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
550            .count();
551        assert_eq!(nested_entry_count, 1);
552    }
553
554    #[test]
555    fn shadow_parse_is_disabled_by_default() {
556        let report = parse_shadow("title: My Title", ShadowYamlOptions::default());
557        assert_eq!(report.outcome, ShadowYamlOutcome::SkippedDisabled);
558        assert_eq!(report.shadow_reason, "shadow-disabled");
559        assert_eq!(report.normalized_input, None);
560    }
561
562    #[test]
563    fn shadow_parse_skips_when_disabled_even_for_valid_input() {
564        let report = parse_shadow(
565            "title: My Title",
566            ShadowYamlOptions {
567                enabled: false,
568                input_kind: YamlInputKind::Plain,
569            },
570        );
571        assert_eq!(report.outcome, ShadowYamlOutcome::SkippedDisabled);
572        assert_eq!(report.shadow_reason, "shadow-disabled");
573    }
574
575    #[test]
576    fn shadow_parse_reports_prototype_parsed_when_enabled() {
577        let report = parse_shadow(
578            "title: My Title",
579            ShadowYamlOptions {
580                enabled: true,
581                input_kind: YamlInputKind::Plain,
582            },
583        );
584        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeParsed);
585        assert_eq!(report.shadow_reason, "prototype-basic-mapping-parsed");
586        assert_eq!(report.normalized_input.as_deref(), Some("title: My Title"));
587    }
588
589    #[test]
590    fn shadow_parse_reports_prototype_rejected_when_enabled() {
591        // An unterminated flow sequence is rejected by the v2-aware
592        // structural validator, which is the rejection signal exercised
593        // by the shadow parse plumbing.
594        let report = parse_shadow(
595            "[ a, b",
596            ShadowYamlOptions {
597                enabled: true,
598                input_kind: YamlInputKind::Plain,
599            },
600        );
601        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeRejected);
602        assert_eq!(report.shadow_reason, "prototype-basic-mapping-rejected");
603    }
604
605    #[test]
606    fn shadow_parse_accepts_hashpipe_mode_but_remains_prototype_scoped() {
607        let report = parse_shadow(
608            "#| title: My Title",
609            ShadowYamlOptions {
610                enabled: true,
611                input_kind: YamlInputKind::Hashpipe,
612            },
613        );
614        assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeParsed);
615        assert_eq!(report.shadow_reason, "prototype-basic-mapping-parsed");
616        assert_eq!(report.normalized_input.as_deref(), Some("title: My Title"));
617    }
618}