Skip to main content

panache_parser/parser/
yaml.rs

1//! In-tree YAML parser. Produces a lossless, Panache-compatible CST that
2//! the host parser embeds directly into the document tree (frontmatter and
3//! hashpipe option bodies) and that the in-tree YAML formatter consumes.
4//!
5//! - supports plain YAML and hashpipe-prefixed YAML from shared parsing
6//!   primitives (see [`parse_stream`] / [`parse_stream_with_prefix`]),
7//! - preserves lossless syntax/trivia needed for exact host document ranges,
8//! - backs first-class YAML formatting via `crate::syntax::yaml_ast`.
9
10#[path = "yaml/cooking.rs"]
11mod cooking;
12#[path = "yaml/events.rs"]
13mod events;
14#[path = "yaml/model.rs"]
15mod model;
16#[path = "yaml/parser.rs"]
17mod parser;
18#[path = "yaml/profile.rs"]
19mod profile;
20#[path = "yaml/scanner.rs"]
21mod scanner;
22#[path = "yaml/validator.rs"]
23mod validator;
24
25pub use events::{project_events, project_events_from_tree};
26// Re-exported crate-internally so the typed YAML AST wrappers in
27// `crate::syntax::yaml_ast` can cook scalar tokens without re-implementing
28// the quote/escape/fold rules. The modules themselves stay private.
29pub(crate) use cooking::cook;
30pub use model::{YamlDiagnostic, YamlParseReport, diagnostic_codes};
31pub use parser::{
32    locate_yaml_diagnostic, locate_yaml_diagnostic_ctx, parse_stream, parse_stream_with_prefix,
33    parse_yaml_report, parse_yaml_tree, validate_yaml_with_prefix,
34};
35pub use profile::{ConsumerSet, YamlConsumer, YamlLocation, YamlValidationContext};
36pub(crate) use scanner::ScalarStyle;
37
38#[doc(hidden)]
39pub fn validate_yaml_for_test(input: &str) -> Option<YamlDiagnostic> {
40    validator::validate_yaml(input)
41}
42
43#[cfg(test)]
44mod tests {
45    use super::*;
46    use crate::syntax::SyntaxKind;
47
48    #[test]
49    fn builds_basic_rowan_tree_for_multiline_mapping() {
50        let tree = parse_yaml_tree("title: My Title\nauthor: Me\n").expect("tree");
51        assert_eq!(tree.kind(), SyntaxKind::YAML_STREAM);
52        assert_eq!(tree.text().to_string(), "title: My Title\nauthor: Me\n");
53
54        let mapping = tree
55            .descendants()
56            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
57            .expect("yaml block map");
58        let entries: Vec<_> = mapping
59            .children()
60            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
61            .collect();
62        assert_eq!(entries.len(), 2);
63
64        let token_kinds: Vec<_> = mapping
65            .descendants_with_tokens()
66            .filter_map(|el| el.into_token())
67            .map(|tok| tok.kind())
68            .collect();
69        assert_eq!(
70            token_kinds,
71            vec![
72                SyntaxKind::YAML_SCALAR_TEXT,
73                SyntaxKind::YAML_COLON,
74                SyntaxKind::WHITESPACE,
75                SyntaxKind::YAML_SCALAR_TEXT,
76                SyntaxKind::NEWLINE,
77                SyntaxKind::YAML_SCALAR_TEXT,
78                SyntaxKind::YAML_COLON,
79                SyntaxKind::WHITESPACE,
80                SyntaxKind::YAML_SCALAR_TEXT,
81                SyntaxKind::NEWLINE,
82            ]
83        );
84    }
85
86    fn block_map_key_texts(tree: &crate::syntax::SyntaxNode) -> Vec<String> {
87        tree.descendants()
88            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
89            .map(|key| {
90                key.descendants_with_tokens()
91                    .filter_map(|el| el.into_token())
92                    .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR_TEXT)
93                    .map(|tok| tok.text().to_string())
94                    .collect::<Vec<_>>()
95                    .join("")
96            })
97            .filter(|s| !s.is_empty())
98            .collect()
99    }
100
101    #[test]
102    fn mapping_nodes_preserve_entry_text_boundaries() {
103        let tree = parse_yaml_tree("title: A\nauthor: B\n").expect("tree");
104        let mapping = tree
105            .descendants()
106            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
107            .expect("yaml block map");
108
109        let entry_texts: Vec<_> = mapping
110            .children()
111            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
112            .map(|n| n.text().to_string())
113            .collect();
114        assert_eq!(
115            entry_texts,
116            vec!["title: A\n".to_string(), "author: B\n".to_string(),]
117        );
118    }
119
120    #[test]
121    fn splits_mapping_on_colon_outside_quoted_key() {
122        let input = "\"foo:bar\": 23\n'x:y': 24\n";
123        let tree = parse_yaml_tree(input).expect("tree");
124        assert_eq!(tree.text().to_string(), input);
125        assert_eq!(
126            block_map_key_texts(&tree),
127            vec!["\"foo:bar\"".to_string(), "'x:y'".to_string()]
128        );
129    }
130
131    #[test]
132    fn keeps_colon_inside_escaped_double_quoted_key() {
133        let input = "\"foo\\\":bar\": 23\n";
134        let tree = parse_yaml_tree(input).expect("tree");
135        assert_eq!(tree.text().to_string(), input);
136        assert_eq!(
137            block_map_key_texts(&tree),
138            vec!["\"foo\\\":bar\"".to_string()]
139        );
140    }
141
142    #[test]
143    fn keeps_hash_in_double_quoted_scalar_value() {
144        let input = "foo: \"a#b\"\n";
145        let tree = parse_yaml_tree(input).expect("tree");
146
147        let comment_count = tree
148            .descendants_with_tokens()
149            .filter_map(|el| el.into_token())
150            .filter(|tok| tok.kind() == SyntaxKind::YAML_COMMENT)
151            .count();
152        assert_eq!(comment_count, 0);
153
154        let value_scalars: Vec<String> = tree
155            .descendants()
156            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
157            .flat_map(|value| {
158                value
159                    .children()
160                    .filter(|n| n.kind() == SyntaxKind::YAML_SCALAR)
161                    .map(|n| n.text().to_string())
162                    .collect::<Vec<_>>()
163            })
164            .collect();
165        assert_eq!(value_scalars, vec!["\"a#b\"".to_string()]);
166    }
167
168    #[test]
169    fn keeps_colon_inside_single_quoted_key_with_escaped_quote() {
170        let input = "'foo'':bar': 23\n";
171        let tree = parse_yaml_tree(input).expect("tree");
172        assert_eq!(tree.text().to_string(), input);
173        assert_eq!(block_map_key_texts(&tree), vec!["'foo'':bar'".to_string()]);
174    }
175
176    #[test]
177    fn parser_preserves_document_markers_and_directives() {
178        let input = "%YAML 1.2\n---\nfoo: bar\n...\n";
179        let tree = parse_yaml_tree(input).expect("tree");
180        assert_eq!(tree.text().to_string(), input);
181
182        let scalar_tokens: Vec<String> = tree
183            .descendants_with_tokens()
184            .filter_map(|el| el.into_token())
185            .filter(|tok| {
186                matches!(
187                    tok.kind(),
188                    SyntaxKind::YAML_SCALAR_TEXT | SyntaxKind::YAML_DIRECTIVE
189                )
190            })
191            .map(|tok| tok.text().to_string())
192            .collect();
193
194        assert!(scalar_tokens.contains(&"%YAML 1.2".to_string()));
195        assert!(scalar_tokens.contains(&"bar".to_string()));
196
197        let has_doc_start = tree
198            .descendants_with_tokens()
199            .filter_map(|el| el.into_token())
200            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START && tok.text() == "---");
201        assert!(has_doc_start, "--- should be a YAML_DOCUMENT_START token");
202
203        let has_doc_end = tree
204            .descendants_with_tokens()
205            .filter_map(|el| el.into_token())
206            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END && tok.text() == "...");
207        assert!(has_doc_end, "... should be a YAML_DOCUMENT_END token");
208    }
209
210    #[test]
211    fn parser_preserves_standalone_flow_mapping_lines() {
212        let input = "{foo: bar}\n";
213        let tree = parse_yaml_tree(input).expect("tree");
214        assert_eq!(tree.text().to_string(), input);
215
216        let flow_entry_count = tree
217            .descendants()
218            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
219            .count();
220        assert_eq!(flow_entry_count, 1);
221
222        let flow_values: Vec<String> = tree
223            .descendants()
224            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
225            .map(|n| n.text().to_string())
226            .collect();
227        assert_eq!(flow_values, vec![" bar".to_string()]);
228    }
229
230    #[test]
231    fn parser_preserves_top_level_quoted_scalar_document() {
232        let input = "\"foo: bar\\\": baz\"\n";
233        let tree = parse_yaml_tree(input).expect("tree");
234        assert_eq!(tree.text().to_string(), input);
235    }
236
237    #[test]
238    fn parse_yaml_report_emits_error_code_for_invalid_yaml() {
239        // `this` at the top of a block-map context is a stray scalar with no
240        // following colon — flagged at the leading scalar rather than at the
241        // later indent that surfaced as a side-effect.
242        let report = parse_yaml_report("this\n is\n  invalid: x\n");
243        assert!(report.tree.is_none());
244        assert_eq!(report.diagnostics.len(), 1);
245        assert_eq!(
246            report.diagnostics[0].code,
247            diagnostic_codes::PARSE_INVALID_KEY_TOKEN
248        );
249    }
250
251    #[test]
252    fn parse_yaml_report_detects_trailing_content_after_document_end() {
253        let report = parse_yaml_report("---\nkey: value\n... invalid\n");
254        assert!(report.tree.is_none());
255        assert_eq!(report.diagnostics.len(), 1);
256        assert_eq!(
257            report.diagnostics[0].code,
258            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END
259        );
260    }
261
262    #[test]
263    fn parse_yaml_report_detects_unexpected_flow_closer() {
264        let report = parse_yaml_report("---\n[ a, b, c ] ]\n");
265        assert!(report.tree.is_none());
266        assert_eq!(report.diagnostics.len(), 1);
267        assert_eq!(
268            report.diagnostics[0].code,
269            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
270        );
271    }
272
273    #[test]
274    fn parse_yaml_report_detects_unterminated_nested_flow_sequence() {
275        let report = parse_yaml_report("---\n[ [ a, b, c ]\n");
276        assert!(report.tree.is_none());
277        assert_eq!(report.diagnostics.len(), 1);
278        assert_eq!(
279            report.diagnostics[0].code,
280            diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE
281        );
282    }
283
284    #[test]
285    fn parse_yaml_report_detects_invalid_leading_flow_sequence_comma() {
286        let report = parse_yaml_report("---\n[ , a, b, c ]\n");
287        assert!(report.tree.is_none());
288        assert_eq!(report.diagnostics.len(), 1);
289        assert_eq!(
290            report.diagnostics[0].code,
291            diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
292        );
293    }
294
295    #[test]
296    fn parse_yaml_report_detects_trailing_content_after_flow_end() {
297        let report = parse_yaml_report("---\n[ a, b, c, ]#invalid\n");
298        assert!(report.tree.is_none());
299        assert_eq!(report.diagnostics.len(), 1);
300        assert_eq!(
301            report.diagnostics[0].code,
302            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
303        );
304    }
305
306    #[test]
307    fn parse_yaml_report_detects_invalid_double_quoted_escape() {
308        let report = parse_yaml_report("---\n\"\\.\"\n");
309        assert!(report.tree.is_none());
310        assert_eq!(report.diagnostics.len(), 1);
311        assert_eq!(
312            report.diagnostics[0].code,
313            diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE
314        );
315    }
316
317    #[test]
318    fn parse_yaml_report_detects_trailing_content_after_document_start() {
319        let report = parse_yaml_report("--- key1: value1\n    key2: value2\n");
320        assert!(report.tree.is_none());
321        assert_eq!(report.diagnostics.len(), 1);
322        assert_eq!(
323            report.diagnostics[0].code,
324            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_START
325        );
326    }
327
328    #[test]
329    fn parse_yaml_report_detects_directive_without_document_start() {
330        let report = parse_yaml_report("%YAML 1.2\n");
331        assert!(report.tree.is_none());
332        assert_eq!(report.diagnostics.len(), 1);
333        assert_eq!(
334            report.diagnostics[0].code,
335            diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
336        );
337    }
338
339    #[test]
340    fn parse_yaml_report_detects_directive_after_content() {
341        // Tag-shape: tag dispatch terminates the scalar before `%TAG`
342        // hits column 0, so the directive lands in its real position
343        // after content.
344        let report = parse_yaml_report("!foo \"bar\"\n%TAG !x! tag:example.com,2014:\n---\n");
345        assert!(report.tree.is_none());
346        assert_eq!(report.diagnostics.len(), 1);
347        assert_eq!(
348            report.diagnostics[0].code,
349            diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT
350        );
351    }
352
353    #[test]
354    fn parse_yaml_report_detects_wrong_indented_flow_continuation() {
355        let report = parse_yaml_report("---\nflow: [a,\nb,\nc]\n");
356        assert!(report.tree.is_none());
357        assert_eq!(report.diagnostics.len(), 1);
358        assert_eq!(
359            report.diagnostics[0].code,
360            diagnostic_codes::LEX_WRONG_INDENTED_FLOW
361        );
362    }
363
364    #[test]
365    fn parser_builds_flow_sequence_nodes_in_mapping_value() {
366        let input = "a: [b, c]\n";
367        let tree = parse_yaml_tree(input).expect("tree");
368        assert_eq!(tree.text().to_string(), input);
369
370        let seq = tree
371            .descendants()
372            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
373            .expect("flow sequence node");
374        let item_count = seq
375            .children()
376            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
377            .count();
378        assert_eq!(item_count, 2);
379    }
380
381    #[test]
382    fn parser_absorbs_literal_block_scalar_into_map_value() {
383        let input = "a: |\n  line1\n  line2\n";
384        let tree = parse_yaml_tree(input).expect("tree");
385        assert_eq!(tree.text().to_string(), input);
386
387        let map = tree
388            .descendants()
389            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
390            .expect("block map");
391        let entry = map
392            .children()
393            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
394            .expect("entry");
395        let value = entry
396            .children()
397            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
398            .expect("value");
399        let value_text = value.text().to_string();
400        assert!(
401            value_text.starts_with('|') || value_text.starts_with(" |"),
402            "value should contain the `|` header, got {value_text:?}"
403        );
404        assert!(
405            value_text.contains("line1") && value_text.contains("line2"),
406            "value should absorb block scalar content, got {value_text:?}"
407        );
408    }
409
410    #[test]
411    fn parser_builds_nested_block_sequence_on_same_line() {
412        let input = "- - a\n  - b\n- c\n";
413        let tree = parse_yaml_tree(input).expect("tree");
414        assert_eq!(tree.text().to_string(), input);
415
416        let outer = tree
417            .descendants()
418            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
419            .expect("outer block sequence");
420        let outer_items: Vec<_> = outer
421            .children()
422            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
423            .collect();
424        assert_eq!(outer_items.len(), 2);
425
426        let nested = outer_items[0]
427            .children()
428            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
429            .expect("nested block sequence inside first item");
430        let nested_items = nested
431            .children()
432            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
433            .count();
434        assert_eq!(nested_items, 2);
435    }
436
437    #[test]
438    fn parser_builds_multiline_flow_map_inside_block_sequence_item() {
439        let input = "- { multi\n  line, a: b}\n";
440        let tree = parse_yaml_tree(input).expect("tree");
441        assert_eq!(tree.text().to_string(), input);
442
443        let seq = tree
444            .descendants()
445            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
446            .expect("block sequence");
447        let item = seq
448            .children()
449            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
450            .expect("sequence item");
451        item.children()
452            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
453            .expect("flow map inside sequence item");
454    }
455
456    #[test]
457    fn parser_builds_flow_sequence_inside_block_sequence_item() {
458        let input = "- [a, b]\n- [c, d]\n";
459        let tree = parse_yaml_tree(input).expect("tree");
460        assert_eq!(tree.text().to_string(), input);
461
462        let seq = tree
463            .descendants()
464            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
465            .expect("block sequence");
466        let items: Vec<_> = seq
467            .children()
468            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
469            .collect();
470        assert_eq!(items.len(), 2);
471
472        for item in &items {
473            let flow = item
474                .children()
475                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
476                .expect("flow sequence inside item");
477            let flow_items = flow
478                .children()
479                .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
480                .count();
481            assert_eq!(flow_items, 2);
482        }
483    }
484
485    #[test]
486    fn parser_emits_scalar_document_for_tag_without_colon() {
487        let input = "! a\n";
488        let tree = parse_yaml_tree(input).expect("tree");
489        assert_eq!(tree.text().to_string(), input);
490
491        let has_block_map = tree
492            .descendants()
493            .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP);
494        assert!(
495            !has_block_map,
496            "scalar document should not be wrapped in YAML_BLOCK_MAP"
497        );
498
499        // The scanner emits the leading `!` as a dedicated YAML_TAG
500        // token; the projection layer reads the tag from that token.
501        let has_tag_token = tree
502            .descendants_with_tokens()
503            .filter_map(|el| el.into_token())
504            .any(|tok| tok.kind() == SyntaxKind::YAML_TAG && tok.text() == "!");
505        assert!(
506            has_tag_token,
507            "tree should contain a YAML_TAG token for the leading `!`"
508        );
509    }
510
511    #[test]
512    fn parser_builds_nested_block_map_inside_block_sequence() {
513        let input = "-\n  name: Mark\n  hr: 65\n";
514        let tree = parse_yaml_tree(input).expect("tree");
515        assert_eq!(tree.text().to_string(), input);
516
517        let seq = tree
518            .descendants()
519            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
520            .expect("block sequence");
521        let items: Vec<_> = seq
522            .children()
523            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
524            .collect();
525        assert_eq!(items.len(), 1);
526
527        let nested_map = items[0]
528            .children()
529            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
530            .expect("nested block map inside sequence item");
531        let entry_count = nested_map
532            .children()
533            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
534            .count();
535        assert_eq!(entry_count, 2);
536    }
537
538    #[test]
539    fn parser_builds_nested_block_map_from_indent_tokens() {
540        let input = "root:\n  child: 2\n";
541        let tree = parse_yaml_tree(input).expect("tree");
542
543        let outer_map = tree
544            .descendants()
545            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
546            .expect("outer map");
547        let outer_entry = outer_map
548            .children()
549            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
550            .expect("outer entry");
551        let outer_value = outer_entry
552            .children()
553            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
554            .expect("outer value");
555
556        let nested_map = outer_value
557            .children()
558            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
559            .expect("nested map");
560        let nested_entry_count = nested_map
561            .children()
562            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
563            .count();
564        assert_eq!(nested_entry_count, 1);
565    }
566}