Skip to main content

panache_parser/parser/
yaml.rs

1//! In-tree YAML parser. Produces a lossless, Panache-compatible CST that
2//! the host parser embeds directly into the document tree (frontmatter and
3//! hashpipe option bodies) and that the in-tree YAML formatter consumes.
4//!
5//! - supports plain YAML and hashpipe-prefixed YAML from shared parsing
6//!   primitives (see [`parse_stream`] / [`parse_stream_with_prefix`]),
7//! - preserves lossless syntax/trivia needed for exact host document ranges,
8//! - backs first-class YAML formatting via `crate::syntax::yaml_ast`.
9
10#[path = "yaml/cooking.rs"]
11mod cooking;
12#[path = "yaml/events.rs"]
13mod events;
14#[path = "yaml/model.rs"]
15mod model;
16#[path = "yaml/parser.rs"]
17mod parser;
18#[path = "yaml/scanner.rs"]
19mod scanner;
20#[path = "yaml/validator.rs"]
21mod validator;
22
23pub use events::{project_events, project_events_from_tree};
24// Re-exported crate-internally so the typed YAML AST wrappers in
25// `crate::syntax::yaml_ast` can cook scalar tokens without re-implementing
26// the quote/escape/fold rules. The modules themselves stay private.
27pub(crate) use cooking::cook;
28pub use model::{YamlDiagnostic, YamlParseReport, diagnostic_codes};
29pub use parser::{
30    locate_yaml_diagnostic, parse_stream, parse_stream_with_prefix, parse_yaml_report,
31    parse_yaml_tree, validate_yaml_with_prefix,
32};
33pub(crate) use scanner::ScalarStyle;
34
35#[doc(hidden)]
36pub fn validate_yaml_for_test(input: &str) -> Option<YamlDiagnostic> {
37    validator::validate_yaml(input)
38}
39
40#[cfg(test)]
41mod tests {
42    use super::*;
43    use crate::syntax::SyntaxKind;
44
45    #[test]
46    fn builds_basic_rowan_tree_for_multiline_mapping() {
47        let tree = parse_yaml_tree("title: My Title\nauthor: Me\n").expect("tree");
48        assert_eq!(tree.kind(), SyntaxKind::YAML_STREAM);
49        assert_eq!(tree.text().to_string(), "title: My Title\nauthor: Me\n");
50
51        let mapping = tree
52            .descendants()
53            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
54            .expect("yaml block map");
55        let entries: Vec<_> = mapping
56            .children()
57            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
58            .collect();
59        assert_eq!(entries.len(), 2);
60
61        let token_kinds: Vec<_> = mapping
62            .descendants_with_tokens()
63            .filter_map(|el| el.into_token())
64            .map(|tok| tok.kind())
65            .collect();
66        assert_eq!(
67            token_kinds,
68            vec![
69                SyntaxKind::YAML_SCALAR_TEXT,
70                SyntaxKind::YAML_COLON,
71                SyntaxKind::WHITESPACE,
72                SyntaxKind::YAML_SCALAR_TEXT,
73                SyntaxKind::NEWLINE,
74                SyntaxKind::YAML_SCALAR_TEXT,
75                SyntaxKind::YAML_COLON,
76                SyntaxKind::WHITESPACE,
77                SyntaxKind::YAML_SCALAR_TEXT,
78                SyntaxKind::NEWLINE,
79            ]
80        );
81    }
82
83    fn block_map_key_texts(tree: &crate::syntax::SyntaxNode) -> Vec<String> {
84        tree.descendants()
85            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
86            .map(|key| {
87                key.descendants_with_tokens()
88                    .filter_map(|el| el.into_token())
89                    .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR_TEXT)
90                    .map(|tok| tok.text().to_string())
91                    .collect::<Vec<_>>()
92                    .join("")
93            })
94            .filter(|s| !s.is_empty())
95            .collect()
96    }
97
98    #[test]
99    fn mapping_nodes_preserve_entry_text_boundaries() {
100        let tree = parse_yaml_tree("title: A\nauthor: B\n").expect("tree");
101        let mapping = tree
102            .descendants()
103            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
104            .expect("yaml block map");
105
106        let entry_texts: Vec<_> = mapping
107            .children()
108            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
109            .map(|n| n.text().to_string())
110            .collect();
111        assert_eq!(
112            entry_texts,
113            vec!["title: A\n".to_string(), "author: B\n".to_string(),]
114        );
115    }
116
117    #[test]
118    fn splits_mapping_on_colon_outside_quoted_key() {
119        let input = "\"foo:bar\": 23\n'x:y': 24\n";
120        let tree = parse_yaml_tree(input).expect("tree");
121        assert_eq!(tree.text().to_string(), input);
122        assert_eq!(
123            block_map_key_texts(&tree),
124            vec!["\"foo:bar\"".to_string(), "'x:y'".to_string()]
125        );
126    }
127
128    #[test]
129    fn keeps_colon_inside_escaped_double_quoted_key() {
130        let input = "\"foo\\\":bar\": 23\n";
131        let tree = parse_yaml_tree(input).expect("tree");
132        assert_eq!(tree.text().to_string(), input);
133        assert_eq!(
134            block_map_key_texts(&tree),
135            vec!["\"foo\\\":bar\"".to_string()]
136        );
137    }
138
139    #[test]
140    fn keeps_hash_in_double_quoted_scalar_value() {
141        let input = "foo: \"a#b\"\n";
142        let tree = parse_yaml_tree(input).expect("tree");
143
144        let comment_count = tree
145            .descendants_with_tokens()
146            .filter_map(|el| el.into_token())
147            .filter(|tok| tok.kind() == SyntaxKind::YAML_COMMENT)
148            .count();
149        assert_eq!(comment_count, 0);
150
151        let value_scalars: Vec<String> = tree
152            .descendants()
153            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
154            .flat_map(|value| {
155                value
156                    .children()
157                    .filter(|n| n.kind() == SyntaxKind::YAML_SCALAR)
158                    .map(|n| n.text().to_string())
159                    .collect::<Vec<_>>()
160            })
161            .collect();
162        assert_eq!(value_scalars, vec!["\"a#b\"".to_string()]);
163    }
164
165    #[test]
166    fn keeps_colon_inside_single_quoted_key_with_escaped_quote() {
167        let input = "'foo'':bar': 23\n";
168        let tree = parse_yaml_tree(input).expect("tree");
169        assert_eq!(tree.text().to_string(), input);
170        assert_eq!(block_map_key_texts(&tree), vec!["'foo'':bar'".to_string()]);
171    }
172
173    #[test]
174    fn parser_preserves_document_markers_and_directives() {
175        let input = "%YAML 1.2\n---\nfoo: bar\n...\n";
176        let tree = parse_yaml_tree(input).expect("tree");
177        assert_eq!(tree.text().to_string(), input);
178
179        let scalar_tokens: Vec<String> = tree
180            .descendants_with_tokens()
181            .filter_map(|el| el.into_token())
182            .filter(|tok| {
183                matches!(
184                    tok.kind(),
185                    SyntaxKind::YAML_SCALAR_TEXT | SyntaxKind::YAML_DIRECTIVE
186                )
187            })
188            .map(|tok| tok.text().to_string())
189            .collect();
190
191        assert!(scalar_tokens.contains(&"%YAML 1.2".to_string()));
192        assert!(scalar_tokens.contains(&"bar".to_string()));
193
194        let has_doc_start = tree
195            .descendants_with_tokens()
196            .filter_map(|el| el.into_token())
197            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START && tok.text() == "---");
198        assert!(has_doc_start, "--- should be a YAML_DOCUMENT_START token");
199
200        let has_doc_end = tree
201            .descendants_with_tokens()
202            .filter_map(|el| el.into_token())
203            .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END && tok.text() == "...");
204        assert!(has_doc_end, "... should be a YAML_DOCUMENT_END token");
205    }
206
207    #[test]
208    fn parser_preserves_standalone_flow_mapping_lines() {
209        let input = "{foo: bar}\n";
210        let tree = parse_yaml_tree(input).expect("tree");
211        assert_eq!(tree.text().to_string(), input);
212
213        let flow_entry_count = tree
214            .descendants()
215            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
216            .count();
217        assert_eq!(flow_entry_count, 1);
218
219        let flow_values: Vec<String> = tree
220            .descendants()
221            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
222            .map(|n| n.text().to_string())
223            .collect();
224        assert_eq!(flow_values, vec![" bar".to_string()]);
225    }
226
227    #[test]
228    fn parser_preserves_top_level_quoted_scalar_document() {
229        let input = "\"foo: bar\\\": baz\"\n";
230        let tree = parse_yaml_tree(input).expect("tree");
231        assert_eq!(tree.text().to_string(), input);
232    }
233
234    #[test]
235    fn parse_yaml_report_emits_error_code_for_invalid_yaml() {
236        // `this` at the top of a block-map context is a stray scalar with no
237        // following colon — flagged at the leading scalar rather than at the
238        // later indent that surfaced as a side-effect.
239        let report = parse_yaml_report("this\n is\n  invalid: x\n");
240        assert!(report.tree.is_none());
241        assert_eq!(report.diagnostics.len(), 1);
242        assert_eq!(
243            report.diagnostics[0].code,
244            diagnostic_codes::PARSE_INVALID_KEY_TOKEN
245        );
246    }
247
248    #[test]
249    fn parse_yaml_report_detects_trailing_content_after_document_end() {
250        let report = parse_yaml_report("---\nkey: value\n... invalid\n");
251        assert!(report.tree.is_none());
252        assert_eq!(report.diagnostics.len(), 1);
253        assert_eq!(
254            report.diagnostics[0].code,
255            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END
256        );
257    }
258
259    #[test]
260    fn parse_yaml_report_detects_unexpected_flow_closer() {
261        let report = parse_yaml_report("---\n[ a, b, c ] ]\n");
262        assert!(report.tree.is_none());
263        assert_eq!(report.diagnostics.len(), 1);
264        assert_eq!(
265            report.diagnostics[0].code,
266            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
267        );
268    }
269
270    #[test]
271    fn parse_yaml_report_detects_unterminated_nested_flow_sequence() {
272        let report = parse_yaml_report("---\n[ [ a, b, c ]\n");
273        assert!(report.tree.is_none());
274        assert_eq!(report.diagnostics.len(), 1);
275        assert_eq!(
276            report.diagnostics[0].code,
277            diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE
278        );
279    }
280
281    #[test]
282    fn parse_yaml_report_detects_invalid_leading_flow_sequence_comma() {
283        let report = parse_yaml_report("---\n[ , a, b, c ]\n");
284        assert!(report.tree.is_none());
285        assert_eq!(report.diagnostics.len(), 1);
286        assert_eq!(
287            report.diagnostics[0].code,
288            diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
289        );
290    }
291
292    #[test]
293    fn parse_yaml_report_detects_trailing_content_after_flow_end() {
294        let report = parse_yaml_report("---\n[ a, b, c, ]#invalid\n");
295        assert!(report.tree.is_none());
296        assert_eq!(report.diagnostics.len(), 1);
297        assert_eq!(
298            report.diagnostics[0].code,
299            diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
300        );
301    }
302
303    #[test]
304    fn parse_yaml_report_detects_invalid_double_quoted_escape() {
305        let report = parse_yaml_report("---\n\"\\.\"\n");
306        assert!(report.tree.is_none());
307        assert_eq!(report.diagnostics.len(), 1);
308        assert_eq!(
309            report.diagnostics[0].code,
310            diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE
311        );
312    }
313
314    #[test]
315    fn parse_yaml_report_detects_trailing_content_after_document_start() {
316        let report = parse_yaml_report("--- key1: value1\n    key2: value2\n");
317        assert!(report.tree.is_none());
318        assert_eq!(report.diagnostics.len(), 1);
319        assert_eq!(
320            report.diagnostics[0].code,
321            diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_START
322        );
323    }
324
325    #[test]
326    fn parse_yaml_report_detects_directive_without_document_start() {
327        let report = parse_yaml_report("%YAML 1.2\n");
328        assert!(report.tree.is_none());
329        assert_eq!(report.diagnostics.len(), 1);
330        assert_eq!(
331            report.diagnostics[0].code,
332            diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
333        );
334    }
335
336    #[test]
337    fn parse_yaml_report_detects_directive_after_content() {
338        // Tag-shape: tag dispatch terminates the scalar before `%TAG`
339        // hits column 0, so the directive lands in its real position
340        // after content.
341        let report = parse_yaml_report("!foo \"bar\"\n%TAG !x! tag:example.com,2014:\n---\n");
342        assert!(report.tree.is_none());
343        assert_eq!(report.diagnostics.len(), 1);
344        assert_eq!(
345            report.diagnostics[0].code,
346            diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT
347        );
348    }
349
350    #[test]
351    fn parse_yaml_report_detects_wrong_indented_flow_continuation() {
352        let report = parse_yaml_report("---\nflow: [a,\nb,\nc]\n");
353        assert!(report.tree.is_none());
354        assert_eq!(report.diagnostics.len(), 1);
355        assert_eq!(
356            report.diagnostics[0].code,
357            diagnostic_codes::LEX_WRONG_INDENTED_FLOW
358        );
359    }
360
361    #[test]
362    fn parser_builds_flow_sequence_nodes_in_mapping_value() {
363        let input = "a: [b, c]\n";
364        let tree = parse_yaml_tree(input).expect("tree");
365        assert_eq!(tree.text().to_string(), input);
366
367        let seq = tree
368            .descendants()
369            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
370            .expect("flow sequence node");
371        let item_count = seq
372            .children()
373            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
374            .count();
375        assert_eq!(item_count, 2);
376    }
377
378    #[test]
379    fn parser_absorbs_literal_block_scalar_into_map_value() {
380        let input = "a: |\n  line1\n  line2\n";
381        let tree = parse_yaml_tree(input).expect("tree");
382        assert_eq!(tree.text().to_string(), input);
383
384        let map = tree
385            .descendants()
386            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
387            .expect("block map");
388        let entry = map
389            .children()
390            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
391            .expect("entry");
392        let value = entry
393            .children()
394            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
395            .expect("value");
396        let value_text = value.text().to_string();
397        assert!(
398            value_text.starts_with('|') || value_text.starts_with(" |"),
399            "value should contain the `|` header, got {value_text:?}"
400        );
401        assert!(
402            value_text.contains("line1") && value_text.contains("line2"),
403            "value should absorb block scalar content, got {value_text:?}"
404        );
405    }
406
407    #[test]
408    fn parser_builds_nested_block_sequence_on_same_line() {
409        let input = "- - a\n  - b\n- c\n";
410        let tree = parse_yaml_tree(input).expect("tree");
411        assert_eq!(tree.text().to_string(), input);
412
413        let outer = tree
414            .descendants()
415            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
416            .expect("outer block sequence");
417        let outer_items: Vec<_> = outer
418            .children()
419            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
420            .collect();
421        assert_eq!(outer_items.len(), 2);
422
423        let nested = outer_items[0]
424            .children()
425            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
426            .expect("nested block sequence inside first item");
427        let nested_items = nested
428            .children()
429            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
430            .count();
431        assert_eq!(nested_items, 2);
432    }
433
434    #[test]
435    fn parser_builds_multiline_flow_map_inside_block_sequence_item() {
436        let input = "- { multi\n  line, a: b}\n";
437        let tree = parse_yaml_tree(input).expect("tree");
438        assert_eq!(tree.text().to_string(), input);
439
440        let seq = tree
441            .descendants()
442            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
443            .expect("block sequence");
444        let item = seq
445            .children()
446            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
447            .expect("sequence item");
448        item.children()
449            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
450            .expect("flow map inside sequence item");
451    }
452
453    #[test]
454    fn parser_builds_flow_sequence_inside_block_sequence_item() {
455        let input = "- [a, b]\n- [c, d]\n";
456        let tree = parse_yaml_tree(input).expect("tree");
457        assert_eq!(tree.text().to_string(), input);
458
459        let seq = tree
460            .descendants()
461            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
462            .expect("block sequence");
463        let items: Vec<_> = seq
464            .children()
465            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
466            .collect();
467        assert_eq!(items.len(), 2);
468
469        for item in &items {
470            let flow = item
471                .children()
472                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
473                .expect("flow sequence inside item");
474            let flow_items = flow
475                .children()
476                .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
477                .count();
478            assert_eq!(flow_items, 2);
479        }
480    }
481
482    #[test]
483    fn parser_emits_scalar_document_for_tag_without_colon() {
484        let input = "! a\n";
485        let tree = parse_yaml_tree(input).expect("tree");
486        assert_eq!(tree.text().to_string(), input);
487
488        let has_block_map = tree
489            .descendants()
490            .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP);
491        assert!(
492            !has_block_map,
493            "scalar document should not be wrapped in YAML_BLOCK_MAP"
494        );
495
496        // The scanner emits the leading `!` as a dedicated YAML_TAG
497        // token; the projection layer reads the tag from that token.
498        let has_tag_token = tree
499            .descendants_with_tokens()
500            .filter_map(|el| el.into_token())
501            .any(|tok| tok.kind() == SyntaxKind::YAML_TAG && tok.text() == "!");
502        assert!(
503            has_tag_token,
504            "tree should contain a YAML_TAG token for the leading `!`"
505        );
506    }
507
508    #[test]
509    fn parser_builds_nested_block_map_inside_block_sequence() {
510        let input = "-\n  name: Mark\n  hr: 65\n";
511        let tree = parse_yaml_tree(input).expect("tree");
512        assert_eq!(tree.text().to_string(), input);
513
514        let seq = tree
515            .descendants()
516            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
517            .expect("block sequence");
518        let items: Vec<_> = seq
519            .children()
520            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
521            .collect();
522        assert_eq!(items.len(), 1);
523
524        let nested_map = items[0]
525            .children()
526            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
527            .expect("nested block map inside sequence item");
528        let entry_count = nested_map
529            .children()
530            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
531            .count();
532        assert_eq!(entry_count, 2);
533    }
534
535    #[test]
536    fn parser_builds_nested_block_map_from_indent_tokens() {
537        let input = "root:\n  child: 2\n";
538        let tree = parse_yaml_tree(input).expect("tree");
539
540        let outer_map = tree
541            .descendants()
542            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
543            .expect("outer map");
544        let outer_entry = outer_map
545            .children()
546            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
547            .expect("outer entry");
548        let outer_value = outer_entry
549            .children()
550            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
551            .expect("outer value");
552
553        let nested_map = outer_value
554            .children()
555            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
556            .expect("nested map");
557        let nested_entry_count = nested_map
558            .children()
559            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
560            .count();
561        assert_eq!(nested_entry_count, 1);
562    }
563}