1#[path = "yaml/events.rs"]
12mod events;
13#[path = "yaml/model.rs"]
14mod model;
15#[path = "yaml/parser.rs"]
16mod parser;
17#[path = "yaml/parser_v2.rs"]
18mod parser_v2;
19#[path = "yaml/scanner.rs"]
20mod scanner;
21#[path = "yaml/validator.rs"]
22mod validator;
23
24pub use events::{project_events, project_events_from_tree};
25pub use model::{
26 ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlDiagnostic, YamlInputKind,
27 YamlParseReport, diagnostic_codes,
28};
29pub use parser::{parse_shadow, parse_yaml_report, parse_yaml_tree};
30pub use parser_v2::{ShadowParserV2Report, parse_v2, shadow_parser_v2_check};
31pub use scanner::{ShadowScannerReport, shadow_scanner_check};
32
33#[doc(hidden)]
34pub fn validate_yaml_for_test(input: &str) -> Option<YamlDiagnostic> {
35 validator::validate_yaml(input)
36}
37
38#[cfg(test)]
39mod tests {
40 use super::*;
41 use crate::syntax::SyntaxKind;
42
43 #[test]
44 fn builds_basic_rowan_tree_for_multiline_mapping() {
45 let tree = parse_yaml_tree("title: My Title\nauthor: Me\n").expect("tree");
46 assert_eq!(tree.kind(), SyntaxKind::DOCUMENT);
47 assert_eq!(tree.text().to_string(), "title: My Title\nauthor: Me\n");
48
49 let mapping = tree
50 .descendants()
51 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
52 .expect("yaml block map");
53 let entries: Vec<_> = mapping
54 .children()
55 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
56 .collect();
57 assert_eq!(entries.len(), 2);
58
59 let token_kinds: Vec<_> = mapping
60 .descendants_with_tokens()
61 .filter_map(|el| el.into_token())
62 .map(|tok| tok.kind())
63 .collect();
64 assert_eq!(
65 token_kinds,
66 vec![
67 SyntaxKind::YAML_SCALAR,
68 SyntaxKind::YAML_COLON,
69 SyntaxKind::WHITESPACE,
70 SyntaxKind::YAML_SCALAR,
71 SyntaxKind::NEWLINE,
72 SyntaxKind::YAML_SCALAR,
73 SyntaxKind::YAML_COLON,
74 SyntaxKind::WHITESPACE,
75 SyntaxKind::YAML_SCALAR,
76 SyntaxKind::NEWLINE,
77 ]
78 );
79 }
80
81 fn block_map_key_texts(tree: &crate::syntax::SyntaxNode) -> Vec<String> {
82 tree.descendants()
83 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
84 .map(|key| {
85 key.children_with_tokens()
86 .filter_map(|el| el.into_token())
87 .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
88 .map(|tok| tok.text().to_string())
89 .collect::<Vec<_>>()
90 .join("")
91 })
92 .filter(|s| !s.is_empty())
93 .collect()
94 }
95
96 #[test]
97 fn mapping_nodes_preserve_entry_text_boundaries() {
98 let tree = parse_yaml_tree("title: A\nauthor: B\n").expect("tree");
99 let mapping = tree
100 .descendants()
101 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
102 .expect("yaml block map");
103
104 let entry_texts: Vec<_> = mapping
105 .children()
106 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
107 .map(|n| n.text().to_string())
108 .collect();
109 assert_eq!(
110 entry_texts,
111 vec!["title: A\n".to_string(), "author: B\n".to_string(),]
112 );
113 }
114
115 #[test]
116 fn splits_mapping_on_colon_outside_quoted_key() {
117 let input = "\"foo:bar\": 23\n'x:y': 24\n";
118 let tree = parse_yaml_tree(input).expect("tree");
119 assert_eq!(tree.text().to_string(), input);
120 assert_eq!(
121 block_map_key_texts(&tree),
122 vec!["\"foo:bar\"".to_string(), "'x:y'".to_string()]
123 );
124 }
125
126 #[test]
127 fn keeps_colon_inside_escaped_double_quoted_key() {
128 let input = "\"foo\\\":bar\": 23\n";
129 let tree = parse_yaml_tree(input).expect("tree");
130 assert_eq!(tree.text().to_string(), input);
131 assert_eq!(
132 block_map_key_texts(&tree),
133 vec!["\"foo\\\":bar\"".to_string()]
134 );
135 }
136
137 #[test]
138 fn keeps_hash_in_double_quoted_scalar_value() {
139 let input = "foo: \"a#b\"\n";
140 let tree = parse_yaml_tree(input).expect("tree");
141
142 let comment_count = tree
143 .descendants_with_tokens()
144 .filter_map(|el| el.into_token())
145 .filter(|tok| tok.kind() == SyntaxKind::YAML_COMMENT)
146 .count();
147 assert_eq!(comment_count, 0);
148
149 let value_scalars: Vec<String> = tree
150 .descendants()
151 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
152 .flat_map(|value| {
153 value
154 .children_with_tokens()
155 .filter_map(|el| el.into_token())
156 .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
157 .map(|tok| tok.text().to_string())
158 .collect::<Vec<_>>()
159 })
160 .collect();
161 assert_eq!(value_scalars, vec!["\"a#b\"".to_string()]);
162 }
163
164 #[test]
165 fn keeps_colon_inside_single_quoted_key_with_escaped_quote() {
166 let input = "'foo'':bar': 23\n";
167 let tree = parse_yaml_tree(input).expect("tree");
168 assert_eq!(tree.text().to_string(), input);
169 assert_eq!(block_map_key_texts(&tree), vec!["'foo'':bar'".to_string()]);
170 }
171
172 #[test]
173 fn parser_preserves_document_markers_and_directives() {
174 let input = "%YAML 1.2\n---\nfoo: bar\n...\n";
175 let tree = parse_yaml_tree(input).expect("tree");
176 assert_eq!(tree.text().to_string(), input);
177
178 let scalar_tokens: Vec<String> = tree
179 .descendants_with_tokens()
180 .filter_map(|el| el.into_token())
181 .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
182 .map(|tok| tok.text().to_string())
183 .collect();
184
185 assert!(scalar_tokens.contains(&"%YAML 1.2".to_string()));
186 assert!(scalar_tokens.contains(&"bar".to_string()));
187
188 let has_doc_start = tree
189 .descendants_with_tokens()
190 .filter_map(|el| el.into_token())
191 .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START && tok.text() == "---");
192 assert!(has_doc_start, "--- should be a YAML_DOCUMENT_START token");
193
194 let has_doc_end = tree
195 .descendants_with_tokens()
196 .filter_map(|el| el.into_token())
197 .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END && tok.text() == "...");
198 assert!(has_doc_end, "... should be a YAML_DOCUMENT_END token");
199 }
200
201 #[test]
202 fn parser_preserves_standalone_flow_mapping_lines() {
203 let input = "{foo: bar}\n";
204 let tree = parse_yaml_tree(input).expect("tree");
205 assert_eq!(tree.text().to_string(), input);
206
207 let flow_entry_count = tree
208 .descendants()
209 .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
210 .count();
211 assert_eq!(flow_entry_count, 1);
212
213 let flow_values: Vec<String> = tree
214 .descendants()
215 .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
216 .map(|n| n.text().to_string())
217 .collect();
218 assert_eq!(flow_values, vec![" bar".to_string()]);
219 }
220
221 #[test]
222 fn parser_preserves_top_level_quoted_scalar_document() {
223 let input = "\"foo: bar\\\": baz\"\n";
224 let tree = parse_yaml_tree(input).expect("tree");
225 assert_eq!(tree.text().to_string(), input);
226 }
227
228 #[test]
229 fn parse_yaml_report_emits_error_code_for_invalid_yaml() {
230 let report = parse_yaml_report("this\n is\n invalid: x\n");
234 assert!(report.tree.is_none());
235 assert_eq!(report.diagnostics.len(), 1);
236 assert_eq!(
237 report.diagnostics[0].code,
238 diagnostic_codes::PARSE_INVALID_KEY_TOKEN
239 );
240 }
241
242 #[test]
243 fn parse_yaml_report_detects_trailing_content_after_document_end() {
244 let report = parse_yaml_report("---\nkey: value\n... invalid\n");
245 assert!(report.tree.is_none());
246 assert_eq!(report.diagnostics.len(), 1);
247 assert_eq!(
248 report.diagnostics[0].code,
249 diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END
250 );
251 }
252
253 #[test]
254 fn parse_yaml_report_detects_unexpected_flow_closer() {
255 let report = parse_yaml_report("---\n[ a, b, c ] ]\n");
256 assert!(report.tree.is_none());
257 assert_eq!(report.diagnostics.len(), 1);
258 assert_eq!(
259 report.diagnostics[0].code,
260 diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
261 );
262 }
263
264 #[test]
265 fn parse_yaml_report_detects_unterminated_nested_flow_sequence() {
266 let report = parse_yaml_report("---\n[ [ a, b, c ]\n");
267 assert!(report.tree.is_none());
268 assert_eq!(report.diagnostics.len(), 1);
269 assert_eq!(
270 report.diagnostics[0].code,
271 diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE
272 );
273 }
274
275 #[test]
276 fn parse_yaml_report_detects_invalid_leading_flow_sequence_comma() {
277 let report = parse_yaml_report("---\n[ , a, b, c ]\n");
278 assert!(report.tree.is_none());
279 assert_eq!(report.diagnostics.len(), 1);
280 assert_eq!(
281 report.diagnostics[0].code,
282 diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
283 );
284 }
285
286 #[test]
287 fn parse_yaml_report_detects_trailing_content_after_flow_end() {
288 let report = parse_yaml_report("---\n[ a, b, c, ]#invalid\n");
289 assert!(report.tree.is_none());
290 assert_eq!(report.diagnostics.len(), 1);
291 assert_eq!(
292 report.diagnostics[0].code,
293 diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
294 );
295 }
296
297 #[test]
298 fn parse_yaml_report_detects_invalid_double_quoted_escape() {
299 let report = parse_yaml_report("---\n\"\\.\"\n");
300 assert!(report.tree.is_none());
301 assert_eq!(report.diagnostics.len(), 1);
302 assert_eq!(
303 report.diagnostics[0].code,
304 diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE
305 );
306 }
307
308 #[test]
309 fn parse_yaml_report_detects_trailing_content_after_document_start() {
310 let report = parse_yaml_report("--- key1: value1\n key2: value2\n");
311 assert!(report.tree.is_none());
312 assert_eq!(report.diagnostics.len(), 1);
313 assert_eq!(
314 report.diagnostics[0].code,
315 diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_START
316 );
317 }
318
319 #[test]
320 fn parse_yaml_report_detects_directive_without_document_start() {
321 let report = parse_yaml_report("%YAML 1.2\n");
322 assert!(report.tree.is_none());
323 assert_eq!(report.diagnostics.len(), 1);
324 assert_eq!(
325 report.diagnostics[0].code,
326 diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
327 );
328 }
329
330 #[test]
331 fn parse_yaml_report_detects_directive_after_content() {
332 let report = parse_yaml_report("---\nscalar1 # comment\n%YAML 1.2\n---\nscalar2\n");
335 assert!(report.tree.is_none());
336 assert_eq!(report.diagnostics.len(), 1);
337 assert_eq!(
338 report.diagnostics[0].code,
339 diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT
340 );
341 }
342
343 #[test]
344 fn parse_yaml_report_detects_wrong_indented_flow_continuation() {
345 let report = parse_yaml_report("---\nflow: [a,\nb,\nc]\n");
346 assert!(report.tree.is_none());
347 assert_eq!(report.diagnostics.len(), 1);
348 assert_eq!(
349 report.diagnostics[0].code,
350 diagnostic_codes::LEX_WRONG_INDENTED_FLOW
351 );
352 }
353
354 #[test]
355 fn parser_builds_flow_sequence_nodes_in_mapping_value() {
356 let input = "a: [b, c]\n";
357 let tree = parse_yaml_tree(input).expect("tree");
358 assert_eq!(tree.text().to_string(), input);
359
360 let seq = tree
361 .descendants()
362 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
363 .expect("flow sequence node");
364 let item_count = seq
365 .children()
366 .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
367 .count();
368 assert_eq!(item_count, 2);
369 }
370
371 #[test]
372 fn parser_absorbs_literal_block_scalar_into_map_value() {
373 let input = "a: |\n line1\n line2\n";
374 let tree = parse_yaml_tree(input).expect("tree");
375 assert_eq!(tree.text().to_string(), input);
376
377 let map = tree
378 .descendants()
379 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
380 .expect("block map");
381 let entry = map
382 .children()
383 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
384 .expect("entry");
385 let value = entry
386 .children()
387 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
388 .expect("value");
389 let value_text = value.text().to_string();
390 assert!(
391 value_text.starts_with('|') || value_text.starts_with(" |"),
392 "value should contain the `|` header, got {value_text:?}"
393 );
394 assert!(
395 value_text.contains("line1") && value_text.contains("line2"),
396 "value should absorb block scalar content, got {value_text:?}"
397 );
398 }
399
400 #[test]
401 fn parser_builds_nested_block_sequence_on_same_line() {
402 let input = "- - a\n - b\n- c\n";
403 let tree = parse_yaml_tree(input).expect("tree");
404 assert_eq!(tree.text().to_string(), input);
405
406 let outer = tree
407 .descendants()
408 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
409 .expect("outer block sequence");
410 let outer_items: Vec<_> = outer
411 .children()
412 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
413 .collect();
414 assert_eq!(outer_items.len(), 2);
415
416 let nested = outer_items[0]
417 .children()
418 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
419 .expect("nested block sequence inside first item");
420 let nested_items = nested
421 .children()
422 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
423 .count();
424 assert_eq!(nested_items, 2);
425 }
426
427 #[test]
428 fn parser_builds_multiline_flow_map_inside_block_sequence_item() {
429 let input = "- { multi\n line, a: b}\n";
430 let tree = parse_yaml_tree(input).expect("tree");
431 assert_eq!(tree.text().to_string(), input);
432
433 let seq = tree
434 .descendants()
435 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
436 .expect("block sequence");
437 let item = seq
438 .children()
439 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
440 .expect("sequence item");
441 item.children()
442 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
443 .expect("flow map inside sequence item");
444 }
445
446 #[test]
447 fn parser_builds_flow_sequence_inside_block_sequence_item() {
448 let input = "- [a, b]\n- [c, d]\n";
449 let tree = parse_yaml_tree(input).expect("tree");
450 assert_eq!(tree.text().to_string(), input);
451
452 let seq = tree
453 .descendants()
454 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
455 .expect("block sequence");
456 let items: Vec<_> = seq
457 .children()
458 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
459 .collect();
460 assert_eq!(items.len(), 2);
461
462 for item in &items {
463 let flow = item
464 .children()
465 .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
466 .expect("flow sequence inside item");
467 let flow_items = flow
468 .children()
469 .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
470 .count();
471 assert_eq!(flow_items, 2);
472 }
473 }
474
475 #[test]
476 fn parser_emits_scalar_document_for_tag_without_colon() {
477 let input = "! a\n";
478 let tree = parse_yaml_tree(input).expect("tree");
479 assert_eq!(tree.text().to_string(), input);
480
481 let has_block_map = tree
482 .descendants()
483 .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP);
484 assert!(
485 !has_block_map,
486 "scalar document should not be wrapped in YAML_BLOCK_MAP"
487 );
488
489 let has_tagged_scalar = tree
492 .descendants_with_tokens()
493 .filter_map(|el| el.into_token())
494 .any(|tok| tok.kind() == SyntaxKind::YAML_SCALAR && tok.text().starts_with('!'));
495 assert!(has_tagged_scalar, "tree should contain tag bytes in scalar");
496 }
497
498 #[test]
499 fn parser_builds_nested_block_map_inside_block_sequence() {
500 let input = "-\n name: Mark\n hr: 65\n";
501 let tree = parse_yaml_tree(input).expect("tree");
502 assert_eq!(tree.text().to_string(), input);
503
504 let seq = tree
505 .descendants()
506 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
507 .expect("block sequence");
508 let items: Vec<_> = seq
509 .children()
510 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
511 .collect();
512 assert_eq!(items.len(), 1);
513
514 let nested_map = items[0]
515 .children()
516 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
517 .expect("nested block map inside sequence item");
518 let entry_count = nested_map
519 .children()
520 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
521 .count();
522 assert_eq!(entry_count, 2);
523 }
524
525 #[test]
526 fn parser_builds_nested_block_map_from_indent_tokens() {
527 let input = "root:\n child: 2\n";
528 let tree = parse_yaml_tree(input).expect("tree");
529
530 let outer_map = tree
531 .descendants()
532 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
533 .expect("outer map");
534 let outer_entry = outer_map
535 .children()
536 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
537 .expect("outer entry");
538 let outer_value = outer_entry
539 .children()
540 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
541 .expect("outer value");
542
543 let nested_map = outer_value
544 .children()
545 .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
546 .expect("nested map");
547 let nested_entry_count = nested_map
548 .children()
549 .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
550 .count();
551 assert_eq!(nested_entry_count, 1);
552 }
553
554 #[test]
555 fn shadow_parse_is_disabled_by_default() {
556 let report = parse_shadow("title: My Title", ShadowYamlOptions::default());
557 assert_eq!(report.outcome, ShadowYamlOutcome::SkippedDisabled);
558 assert_eq!(report.shadow_reason, "shadow-disabled");
559 assert_eq!(report.normalized_input, None);
560 }
561
562 #[test]
563 fn shadow_parse_skips_when_disabled_even_for_valid_input() {
564 let report = parse_shadow(
565 "title: My Title",
566 ShadowYamlOptions {
567 enabled: false,
568 input_kind: YamlInputKind::Plain,
569 },
570 );
571 assert_eq!(report.outcome, ShadowYamlOutcome::SkippedDisabled);
572 assert_eq!(report.shadow_reason, "shadow-disabled");
573 }
574
575 #[test]
576 fn shadow_parse_reports_prototype_parsed_when_enabled() {
577 let report = parse_shadow(
578 "title: My Title",
579 ShadowYamlOptions {
580 enabled: true,
581 input_kind: YamlInputKind::Plain,
582 },
583 );
584 assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeParsed);
585 assert_eq!(report.shadow_reason, "prototype-basic-mapping-parsed");
586 assert_eq!(report.normalized_input.as_deref(), Some("title: My Title"));
587 }
588
589 #[test]
590 fn shadow_parse_reports_prototype_rejected_when_enabled() {
591 let report = parse_shadow(
595 "[ a, b",
596 ShadowYamlOptions {
597 enabled: true,
598 input_kind: YamlInputKind::Plain,
599 },
600 );
601 assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeRejected);
602 assert_eq!(report.shadow_reason, "prototype-basic-mapping-rejected");
603 }
604
605 #[test]
606 fn shadow_parse_accepts_hashpipe_mode_but_remains_prototype_scoped() {
607 let report = parse_shadow(
608 "#| title: My Title",
609 ShadowYamlOptions {
610 enabled: true,
611 input_kind: YamlInputKind::Hashpipe,
612 },
613 );
614 assert_eq!(report.outcome, ShadowYamlOutcome::PrototypeParsed);
615 assert_eq!(report.shadow_reason, "prototype-basic-mapping-parsed");
616 assert_eq!(report.normalized_input.as_deref(), Some("title: My Title"));
617 }
618}