1use crate::syntax::{SyntaxKind, SyntaxNode};
2use rowan::GreenNodeBuilder;
3
4use super::lexer::{lex_mapping_tokens_with_diagnostic, split_once_unquoted_key_colon};
5use super::model::{
6 ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlDiagnostic, YamlInputKind,
7 YamlParseReport, YamlToken, YamlTokenSpan, diagnostic_codes,
8};
9
10pub fn parse_shadow(input: &str, options: ShadowYamlOptions) -> ShadowYamlReport {
15 let line_count = input.lines().count().max(1);
16
17 if !options.enabled {
18 return ShadowYamlReport {
19 outcome: ShadowYamlOutcome::SkippedDisabled,
20 shadow_reason: "shadow-disabled",
21 input_kind: options.input_kind,
22 input_len_bytes: input.len(),
23 line_count,
24 normalized_input: None,
25 };
26 }
27
28 let normalized = match options.input_kind {
29 YamlInputKind::Plain => input.to_owned(),
30 YamlInputKind::Hashpipe => normalize_hashpipe_input(input),
31 };
32
33 let parsed = parse_yaml_tree(&normalized).is_some();
34
35 ShadowYamlReport {
36 outcome: if parsed {
37 ShadowYamlOutcome::PrototypeParsed
38 } else {
39 ShadowYamlOutcome::PrototypeRejected
40 },
41 shadow_reason: if parsed {
42 "prototype-basic-mapping-parsed"
43 } else {
44 "prototype-basic-mapping-rejected"
45 },
46 input_kind: options.input_kind,
47 input_len_bytes: input.len(),
48 line_count,
49 normalized_input: Some(normalized),
50 }
51}
52
53fn normalize_hashpipe_input(input: &str) -> String {
54 input
55 .lines()
56 .map(strip_hashpipe_prefix)
57 .collect::<Vec<_>>()
58 .join("\n")
59}
60
61fn strip_hashpipe_prefix(line: &str) -> &str {
62 if let Some(rest) = line.strip_prefix("#|") {
63 return rest.strip_prefix(' ').unwrap_or(rest);
64 }
65 line
66}
67
68fn emit_token_as_yaml(builder: &mut GreenNodeBuilder<'_>, token: &YamlTokenSpan<'_>) {
69 let kind = match token.kind {
70 YamlToken::Whitespace => SyntaxKind::WHITESPACE,
71 YamlToken::Comment => SyntaxKind::YAML_COMMENT,
72 YamlToken::Tag => SyntaxKind::YAML_TAG,
73 YamlToken::Colon => SyntaxKind::YAML_COLON,
74 _ => SyntaxKind::YAML_SCALAR,
75 };
76 builder.token(kind.into(), token.text);
77}
78
79fn diag_at_token(
80 token: &YamlTokenSpan<'_>,
81 code: &'static str,
82 message: &'static str,
83) -> YamlDiagnostic {
84 YamlDiagnostic {
85 code,
86 message,
87 byte_start: token.byte_start,
88 byte_end: token.byte_end,
89 }
90}
91
92fn emit_flow_sequence<'a>(
93 builder: &mut GreenNodeBuilder<'_>,
94 tokens: &[YamlTokenSpan<'a>],
95 i: &mut usize,
96) -> Result<(), YamlDiagnostic> {
97 if *i >= tokens.len() || tokens[*i].kind != YamlToken::FlowSeqStart {
98 return Err(YamlDiagnostic {
99 code: diagnostic_codes::PARSE_EXPECTED_FLOW_SEQUENCE_START,
100 message: "expected flow sequence start token",
101 byte_start: tokens.get(*i).map(|t| t.byte_start).unwrap_or(0),
102 byte_end: tokens.get(*i).map(|t| t.byte_end).unwrap_or(0),
103 });
104 }
105
106 builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE.into());
107 emit_token_as_yaml(builder, &tokens[*i]); *i += 1;
109
110 let mut open_item = false;
111 while *i < tokens.len() {
112 match tokens[*i].kind {
113 YamlToken::FlowSeqEnd => {
114 if open_item {
115 builder.finish_node(); }
117 emit_token_as_yaml(builder, &tokens[*i]); *i += 1;
119 if *i < tokens.len() {
120 match tokens[*i].kind {
121 YamlToken::Newline | YamlToken::Comment => {}
122 YamlToken::Whitespace if tokens[*i].text.trim().is_empty() => {}
123 _ => {
124 return Err(diag_at_token(
125 &tokens[*i],
126 diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
127 "trailing content after flow sequence end",
128 ));
129 }
130 }
131 }
132 builder.finish_node(); return Ok(());
134 }
135 YamlToken::Comma => {
136 if !open_item {
137 return Err(diag_at_token(
138 &tokens[*i],
139 diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA,
140 "invalid comma position in flow sequence",
141 ));
142 }
143 builder.finish_node(); open_item = false;
145 emit_token_as_yaml(builder, &tokens[*i]);
146 *i += 1;
147 }
148 YamlToken::Whitespace | YamlToken::Newline | YamlToken::Indent | YamlToken::Dedent
149 if !open_item =>
150 {
151 emit_token_as_yaml(builder, &tokens[*i]);
152 *i += 1;
153 }
154 YamlToken::Scalar if !open_item && tokens[*i].text.trim().is_empty() => {
155 emit_token_as_yaml(builder, &tokens[*i]);
156 *i += 1;
157 }
158 YamlToken::FlowSeqStart => {
159 if !open_item {
160 builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
161 open_item = true;
162 }
163 emit_flow_sequence(builder, tokens, i)?;
164 }
165 YamlToken::FlowMapStart => {
166 if !open_item {
167 builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
168 open_item = true;
169 }
170 emit_flow_map(builder, tokens, i)?;
171 }
172 _ => {
173 if !open_item {
174 builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
175 open_item = true;
176 }
177 emit_token_as_yaml(builder, &tokens[*i]);
178 *i += 1;
179 }
180 }
181 }
182
183 let (byte_start, byte_end) =
184 if let Some(start) = tokens.iter().find(|t| t.kind == YamlToken::FlowSeqStart) {
185 (
186 start.byte_start,
187 tokens.last().map(|t| t.byte_end).unwrap_or(start.byte_end),
188 )
189 } else {
190 tokens
191 .last()
192 .map(|t| (t.byte_start, t.byte_end))
193 .unwrap_or((0, 0))
194 };
195 Err(YamlDiagnostic {
196 code: diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE,
197 message: "unterminated flow sequence",
198 byte_start,
199 byte_end,
200 })
201}
202
203fn emit_flow_map<'a>(
204 builder: &mut GreenNodeBuilder<'_>,
205 tokens: &[YamlTokenSpan<'a>],
206 i: &mut usize,
207) -> Result<(), YamlDiagnostic> {
208 if *i >= tokens.len() || tokens[*i].kind != YamlToken::FlowMapStart {
209 return Err(YamlDiagnostic {
210 code: diagnostic_codes::PARSE_EXPECTED_FLOW_MAP_START,
211 message: "expected flow map start token",
212 byte_start: tokens.get(*i).map(|t| t.byte_start).unwrap_or(0),
213 byte_end: tokens.get(*i).map(|t| t.byte_end).unwrap_or(0),
214 });
215 }
216
217 builder.start_node(SyntaxKind::YAML_FLOW_MAP.into());
218 emit_token_as_yaml(builder, &tokens[*i]); *i += 1;
220
221 loop {
222 while *i < tokens.len()
229 && (matches!(
230 tokens[*i].kind,
231 YamlToken::Whitespace | YamlToken::Newline | YamlToken::Indent | YamlToken::Dedent
232 ) || (tokens[*i].kind == YamlToken::Scalar && tokens[*i].text.trim().is_empty()))
233 {
234 emit_token_as_yaml(builder, &tokens[*i]);
235 *i += 1;
236 }
237
238 if *i >= tokens.len() {
239 let (byte_start, byte_end) = tokens
240 .last()
241 .map(|t| (t.byte_start, t.byte_end))
242 .unwrap_or((0, 0));
243 return Err(YamlDiagnostic {
244 code: diagnostic_codes::PARSE_UNTERMINATED_FLOW_MAP,
245 message: "unterminated flow map",
246 byte_start,
247 byte_end,
248 });
249 }
250
251 match tokens[*i].kind {
252 YamlToken::FlowMapEnd => {
253 emit_token_as_yaml(builder, &tokens[*i]);
254 *i += 1;
255 if *i < tokens.len() {
256 match tokens[*i].kind {
257 YamlToken::Newline
258 | YamlToken::Comment
259 | YamlToken::Whitespace
260 | YamlToken::FlowMapEnd
261 | YamlToken::FlowSeqEnd
262 | YamlToken::Comma => {}
263 _ => {
264 return Err(diag_at_token(
265 &tokens[*i],
266 diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
267 "trailing content after flow map end",
268 ));
269 }
270 }
271 }
272 builder.finish_node(); return Ok(());
274 }
275 YamlToken::Comma => {
276 emit_token_as_yaml(builder, &tokens[*i]);
277 *i += 1;
278 }
279 _ => {
280 emit_flow_map_entry(builder, tokens, i)?;
281 }
282 }
283 }
284}
285
286fn emit_flow_map_entry<'a>(
287 builder: &mut GreenNodeBuilder<'_>,
288 tokens: &[YamlTokenSpan<'a>],
289 i: &mut usize,
290) -> Result<(), YamlDiagnostic> {
291 builder.start_node(SyntaxKind::YAML_FLOW_MAP_ENTRY.into());
292 builder.start_node(SyntaxKind::YAML_FLOW_MAP_KEY.into());
293
294 while *i < tokens.len()
298 && matches!(
299 tokens[*i].kind,
300 YamlToken::Whitespace | YamlToken::Indent | YamlToken::Dedent
301 )
302 {
303 emit_token_as_yaml(builder, &tokens[*i]);
304 *i += 1;
305 }
306
307 let colon_at: Option<usize> = {
314 let mut j = *i;
315 let mut found = None;
316 while j < tokens.len() {
317 match tokens[j].kind {
318 YamlToken::Comma
319 | YamlToken::FlowMapEnd
320 | YamlToken::FlowSeqEnd
321 | YamlToken::FlowMapStart
322 | YamlToken::FlowSeqStart
323 | YamlToken::Tag
324 | YamlToken::Key
325 | YamlToken::Anchor
326 | YamlToken::Alias => break,
327 YamlToken::Scalar => {
328 if split_once_unquoted_key_colon(tokens[j].text).is_some() {
329 found = Some(j);
330 break;
331 }
332 }
333 _ => {}
334 }
335 j += 1;
336 }
337 found
338 };
339
340 let value_prefix: Option<&'a str> = if let Some(target) = colon_at {
341 while *i < target {
345 emit_token_as_yaml(builder, &tokens[*i]);
346 *i += 1;
347 }
348 let scalar = tokens[target];
349 *i += 1;
350 let (key_text, rest_text) = split_once_unquoted_key_colon(scalar.text)
351 .expect("implicit-key scan promised a colon in this scalar");
352 if !key_text.is_empty() {
353 builder.token(SyntaxKind::YAML_KEY.into(), key_text);
354 }
355 builder.token(
356 SyntaxKind::YAML_COLON.into(),
357 &scalar.text[key_text.len()..key_text.len() + 1],
358 );
359 Some(rest_text)
360 } else {
361 match tokens.get(*i).map(|t| t.kind) {
362 Some(YamlToken::Scalar) => {
363 let scalar = tokens[*i];
364 *i += 1;
365 builder.token(SyntaxKind::YAML_SCALAR.into(), scalar.text);
366 None
367 }
368 Some(YamlToken::Key) => {
369 builder.token(SyntaxKind::YAML_KEY.into(), tokens[*i].text);
370 *i += 1;
371 while *i < tokens.len() && tokens[*i].kind == YamlToken::Whitespace {
372 emit_token_as_yaml(builder, &tokens[*i]);
373 *i += 1;
374 }
375 if *i < tokens.len() && tokens[*i].kind == YamlToken::Colon {
376 builder.token(SyntaxKind::YAML_COLON.into(), tokens[*i].text);
377 *i += 1;
378 }
379 None
380 }
381 Some(YamlToken::Tag) => {
382 emit_token_as_yaml(builder, &tokens[*i]);
383 *i += 1;
384 None
385 }
386 _ => None,
387 }
388 };
389
390 builder.finish_node(); builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
393 if let Some(prefix) = value_prefix
394 && !prefix.is_empty()
395 {
396 builder.token(SyntaxKind::YAML_SCALAR.into(), prefix);
397 }
398 emit_flow_value_tokens(builder, tokens, i)?;
399 builder.finish_node(); builder.finish_node(); Ok(())
403}
404
405fn emit_flow_value_tokens<'a>(
406 builder: &mut GreenNodeBuilder<'_>,
407 tokens: &[YamlTokenSpan<'a>],
408 i: &mut usize,
409) -> Result<(), YamlDiagnostic> {
410 while *i < tokens.len() {
411 match tokens[*i].kind {
412 YamlToken::Comma | YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd => break,
413 YamlToken::FlowMapStart => emit_flow_map(builder, tokens, i)?,
414 YamlToken::FlowSeqStart => emit_flow_sequence(builder, tokens, i)?,
415 _ => {
416 emit_token_as_yaml(builder, &tokens[*i]);
417 *i += 1;
418 }
419 }
420 }
421 Ok(())
422}
423
424fn emit_scalar_document<'a>(
425 builder: &mut GreenNodeBuilder<'_>,
426 tokens: &[YamlTokenSpan<'a>],
427 i: &mut usize,
428) -> Result<(), YamlDiagnostic> {
429 while *i < tokens.len() {
430 let kind = match tokens[*i].kind {
431 YamlToken::Newline => SyntaxKind::NEWLINE,
432 YamlToken::DocumentStart | YamlToken::DocumentEnd => break,
435 YamlToken::Tag => SyntaxKind::YAML_TAG,
436 YamlToken::Comment => SyntaxKind::YAML_COMMENT,
437 YamlToken::Whitespace => SyntaxKind::WHITESPACE,
438 YamlToken::Colon => SyntaxKind::YAML_COLON,
439 YamlToken::FlowMapStart
440 | YamlToken::FlowMapEnd
441 | YamlToken::FlowSeqStart
442 | YamlToken::FlowSeqEnd
443 | YamlToken::Comma => {
444 return Err(diag_at_token(
445 &tokens[*i],
446 diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
447 "unexpected flow indicator in plain scalar document",
448 ));
449 }
450 _ => SyntaxKind::YAML_SCALAR,
451 };
452 builder.token(kind.into(), tokens[*i].text);
453 *i += 1;
454 }
455 Ok(())
456}
457
458fn emit_block_seq<'a>(
459 builder: &mut GreenNodeBuilder<'_>,
460 tokens: &[YamlTokenSpan<'a>],
461 i: &mut usize,
462 stop_on_dedent: bool,
463) -> Result<(), YamlDiagnostic> {
464 let mut header_done = false;
471 while !header_done && *i < tokens.len() {
472 match tokens[*i].kind {
473 YamlToken::Tag => {
474 builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
475 *i += 1;
476 }
477 YamlToken::Scalar if tokens[*i].text.trim_start().starts_with('&') => {
478 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
479 *i += 1;
480 }
481 YamlToken::Whitespace => {
482 builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
483 *i += 1;
484 }
485 YamlToken::Newline => {
486 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
487 *i += 1;
488 }
489 YamlToken::Comment => {
490 builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text);
491 *i += 1;
492 }
493 _ => header_done = true,
494 }
495 }
496 while *i < tokens.len() {
497 match tokens[*i].kind {
498 YamlToken::Newline => {
499 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
500 *i += 1;
501 }
502 YamlToken::DocumentStart | YamlToken::DocumentEnd => break,
505 YamlToken::Whitespace => {
506 builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
508 *i += 1;
509 }
510 YamlToken::Comment => {
518 let mut peek = *i + 1;
519 while peek < tokens.len()
520 && matches!(
521 tokens[peek].kind,
522 YamlToken::Newline | YamlToken::Whitespace | YamlToken::Comment
523 )
524 {
525 peek += 1;
526 }
527 if peek < tokens.len() && tokens[peek].kind == YamlToken::BlockSeqEntry {
528 builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text);
529 *i += 1;
530 } else {
531 break;
532 }
533 }
534 YamlToken::Dedent => {
535 if stop_on_dedent {
536 *i += 1;
537 break;
538 }
539 break;
540 }
541 YamlToken::BlockSeqEntry => emit_block_seq_item(builder, tokens, i)?,
542 _ => break,
543 }
544 }
545 Ok(())
546}
547
548fn emit_block_seq_item<'a>(
549 builder: &mut GreenNodeBuilder<'_>,
550 tokens: &[YamlTokenSpan<'a>],
551 i: &mut usize,
552) -> Result<(), YamlDiagnostic> {
553 builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM.into());
554 builder.token(SyntaxKind::YAML_BLOCK_SEQ_ENTRY.into(), tokens[*i].text);
555 *i += 1;
556 let mut closed_via_nested_seq = false;
557 while *i < tokens.len() && tokens[*i].kind != YamlToken::Newline {
558 match tokens[*i].kind {
559 YamlToken::FlowSeqStart => emit_flow_sequence(builder, tokens, i)?,
560 YamlToken::FlowMapStart => emit_flow_map(builder, tokens, i)?,
561 YamlToken::Indent => {
562 *i += 1;
568 builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
569 emit_block_seq(builder, tokens, i, true)?;
570 builder.finish_node(); closed_via_nested_seq = true;
572 break;
573 }
574 _ => {
575 emit_token_as_yaml(builder, &tokens[*i]);
576 *i += 1;
577 }
578 }
579 }
580 if !closed_via_nested_seq && *i < tokens.len() && tokens[*i].kind == YamlToken::Newline {
581 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
582 *i += 1;
583 }
584 if !closed_via_nested_seq && *i < tokens.len() && tokens[*i].kind == YamlToken::Indent {
587 *i += 1;
588 builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
589 emit_block_map(builder, tokens, i, true)?;
590 builder.finish_node(); }
592 builder.finish_node(); Ok(())
594}
595
596fn emit_block_map<'a>(
597 builder: &mut GreenNodeBuilder<'_>,
598 tokens: &[YamlTokenSpan<'a>],
599 i: &mut usize,
600 stop_on_dedent: bool,
601) -> Result<(), YamlDiagnostic> {
602 let mut closed_by_dedent = false;
603 while *i < tokens.len() {
604 match tokens[*i].kind {
605 YamlToken::Newline => {
606 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
607 *i += 1;
608 }
609 YamlToken::DocumentStart | YamlToken::DocumentEnd => break,
612 YamlToken::Directive | YamlToken::Comma => {
613 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
614 *i += 1;
615 }
616 YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd => {
617 return Err(diag_at_token(
618 &tokens[*i],
619 diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
620 "unexpected flow closing token",
621 ));
622 }
623 YamlToken::FlowMapStart | YamlToken::FlowSeqStart => {
624 if tokens[*i].kind == YamlToken::FlowMapStart {
625 emit_flow_map(builder, tokens, i)?;
626 } else {
627 emit_flow_sequence(builder, tokens, i)?;
628 }
629 }
630 YamlToken::Anchor
631 | YamlToken::Alias
632 | YamlToken::BlockScalarHeader
633 | YamlToken::BlockScalarContent => {
634 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
635 *i += 1;
636 }
637 YamlToken::Scalar | YamlToken::Comment => {
638 while *i < tokens.len() && tokens[*i].kind != YamlToken::Newline {
639 if matches!(
640 tokens[*i].kind,
641 YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd
642 ) {
643 return Err(diag_at_token(
644 &tokens[*i],
645 diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
646 "unexpected flow closing token",
647 ));
648 }
649 emit_token_as_yaml(builder, &tokens[*i]);
650 *i += 1;
651 }
652 }
653 YamlToken::Indent => {
654 return Err(diag_at_token(
655 &tokens[*i],
656 diagnostic_codes::PARSE_UNEXPECTED_INDENT,
657 "unexpected indent token while parsing block map",
658 ));
659 }
660 YamlToken::Dedent => {
661 if stop_on_dedent {
662 *i += 1;
663 closed_by_dedent = true;
664 break;
665 }
666 return Err(diag_at_token(
667 &tokens[*i],
668 diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
669 "unexpected dedent token while parsing block map",
670 ));
671 }
672 _ => emit_block_map_entry(builder, tokens, i)?,
673 }
674 }
675
676 if stop_on_dedent && !closed_by_dedent {
677 let (byte_start, byte_end) = tokens
678 .last()
679 .map(|t| (t.byte_start, t.byte_end))
680 .unwrap_or((0, 0));
681 return Err(YamlDiagnostic {
682 code: diagnostic_codes::PARSE_UNTERMINATED_BLOCK_MAP,
683 message: "unterminated indented block map",
684 byte_start,
685 byte_end,
686 });
687 }
688
689 Ok(())
690}
691
692fn emit_block_map_entry<'a>(
693 builder: &mut GreenNodeBuilder<'_>,
694 tokens: &[YamlTokenSpan<'a>],
695 i: &mut usize,
696) -> Result<(), YamlDiagnostic> {
697 builder.start_node(SyntaxKind::YAML_BLOCK_MAP_ENTRY.into());
698 emit_block_map_key(builder, tokens, i)?;
699 let trailing_newline = emit_block_map_value(builder, tokens, i)?;
700 if let Some(newline) = trailing_newline {
701 builder.token(SyntaxKind::NEWLINE.into(), newline);
702 }
703 builder.finish_node(); Ok(())
705}
706
707fn emit_block_map_key<'a>(
708 builder: &mut GreenNodeBuilder<'_>,
709 tokens: &[YamlTokenSpan<'a>],
710 i: &mut usize,
711) -> Result<(), YamlDiagnostic> {
712 builder.start_node(SyntaxKind::YAML_BLOCK_MAP_KEY.into());
713
714 let mut saw_colon = false;
715 while *i < tokens.len() {
716 match tokens[*i].kind {
717 YamlToken::Key => {
718 builder.token(SyntaxKind::YAML_KEY.into(), tokens[*i].text);
719 *i += 1;
720 }
721 YamlToken::Tag => {
722 builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
723 *i += 1;
724 }
725 YamlToken::Whitespace => {
726 builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
727 *i += 1;
728 }
729 YamlToken::Colon => {
730 builder.token(SyntaxKind::YAML_COLON.into(), tokens[*i].text);
731 *i += 1;
732 saw_colon = true;
733 break;
734 }
735 _ => {
736 return Err(diag_at_token(
737 &tokens[*i],
738 diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
739 "invalid token while parsing block map key",
740 ));
741 }
742 }
743 }
744 if !saw_colon {
745 return Err(diag_at_token(
746 &tokens[(*i).saturating_sub(1)],
747 diagnostic_codes::PARSE_MISSING_COLON,
748 "missing colon in block map entry",
749 ));
750 }
751 builder.finish_node(); Ok(())
753}
754
755fn emit_block_map_value<'a>(
760 builder: &mut GreenNodeBuilder<'_>,
761 tokens: &[YamlTokenSpan<'a>],
762 i: &mut usize,
763) -> Result<Option<&'a str>, YamlDiagnostic> {
764 builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
765 while *i < tokens.len() {
766 match tokens[*i].kind {
767 YamlToken::Scalar => {
768 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
769 *i += 1;
770 }
771 YamlToken::FlowMapStart => emit_flow_map(builder, tokens, i)?,
772 YamlToken::FlowSeqStart => emit_flow_sequence(builder, tokens, i)?,
773 YamlToken::Anchor | YamlToken::Alias => {
774 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
775 *i += 1;
776 }
777 YamlToken::BlockScalarHeader => {
778 consume_block_scalar(builder, tokens, i);
779 }
780 YamlToken::BlockScalarContent => {
781 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
782 *i += 1;
783 }
784 YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd | YamlToken::Comma => break,
785 YamlToken::Tag => {
786 builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
787 *i += 1;
788 }
789 YamlToken::Comment => {
790 builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text);
791 *i += 1;
792 }
793 YamlToken::Whitespace => {
794 builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
795 *i += 1;
796 }
797 _ => break,
798 }
799 }
800
801 let mut trailing_newline: Option<&str> = None;
802 if *i < tokens.len() && tokens[*i].kind == YamlToken::Newline {
803 trailing_newline = Some(tokens[*i].text);
804 *i += 1;
805 }
806
807 if *i < tokens.len() && tokens[*i].kind == YamlToken::Indent {
808 *i += 1;
809 if let Some(newline) = trailing_newline.take() {
811 builder.token(SyntaxKind::NEWLINE.into(), newline);
812 }
813 builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
814 emit_block_map(builder, tokens, i, true)?;
815 builder.finish_node(); }
817
818 builder.finish_node(); Ok(trailing_newline)
820}
821
822fn consume_block_scalar<'a>(
827 builder: &mut GreenNodeBuilder<'_>,
828 tokens: &[YamlTokenSpan<'a>],
829 i: &mut usize,
830) {
831 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
832 *i += 1;
833 while *i < tokens.len() {
834 match tokens[*i].kind {
835 YamlToken::Newline => {
836 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
837 *i += 1;
838 if *i < tokens.len()
839 && matches!(
840 tokens[*i].kind,
841 YamlToken::BlockScalarContent | YamlToken::Newline
842 )
843 {
844 continue;
845 }
846 break;
847 }
848 YamlToken::BlockScalarContent => {
849 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
850 *i += 1;
851 }
852 _ => break,
853 }
854 }
855}
856
857pub fn parse_yaml_tree(input: &str) -> Option<SyntaxNode> {
859 parse_yaml_report(input).tree
860}
861
862pub fn parse_yaml_report(input: &str) -> YamlParseReport {
864 let tokens = match lex_mapping_tokens_with_diagnostic(input) {
865 Ok(tokens) => tokens,
866 Err(err) => {
867 return YamlParseReport {
868 tree: None,
869 diagnostics: vec![err],
870 };
871 }
872 };
873
874 let mut seen_content = false;
875 for token in &tokens {
876 match token.kind {
877 YamlToken::Directive if seen_content => {
878 return YamlParseReport {
879 tree: None,
880 diagnostics: vec![diag_at_token(
881 token,
882 diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT,
883 "directive requires document end before subsequent directives",
884 )],
885 };
886 }
887 YamlToken::Directive
888 | YamlToken::Newline
889 | YamlToken::Whitespace
890 | YamlToken::Comment => {}
891 YamlToken::DocumentEnd => seen_content = false,
892 _ => seen_content = true,
893 }
894 }
895
896 if let Some(directive) = tokens.iter().find(|t| t.kind == YamlToken::Directive)
897 && !tokens.iter().any(|t| t.kind == YamlToken::DocumentStart)
898 {
899 return YamlParseReport {
900 tree: None,
901 diagnostics: vec![diag_at_token(
902 directive,
903 diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START,
904 "directive requires an explicit document start marker",
905 )],
906 };
907 }
908
909 let mut builder = GreenNodeBuilder::new();
910 builder.start_node(SyntaxKind::DOCUMENT.into());
911 builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
912 builder.start_node(SyntaxKind::YAML_STREAM.into());
913 if let Err(err) = parse_stream(&mut builder, &tokens) {
914 return YamlParseReport {
915 tree: None,
916 diagnostics: vec![err],
917 };
918 }
919 builder.finish_node(); builder.finish_node(); builder.finish_node(); YamlParseReport {
923 tree: Some(SyntaxNode::new_root(builder.finish())),
924 diagnostics: Vec::new(),
925 }
926}
927
928fn parse_stream<'a>(
932 builder: &mut GreenNodeBuilder<'_>,
933 tokens: &[YamlTokenSpan<'a>],
934) -> Result<(), YamlDiagnostic> {
935 let mut i = 0usize;
936 while i < tokens.len() {
937 match tokens[i].kind {
938 YamlToken::Newline => {
939 builder.token(SyntaxKind::NEWLINE.into(), tokens[i].text);
940 i += 1;
941 }
942 YamlToken::Whitespace => {
943 builder.token(SyntaxKind::WHITESPACE.into(), tokens[i].text);
944 i += 1;
945 }
946 YamlToken::Comment => {
947 builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[i].text);
948 i += 1;
949 }
950 YamlToken::Indent | YamlToken::Dedent => {
954 i += 1;
955 }
956 YamlToken::DocumentEnd if !document_follows(tokens, i + 1) => {
960 builder.token(SyntaxKind::YAML_DOCUMENT_END.into(), tokens[i].text);
961 i += 1;
962 }
963 _ => {
964 builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
965 emit_document(builder, tokens, &mut i)?;
966 builder.finish_node(); }
968 }
969 }
970 Ok(())
971}
972
973fn document_follows(tokens: &[YamlTokenSpan<'_>], start: usize) -> bool {
979 tokens[start..].iter().any(|t| {
980 !matches!(
981 t.kind,
982 YamlToken::Newline
983 | YamlToken::Whitespace
984 | YamlToken::Comment
985 | YamlToken::DocumentEnd
986 )
987 })
988}
989
990fn emit_document<'a>(
995 builder: &mut GreenNodeBuilder<'_>,
996 tokens: &[YamlTokenSpan<'a>],
997 i: &mut usize,
998) -> Result<(), YamlDiagnostic> {
999 let mut saw_marker = false;
1001 while *i < tokens.len() {
1002 match tokens[*i].kind {
1003 YamlToken::Directive => {
1004 builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
1005 *i += 1;
1006 }
1007 YamlToken::Newline => {
1008 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
1009 *i += 1;
1010 }
1011 YamlToken::Whitespace => {
1012 builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
1013 *i += 1;
1014 }
1015 YamlToken::Comment => {
1016 builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text);
1017 *i += 1;
1018 }
1019 YamlToken::DocumentStart => {
1020 builder.token(SyntaxKind::YAML_DOCUMENT_START.into(), tokens[*i].text);
1021 *i += 1;
1022 saw_marker = true;
1023 if *i < tokens.len() && tokens[*i].kind == YamlToken::Newline {
1024 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
1025 *i += 1;
1026 }
1027 break;
1028 }
1029 _ => break,
1030 }
1031 }
1032 let _ = saw_marker;
1033
1034 let next_significant = tokens[*i..].iter().find(|t| {
1036 !matches!(
1037 t.kind,
1038 YamlToken::Newline | YamlToken::Whitespace | YamlToken::Comment
1039 )
1040 });
1041
1042 let body_kind = match next_significant.map(|t| t.kind) {
1043 Some(YamlToken::DocumentStart) | Some(YamlToken::DocumentEnd) | None => DocumentBody::Empty,
1044 Some(YamlToken::BlockSeqEntry) => DocumentBody::BlockSequence,
1045 _ => {
1046 let mut has_colon = false;
1056 let mut has_tag = false;
1057 let mut has_scalar = false;
1058 let mut has_flow = false;
1059 let mut has_block_seq = false;
1060 let mut pre_seq_only_properties = true;
1067 let mut seen_block_seq = false;
1068 for tok in &tokens[*i..] {
1069 match tok.kind {
1070 YamlToken::DocumentStart | YamlToken::DocumentEnd => break,
1071 YamlToken::Colon => has_colon = true,
1072 YamlToken::Tag => has_tag = true,
1073 YamlToken::Scalar
1074 | YamlToken::BlockScalarHeader
1075 | YamlToken::BlockScalarContent => {
1076 has_scalar = true;
1077 if !seen_block_seq && !tok.text.trim_start().starts_with('&') {
1078 pre_seq_only_properties = false;
1079 }
1080 }
1081 YamlToken::FlowMapStart
1082 | YamlToken::FlowMapEnd
1083 | YamlToken::FlowSeqStart
1084 | YamlToken::FlowSeqEnd
1085 | YamlToken::Comma => has_flow = true,
1086 YamlToken::BlockSeqEntry => {
1087 has_block_seq = true;
1088 seen_block_seq = true;
1089 }
1090 _ => {}
1091 }
1092 }
1093 if has_colon || has_flow {
1094 DocumentBody::BlockMap
1095 } else if has_block_seq && pre_seq_only_properties {
1096 DocumentBody::BlockSequence
1097 } else if has_tag || has_scalar {
1098 DocumentBody::Scalar
1099 } else {
1100 DocumentBody::BlockMap
1101 }
1102 }
1103 };
1104
1105 match body_kind {
1106 DocumentBody::Empty => {}
1107 DocumentBody::BlockSequence => {
1108 builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
1109 emit_block_seq(builder, tokens, i, false)?;
1110 builder.finish_node(); }
1112 DocumentBody::Scalar => emit_scalar_document(builder, tokens, i)?,
1113 DocumentBody::BlockMap => {
1114 builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
1115 emit_block_map(builder, tokens, i, false)?;
1116 builder.finish_node(); }
1118 }
1119
1120 if matches!(body_kind, DocumentBody::Empty) {
1127 let mut peek = *i;
1128 while peek < tokens.len() {
1129 match tokens[peek].kind {
1130 YamlToken::Newline | YamlToken::Whitespace | YamlToken::Comment => peek += 1,
1131 _ => break,
1132 }
1133 }
1134 if peek < tokens.len() && tokens[peek].kind == YamlToken::DocumentEnd {
1135 while *i < peek {
1136 match tokens[*i].kind {
1137 YamlToken::Newline => {
1138 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text)
1139 }
1140 YamlToken::Whitespace => {
1141 builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text)
1142 }
1143 YamlToken::Comment => {
1144 builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text)
1145 }
1146 _ => unreachable!("only trivia in this range"),
1147 }
1148 *i += 1;
1149 }
1150 }
1151 }
1152 if *i < tokens.len() && tokens[*i].kind == YamlToken::DocumentEnd {
1153 builder.token(SyntaxKind::YAML_DOCUMENT_END.into(), tokens[*i].text);
1154 *i += 1;
1155 if *i < tokens.len() && tokens[*i].kind == YamlToken::Newline {
1156 builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
1157 *i += 1;
1158 }
1159 }
1160
1161 Ok(())
1162}
1163
1164#[derive(Clone, Copy)]
1165enum DocumentBody {
1166 Empty,
1167 BlockSequence,
1168 BlockMap,
1169 Scalar,
1170}