Skip to main content

panache_parser/parser/yaml/
parser.rs

1use crate::syntax::{SyntaxKind, SyntaxNode};
2use rowan::GreenNodeBuilder;
3
4use super::lexer::{lex_mapping_tokens_with_diagnostic, split_once_unquoted};
5use super::model::{
6    ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlDiagnostic, YamlInputKind,
7    YamlParseReport, YamlToken, YamlTokenSpan, diagnostic_codes,
8};
9
10/// Parse YAML in shadow mode using prototype groundwork only.
11///
12/// This API is intentionally read-only and does not replace production YAML
13/// parsing. By default it is disabled and reports `SkippedDisabled`.
14pub fn parse_shadow(input: &str, options: ShadowYamlOptions) -> ShadowYamlReport {
15    let line_count = input.lines().count().max(1);
16
17    if !options.enabled {
18        return ShadowYamlReport {
19            outcome: ShadowYamlOutcome::SkippedDisabled,
20            shadow_reason: "shadow-disabled",
21            input_kind: options.input_kind,
22            input_len_bytes: input.len(),
23            line_count,
24            normalized_input: None,
25        };
26    }
27
28    let normalized = match options.input_kind {
29        YamlInputKind::Plain => input.to_owned(),
30        YamlInputKind::Hashpipe => normalize_hashpipe_input(input),
31    };
32
33    let parsed = parse_yaml_tree(&normalized).is_some();
34
35    ShadowYamlReport {
36        outcome: if parsed {
37            ShadowYamlOutcome::PrototypeParsed
38        } else {
39            ShadowYamlOutcome::PrototypeRejected
40        },
41        shadow_reason: if parsed {
42            "prototype-basic-mapping-parsed"
43        } else {
44            "prototype-basic-mapping-rejected"
45        },
46        input_kind: options.input_kind,
47        input_len_bytes: input.len(),
48        line_count,
49        normalized_input: Some(normalized),
50    }
51}
52
53fn normalize_hashpipe_input(input: &str) -> String {
54    input
55        .lines()
56        .map(strip_hashpipe_prefix)
57        .collect::<Vec<_>>()
58        .join("\n")
59}
60
61fn strip_hashpipe_prefix(line: &str) -> &str {
62    if let Some(rest) = line.strip_prefix("#|") {
63        return rest.strip_prefix(' ').unwrap_or(rest);
64    }
65    line
66}
67
68fn emit_token_as_yaml(builder: &mut GreenNodeBuilder<'_>, token: &YamlTokenSpan<'_>) {
69    let kind = match token.kind {
70        YamlToken::Whitespace => SyntaxKind::WHITESPACE,
71        YamlToken::Comment => SyntaxKind::YAML_COMMENT,
72        YamlToken::Tag => SyntaxKind::YAML_TAG,
73        YamlToken::Colon => SyntaxKind::YAML_COLON,
74        _ => SyntaxKind::YAML_SCALAR,
75    };
76    builder.token(kind.into(), token.text);
77}
78
79fn diag_at_token(
80    token: &YamlTokenSpan<'_>,
81    code: &'static str,
82    message: &'static str,
83) -> YamlDiagnostic {
84    YamlDiagnostic {
85        code,
86        message,
87        byte_start: token.byte_start,
88        byte_end: token.byte_end,
89    }
90}
91
92fn emit_flow_sequence<'a>(
93    builder: &mut GreenNodeBuilder<'_>,
94    tokens: &[YamlTokenSpan<'a>],
95    i: &mut usize,
96) -> Result<(), YamlDiagnostic> {
97    if *i >= tokens.len() || tokens[*i].kind != YamlToken::FlowSeqStart {
98        return Err(YamlDiagnostic {
99            code: diagnostic_codes::PARSE_EXPECTED_FLOW_SEQUENCE_START,
100            message: "expected flow sequence start token",
101            byte_start: tokens.get(*i).map(|t| t.byte_start).unwrap_or(0),
102            byte_end: tokens.get(*i).map(|t| t.byte_end).unwrap_or(0),
103        });
104    }
105
106    builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE.into());
107    emit_token_as_yaml(builder, &tokens[*i]); // [
108    *i += 1;
109
110    let mut open_item = false;
111    while *i < tokens.len() {
112        match tokens[*i].kind {
113            YamlToken::FlowSeqEnd => {
114                if open_item {
115                    builder.finish_node(); // YAML_FLOW_SEQUENCE_ITEM
116                }
117                emit_token_as_yaml(builder, &tokens[*i]); // ]
118                *i += 1;
119                if *i < tokens.len() {
120                    match tokens[*i].kind {
121                        YamlToken::Newline | YamlToken::Comment => {}
122                        YamlToken::Whitespace if tokens[*i].text.trim().is_empty() => {}
123                        _ => {
124                            return Err(diag_at_token(
125                                &tokens[*i],
126                                diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
127                                "trailing content after flow sequence end",
128                            ));
129                        }
130                    }
131                }
132                builder.finish_node(); // YAML_FLOW_SEQUENCE
133                return Ok(());
134            }
135            YamlToken::Comma => {
136                if !open_item {
137                    return Err(diag_at_token(
138                        &tokens[*i],
139                        diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA,
140                        "invalid comma position in flow sequence",
141                    ));
142                }
143                builder.finish_node(); // YAML_FLOW_SEQUENCE_ITEM
144                open_item = false;
145                emit_token_as_yaml(builder, &tokens[*i]);
146                *i += 1;
147            }
148            YamlToken::Whitespace if !open_item => {
149                emit_token_as_yaml(builder, &tokens[*i]);
150                *i += 1;
151            }
152            YamlToken::Scalar if !open_item && tokens[*i].text.trim().is_empty() => {
153                emit_token_as_yaml(builder, &tokens[*i]);
154                *i += 1;
155            }
156            YamlToken::FlowSeqStart => {
157                if !open_item {
158                    builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
159                    open_item = true;
160                }
161                emit_flow_sequence(builder, tokens, i)?;
162            }
163            YamlToken::FlowMapStart => {
164                if !open_item {
165                    builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
166                    open_item = true;
167                }
168                emit_flow_map(builder, tokens, i)?;
169            }
170            _ => {
171                if !open_item {
172                    builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
173                    open_item = true;
174                }
175                emit_token_as_yaml(builder, &tokens[*i]);
176                *i += 1;
177            }
178        }
179    }
180
181    let (byte_start, byte_end) =
182        if let Some(start) = tokens.iter().find(|t| t.kind == YamlToken::FlowSeqStart) {
183            (
184                start.byte_start,
185                tokens.last().map(|t| t.byte_end).unwrap_or(start.byte_end),
186            )
187        } else {
188            tokens
189                .last()
190                .map(|t| (t.byte_start, t.byte_end))
191                .unwrap_or((0, 0))
192        };
193    Err(YamlDiagnostic {
194        code: diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE,
195        message: "unterminated flow sequence",
196        byte_start,
197        byte_end,
198    })
199}
200
201fn emit_flow_map<'a>(
202    builder: &mut GreenNodeBuilder<'_>,
203    tokens: &[YamlTokenSpan<'a>],
204    i: &mut usize,
205) -> Result<(), YamlDiagnostic> {
206    if *i >= tokens.len() || tokens[*i].kind != YamlToken::FlowMapStart {
207        return Err(YamlDiagnostic {
208            code: diagnostic_codes::PARSE_EXPECTED_FLOW_MAP_START,
209            message: "expected flow map start token",
210            byte_start: tokens.get(*i).map(|t| t.byte_start).unwrap_or(0),
211            byte_end: tokens.get(*i).map(|t| t.byte_end).unwrap_or(0),
212        });
213    }
214
215    builder.start_node(SyntaxKind::YAML_FLOW_MAP.into());
216    emit_token_as_yaml(builder, &tokens[*i]); // {
217    *i += 1;
218
219    loop {
220        // Skip inter-entry whitespace and newlines
221        while *i < tokens.len()
222            && matches!(tokens[*i].kind, YamlToken::Whitespace | YamlToken::Newline)
223        {
224            emit_token_as_yaml(builder, &tokens[*i]);
225            *i += 1;
226        }
227
228        if *i >= tokens.len() {
229            let (byte_start, byte_end) = tokens
230                .last()
231                .map(|t| (t.byte_start, t.byte_end))
232                .unwrap_or((0, 0));
233            return Err(YamlDiagnostic {
234                code: diagnostic_codes::PARSE_UNTERMINATED_FLOW_MAP,
235                message: "unterminated flow map",
236                byte_start,
237                byte_end,
238            });
239        }
240
241        match tokens[*i].kind {
242            YamlToken::FlowMapEnd => {
243                emit_token_as_yaml(builder, &tokens[*i]);
244                *i += 1;
245                if *i < tokens.len() {
246                    match tokens[*i].kind {
247                        YamlToken::Newline
248                        | YamlToken::Comment
249                        | YamlToken::Whitespace
250                        | YamlToken::FlowMapEnd
251                        | YamlToken::FlowSeqEnd
252                        | YamlToken::Comma => {}
253                        _ => {
254                            return Err(diag_at_token(
255                                &tokens[*i],
256                                diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
257                                "trailing content after flow map end",
258                            ));
259                        }
260                    }
261                }
262                builder.finish_node(); // YAML_FLOW_MAP
263                return Ok(());
264            }
265            YamlToken::Comma => {
266                emit_token_as_yaml(builder, &tokens[*i]);
267                *i += 1;
268            }
269            _ => {
270                emit_flow_map_entry(builder, tokens, i)?;
271            }
272        }
273    }
274}
275
276fn emit_flow_map_entry<'a>(
277    builder: &mut GreenNodeBuilder<'_>,
278    tokens: &[YamlTokenSpan<'a>],
279    i: &mut usize,
280) -> Result<(), YamlDiagnostic> {
281    builder.start_node(SyntaxKind::YAML_FLOW_MAP_ENTRY.into());
282    builder.start_node(SyntaxKind::YAML_FLOW_MAP_KEY.into());
283
284    // Emit leading whitespace inside key node
285    while *i < tokens.len() && tokens[*i].kind == YamlToken::Whitespace {
286        emit_token_as_yaml(builder, &tokens[*i]);
287        *i += 1;
288    }
289
290    // Determine key/value split point
291    let value_prefix: Option<&'a str> = match tokens.get(*i).map(|t| t.kind) {
292        Some(YamlToken::Scalar) => {
293            let scalar = tokens[*i];
294            *i += 1;
295            if let Some((key_text, rest_text)) = split_once_unquoted(scalar.text, ':') {
296                builder.token(SyntaxKind::YAML_KEY.into(), key_text);
297                builder.token(
298                    SyntaxKind::YAML_COLON.into(),
299                    &scalar.text[key_text.len()..key_text.len() + 1],
300                );
301                Some(rest_text)
302            } else {
303                // No colon — standalone scalar (implicit key or value-only)
304                builder.token(SyntaxKind::YAML_SCALAR.into(), scalar.text);
305                None
306            }
307        }
308        Some(YamlToken::Key) => {
309            // Already-tokenized key (from multi-line block lexing inside a flow map)
310            builder.token(SyntaxKind::YAML_KEY.into(), tokens[*i].text);
311            *i += 1;
312            while *i < tokens.len() && tokens[*i].kind == YamlToken::Whitespace {
313                emit_token_as_yaml(builder, &tokens[*i]);
314                *i += 1;
315            }
316            if *i < tokens.len() && tokens[*i].kind == YamlToken::Colon {
317                builder.token(SyntaxKind::YAML_COLON.into(), tokens[*i].text);
318                *i += 1;
319            }
320            None
321        }
322        Some(YamlToken::Tag) => {
323            emit_token_as_yaml(builder, &tokens[*i]);
324            *i += 1;
325            None
326        }
327        _ => None,
328    };
329
330    builder.finish_node(); // YAML_FLOW_MAP_KEY
331
332    builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
333    if let Some(prefix) = value_prefix
334        && !prefix.is_empty()
335    {
336        builder.token(SyntaxKind::YAML_SCALAR.into(), prefix);
337    }
338    emit_flow_value_tokens(builder, tokens, i)?;
339    builder.finish_node(); // YAML_FLOW_MAP_VALUE
340
341    builder.finish_node(); // YAML_FLOW_MAP_ENTRY
342    Ok(())
343}
344
345fn emit_flow_value_tokens<'a>(
346    builder: &mut GreenNodeBuilder<'_>,
347    tokens: &[YamlTokenSpan<'a>],
348    i: &mut usize,
349) -> Result<(), YamlDiagnostic> {
350    while *i < tokens.len() {
351        match tokens[*i].kind {
352            YamlToken::Comma | YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd => break,
353            YamlToken::FlowMapStart => emit_flow_map(builder, tokens, i)?,
354            YamlToken::FlowSeqStart => emit_flow_sequence(builder, tokens, i)?,
355            _ => {
356                emit_token_as_yaml(builder, &tokens[*i]);
357                *i += 1;
358            }
359        }
360    }
361    Ok(())
362}
363
364fn emit_scalar_document<'a>(
365    builder: &mut GreenNodeBuilder<'_>,
366    tokens: &[YamlTokenSpan<'a>],
367    i: &mut usize,
368) -> Result<(), YamlDiagnostic> {
369    while *i < tokens.len() {
370        let kind = match tokens[*i].kind {
371            YamlToken::Newline => SyntaxKind::NEWLINE,
372            YamlToken::DocumentStart => SyntaxKind::YAML_DOCUMENT_START,
373            YamlToken::DocumentEnd => SyntaxKind::YAML_DOCUMENT_END,
374            YamlToken::Tag => SyntaxKind::YAML_TAG,
375            YamlToken::Comment => SyntaxKind::YAML_COMMENT,
376            YamlToken::Whitespace => SyntaxKind::WHITESPACE,
377            YamlToken::Colon => SyntaxKind::YAML_COLON,
378            YamlToken::FlowMapStart
379            | YamlToken::FlowMapEnd
380            | YamlToken::FlowSeqStart
381            | YamlToken::FlowSeqEnd
382            | YamlToken::Comma => {
383                return Err(diag_at_token(
384                    &tokens[*i],
385                    diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
386                    "unexpected flow indicator in plain scalar document",
387                ));
388            }
389            _ => SyntaxKind::YAML_SCALAR,
390        };
391        builder.token(kind.into(), tokens[*i].text);
392        *i += 1;
393    }
394    Ok(())
395}
396
397fn emit_block_seq<'a>(
398    builder: &mut GreenNodeBuilder<'_>,
399    tokens: &[YamlTokenSpan<'a>],
400    i: &mut usize,
401) -> Result<(), YamlDiagnostic> {
402    while *i < tokens.len() {
403        match tokens[*i].kind {
404            YamlToken::Newline => {
405                builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
406                *i += 1;
407            }
408            YamlToken::DocumentStart => {
409                builder.token(SyntaxKind::YAML_DOCUMENT_START.into(), tokens[*i].text);
410                *i += 1;
411            }
412            YamlToken::DocumentEnd => {
413                builder.token(SyntaxKind::YAML_DOCUMENT_END.into(), tokens[*i].text);
414                *i += 1;
415            }
416            YamlToken::BlockSeqEntry => {
417                builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM.into());
418                builder.token(SyntaxKind::YAML_BLOCK_SEQ_ENTRY.into(), tokens[*i].text);
419                *i += 1;
420                while *i < tokens.len() && tokens[*i].kind != YamlToken::Newline {
421                    match tokens[*i].kind {
422                        YamlToken::FlowSeqStart => emit_flow_sequence(builder, tokens, i)?,
423                        YamlToken::FlowMapStart => emit_flow_map(builder, tokens, i)?,
424                        _ => {
425                            emit_token_as_yaml(builder, &tokens[*i]);
426                            *i += 1;
427                        }
428                    }
429                }
430                if *i < tokens.len() && tokens[*i].kind == YamlToken::Newline {
431                    builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
432                    *i += 1;
433                }
434                // Nested block map following a bare `-` entry: lexer has emitted
435                // an Indent, then the nested mapping tokens, terminated by a
436                // Dedent. Mirror the nested-map path that emit_block_map uses
437                // after a map value (see the Indent handling below).
438                if *i < tokens.len() && tokens[*i].kind == YamlToken::Indent {
439                    *i += 1;
440                    builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
441                    emit_block_map(builder, tokens, i, true)?;
442                    builder.finish_node(); // YAML_BLOCK_MAP
443                }
444                builder.finish_node(); // YAML_BLOCK_SEQUENCE_ITEM
445            }
446            _ => break,
447        }
448    }
449    Ok(())
450}
451
452fn emit_block_map<'a>(
453    builder: &mut GreenNodeBuilder<'_>,
454    tokens: &[YamlTokenSpan<'a>],
455    i: &mut usize,
456    stop_on_dedent: bool,
457) -> Result<(), YamlDiagnostic> {
458    let mut closed_by_dedent = false;
459    while *i < tokens.len() {
460        match tokens[*i].kind {
461            YamlToken::Newline => {
462                builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
463                *i += 1;
464            }
465            YamlToken::DocumentStart => {
466                builder.token(SyntaxKind::YAML_DOCUMENT_START.into(), tokens[*i].text);
467                *i += 1;
468            }
469            YamlToken::DocumentEnd => {
470                builder.token(SyntaxKind::YAML_DOCUMENT_END.into(), tokens[*i].text);
471                *i += 1;
472            }
473            YamlToken::Directive | YamlToken::Comma => {
474                builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
475                *i += 1;
476            }
477            YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd => {
478                return Err(diag_at_token(
479                    &tokens[*i],
480                    diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
481                    "unexpected flow closing token",
482                ));
483            }
484            YamlToken::FlowMapStart | YamlToken::FlowSeqStart => {
485                if tokens[*i].kind == YamlToken::FlowMapStart {
486                    emit_flow_map(builder, tokens, i)?;
487                } else {
488                    emit_flow_sequence(builder, tokens, i)?;
489                }
490            }
491            YamlToken::Anchor
492            | YamlToken::Alias
493            | YamlToken::BlockScalarHeader
494            | YamlToken::BlockScalarContent => {
495                builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
496                *i += 1;
497            }
498            YamlToken::Scalar | YamlToken::Comment => {
499                while *i < tokens.len() && tokens[*i].kind != YamlToken::Newline {
500                    if matches!(
501                        tokens[*i].kind,
502                        YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd
503                    ) {
504                        return Err(diag_at_token(
505                            &tokens[*i],
506                            diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
507                            "unexpected flow closing token",
508                        ));
509                    }
510                    emit_token_as_yaml(builder, &tokens[*i]);
511                    *i += 1;
512                }
513            }
514            YamlToken::Indent => {
515                return Err(diag_at_token(
516                    &tokens[*i],
517                    diagnostic_codes::PARSE_UNEXPECTED_INDENT,
518                    "unexpected indent token while parsing block map",
519                ));
520            }
521            YamlToken::Dedent => {
522                if stop_on_dedent {
523                    *i += 1;
524                    closed_by_dedent = true;
525                    break;
526                }
527                return Err(diag_at_token(
528                    &tokens[*i],
529                    diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
530                    "unexpected dedent token while parsing block map",
531                ));
532            }
533            _ => {
534                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_ENTRY.into());
535                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_KEY.into());
536
537                let mut saw_colon = false;
538                while *i < tokens.len() {
539                    match tokens[*i].kind {
540                        YamlToken::Key => {
541                            builder.token(SyntaxKind::YAML_KEY.into(), tokens[*i].text);
542                            *i += 1;
543                        }
544                        YamlToken::Tag => {
545                            builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
546                            *i += 1;
547                        }
548                        YamlToken::Whitespace => {
549                            builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
550                            *i += 1;
551                        }
552                        YamlToken::Colon => {
553                            builder.token(SyntaxKind::YAML_COLON.into(), tokens[*i].text);
554                            *i += 1;
555                            saw_colon = true;
556                            break;
557                        }
558                        _ => {
559                            return Err(diag_at_token(
560                                &tokens[*i],
561                                diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
562                                "invalid token while parsing block map key",
563                            ));
564                        }
565                    }
566                }
567                if !saw_colon {
568                    return Err(diag_at_token(
569                        &tokens[(*i).saturating_sub(1)],
570                        diagnostic_codes::PARSE_MISSING_COLON,
571                        "missing colon in block map entry",
572                    ));
573                }
574                builder.finish_node(); // YAML_BLOCK_MAP_KEY
575
576                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
577                while *i < tokens.len() {
578                    match tokens[*i].kind {
579                        YamlToken::Scalar => {
580                            builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
581                            *i += 1;
582                        }
583                        YamlToken::FlowMapStart => {
584                            emit_flow_map(builder, tokens, i)?;
585                        }
586                        YamlToken::FlowSeqStart => {
587                            emit_flow_sequence(builder, tokens, i)?;
588                        }
589                        YamlToken::Anchor
590                        | YamlToken::Alias
591                        | YamlToken::BlockScalarHeader
592                        | YamlToken::BlockScalarContent => {
593                            builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
594                            *i += 1;
595                        }
596                        YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd | YamlToken::Comma => {
597                            break;
598                        }
599                        YamlToken::Tag => {
600                            builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
601                            *i += 1;
602                        }
603                        YamlToken::Comment => {
604                            builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text);
605                            *i += 1;
606                        }
607                        YamlToken::Whitespace => {
608                            builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
609                            *i += 1;
610                        }
611                        _ => break,
612                    }
613                }
614
615                let mut trailing_newline: Option<&str> = None;
616                if *i < tokens.len() && tokens[*i].kind == YamlToken::Newline {
617                    trailing_newline = Some(tokens[*i].text);
618                    *i += 1;
619                }
620
621                if *i < tokens.len() && tokens[*i].kind == YamlToken::Indent {
622                    *i += 1;
623                    // Emit trailing newline before nested content to preserve byte order
624                    if let Some(newline) = trailing_newline.take() {
625                        builder.token(SyntaxKind::NEWLINE.into(), newline);
626                    }
627                    builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
628                    emit_block_map(builder, tokens, i, true)?;
629                    builder.finish_node(); // YAML_BLOCK_MAP
630                }
631
632                builder.finish_node(); // YAML_BLOCK_MAP_VALUE
633                if let Some(newline) = trailing_newline {
634                    builder.token(SyntaxKind::NEWLINE.into(), newline);
635                }
636                builder.finish_node(); // YAML_BLOCK_MAP_ENTRY
637            }
638        }
639    }
640
641    if stop_on_dedent && !closed_by_dedent {
642        let (byte_start, byte_end) = tokens
643            .last()
644            .map(|t| (t.byte_start, t.byte_end))
645            .unwrap_or((0, 0));
646        return Err(YamlDiagnostic {
647            code: diagnostic_codes::PARSE_UNTERMINATED_BLOCK_MAP,
648            message: "unterminated indented block map",
649            byte_start,
650            byte_end,
651        });
652    }
653
654    Ok(())
655}
656
657/// Parse prototype YAML tree structure from input
658pub fn parse_yaml_tree(input: &str) -> Option<SyntaxNode> {
659    parse_yaml_report(input).tree
660}
661
662/// Parse prototype YAML tree structure and include diagnostics on failure.
663pub fn parse_yaml_report(input: &str) -> YamlParseReport {
664    let tokens = match lex_mapping_tokens_with_diagnostic(input) {
665        Ok(tokens) => tokens,
666        Err(err) => {
667            return YamlParseReport {
668                tree: None,
669                diagnostics: vec![err],
670            };
671        }
672    };
673
674    let mut seen_content = false;
675    for token in &tokens {
676        match token.kind {
677            YamlToken::Directive if seen_content => {
678                return YamlParseReport {
679                    tree: None,
680                    diagnostics: vec![diag_at_token(
681                        token,
682                        diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT,
683                        "directive requires document end before subsequent directives",
684                    )],
685                };
686            }
687            YamlToken::Directive
688            | YamlToken::Newline
689            | YamlToken::Whitespace
690            | YamlToken::Comment => {}
691            YamlToken::DocumentEnd => seen_content = false,
692            _ => seen_content = true,
693        }
694    }
695
696    if let Some(directive) = tokens.iter().find(|t| t.kind == YamlToken::Directive)
697        && !tokens.iter().any(|t| t.kind == YamlToken::DocumentStart)
698    {
699        return YamlParseReport {
700            tree: None,
701            diagnostics: vec![diag_at_token(
702                directive,
703                diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START,
704                "directive requires an explicit document start marker",
705            )],
706        };
707    }
708
709    let is_sequence = tokens
710        .iter()
711        .find(|t| {
712            !matches!(
713                t.kind,
714                YamlToken::Newline
715                    | YamlToken::Whitespace
716                    | YamlToken::Comment
717                    | YamlToken::DocumentStart
718                    | YamlToken::DocumentEnd
719                    | YamlToken::Directive
720            )
721        })
722        .is_some_and(|t| t.kind == YamlToken::BlockSeqEntry);
723
724    // Scalar document with an explicit tag (e.g. `! a`, `!!str foo`): the token
725    // stream has a Tag but no Colon and no BlockSeqEntry, so emit_block_map's
726    // key/colon expectation would reject it. Tagless scalar documents still go
727    // through emit_block_map to keep existing CST shapes unchanged.
728    let has_colon = tokens.iter().any(|t| t.kind == YamlToken::Colon);
729    let has_tag = tokens.iter().any(|t| t.kind == YamlToken::Tag);
730    let is_scalar_document = !is_sequence && !has_colon && has_tag;
731
732    let mut builder = GreenNodeBuilder::new();
733    builder.start_node(SyntaxKind::DOCUMENT.into());
734    builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
735    let mut i = 0usize;
736    if is_sequence {
737        builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
738        if let Err(err) = emit_block_seq(&mut builder, &tokens, &mut i) {
739            return YamlParseReport {
740                tree: None,
741                diagnostics: vec![err],
742            };
743        }
744        builder.finish_node(); // YAML_BLOCK_SEQUENCE
745    } else if is_scalar_document {
746        if let Err(err) = emit_scalar_document(&mut builder, &tokens, &mut i) {
747            return YamlParseReport {
748                tree: None,
749                diagnostics: vec![err],
750            };
751        }
752    } else {
753        builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
754        if let Err(err) = emit_block_map(&mut builder, &tokens, &mut i, false) {
755            return YamlParseReport {
756                tree: None,
757                diagnostics: vec![err],
758            };
759        }
760        builder.finish_node(); // YAML_BLOCK_MAP
761    }
762    builder.finish_node(); // YAML_METADATA_CONTENT
763    builder.finish_node(); // DOCUMENT
764    YamlParseReport {
765        tree: Some(SyntaxNode::new_root(builder.finish())),
766        diagnostics: Vec::new(),
767    }
768}