Skip to main content

panache_parser/parser/yaml/
parser.rs

1use crate::syntax::{SyntaxKind, SyntaxNode};
2use rowan::GreenNodeBuilder;
3
4use super::lexer::{lex_mapping_tokens_with_diagnostic, split_once_unquoted};
5use super::model::{
6    ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlDiagnostic, YamlInputKind,
7    YamlParseReport, YamlToken, YamlTokenSpan, diagnostic_codes,
8};
9
10/// Parse YAML in shadow mode using prototype groundwork only.
11///
12/// This API is intentionally read-only and does not replace production YAML
13/// parsing. By default it is disabled and reports `SkippedDisabled`.
14pub fn parse_shadow(input: &str, options: ShadowYamlOptions) -> ShadowYamlReport {
15    let line_count = input.lines().count().max(1);
16
17    if !options.enabled {
18        return ShadowYamlReport {
19            outcome: ShadowYamlOutcome::SkippedDisabled,
20            shadow_reason: "shadow-disabled",
21            input_kind: options.input_kind,
22            input_len_bytes: input.len(),
23            line_count,
24            normalized_input: None,
25        };
26    }
27
28    let normalized = match options.input_kind {
29        YamlInputKind::Plain => input.to_owned(),
30        YamlInputKind::Hashpipe => normalize_hashpipe_input(input),
31    };
32
33    let parsed = parse_yaml_tree(&normalized).is_some();
34
35    ShadowYamlReport {
36        outcome: if parsed {
37            ShadowYamlOutcome::PrototypeParsed
38        } else {
39            ShadowYamlOutcome::PrototypeRejected
40        },
41        shadow_reason: if parsed {
42            "prototype-basic-mapping-parsed"
43        } else {
44            "prototype-basic-mapping-rejected"
45        },
46        input_kind: options.input_kind,
47        input_len_bytes: input.len(),
48        line_count,
49        normalized_input: Some(normalized),
50    }
51}
52
53fn normalize_hashpipe_input(input: &str) -> String {
54    input
55        .lines()
56        .map(strip_hashpipe_prefix)
57        .collect::<Vec<_>>()
58        .join("\n")
59}
60
61fn strip_hashpipe_prefix(line: &str) -> &str {
62    if let Some(rest) = line.strip_prefix("#|") {
63        return rest.strip_prefix(' ').unwrap_or(rest);
64    }
65    line
66}
67
68fn emit_token_as_yaml(builder: &mut GreenNodeBuilder<'_>, token: &YamlTokenSpan<'_>) {
69    let kind = match token.kind {
70        YamlToken::Whitespace => SyntaxKind::WHITESPACE,
71        YamlToken::Comment => SyntaxKind::YAML_COMMENT,
72        YamlToken::Tag => SyntaxKind::YAML_TAG,
73        YamlToken::Colon => SyntaxKind::YAML_COLON,
74        _ => SyntaxKind::YAML_SCALAR,
75    };
76    builder.token(kind.into(), token.text);
77}
78
79fn diag_at_token(
80    token: &YamlTokenSpan<'_>,
81    code: &'static str,
82    message: &'static str,
83) -> YamlDiagnostic {
84    YamlDiagnostic {
85        code,
86        message,
87        byte_start: token.byte_start,
88        byte_end: token.byte_end,
89    }
90}
91
92fn emit_flow_sequence<'a>(
93    builder: &mut GreenNodeBuilder<'_>,
94    tokens: &[YamlTokenSpan<'a>],
95    i: &mut usize,
96) -> Result<(), YamlDiagnostic> {
97    if *i >= tokens.len() || tokens[*i].kind != YamlToken::FlowSeqStart {
98        return Err(YamlDiagnostic {
99            code: diagnostic_codes::PARSE_EXPECTED_FLOW_SEQUENCE_START,
100            message: "expected flow sequence start token",
101            byte_start: tokens.get(*i).map(|t| t.byte_start).unwrap_or(0),
102            byte_end: tokens.get(*i).map(|t| t.byte_end).unwrap_or(0),
103        });
104    }
105
106    builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE.into());
107    emit_token_as_yaml(builder, &tokens[*i]); // [
108    *i += 1;
109
110    let mut open_item = false;
111    while *i < tokens.len() {
112        match tokens[*i].kind {
113            YamlToken::FlowSeqEnd => {
114                if open_item {
115                    builder.finish_node(); // YAML_FLOW_SEQUENCE_ITEM
116                }
117                emit_token_as_yaml(builder, &tokens[*i]); // ]
118                *i += 1;
119                if *i < tokens.len() {
120                    match tokens[*i].kind {
121                        YamlToken::Newline | YamlToken::Comment => {}
122                        YamlToken::Whitespace if tokens[*i].text.trim().is_empty() => {}
123                        _ => {
124                            return Err(diag_at_token(
125                                &tokens[*i],
126                                diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
127                                "trailing content after flow sequence end",
128                            ));
129                        }
130                    }
131                }
132                builder.finish_node(); // YAML_FLOW_SEQUENCE
133                return Ok(());
134            }
135            YamlToken::Comma => {
136                if !open_item {
137                    return Err(diag_at_token(
138                        &tokens[*i],
139                        diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA,
140                        "invalid comma position in flow sequence",
141                    ));
142                }
143                builder.finish_node(); // YAML_FLOW_SEQUENCE_ITEM
144                open_item = false;
145                emit_token_as_yaml(builder, &tokens[*i]);
146                *i += 1;
147            }
148            YamlToken::Whitespace if !open_item => {
149                emit_token_as_yaml(builder, &tokens[*i]);
150                *i += 1;
151            }
152            YamlToken::Scalar if !open_item && tokens[*i].text.trim().is_empty() => {
153                emit_token_as_yaml(builder, &tokens[*i]);
154                *i += 1;
155            }
156            YamlToken::FlowSeqStart => {
157                if !open_item {
158                    builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
159                    open_item = true;
160                }
161                emit_flow_sequence(builder, tokens, i)?;
162            }
163            YamlToken::FlowMapStart => {
164                if !open_item {
165                    builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
166                    open_item = true;
167                }
168                emit_flow_map(builder, tokens, i)?;
169            }
170            _ => {
171                if !open_item {
172                    builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
173                    open_item = true;
174                }
175                emit_token_as_yaml(builder, &tokens[*i]);
176                *i += 1;
177            }
178        }
179    }
180
181    let (byte_start, byte_end) =
182        if let Some(start) = tokens.iter().find(|t| t.kind == YamlToken::FlowSeqStart) {
183            (
184                start.byte_start,
185                tokens.last().map(|t| t.byte_end).unwrap_or(start.byte_end),
186            )
187        } else {
188            tokens
189                .last()
190                .map(|t| (t.byte_start, t.byte_end))
191                .unwrap_or((0, 0))
192        };
193    Err(YamlDiagnostic {
194        code: diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE,
195        message: "unterminated flow sequence",
196        byte_start,
197        byte_end,
198    })
199}
200
201fn emit_flow_map<'a>(
202    builder: &mut GreenNodeBuilder<'_>,
203    tokens: &[YamlTokenSpan<'a>],
204    i: &mut usize,
205) -> Result<(), YamlDiagnostic> {
206    if *i >= tokens.len() || tokens[*i].kind != YamlToken::FlowMapStart {
207        return Err(YamlDiagnostic {
208            code: diagnostic_codes::PARSE_EXPECTED_FLOW_MAP_START,
209            message: "expected flow map start token",
210            byte_start: tokens.get(*i).map(|t| t.byte_start).unwrap_or(0),
211            byte_end: tokens.get(*i).map(|t| t.byte_end).unwrap_or(0),
212        });
213    }
214
215    builder.start_node(SyntaxKind::YAML_FLOW_MAP.into());
216    emit_token_as_yaml(builder, &tokens[*i]); // {
217    *i += 1;
218
219    loop {
220        // Skip inter-entry whitespace and newlines
221        while *i < tokens.len()
222            && matches!(tokens[*i].kind, YamlToken::Whitespace | YamlToken::Newline)
223        {
224            emit_token_as_yaml(builder, &tokens[*i]);
225            *i += 1;
226        }
227
228        if *i >= tokens.len() {
229            let (byte_start, byte_end) = tokens
230                .last()
231                .map(|t| (t.byte_start, t.byte_end))
232                .unwrap_or((0, 0));
233            return Err(YamlDiagnostic {
234                code: diagnostic_codes::PARSE_UNTERMINATED_FLOW_MAP,
235                message: "unterminated flow map",
236                byte_start,
237                byte_end,
238            });
239        }
240
241        match tokens[*i].kind {
242            YamlToken::FlowMapEnd => {
243                emit_token_as_yaml(builder, &tokens[*i]);
244                *i += 1;
245                if *i < tokens.len() {
246                    match tokens[*i].kind {
247                        YamlToken::Newline
248                        | YamlToken::Comment
249                        | YamlToken::Whitespace
250                        | YamlToken::FlowMapEnd
251                        | YamlToken::FlowSeqEnd
252                        | YamlToken::Comma => {}
253                        _ => {
254                            return Err(diag_at_token(
255                                &tokens[*i],
256                                diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
257                                "trailing content after flow map end",
258                            ));
259                        }
260                    }
261                }
262                builder.finish_node(); // YAML_FLOW_MAP
263                return Ok(());
264            }
265            YamlToken::Comma => {
266                emit_token_as_yaml(builder, &tokens[*i]);
267                *i += 1;
268            }
269            _ => {
270                emit_flow_map_entry(builder, tokens, i)?;
271            }
272        }
273    }
274}
275
276fn emit_flow_map_entry<'a>(
277    builder: &mut GreenNodeBuilder<'_>,
278    tokens: &[YamlTokenSpan<'a>],
279    i: &mut usize,
280) -> Result<(), YamlDiagnostic> {
281    builder.start_node(SyntaxKind::YAML_FLOW_MAP_ENTRY.into());
282    builder.start_node(SyntaxKind::YAML_FLOW_MAP_KEY.into());
283
284    // Emit leading whitespace inside key node
285    while *i < tokens.len() && tokens[*i].kind == YamlToken::Whitespace {
286        emit_token_as_yaml(builder, &tokens[*i]);
287        *i += 1;
288    }
289
290    // Determine key/value split point
291    let value_prefix: Option<&'a str> = match tokens.get(*i).map(|t| t.kind) {
292        Some(YamlToken::Scalar) => {
293            let scalar = tokens[*i];
294            *i += 1;
295            if let Some((key_text, rest_text)) = split_once_unquoted(scalar.text, ':') {
296                builder.token(SyntaxKind::YAML_KEY.into(), key_text);
297                builder.token(
298                    SyntaxKind::YAML_COLON.into(),
299                    &scalar.text[key_text.len()..key_text.len() + 1],
300                );
301                Some(rest_text)
302            } else {
303                // No colon — standalone scalar (implicit key or value-only)
304                builder.token(SyntaxKind::YAML_SCALAR.into(), scalar.text);
305                None
306            }
307        }
308        Some(YamlToken::Key) => {
309            // Already-tokenized key (from multi-line block lexing inside a flow map)
310            builder.token(SyntaxKind::YAML_KEY.into(), tokens[*i].text);
311            *i += 1;
312            while *i < tokens.len() && tokens[*i].kind == YamlToken::Whitespace {
313                emit_token_as_yaml(builder, &tokens[*i]);
314                *i += 1;
315            }
316            if *i < tokens.len() && tokens[*i].kind == YamlToken::Colon {
317                builder.token(SyntaxKind::YAML_COLON.into(), tokens[*i].text);
318                *i += 1;
319            }
320            None
321        }
322        Some(YamlToken::Tag) => {
323            emit_token_as_yaml(builder, &tokens[*i]);
324            *i += 1;
325            None
326        }
327        _ => None,
328    };
329
330    builder.finish_node(); // YAML_FLOW_MAP_KEY
331
332    builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
333    if let Some(prefix) = value_prefix
334        && !prefix.is_empty()
335    {
336        builder.token(SyntaxKind::YAML_SCALAR.into(), prefix);
337    }
338    emit_flow_value_tokens(builder, tokens, i)?;
339    builder.finish_node(); // YAML_FLOW_MAP_VALUE
340
341    builder.finish_node(); // YAML_FLOW_MAP_ENTRY
342    Ok(())
343}
344
345fn emit_flow_value_tokens<'a>(
346    builder: &mut GreenNodeBuilder<'_>,
347    tokens: &[YamlTokenSpan<'a>],
348    i: &mut usize,
349) -> Result<(), YamlDiagnostic> {
350    while *i < tokens.len() {
351        match tokens[*i].kind {
352            YamlToken::Comma | YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd => break,
353            YamlToken::FlowMapStart => emit_flow_map(builder, tokens, i)?,
354            YamlToken::FlowSeqStart => emit_flow_sequence(builder, tokens, i)?,
355            _ => {
356                emit_token_as_yaml(builder, &tokens[*i]);
357                *i += 1;
358            }
359        }
360    }
361    Ok(())
362}
363
364fn emit_scalar_document<'a>(
365    builder: &mut GreenNodeBuilder<'_>,
366    tokens: &[YamlTokenSpan<'a>],
367    i: &mut usize,
368) -> Result<(), YamlDiagnostic> {
369    while *i < tokens.len() {
370        let kind = match tokens[*i].kind {
371            YamlToken::Newline => SyntaxKind::NEWLINE,
372            YamlToken::DocumentStart => SyntaxKind::YAML_DOCUMENT_START,
373            YamlToken::DocumentEnd => SyntaxKind::YAML_DOCUMENT_END,
374            YamlToken::Tag => SyntaxKind::YAML_TAG,
375            YamlToken::Comment => SyntaxKind::YAML_COMMENT,
376            YamlToken::Whitespace => SyntaxKind::WHITESPACE,
377            YamlToken::Colon => SyntaxKind::YAML_COLON,
378            YamlToken::FlowMapStart
379            | YamlToken::FlowMapEnd
380            | YamlToken::FlowSeqStart
381            | YamlToken::FlowSeqEnd
382            | YamlToken::Comma => {
383                return Err(diag_at_token(
384                    &tokens[*i],
385                    diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
386                    "unexpected flow indicator in plain scalar document",
387                ));
388            }
389            _ => SyntaxKind::YAML_SCALAR,
390        };
391        builder.token(kind.into(), tokens[*i].text);
392        *i += 1;
393    }
394    Ok(())
395}
396
397fn emit_block_seq<'a>(
398    builder: &mut GreenNodeBuilder<'_>,
399    tokens: &[YamlTokenSpan<'a>],
400    i: &mut usize,
401    stop_on_dedent: bool,
402) -> Result<(), YamlDiagnostic> {
403    while *i < tokens.len() {
404        match tokens[*i].kind {
405            YamlToken::Newline => {
406                builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
407                *i += 1;
408            }
409            YamlToken::DocumentStart => {
410                builder.token(SyntaxKind::YAML_DOCUMENT_START.into(), tokens[*i].text);
411                *i += 1;
412            }
413            YamlToken::DocumentEnd => {
414                builder.token(SyntaxKind::YAML_DOCUMENT_END.into(), tokens[*i].text);
415                *i += 1;
416            }
417            YamlToken::Whitespace => {
418                // Between-item indentation in a nested sequence.
419                builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
420                *i += 1;
421            }
422            YamlToken::Dedent => {
423                if stop_on_dedent {
424                    *i += 1;
425                    break;
426                }
427                break;
428            }
429            YamlToken::BlockSeqEntry => {
430                builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM.into());
431                builder.token(SyntaxKind::YAML_BLOCK_SEQ_ENTRY.into(), tokens[*i].text);
432                *i += 1;
433                let mut closed_via_nested_seq = false;
434                while *i < tokens.len() && tokens[*i].kind != YamlToken::Newline {
435                    match tokens[*i].kind {
436                        YamlToken::FlowSeqStart => emit_flow_sequence(builder, tokens, i)?,
437                        YamlToken::FlowMapStart => emit_flow_map(builder, tokens, i)?,
438                        YamlToken::Indent => {
439                            // Nested block sequence triggered by `- - ...`: the
440                            // lexer emitted an Indent between the outer `- ` and
441                            // the inner `-`. Recurse; the nested emitter
442                            // consumes through the matching Dedent (including
443                            // any intervening Newlines), so the outer item has
444                            // no trailing Newline to emit.
445                            *i += 1;
446                            builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
447                            emit_block_seq(builder, tokens, i, true)?;
448                            builder.finish_node(); // YAML_BLOCK_SEQUENCE
449                            closed_via_nested_seq = true;
450                            break;
451                        }
452                        _ => {
453                            emit_token_as_yaml(builder, &tokens[*i]);
454                            *i += 1;
455                        }
456                    }
457                }
458                if !closed_via_nested_seq
459                    && *i < tokens.len()
460                    && tokens[*i].kind == YamlToken::Newline
461                {
462                    builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
463                    *i += 1;
464                }
465                // Nested block map following a bare `-\n` entry: lexer has
466                // emitted an Indent after the Newline, terminated by a Dedent.
467                if !closed_via_nested_seq
468                    && *i < tokens.len()
469                    && tokens[*i].kind == YamlToken::Indent
470                {
471                    *i += 1;
472                    builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
473                    emit_block_map(builder, tokens, i, true)?;
474                    builder.finish_node(); // YAML_BLOCK_MAP
475                }
476                builder.finish_node(); // YAML_BLOCK_SEQUENCE_ITEM
477            }
478            _ => break,
479        }
480    }
481    Ok(())
482}
483
484fn emit_block_map<'a>(
485    builder: &mut GreenNodeBuilder<'_>,
486    tokens: &[YamlTokenSpan<'a>],
487    i: &mut usize,
488    stop_on_dedent: bool,
489) -> Result<(), YamlDiagnostic> {
490    let mut closed_by_dedent = false;
491    while *i < tokens.len() {
492        match tokens[*i].kind {
493            YamlToken::Newline => {
494                builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
495                *i += 1;
496            }
497            YamlToken::DocumentStart => {
498                builder.token(SyntaxKind::YAML_DOCUMENT_START.into(), tokens[*i].text);
499                *i += 1;
500            }
501            YamlToken::DocumentEnd => {
502                builder.token(SyntaxKind::YAML_DOCUMENT_END.into(), tokens[*i].text);
503                *i += 1;
504            }
505            YamlToken::Directive | YamlToken::Comma => {
506                builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
507                *i += 1;
508            }
509            YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd => {
510                return Err(diag_at_token(
511                    &tokens[*i],
512                    diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
513                    "unexpected flow closing token",
514                ));
515            }
516            YamlToken::FlowMapStart | YamlToken::FlowSeqStart => {
517                if tokens[*i].kind == YamlToken::FlowMapStart {
518                    emit_flow_map(builder, tokens, i)?;
519                } else {
520                    emit_flow_sequence(builder, tokens, i)?;
521                }
522            }
523            YamlToken::Anchor
524            | YamlToken::Alias
525            | YamlToken::BlockScalarHeader
526            | YamlToken::BlockScalarContent => {
527                builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
528                *i += 1;
529            }
530            YamlToken::Scalar | YamlToken::Comment => {
531                while *i < tokens.len() && tokens[*i].kind != YamlToken::Newline {
532                    if matches!(
533                        tokens[*i].kind,
534                        YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd
535                    ) {
536                        return Err(diag_at_token(
537                            &tokens[*i],
538                            diagnostic_codes::PARSE_UNEXPECTED_FLOW_CLOSER,
539                            "unexpected flow closing token",
540                        ));
541                    }
542                    emit_token_as_yaml(builder, &tokens[*i]);
543                    *i += 1;
544                }
545            }
546            YamlToken::Indent => {
547                return Err(diag_at_token(
548                    &tokens[*i],
549                    diagnostic_codes::PARSE_UNEXPECTED_INDENT,
550                    "unexpected indent token while parsing block map",
551                ));
552            }
553            YamlToken::Dedent => {
554                if stop_on_dedent {
555                    *i += 1;
556                    closed_by_dedent = true;
557                    break;
558                }
559                return Err(diag_at_token(
560                    &tokens[*i],
561                    diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
562                    "unexpected dedent token while parsing block map",
563                ));
564            }
565            _ => {
566                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_ENTRY.into());
567                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_KEY.into());
568
569                let mut saw_colon = false;
570                while *i < tokens.len() {
571                    match tokens[*i].kind {
572                        YamlToken::Key => {
573                            builder.token(SyntaxKind::YAML_KEY.into(), tokens[*i].text);
574                            *i += 1;
575                        }
576                        YamlToken::Tag => {
577                            builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
578                            *i += 1;
579                        }
580                        YamlToken::Whitespace => {
581                            builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
582                            *i += 1;
583                        }
584                        YamlToken::Colon => {
585                            builder.token(SyntaxKind::YAML_COLON.into(), tokens[*i].text);
586                            *i += 1;
587                            saw_colon = true;
588                            break;
589                        }
590                        _ => {
591                            return Err(diag_at_token(
592                                &tokens[*i],
593                                diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
594                                "invalid token while parsing block map key",
595                            ));
596                        }
597                    }
598                }
599                if !saw_colon {
600                    return Err(diag_at_token(
601                        &tokens[(*i).saturating_sub(1)],
602                        diagnostic_codes::PARSE_MISSING_COLON,
603                        "missing colon in block map entry",
604                    ));
605                }
606                builder.finish_node(); // YAML_BLOCK_MAP_KEY
607
608                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
609                while *i < tokens.len() {
610                    match tokens[*i].kind {
611                        YamlToken::Scalar => {
612                            builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
613                            *i += 1;
614                        }
615                        YamlToken::FlowMapStart => {
616                            emit_flow_map(builder, tokens, i)?;
617                        }
618                        YamlToken::FlowSeqStart => {
619                            emit_flow_sequence(builder, tokens, i)?;
620                        }
621                        YamlToken::Anchor | YamlToken::Alias => {
622                            builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
623                            *i += 1;
624                        }
625                        YamlToken::BlockScalarHeader => {
626                            // Emit the header, then consume the header line's
627                            // trailing newline plus all following content
628                            // lines (BlockScalarContent + Newline) into this
629                            // value so the block scalar body is structurally
630                            // part of YAML_BLOCK_MAP_VALUE.
631                            builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
632                            *i += 1;
633                            while *i < tokens.len() {
634                                match tokens[*i].kind {
635                                    YamlToken::Newline => {
636                                        builder.token(SyntaxKind::NEWLINE.into(), tokens[*i].text);
637                                        *i += 1;
638                                        // Continue while subsequent tokens
639                                        // belong to the scalar (more content
640                                        // or another blank-line newline).
641                                        if *i < tokens.len()
642                                            && matches!(
643                                                tokens[*i].kind,
644                                                YamlToken::BlockScalarContent | YamlToken::Newline
645                                            )
646                                        {
647                                            continue;
648                                        }
649                                        break;
650                                    }
651                                    YamlToken::BlockScalarContent => {
652                                        builder
653                                            .token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
654                                        *i += 1;
655                                    }
656                                    _ => break,
657                                }
658                            }
659                        }
660                        YamlToken::BlockScalarContent => {
661                            builder.token(SyntaxKind::YAML_SCALAR.into(), tokens[*i].text);
662                            *i += 1;
663                        }
664                        YamlToken::FlowMapEnd | YamlToken::FlowSeqEnd | YamlToken::Comma => {
665                            break;
666                        }
667                        YamlToken::Tag => {
668                            builder.token(SyntaxKind::YAML_TAG.into(), tokens[*i].text);
669                            *i += 1;
670                        }
671                        YamlToken::Comment => {
672                            builder.token(SyntaxKind::YAML_COMMENT.into(), tokens[*i].text);
673                            *i += 1;
674                        }
675                        YamlToken::Whitespace => {
676                            builder.token(SyntaxKind::WHITESPACE.into(), tokens[*i].text);
677                            *i += 1;
678                        }
679                        _ => break,
680                    }
681                }
682
683                let mut trailing_newline: Option<&str> = None;
684                if *i < tokens.len() && tokens[*i].kind == YamlToken::Newline {
685                    trailing_newline = Some(tokens[*i].text);
686                    *i += 1;
687                }
688
689                if *i < tokens.len() && tokens[*i].kind == YamlToken::Indent {
690                    *i += 1;
691                    // Emit trailing newline before nested content to preserve byte order
692                    if let Some(newline) = trailing_newline.take() {
693                        builder.token(SyntaxKind::NEWLINE.into(), newline);
694                    }
695                    builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
696                    emit_block_map(builder, tokens, i, true)?;
697                    builder.finish_node(); // YAML_BLOCK_MAP
698                }
699
700                builder.finish_node(); // YAML_BLOCK_MAP_VALUE
701                if let Some(newline) = trailing_newline {
702                    builder.token(SyntaxKind::NEWLINE.into(), newline);
703                }
704                builder.finish_node(); // YAML_BLOCK_MAP_ENTRY
705            }
706        }
707    }
708
709    if stop_on_dedent && !closed_by_dedent {
710        let (byte_start, byte_end) = tokens
711            .last()
712            .map(|t| (t.byte_start, t.byte_end))
713            .unwrap_or((0, 0));
714        return Err(YamlDiagnostic {
715            code: diagnostic_codes::PARSE_UNTERMINATED_BLOCK_MAP,
716            message: "unterminated indented block map",
717            byte_start,
718            byte_end,
719        });
720    }
721
722    Ok(())
723}
724
725/// Parse prototype YAML tree structure from input
726pub fn parse_yaml_tree(input: &str) -> Option<SyntaxNode> {
727    parse_yaml_report(input).tree
728}
729
730/// Parse prototype YAML tree structure and include diagnostics on failure.
731pub fn parse_yaml_report(input: &str) -> YamlParseReport {
732    let tokens = match lex_mapping_tokens_with_diagnostic(input) {
733        Ok(tokens) => tokens,
734        Err(err) => {
735            return YamlParseReport {
736                tree: None,
737                diagnostics: vec![err],
738            };
739        }
740    };
741
742    let mut seen_content = false;
743    for token in &tokens {
744        match token.kind {
745            YamlToken::Directive if seen_content => {
746                return YamlParseReport {
747                    tree: None,
748                    diagnostics: vec![diag_at_token(
749                        token,
750                        diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT,
751                        "directive requires document end before subsequent directives",
752                    )],
753                };
754            }
755            YamlToken::Directive
756            | YamlToken::Newline
757            | YamlToken::Whitespace
758            | YamlToken::Comment => {}
759            YamlToken::DocumentEnd => seen_content = false,
760            _ => seen_content = true,
761        }
762    }
763
764    if let Some(directive) = tokens.iter().find(|t| t.kind == YamlToken::Directive)
765        && !tokens.iter().any(|t| t.kind == YamlToken::DocumentStart)
766    {
767        return YamlParseReport {
768            tree: None,
769            diagnostics: vec![diag_at_token(
770                directive,
771                diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START,
772                "directive requires an explicit document start marker",
773            )],
774        };
775    }
776
777    let is_sequence = tokens
778        .iter()
779        .find(|t| {
780            !matches!(
781                t.kind,
782                YamlToken::Newline
783                    | YamlToken::Whitespace
784                    | YamlToken::Comment
785                    | YamlToken::DocumentStart
786                    | YamlToken::DocumentEnd
787                    | YamlToken::Directive
788            )
789        })
790        .is_some_and(|t| t.kind == YamlToken::BlockSeqEntry);
791
792    // Scalar document with an explicit tag (e.g. `! a`, `!!str foo`): the token
793    // stream has a Tag but no Colon and no BlockSeqEntry, so emit_block_map's
794    // key/colon expectation would reject it. Tagless scalar documents still go
795    // through emit_block_map to keep existing CST shapes unchanged.
796    let has_colon = tokens.iter().any(|t| t.kind == YamlToken::Colon);
797    let has_tag = tokens.iter().any(|t| t.kind == YamlToken::Tag);
798    let is_scalar_document = !is_sequence && !has_colon && has_tag;
799
800    let mut builder = GreenNodeBuilder::new();
801    builder.start_node(SyntaxKind::DOCUMENT.into());
802    builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
803    let mut i = 0usize;
804    if is_sequence {
805        builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
806        if let Err(err) = emit_block_seq(&mut builder, &tokens, &mut i, false) {
807            return YamlParseReport {
808                tree: None,
809                diagnostics: vec![err],
810            };
811        }
812        builder.finish_node(); // YAML_BLOCK_SEQUENCE
813    } else if is_scalar_document {
814        if let Err(err) = emit_scalar_document(&mut builder, &tokens, &mut i) {
815            return YamlParseReport {
816                tree: None,
817                diagnostics: vec![err],
818            };
819        }
820    } else {
821        builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
822        if let Err(err) = emit_block_map(&mut builder, &tokens, &mut i, false) {
823            return YamlParseReport {
824                tree: None,
825                diagnostics: vec![err],
826            };
827        }
828        builder.finish_node(); // YAML_BLOCK_MAP
829    }
830    builder.finish_node(); // YAML_METADATA_CONTENT
831    builder.finish_node(); // DOCUMENT
832    YamlParseReport {
833        tree: Some(SyntaxNode::new_root(builder.finish())),
834        diagnostics: Vec::new(),
835    }
836}