Skip to main content

panache_parser/parser/yaml/
parser.rs

1//! YAML parser core: orchestrator + streaming-token-to-CST builder.
2//!
3//! Two layers live in this module:
4//!
5//! 1. **Orchestrator** — [`parse_shadow`], [`parse_yaml_tree`], and
6//!    [`parse_yaml_report`]. These wrap [`parse_stream`] in the
7//!    `DOCUMENT > YAML_METADATA_CONTENT > YAML_STREAM` envelope
8//!    expected by the host CST, run the structural
9//!    [`super::validator::validate_yaml`] pass, and surface
10//!    diagnostics. Shadow-mode (`parse_shadow`) keeps a probe path the
11//!    integration harness can flip on for prototype reporting.
12//!
13//! 2. **Streaming parser** — [`parse_stream`] drives
14//!    [`super::scanner::Scanner`] and emits the rowan green tree. Each
15//!    contiguous run of body content is wrapped in a `YAML_DOCUMENT`
16//!    node (with `---` / `...` markers consumed inside the document
17//!    they delimit); block-context content nests under `YAML_BLOCK_MAP`
18//!    / `YAML_BLOCK_SEQUENCE` containers driven by the scanner's
19//!    synthetic `BlockMappingStart` / `BlockSequenceStart` / `BlockEnd`
20//!    markers; each key-value pair is wrapped in
21//!    `YAML_BLOCK_MAP_ENTRY`, each `-` entry in
22//!    `YAML_BLOCK_SEQUENCE_ITEM`, and each map entry splits into
23//!    `YAML_BLOCK_MAP_KEY` (everything up to and including the `:`) and
24//!    `YAML_BLOCK_MAP_VALUE` (everything after). Flow contexts mirror
25//!    the same shape: `YAML_FLOW_MAP` / `YAML_FLOW_MAP_ENTRY` /
26//!    `YAML_FLOW_MAP_KEY` / `YAML_FLOW_MAP_VALUE` and
27//!    `YAML_FLOW_SEQUENCE` / `YAML_FLOW_SEQUENCE_ITEM`. Source-backed
28//!    `[` / `]` / `{` / `}` / `,` are emitted at the container level
29//!    (siblings of items), with item/entry sub-wrappers closing on `,`
30//!    and the matching closer.
31
32#![allow(dead_code)]
33
34use crate::syntax::{SyntaxKind, SyntaxNode};
35use rowan::GreenNodeBuilder;
36
37use super::model::{
38    ShadowYamlOptions, ShadowYamlOutcome, ShadowYamlReport, YamlInputKind, YamlParseReport,
39};
40use super::scanner::{Scanner, TokenKind, TriviaKind};
41
42/// Parse YAML in shadow mode using prototype groundwork only.
43///
44/// This API is intentionally read-only and does not replace production YAML
45/// parsing. By default it is disabled and reports `SkippedDisabled`.
46pub fn parse_shadow(input: &str, options: ShadowYamlOptions) -> ShadowYamlReport {
47    let line_count = input.lines().count().max(1);
48
49    if !options.enabled {
50        return ShadowYamlReport {
51            outcome: ShadowYamlOutcome::SkippedDisabled,
52            shadow_reason: "shadow-disabled",
53            input_kind: options.input_kind,
54            input_len_bytes: input.len(),
55            line_count,
56            normalized_input: None,
57        };
58    }
59
60    let normalized = match options.input_kind {
61        YamlInputKind::Plain => input.to_owned(),
62        YamlInputKind::Hashpipe => normalize_hashpipe_input(input),
63    };
64
65    let parsed = parse_yaml_tree(&normalized).is_some();
66
67    ShadowYamlReport {
68        outcome: if parsed {
69            ShadowYamlOutcome::PrototypeParsed
70        } else {
71            ShadowYamlOutcome::PrototypeRejected
72        },
73        shadow_reason: if parsed {
74            "prototype-basic-mapping-parsed"
75        } else {
76            "prototype-basic-mapping-rejected"
77        },
78        input_kind: options.input_kind,
79        input_len_bytes: input.len(),
80        line_count,
81        normalized_input: Some(normalized),
82    }
83}
84
85fn normalize_hashpipe_input(input: &str) -> String {
86    input
87        .lines()
88        .map(strip_hashpipe_prefix)
89        .collect::<Vec<_>>()
90        .join("\n")
91}
92
93fn strip_hashpipe_prefix(line: &str) -> &str {
94    if let Some(rest) = line.strip_prefix("#|") {
95        return rest.strip_prefix(' ').unwrap_or(rest);
96    }
97    line
98}
99
100/// Parse prototype YAML tree structure from input
101pub fn parse_yaml_tree(input: &str) -> Option<SyntaxNode> {
102    parse_yaml_report(input).tree
103}
104
105/// Parse prototype YAML tree structure and include diagnostics on failure.
106///
107/// Diagnostics flow through the structural
108/// [`super::validator::validate_yaml`] pass, which composes per-cluster
109/// `check_*` functions covering directive ordering, structural shape
110/// (unterminated flow, trailing content, invalid keys, indent
111/// anomalies, block-scalar header, etc.), and lex-level checks like
112/// `LEX_INVALID_DOUBLE_QUOTED_ESCAPE`.
113///
114/// The returned tree, when present, comes from the streaming scanner
115/// and builder.
116pub fn parse_yaml_report(input: &str) -> YamlParseReport {
117    if let Some(err) = super::validator::validate_yaml(input) {
118        return YamlParseReport {
119            tree: None,
120            diagnostics: vec![err],
121        };
122    }
123
124    let stream = parse_stream(input);
125    let mut builder = GreenNodeBuilder::new();
126    builder.start_node(SyntaxKind::DOCUMENT.into());
127    builder.start_node(SyntaxKind::YAML_METADATA_CONTENT.into());
128    let stream_green = stream.green().into_owned();
129    builder.start_node(SyntaxKind::YAML_STREAM.into());
130    for child in stream_green.children() {
131        match child {
132            rowan::NodeOrToken::Node(n) => {
133                push_green_node(&mut builder, n);
134            }
135            rowan::NodeOrToken::Token(t) => {
136                builder.token(t.kind(), t.text());
137            }
138        }
139    }
140    builder.finish_node(); // YAML_STREAM
141    builder.finish_node(); // YAML_METADATA_CONTENT
142    builder.finish_node(); // DOCUMENT
143    YamlParseReport {
144        tree: Some(SyntaxNode::new_root(builder.finish())),
145        diagnostics: Vec::new(),
146    }
147}
148
149fn push_green_node(builder: &mut GreenNodeBuilder<'_>, node: &rowan::GreenNodeData) {
150    builder.start_node(node.kind());
151    for child in node.children() {
152        match child {
153            rowan::NodeOrToken::Node(n) => push_green_node(builder, n),
154            rowan::NodeOrToken::Token(t) => builder.token(t.kind(), t.text()),
155        }
156    }
157    builder.finish_node();
158}
159
160/// Drive the scanner over `input` and build a CST. Always returns a
161/// `SyntaxNode` — the scanner is permissive and the builder preserves
162/// bytes regardless of well-formedness.
163pub fn parse_stream(input: &str) -> SyntaxNode {
164    let mut builder = GreenNodeBuilder::new();
165    builder.start_node(SyntaxKind::YAML_STREAM.into());
166    let mut scanner = Scanner::new(input);
167    let mut doc_open = false;
168    // True when the open YAML_DOCUMENT has only seen directives + trivia
169    // (no body content yet, no `---`). YAML 1.2 says directives belong to
170    // the document the following `---` opens, so when DocumentStart
171    // arrives in this state the marker stays inside the same document
172    // rather than splitting it. Cleared as soon as any non-directive
173    // body content lands.
174    let mut doc_only_has_directives = false;
175    // Stack of currently-open block containers. Each frame tracks
176    // whether its current `YAML_BLOCK_MAP_ENTRY` / `YAML_BLOCK_SEQUENCE_ITEM`
177    // sub-wrapper is still open and waiting to be closed (by the next
178    // `Key` / `BlockEntry` peer or by `BlockEnd`).
179    let mut block_stack: Vec<BlockFrame> = Vec::new();
180    // Kind of the last non-trivia, non-stream-marker, non-decoration
181    // token emitted. An indentless block sequence is only valid when
182    // its `-` directly follows the map entry's `:` (the value is
183    // otherwise empty), so the `BlockEntry` handler consults this to
184    // tell RLU9 (`foo:\n- 42`, value is purely the sequence) apart from
185    // G9HC (`seq:\n&anchor\n- a` with the anchor at column 0 — an
186    // error the validator must still catch on the unwrapped shape).
187    // Anchor / Tag / Alias tokens are *decorations* of the next node
188    // and don't fill the empty-value slot; they're skipped here so a
189    // value-leading decoration still permits an indentless sequence
190    // (SKE5: `seq:\n &anchor\n- a`).
191    let mut prev_significant: Option<TokenKind> = None;
192    // Smallest column among Anchor/Tag/Alias decorations seen since the
193    // last value-filling token. The indentless detector uses this to
194    // distinguish SKE5 (decoration indented past parent → wrap) from
195    // G9HC (decoration at parent indent → leave unwrapped for the
196    // validator). `None` when no decoration is pending.
197    let mut decoration_col_floor: Option<usize> = None;
198    while let Some(tok) = scanner.next_token() {
199        let last_significant = prev_significant;
200        let decorations_so_far = decoration_col_floor;
201        let is_decoration = matches!(
202            tok.kind,
203            TokenKind::Anchor | TokenKind::Tag | TokenKind::Alias
204        );
205        if !matches!(
206            tok.kind,
207            TokenKind::Trivia(_) | TokenKind::StreamStart | TokenKind::StreamEnd
208        ) {
209            if is_decoration {
210                decoration_col_floor = Some(
211                    decoration_col_floor.map_or(tok.start.column, |c| c.min(tok.start.column)),
212                );
213            } else {
214                prev_significant = Some(tok.kind);
215                decoration_col_floor = None;
216            }
217        }
218        match tok.kind {
219            TokenKind::StreamStart | TokenKind::StreamEnd => continue,
220            TokenKind::BlockMappingStart => {
221                ensure_doc_open(&mut builder, &mut doc_open);
222                doc_only_has_directives = false;
223                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
224                builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
225                block_stack.push(BlockFrame::BlockMap {
226                    entry_open: false,
227                    in_value: false,
228                });
229                continue;
230            }
231            TokenKind::BlockSequenceStart => {
232                ensure_doc_open(&mut builder, &mut doc_open);
233                doc_only_has_directives = false;
234                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
235                builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
236                block_stack.push(BlockFrame::BlockSequence {
237                    item_open: false,
238                    indentless: false,
239                });
240                continue;
241            }
242            TokenKind::BlockEnd => {
243                // Indentless sequences have no scanner BlockEnd of their
244                // own, so a BlockEnd arriving while one is on top is meant
245                // for the real container beneath it. Close the indentless
246                // frame(s) first, then consume the BlockEnd normally.
247                close_indentless_sequences(&mut builder, &mut block_stack);
248                close_open_sub_wrapper(&mut builder, &mut block_stack);
249                // Defensive: only close if the scanner gave us an open
250                // container. A stray BlockEnd would otherwise pop the
251                // YAML_DOCUMENT or YAML_STREAM frame.
252                if block_stack.pop().is_some() {
253                    builder.finish_node();
254                }
255                continue;
256            }
257            TokenKind::FlowSequenceStart => {
258                ensure_doc_open(&mut builder, &mut doc_open);
259                doc_only_has_directives = false;
260                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
261                // If nested inside a Map's open KEY/VALUE wrapper, the
262                // current open scope is the appropriate parent.
263                builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE.into());
264                block_stack.push(BlockFrame::FlowSequence { item_open: false });
265                let text = &input[tok.start.index..tok.end.index];
266                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
267                continue;
268            }
269            TokenKind::FlowSequenceEnd => {
270                close_open_sub_wrapper(&mut builder, &mut block_stack);
271                let text = &input[tok.start.index..tok.end.index];
272                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
273                if matches!(
274                    block_stack.last(),
275                    Some(BlockFrame::FlowSequence { .. } | BlockFrame::FlowMap { .. })
276                ) {
277                    block_stack.pop();
278                    builder.finish_node();
279                }
280                continue;
281            }
282            TokenKind::FlowMappingStart => {
283                ensure_doc_open(&mut builder, &mut doc_open);
284                doc_only_has_directives = false;
285                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
286                builder.start_node(SyntaxKind::YAML_FLOW_MAP.into());
287                block_stack.push(BlockFrame::FlowMap {
288                    entry_open: false,
289                    in_value: false,
290                });
291                let text = &input[tok.start.index..tok.end.index];
292                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
293                continue;
294            }
295            TokenKind::FlowMappingEnd => {
296                close_open_sub_wrapper(&mut builder, &mut block_stack);
297                let text = &input[tok.start.index..tok.end.index];
298                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
299                if matches!(
300                    block_stack.last(),
301                    Some(BlockFrame::FlowMap { .. } | BlockFrame::FlowSequence { .. })
302                ) {
303                    block_stack.pop();
304                    builder.finish_node();
305                }
306                continue;
307            }
308            TokenKind::FlowEntry => {
309                // `,` closes the current entry/item and lives at the
310                // container level (between peer entries/items).
311                close_open_sub_wrapper(&mut builder, &mut block_stack);
312                let text = &input[tok.start.index..tok.end.index];
313                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
314                continue;
315            }
316            TokenKind::Key => {
317                // A `Key` at the parent map's level terminates any
318                // open indentless sequence value first, revealing the
319                // map frame below.
320                close_indentless_sequences(&mut builder, &mut block_stack);
321                // Both the synthetic 0-width splice and the source-backed
322                // `?` indicator open a new map entry. Close the previous
323                // entry first if still open. After this, the current
324                // open scope is the new key wrapper.
325                if matches!(
326                    block_stack.last(),
327                    Some(BlockFrame::BlockMap { .. } | BlockFrame::FlowMap { .. })
328                ) {
329                    open_map_entry_with_key(&mut builder, &mut block_stack);
330                }
331                if tok.start.index == tok.end.index {
332                    // Synthetic Key splice carries no bytes.
333                    continue;
334                }
335                // Source-backed `?`: ensure we have somewhere to put it.
336                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
337                // Fall through to emit `?` inside the open KEY (or
338                // current scope if not in a Map frame).
339            }
340            TokenKind::Value => {
341                // An empty-key `:` at the parent map's level likewise
342                // terminates an open indentless sequence value first.
343                close_indentless_sequences(&mut builder, &mut block_stack);
344                let map_state = match block_stack.last().copied() {
345                    Some(BlockFrame::BlockMap {
346                        entry_open,
347                        in_value,
348                    }) => Some((false, entry_open, in_value)),
349                    Some(BlockFrame::FlowMap {
350                        entry_open,
351                        in_value,
352                    }) => Some((true, entry_open, in_value)),
353                    _ => None,
354                };
355                if let Some((is_flow, mut entry_open, mut in_value)) = map_state {
356                    // A bare `:` arriving while the current block-map
357                    // entry is already in its VALUE phase starts a NEW
358                    // entry whose key is empty (`: a\n: b`, 2JQS/S3PD) —
359                    // not a double-colon inside that value. The scanner's
360                    // indent machinery guarantees we only reach here for a
361                    // peer at the map's column (a deeper colon rolls a
362                    // fresh BlockMappingStart; a shallower one unwinds with
363                    // BlockEnd first), so close the current entry and fall
364                    // through to open the new one. Flow maps separate
365                    // entries with `,`, which already closes the entry, so
366                    // their in_value is false here — leave them alone.
367                    if !is_flow && entry_open && in_value {
368                        close_open_sub_wrapper(&mut builder, &mut block_stack);
369                        entry_open = false;
370                        in_value = false;
371                    }
372                    // Empty-key shorthand: `:` arriving without a prior
373                    // Key opens an ENTRY+KEY before consuming the colon.
374                    if !entry_open {
375                        open_map_entry_with_key(&mut builder, &mut block_stack);
376                    }
377                    if !in_value {
378                        // The colon is the last token of KEY. After it
379                        // we close KEY and open VALUE.
380                        let text = &input[tok.start.index..tok.end.index];
381                        if !text.is_empty() {
382                            builder.token(SyntaxKind::YAML_COLON.into(), text);
383                        }
384                        builder.finish_node(); // close KEY
385                        let value_kind = if is_flow {
386                            SyntaxKind::YAML_FLOW_MAP_VALUE
387                        } else {
388                            SyntaxKind::YAML_BLOCK_MAP_VALUE
389                        };
390                        builder.start_node(value_kind.into());
391                        if let Some(
392                            BlockFrame::BlockMap { in_value, .. }
393                            | BlockFrame::FlowMap { in_value, .. },
394                        ) = block_stack.last_mut()
395                        {
396                            *in_value = true;
397                        }
398                        continue;
399                    }
400                    // Already in_value: pathological double-colon. Fall
401                    // through and emit at the current scope (inside
402                    // VALUE) for losslessness.
403                }
404                // Not a Map frame: ensure flow-seq ITEM is open, then
405                // fall through to emit `:` at current scope.
406                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
407            }
408            TokenKind::BlockEntry => {
409                // An indentless sequence opens when a `-` lands directly
410                // in a block-map VALUE: the scanner pushed no indent
411                // level (the `-` is at the parent key's column), so no
412                // `BlockSequenceStart` arrived. Synthesize the
413                // `YAML_BLOCK_SEQUENCE` frame inside the open VALUE so the
414                // tree matches the indented form (spec 8.2.1). Only when
415                // the `:` is the last significant token — i.e. the value
416                // is otherwise empty; a `-` after scalar content in the
417                // value is a structural error left unwrapped for the
418                // validator to reject.
419                // Decorations between `:` and `-` are allowed only when
420                // they sit inside the value scope — strictly indented
421                // past the indentless `-`. Otherwise the anchor is at
422                // the parent mapping's level (G9HC) and the sequence
423                // shouldn't wrap.
424                let decorations_inside_value =
425                    decorations_so_far.is_none_or(|c| c > tok.start.column);
426                let indentless_value = last_significant == Some(TokenKind::Value)
427                    && matches!(
428                        block_stack.last(),
429                        Some(BlockFrame::BlockMap { in_value: true, .. })
430                    )
431                    && decorations_inside_value;
432                // The mirror case: a `-` landing directly after the `?`
433                // explicit-key indicator opens an indentless sequence as
434                // the KEY's content (6PBE). The scanner likewise pushes no
435                // indent level, so synthesize the `YAML_BLOCK_SEQUENCE`
436                // inside the open KEY. `close_indentless_sequences` later
437                // pops it when the entry's `:` (`Value`) arrives.
438                let indentless_key = last_significant == Some(TokenKind::Key)
439                    && matches!(
440                        block_stack.last(),
441                        Some(BlockFrame::BlockMap {
442                            entry_open: true,
443                            in_value: false,
444                        })
445                    )
446                    && decorations_inside_value;
447                if indentless_value || indentless_key {
448                    builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
449                    block_stack.push(BlockFrame::BlockSequence {
450                        item_open: false,
451                        indentless: true,
452                    });
453                }
454                if matches!(block_stack.last(), Some(BlockFrame::BlockSequence { .. })) {
455                    close_open_sub_wrapper(&mut builder, &mut block_stack);
456                    builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM.into());
457                    if let Some(BlockFrame::BlockSequence { item_open, .. }) =
458                        block_stack.last_mut()
459                    {
460                        *item_open = true;
461                    }
462                }
463                // Fall through to emit the `-` byte inside the new ITEM
464                // (or at current scope if not in a Sequence frame).
465            }
466            TokenKind::Trivia(_) => {
467                // Trivia bypasses item-opening: pre-content trivia in a
468                // flow sequence stays at SEQUENCE level.
469            }
470            _ => {
471                // Any other source-backed content (Scalar, Anchor, Tag,
472                // Alias, Directive, doc markers): if we're inside a
473                // FlowSequence with no open ITEM, open one before
474                // emitting. Doc markers are handled below.
475                if !matches!(tok.kind, TokenKind::DocumentStart | TokenKind::DocumentEnd) {
476                    ensure_flow_seq_item_open(&mut builder, &mut block_stack);
477                }
478            }
479        }
480        let text = &input[tok.start.index..tok.end.index];
481        if text.is_empty() {
482            // Defensive: never emit zero-width tokens (rowan rejects).
483            continue;
484        }
485        let kind = map_token_to_syntax_kind(tok.kind);
486        match tok.kind {
487            TokenKind::DocumentStart => {
488                // `---` begins a fresh document. Two cases:
489                //  - The currently-open document only has directives so
490                //    far: per YAML 1.2 the directives belong to the doc
491                //    that this `---` opens. Stay inside, just emit the
492                //    marker.
493                //  - Otherwise: close the previous doc (and any open
494                //    block containers) and open a new YAML_DOCUMENT.
495                //    The scanner unwinds the indent stack at column 0,
496                //    but a same-indent map at indent==0 leaves them
497                //    open, so close them defensively.
498                if doc_open && doc_only_has_directives {
499                    builder.token(kind.into(), text);
500                    doc_only_has_directives = false;
501                } else {
502                    close_block_containers(&mut builder, &mut block_stack);
503                    if doc_open {
504                        builder.finish_node();
505                    }
506                    builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
507                    doc_open = true;
508                    doc_only_has_directives = false;
509                    builder.token(kind.into(), text);
510                }
511            }
512            TokenKind::DocumentEnd => {
513                // `...` closes the current document. Close any open
514                // block containers first so the marker is a child of
515                // the document, not buried in a block container.
516                close_block_containers(&mut builder, &mut block_stack);
517                if !doc_open {
518                    builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
519                }
520                builder.token(kind.into(), text);
521                builder.finish_node();
522                doc_open = false;
523                doc_only_has_directives = false;
524            }
525            TokenKind::Trivia(_) => {
526                // Trivia goes to whichever level is currently open;
527                // pre-document trivia stays at YAML_STREAM, in-document
528                // trivia stays inside the YAML_DOCUMENT, the open
529                // block container, or the open ENTRY/ITEM sub-wrapper.
530                builder.token(kind.into(), text);
531            }
532            TokenKind::Directive => {
533                // Directives belong inside a YAML_DOCUMENT but don't by
534                // themselves count as body content — a following `---`
535                // should not split into a separate doc.
536                let was_open = doc_open;
537                ensure_doc_open(&mut builder, &mut doc_open);
538                if !was_open {
539                    doc_only_has_directives = true;
540                }
541                builder.token(kind.into(), text);
542            }
543            _ => {
544                // Any non-trivia content opens an implicit document
545                // when one isn't already in progress and counts as
546                // body content (clears the directives-only flag).
547                ensure_doc_open(&mut builder, &mut doc_open);
548                doc_only_has_directives = false;
549                builder.token(kind.into(), text);
550            }
551        }
552    }
553    // Close any open block containers (and their open ENTRY/ITEM
554    // sub-wrappers) and the open document. The scanner emits BlockEnd
555    // on stream end via `unwind_indent(-1)`, so this is normally a
556    // no-op for `block_stack`; kept for safety against truncated
557    // inputs and future scanner quirks.
558    close_block_containers(&mut builder, &mut block_stack);
559    if doc_open {
560        builder.finish_node();
561    }
562    builder.finish_node();
563    SyntaxNode::new_root(builder.finish())
564}
565
566/// Tracks an open container in the streaming builder's stack. Block and
567/// flow contexts share state shape, but their containers and
568/// sub-wrappers use different `SyntaxKind` variants and they close on
569/// different tokens (`BlockEnd` / dedent vs. `]` / `}` / `,`).
570///
571/// For maps, `entry_open` records whether the entry sub-wrapper is
572/// still open, and `in_value` selects between the KEY and VALUE
573/// sub-sub-wrapper. For sequences, `item_open` records whether the
574/// item sub-wrapper is still open.
575#[derive(Debug, Clone, Copy)]
576enum BlockFrame {
577    BlockMap {
578        entry_open: bool,
579        in_value: bool,
580    },
581    /// `indentless` marks a sequence opened as a block-map value whose
582    /// `-` entries sit at the same column as the parent key (YAML's
583    /// "indentless sequence", spec 8.2.1). The scanner never pushes an
584    /// indent level for it, so it emits no matching `BlockEnd`; the
585    /// builder must close the frame itself when the parent map's next
586    /// `Key` / `Value` / `BlockEnd` arrives.
587    BlockSequence {
588        item_open: bool,
589        indentless: bool,
590    },
591    FlowMap {
592        entry_open: bool,
593        in_value: bool,
594    },
595    FlowSequence {
596        item_open: bool,
597    },
598}
599
600fn ensure_doc_open(builder: &mut GreenNodeBuilder<'_>, doc_open: &mut bool) {
601    if !*doc_open {
602        builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
603        *doc_open = true;
604    }
605}
606
607/// In a flow sequence, source-backed content opens a new
608/// `YAML_FLOW_SEQUENCE_ITEM` lazily — there is no `-` token to drive
609/// the boundary the way `BlockEntry` drives block sequences. Trivia
610/// arriving before the first item stays at the container level.
611fn ensure_flow_seq_item_open(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
612    if let Some(BlockFrame::FlowSequence { item_open }) = stack.last_mut()
613        && !*item_open
614    {
615        builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
616        *item_open = true;
617    }
618}
619
620/// Open `<MAP>_ENTRY` > `<MAP>_KEY` for the next entry, closing any
621/// previously-open entry on the same Map frame. Caller must have
622/// verified the top frame is a Map (Block or Flow).
623fn open_map_entry_with_key(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
624    close_open_sub_wrapper(builder, stack);
625    let (entry_kind, key_kind) = match stack.last() {
626        Some(BlockFrame::BlockMap { .. }) => (
627            SyntaxKind::YAML_BLOCK_MAP_ENTRY,
628            SyntaxKind::YAML_BLOCK_MAP_KEY,
629        ),
630        Some(BlockFrame::FlowMap { .. }) => (
631            SyntaxKind::YAML_FLOW_MAP_ENTRY,
632            SyntaxKind::YAML_FLOW_MAP_KEY,
633        ),
634        _ => return,
635    };
636    builder.start_node(entry_kind.into());
637    builder.start_node(key_kind.into());
638    if let Some(
639        BlockFrame::BlockMap {
640            entry_open,
641            in_value,
642        }
643        | BlockFrame::FlowMap {
644            entry_open,
645            in_value,
646        },
647    ) = stack.last_mut()
648    {
649        *entry_open = true;
650        *in_value = false;
651    }
652}
653
654/// Close any indentless `YAML_BLOCK_SEQUENCE` frames on top of the
655/// stack. These have no matching scanner `BlockEnd`, so they're closed
656/// here when the parent map's next `Key` / `Value` / `BlockEnd` arrives.
657/// Closing the open ITEM, finishing the SEQUENCE node, and popping the
658/// frame reveals the parent map for the incoming token. Loops because
659/// the next token may close several levels, though in practice
660/// indentless frames never stack directly (they're always separated by
661/// a map frame).
662fn close_indentless_sequences(builder: &mut GreenNodeBuilder<'_>, stack: &mut Vec<BlockFrame>) {
663    while let Some(BlockFrame::BlockSequence {
664        indentless: true, ..
665    }) = stack.last()
666    {
667        close_open_sub_wrapper(builder, stack);
668        stack.pop();
669        builder.finish_node(); // close YAML_BLOCK_SEQUENCE
670    }
671}
672
673/// Close the top-of-stack frame's entry/item sub-wrapper if still open
674/// and clear the flag. For maps, this closes the inner KEY/VALUE
675/// node and the surrounding ENTRY. If we're closing while the entry
676/// is still in its KEY phase (i.e. the entry never received a `:`,
677/// e.g. a `?`-only explicit-key entry), an empty VALUE wrapper is
678/// inserted before the ENTRY closes so every ENTRY has the same
679/// `KEY + VALUE` child shape — the projection layer relies on that
680/// invariant. For sequences it closes the ITEM. Caller decides whether
681/// to also pop the frame itself.
682fn close_open_sub_wrapper(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
683    let Some(frame) = stack.last_mut() else {
684        return;
685    };
686    match frame {
687        BlockFrame::BlockMap {
688            entry_open: true,
689            in_value,
690        } => {
691            if *in_value {
692                builder.finish_node(); // close VALUE
693            } else {
694                builder.finish_node(); // close KEY
695                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
696                builder.finish_node(); // empty VALUE for shape parity
697            }
698            builder.finish_node(); // close ENTRY
699            *frame = BlockFrame::BlockMap {
700                entry_open: false,
701                in_value: false,
702            };
703        }
704        BlockFrame::FlowMap {
705            entry_open: true,
706            in_value,
707        } => {
708            if *in_value {
709                builder.finish_node();
710            } else {
711                builder.finish_node();
712                builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
713                builder.finish_node();
714            }
715            builder.finish_node();
716            *frame = BlockFrame::FlowMap {
717                entry_open: false,
718                in_value: false,
719            };
720        }
721        BlockFrame::BlockSequence {
722            item_open: true,
723            indentless,
724        } => {
725            let indentless = *indentless;
726            builder.finish_node();
727            *frame = BlockFrame::BlockSequence {
728                item_open: false,
729                indentless,
730            };
731        }
732        BlockFrame::FlowSequence { item_open: true } => {
733            builder.finish_node();
734            *frame = BlockFrame::FlowSequence { item_open: false };
735        }
736        _ => {}
737    }
738}
739
740fn close_block_containers(builder: &mut GreenNodeBuilder<'_>, stack: &mut Vec<BlockFrame>) {
741    while let Some(frame) = stack.pop() {
742        match frame {
743            BlockFrame::BlockMap {
744                entry_open: true,
745                in_value,
746            } => {
747                if in_value {
748                    builder.finish_node(); // close VALUE
749                } else {
750                    builder.finish_node(); // close KEY
751                    builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
752                    builder.finish_node();
753                }
754                builder.finish_node(); // close ENTRY
755            }
756            BlockFrame::FlowMap {
757                entry_open: true,
758                in_value,
759            } => {
760                if in_value {
761                    builder.finish_node();
762                } else {
763                    builder.finish_node();
764                    builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
765                    builder.finish_node();
766                }
767                builder.finish_node();
768            }
769            BlockFrame::BlockSequence {
770                item_open: true, ..
771            }
772            | BlockFrame::FlowSequence { item_open: true } => {
773                builder.finish_node();
774            }
775            _ => {}
776        }
777        builder.finish_node();
778    }
779}
780
781fn map_token_to_syntax_kind(kind: TokenKind) -> SyntaxKind {
782    match kind {
783        TokenKind::Trivia(TriviaKind::Whitespace) => SyntaxKind::WHITESPACE,
784        TokenKind::Trivia(TriviaKind::Newline) => SyntaxKind::NEWLINE,
785        TokenKind::Trivia(TriviaKind::Comment) => SyntaxKind::YAML_COMMENT,
786        TokenKind::DocumentStart => SyntaxKind::YAML_DOCUMENT_START,
787        TokenKind::DocumentEnd => SyntaxKind::YAML_DOCUMENT_END,
788        TokenKind::Directive => SyntaxKind::YAML_SCALAR,
789        TokenKind::BlockEntry => SyntaxKind::YAML_BLOCK_SEQ_ENTRY,
790        TokenKind::FlowEntry => SyntaxKind::YAML_SCALAR,
791        TokenKind::FlowSequenceStart | TokenKind::FlowSequenceEnd => SyntaxKind::YAML_SCALAR,
792        TokenKind::FlowMappingStart | TokenKind::FlowMappingEnd => SyntaxKind::YAML_SCALAR,
793        TokenKind::Value => SyntaxKind::YAML_COLON,
794        TokenKind::Anchor => SyntaxKind::YAML_ANCHOR,
795        TokenKind::Alias => SyntaxKind::YAML_ALIAS,
796        TokenKind::Tag => SyntaxKind::YAML_TAG,
797        TokenKind::Scalar(_) => SyntaxKind::YAML_SCALAR,
798        // Source-backed `Key` (the explicit `?` indicator) — there is
799        // no dedicated SyntaxKind yet, route to YAML_KEY for now.
800        TokenKind::Key => SyntaxKind::YAML_KEY,
801        // Synthetic markers handled before this map; defensive
802        // fallback.
803        TokenKind::StreamStart
804        | TokenKind::StreamEnd
805        | TokenKind::BlockSequenceStart
806        | TokenKind::BlockMappingStart
807        | TokenKind::BlockEnd => SyntaxKind::YAML_SCALAR,
808    }
809}
810
811/// Public byte-completeness report from running the parser over an
812/// input. The harness in `tests/yaml.rs` uses this to gate each
813/// sub-commit on losslessness.
814#[derive(Debug, Clone)]
815pub struct ShadowParserReport {
816    /// True if `tree.text() == input`.
817    pub text_lossless: bool,
818    /// Number of children directly under YAML_STREAM (a coarse proxy
819    /// for "did we emit any nesting yet"); useful to track structural
820    /// progression across sub-commits.
821    pub stream_child_count: usize,
822}
823
824/// Run the parser and return a losslessness report. Exposed so the
825/// integration harness can run over allowlisted fixtures without
826/// depending on private types.
827pub fn shadow_parser_check(input: &str) -> ShadowParserReport {
828    let tree = parse_stream(input);
829    let text = tree.text().to_string();
830    ShadowParserReport {
831        text_lossless: text == input,
832        stream_child_count: tree.children().count(),
833    }
834}
835
836#[cfg(test)]
837mod tests {
838    use super::*;
839    use crate::syntax::SyntaxKind;
840
841    #[test]
842    fn returns_byte_lossless_cst_for_empty_input() {
843        let report = shadow_parser_check("");
844        assert!(report.text_lossless);
845    }
846
847    #[test]
848    fn returns_byte_lossless_cst_for_simple_mapping() {
849        let report = shadow_parser_check("key: value\n");
850        assert!(report.text_lossless);
851    }
852
853    #[test]
854    fn returns_byte_lossless_cst_for_block_sequence() {
855        let report = shadow_parser_check("- a\n- b\n");
856        assert!(report.text_lossless);
857    }
858
859    #[test]
860    fn returns_byte_lossless_cst_for_flow_mapping() {
861        let report = shadow_parser_check("{a: b, c: d}\n");
862        assert!(report.text_lossless);
863    }
864
865    #[test]
866    fn returns_byte_lossless_cst_for_block_scalar() {
867        let report = shadow_parser_check("key: |\n  hello\n  world\n");
868        assert!(report.text_lossless);
869    }
870
871    #[test]
872    fn returns_byte_lossless_cst_for_quoted_scalar() {
873        let report = shadow_parser_check("\"key\": \"value\"\n");
874        assert!(report.text_lossless);
875    }
876
877    #[test]
878    fn returns_byte_lossless_cst_for_multi_line_plain_scalar() {
879        let report = shadow_parser_check("key: hello\n  world\n");
880        assert!(report.text_lossless);
881    }
882
883    #[test]
884    fn preserves_explicit_key_indicator_byte_in_flow_context() {
885        // The `?` explicit-key indicator carries a 1-byte source span
886        // even in flow context, so the builder must NOT drop it
887        // (only zero-width `Key` splices from `fetch_value` should be
888        // dropped). Regression: an earlier draft filtered every Key.
889        let input = "{ ?foo: bar }\n";
890        let report = shadow_parser_check(input);
891        assert!(report.text_lossless, "input {input:?} not preserved");
892    }
893
894    #[test]
895    fn does_not_absorb_terminator_line_break_into_flow_scalar() {
896        // Regression: in flow context the multi-line plain
897        // continuation must abort if the next non-blank char is a
898        // flow terminator (`}`/`]`/`,`). Otherwise the trailing
899        // newline got swallowed into the scalar (`42\n` instead of
900        // `42`) and the closer's byte position drifted.
901        let input = "{a: 42\n}\n";
902        let report = shadow_parser_check(input);
903        assert!(report.text_lossless, "input {input:?} not preserved");
904    }
905
906    fn document_count(tree: &SyntaxNode) -> usize {
907        tree.children()
908            .filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
909            .count()
910    }
911
912    #[test]
913    fn implicit_document_wraps_body_with_no_markers() {
914        // No explicit `---` or `...` — the body still belongs to one
915        // YAML_DOCUMENT so projection has a node to walk.
916        let input = "key: value\n";
917        let tree = parse_stream(input);
918        assert_eq!(document_count(&tree), 1);
919        assert_eq!(tree.text().to_string(), input);
920    }
921
922    #[test]
923    fn explicit_doc_start_opens_document_marker_lives_inside() {
924        let input = "---\nkey: value\n";
925        let tree = parse_stream(input);
926        assert_eq!(document_count(&tree), 1);
927        let doc = tree
928            .children()
929            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
930            .expect("document node");
931        assert!(
932            doc.children_with_tokens().any(|el| el
933                .as_token()
934                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_START)),
935            "`---` token should live inside YAML_DOCUMENT"
936        );
937        assert_eq!(tree.text().to_string(), input);
938    }
939
940    #[test]
941    fn explicit_doc_end_closes_document_marker_lives_inside() {
942        let input = "key: value\n...\n";
943        let tree = parse_stream(input);
944        assert_eq!(document_count(&tree), 1);
945        let doc = tree
946            .children()
947            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
948            .expect("document node");
949        assert!(
950            doc.children_with_tokens().any(|el| el
951                .as_token()
952                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_END)),
953            "`...` token should live inside YAML_DOCUMENT"
954        );
955        assert_eq!(tree.text().to_string(), input);
956    }
957
958    #[test]
959    fn consecutive_doc_starts_emit_two_documents() {
960        let input = "---\na\n---\nb\n";
961        let tree = parse_stream(input);
962        assert_eq!(document_count(&tree), 2);
963        assert_eq!(tree.text().to_string(), input);
964    }
965
966    #[test]
967    fn pre_document_trivia_stays_at_stream_level() {
968        // A leading newline before the first document content should
969        // sit under YAML_STREAM, not inside a YAML_DOCUMENT — there is
970        // no document yet at that point.
971        let input = "\n---\nkey: value\n";
972        let tree = parse_stream(input);
973        let stream_token_kinds: Vec<SyntaxKind> = tree
974            .children_with_tokens()
975            .filter_map(|el| el.into_token())
976            .map(|t| t.kind())
977            .collect();
978        assert!(
979            stream_token_kinds.contains(&SyntaxKind::NEWLINE),
980            "leading newline should be a direct child of YAML_STREAM, got {stream_token_kinds:?}"
981        );
982        assert_eq!(tree.text().to_string(), input);
983    }
984
985    #[test]
986    fn bare_doc_end_at_stream_start_opens_synthetic_empty_document() {
987        // Pathological but lossless: a stream that begins with `...`
988        // wraps the marker in an empty YAML_DOCUMENT so no source
989        // bytes leak out at YAML_STREAM level uncoupled from a doc.
990        let input = "...\n";
991        let tree = parse_stream(input);
992        assert_eq!(document_count(&tree), 1);
993        assert_eq!(tree.text().to_string(), input);
994    }
995
996    fn first_document(tree: &SyntaxNode) -> SyntaxNode {
997        tree.children()
998            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
999            .expect("at least one document")
1000    }
1001
1002    fn block_map_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1003        parent
1004            .children()
1005            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1006    }
1007
1008    fn block_seq_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1009        parent
1010            .children()
1011            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
1012    }
1013
1014    fn block_map_entries(map: &SyntaxNode) -> Vec<SyntaxNode> {
1015        map.children()
1016            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
1017            .collect()
1018    }
1019
1020    fn block_seq_items(seq: &SyntaxNode) -> Vec<SyntaxNode> {
1021        seq.children()
1022            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
1023            .collect()
1024    }
1025
1026    fn entry_key(entry: &SyntaxNode) -> SyntaxNode {
1027        entry
1028            .children()
1029            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
1030            .expect("entry should have a YAML_BLOCK_MAP_KEY child")
1031    }
1032
1033    fn entry_value(entry: &SyntaxNode) -> SyntaxNode {
1034        entry
1035            .children()
1036            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
1037            .expect("entry should have a YAML_BLOCK_MAP_VALUE child")
1038    }
1039
1040    #[test]
1041    fn consecutive_empty_key_colons_open_separate_entries() {
1042        // `: a\n: b` is two block-map entries, each with an empty
1043        // (null) key and a value (2JQS). The scanner emits two bare
1044        // `Value` tokens with no Key/BlockEnd between them, so the
1045        // builder must close the first entry when the second `:`
1046        // arrives at the map's column rather than absorbing it into
1047        // the first value.
1048        let input = ": a\n: b\n";
1049        let tree = parse_stream(input);
1050        let doc = first_document(&tree);
1051        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1052        let entries = block_map_entries(&map);
1053        assert_eq!(entries.len(), 2, "expected two empty-key ENTRY nodes");
1054        for (entry, scalar) in entries.iter().zip(["a", "b"]) {
1055            let key = entry_key(entry);
1056            // Empty key: the KEY holds only the `:` value indicator.
1057            assert!(
1058                !key.children_with_tokens().any(|el| el
1059                    .as_token()
1060                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1061                "empty key should carry no scalar, got {key:?}",
1062            );
1063            let value = entry_value(entry);
1064            assert!(
1065                value.children_with_tokens().any(|el| el
1066                    .as_token()
1067                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR && t.text() == scalar)),
1068                "value should be {scalar:?}, got {value:?}",
1069            );
1070        }
1071        assert_eq!(tree.text().to_string(), input);
1072    }
1073
1074    #[test]
1075    fn block_mapping_wraps_key_value_with_key_and_value_sub_wrappers() {
1076        let input = "key: value\n";
1077        let tree = parse_stream(input);
1078        let doc = first_document(&tree);
1079        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1080        let entries = block_map_entries(&map);
1081        assert_eq!(entries.len(), 1, "expected one ENTRY for `key: value`");
1082        let key = entry_key(&entries[0]);
1083        let value = entry_value(&entries[0]);
1084        // Colon ends the KEY (last token); VALUE has the scalar.
1085        assert!(
1086            key.children_with_tokens().any(|el| el
1087                .as_token()
1088                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1089            "colon should be the trailing token of YAML_BLOCK_MAP_KEY",
1090        );
1091        assert!(
1092            value.children_with_tokens().any(|el| el
1093                .as_token()
1094                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1095            "scalar `value` should live inside YAML_BLOCK_MAP_VALUE",
1096        );
1097        assert_eq!(tree.text().to_string(), input);
1098    }
1099
1100    #[test]
1101    fn block_sequence_wraps_entries_in_yaml_block_sequence() {
1102        let input = "- a\n- b\n";
1103        let tree = parse_stream(input);
1104        let doc = first_document(&tree);
1105        let seq = block_seq_under(&doc).expect("YAML_BLOCK_SEQUENCE child");
1106        let items = block_seq_items(&seq);
1107        assert_eq!(items.len(), 2, "expected 2 YAML_BLOCK_SEQUENCE_ITEM");
1108        // Each item must own its own `-` entry token.
1109        for item in &items {
1110            let dash_count = item
1111                .children_with_tokens()
1112                .filter(|el| {
1113                    el.as_token()
1114                        .is_some_and(|t| t.kind() == SyntaxKind::YAML_BLOCK_SEQ_ENTRY)
1115                })
1116                .count();
1117            assert_eq!(dash_count, 1, "each item owns exactly one `-` token");
1118        }
1119        assert_eq!(tree.text().to_string(), input);
1120    }
1121
1122    #[test]
1123    fn nested_block_mapping_nests_inner_block_map_inside_outer_value() {
1124        let input = "outer:\n  inner: x\n";
1125        let tree = parse_stream(input);
1126        let doc = first_document(&tree);
1127        let outer = block_map_under(&doc).expect("outer YAML_BLOCK_MAP");
1128        let outer_entries = block_map_entries(&outer);
1129        assert_eq!(outer_entries.len(), 1);
1130        let outer_value = entry_value(&outer_entries[0]);
1131        let inner = outer_value
1132            .children()
1133            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1134            .expect("inner YAML_BLOCK_MAP nested under outer VALUE");
1135        let inner_entries = block_map_entries(&inner);
1136        assert_eq!(inner_entries.len(), 1);
1137        let inner_key = entry_key(&inner_entries[0]);
1138        assert!(
1139            inner_key.children_with_tokens().any(|el| el
1140                .as_token()
1141                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1142            "inner key should own its colon",
1143        );
1144        assert_eq!(tree.text().to_string(), input);
1145    }
1146
1147    #[test]
1148    fn block_sequence_inside_mapping_nests_under_outer_map_value() {
1149        let input = "items:\n  - a\n  - b\n";
1150        let tree = parse_stream(input);
1151        let doc = first_document(&tree);
1152        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1153        let entries = block_map_entries(&map);
1154        assert_eq!(entries.len(), 1, "one entry: `items: <seq>`");
1155        let value = entry_value(&entries[0]);
1156        let seq = value
1157            .children()
1158            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
1159            .expect("YAML_BLOCK_SEQUENCE nested under map VALUE");
1160        let items = block_seq_items(&seq);
1161        assert_eq!(items.len(), 2);
1162        assert_eq!(tree.text().to_string(), input);
1163    }
1164
1165    #[test]
1166    fn dedent_closes_inner_block_map_before_next_outer_key() {
1167        // outer:
1168        //   inner: x
1169        // sibling: y
1170        // The dedent before `sibling` must close the inner map and
1171        // its outer ENTRY so `sibling: y` lands as a sibling ENTRY
1172        // under the outer map.
1173        let input = "outer:\n  inner: x\nsibling: y\n";
1174        let tree = parse_stream(input);
1175        let doc = first_document(&tree);
1176        let outer = block_map_under(&doc).expect("outer YAML_BLOCK_MAP");
1177        let entries = block_map_entries(&outer);
1178        assert_eq!(
1179            entries.len(),
1180            2,
1181            "outer map should have two entries (`outer:` and `sibling:`)",
1182        );
1183        // Only the first entry's VALUE has a nested map; the second is flat.
1184        let first_value = entry_value(&entries[0]);
1185        let nested_in_first = first_value
1186            .children()
1187            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1188            .count();
1189        assert_eq!(nested_in_first, 1);
1190        let second_value = entry_value(&entries[1]);
1191        let nested_in_second = second_value
1192            .children()
1193            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1194            .count();
1195        assert_eq!(nested_in_second, 0);
1196        assert_eq!(tree.text().to_string(), input);
1197    }
1198
1199    #[test]
1200    fn block_map_with_two_top_level_entries_emits_two_entry_wrappers() {
1201        let input = "a: 1\nb: 2\n";
1202        let tree = parse_stream(input);
1203        let doc = first_document(&tree);
1204        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1205        assert_eq!(block_map_entries(&map).len(), 2);
1206        assert_eq!(tree.text().to_string(), input);
1207    }
1208
1209    #[test]
1210    fn explicit_key_indicator_question_mark_lives_inside_key() {
1211        // `? a\n: b\n` — the `?` is a source-backed Key token. It
1212        // opens the ENTRY and lives inside the resulting KEY node
1213        // (alongside the scalar `a` and the trailing `:`).
1214        let input = "? a\n: b\n";
1215        let tree = parse_stream(input);
1216        let doc = first_document(&tree);
1217        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1218        let entries = block_map_entries(&map);
1219        assert_eq!(entries.len(), 1);
1220        let key = entry_key(&entries[0]);
1221        let has_question = key.children_with_tokens().any(|el| {
1222            el.as_token()
1223                .is_some_and(|t| t.kind() == SyntaxKind::YAML_KEY)
1224        });
1225        assert!(has_question, "`?` should live inside YAML_BLOCK_MAP_KEY");
1226        assert_eq!(tree.text().to_string(), input);
1227    }
1228
1229    #[test]
1230    fn explicit_key_indentless_sequence_wraps_inside_key() {
1231        // `?\n- a\n- b\n:\n- c\n- d\n` (6PBE) — the explicit `?` key's
1232        // content is a zero-indented block sequence. As with an indentless
1233        // sequence in a VALUE, the scanner pushes no indent level and emits
1234        // no BlockSequenceStart, so the builder must synthesize a
1235        // YAML_BLOCK_SEQUENCE inside the KEY (mirroring the VALUE side)
1236        // rather than leaving the `- a` / `- b` entries flat.
1237        let input = "?\n- a\n- b\n:\n- c\n- d\n";
1238        let tree = parse_stream(input);
1239        let doc = first_document(&tree);
1240        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1241        let entries = block_map_entries(&map);
1242        assert_eq!(entries.len(), 1);
1243        let key = entry_key(&entries[0]);
1244        assert!(
1245            key.children()
1246                .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE),
1247            "explicit-key block sequence should be wrapped in YAML_BLOCK_SEQUENCE inside KEY",
1248        );
1249        let value = entry_value(&entries[0]);
1250        assert!(
1251            value
1252                .children()
1253                .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE),
1254            "value-side block sequence should remain wrapped",
1255        );
1256        assert_eq!(tree.text().to_string(), input);
1257    }
1258
1259    #[test]
1260    fn empty_key_shorthand_opens_entry_with_empty_key() {
1261        // `: value\n` — bare `:` at column 0 is the empty-implicit-key
1262        // shorthand. The builder must open ENTRY+KEY before the colon
1263        // arrives so the colon ends up as the only KEY child.
1264        let input = ": value\n";
1265        let tree = parse_stream(input);
1266        let doc = first_document(&tree);
1267        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1268        let entries = block_map_entries(&map);
1269        assert_eq!(entries.len(), 1);
1270        let key = entry_key(&entries[0]);
1271        // KEY has no scalar; only the colon.
1272        assert!(
1273            !key.children_with_tokens().any(|el| el
1274                .as_token()
1275                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1276            "empty-key shorthand has no scalar in KEY",
1277        );
1278        assert!(
1279            key.children_with_tokens().any(|el| el
1280                .as_token()
1281                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1282            "empty-key KEY still owns the `:` token",
1283        );
1284        let value = entry_value(&entries[0]);
1285        assert!(
1286            value.children_with_tokens().any(|el| el
1287                .as_token()
1288                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1289            "VALUE owns the `value` scalar",
1290        );
1291        assert_eq!(tree.text().to_string(), input);
1292    }
1293
1294    #[test]
1295    fn document_end_marker_lives_at_document_level_not_inside_block_map() {
1296        // `...` must not be buried inside the block map; it is a
1297        // document-level marker. The builder closes any open block
1298        // containers before consuming `DocumentEnd`.
1299        let input = "key: value\n...\n";
1300        let tree = parse_stream(input);
1301        let doc = first_document(&tree);
1302        let has_doc_end = doc.children_with_tokens().any(|el| {
1303            el.as_token()
1304                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_END)
1305        });
1306        assert!(
1307            has_doc_end,
1308            "DOCUMENT_END should be a direct child of YAML_DOCUMENT"
1309        );
1310        assert_eq!(tree.text().to_string(), input);
1311    }
1312
1313    fn flow_map_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1314        parent
1315            .children()
1316            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1317    }
1318
1319    fn flow_seq_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1320        parent
1321            .children()
1322            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1323    }
1324
1325    fn flow_map_entries(map: &SyntaxNode) -> Vec<SyntaxNode> {
1326        map.children()
1327            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
1328            .collect()
1329    }
1330
1331    fn flow_seq_items(seq: &SyntaxNode) -> Vec<SyntaxNode> {
1332        seq.children()
1333            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
1334            .collect()
1335    }
1336
1337    #[test]
1338    fn flow_sequence_wraps_each_item_in_flow_sequence_item() {
1339        let input = "[a, b, c]\n";
1340        let tree = parse_stream(input);
1341        let doc = first_document(&tree);
1342        let seq = flow_seq_under(&doc).expect("YAML_FLOW_SEQUENCE child");
1343        let items = flow_seq_items(&seq);
1344        assert_eq!(items.len(), 3);
1345        // The opening `[` and closing `]` live at SEQUENCE level
1346        // (siblings of items).
1347        let bracket_count = seq
1348            .children_with_tokens()
1349            .filter(|el| {
1350                el.as_token().map(|t| t.text()) == Some("[")
1351                    || el.as_token().map(|t| t.text()) == Some("]")
1352            })
1353            .count();
1354        assert_eq!(bracket_count, 2, "`[` and `]` at SEQUENCE level");
1355        assert_eq!(tree.text().to_string(), input);
1356    }
1357
1358    #[test]
1359    fn flow_mapping_wraps_each_entry_with_key_and_value() {
1360        let input = "{a: 1, b: 2}\n";
1361        let tree = parse_stream(input);
1362        let doc = first_document(&tree);
1363        let map = flow_map_under(&doc).expect("YAML_FLOW_MAP child");
1364        let entries = flow_map_entries(&map);
1365        assert_eq!(entries.len(), 2);
1366        for entry in &entries {
1367            let key = entry
1368                .children()
1369                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
1370                .expect("entry has YAML_FLOW_MAP_KEY");
1371            assert!(
1372                key.children_with_tokens().any(|el| el
1373                    .as_token()
1374                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1375                "flow KEY owns trailing `:`",
1376            );
1377            let value = entry
1378                .children()
1379                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
1380                .expect("entry has YAML_FLOW_MAP_VALUE");
1381            assert!(
1382                value.children_with_tokens().any(|el| el
1383                    .as_token()
1384                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1385                "flow VALUE owns its scalar",
1386            );
1387        }
1388        assert_eq!(tree.text().to_string(), input);
1389    }
1390
1391    #[test]
1392    fn flow_sequence_inside_flow_sequence_nests_under_outer_item() {
1393        let input = "[[1, 2], [3, 4]]\n";
1394        let tree = parse_stream(input);
1395        let doc = first_document(&tree);
1396        let outer = flow_seq_under(&doc).expect("outer YAML_FLOW_SEQUENCE");
1397        let outer_items = flow_seq_items(&outer);
1398        assert_eq!(outer_items.len(), 2);
1399        for item in &outer_items {
1400            assert!(
1401                item.children()
1402                    .any(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE),
1403                "outer item should contain a nested YAML_FLOW_SEQUENCE",
1404            );
1405        }
1406        assert_eq!(tree.text().to_string(), input);
1407    }
1408
1409    #[test]
1410    fn flow_mapping_inside_flow_sequence_nests_under_item() {
1411        let input = "[{a: 1}, {b: 2}]\n";
1412        let tree = parse_stream(input);
1413        let doc = first_document(&tree);
1414        let seq = flow_seq_under(&doc).expect("YAML_FLOW_SEQUENCE child");
1415        let items = flow_seq_items(&seq);
1416        assert_eq!(items.len(), 2);
1417        for item in &items {
1418            assert!(
1419                item.children()
1420                    .any(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP),
1421                "each item should contain a nested YAML_FLOW_MAP",
1422            );
1423        }
1424        assert_eq!(tree.text().to_string(), input);
1425    }
1426
1427    #[test]
1428    fn flow_mapping_at_block_map_value_nests_under_block_map_value() {
1429        let input = "key: {a: 1, b: 2}\n";
1430        let tree = parse_stream(input);
1431        let doc = first_document(&tree);
1432        let block_map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1433        let entries = block_map_entries(&block_map);
1434        assert_eq!(entries.len(), 1);
1435        let value = entry_value(&entries[0]);
1436        assert!(
1437            value
1438                .children()
1439                .any(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP),
1440            "flow map should be nested under outer block map's VALUE",
1441        );
1442        assert_eq!(tree.text().to_string(), input);
1443    }
1444
1445    #[test]
1446    fn directive_prelude_stays_inside_document_opened_by_marker() {
1447        // YAML 1.2 §6.8.1: directives belong to the document the
1448        // following `---` opens. The builder must not split the
1449        // directive line into a separate doc — the entire input is one
1450        // YAML_DOCUMENT.
1451        let input = "%TAG !e! tag:example.com,2000:app/\n---\n!e!foo \"bar\"\n";
1452        let tree = parse_stream(input);
1453        assert_eq!(document_count(&tree), 1);
1454        let doc = first_document(&tree);
1455        let has_doc_start = doc.children_with_tokens().any(|el| {
1456            el.as_token()
1457                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_START)
1458        });
1459        assert!(has_doc_start, "the `---` should live inside the same doc");
1460        assert_eq!(tree.text().to_string(), input);
1461    }
1462
1463    #[test]
1464    fn explicit_key_without_value_emits_empty_value_for_shape_parity() {
1465        // `? a\n? b\n` — neither entry has a `:`. Each ENTRY must still
1466        // hold both KEY and VALUE children (VALUE empty) so projection
1467        // walkers don't have to special-case missing children.
1468        let input = "? a\n? b\n";
1469        let tree = parse_stream(input);
1470        let doc = first_document(&tree);
1471        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP");
1472        let entries = block_map_entries(&map);
1473        assert_eq!(entries.len(), 2);
1474        for entry in &entries {
1475            assert!(
1476                entry
1477                    .children()
1478                    .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY),
1479                "ENTRY missing KEY child",
1480            );
1481            assert!(
1482                entry
1483                    .children()
1484                    .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE),
1485                "ENTRY missing VALUE child",
1486            );
1487        }
1488    }
1489}