Skip to main content

panache_parser/parser/yaml/
parser_v2.rs

1//! Step-11 parser scaffold — a CST builder that consumes the streaming
2//! scanner. Wraps each contiguous run of body content in a
3//! `YAML_DOCUMENT` node (with `---` / `...` markers consumed inside the
4//! document they delimit), nests block-context content under
5//! `YAML_BLOCK_MAP` / `YAML_BLOCK_SEQUENCE` containers driven by the
6//! scanner's synthetic `BlockMappingStart` / `BlockSequenceStart` /
7//! `BlockEnd` markers, wraps each key-value pair in
8//! `YAML_BLOCK_MAP_ENTRY` / each `-` entry in
9//! `YAML_BLOCK_SEQUENCE_ITEM`, splits each map entry into
10//! `YAML_BLOCK_MAP_KEY` (everything up to and including the `:`) and
11//! `YAML_BLOCK_MAP_VALUE` (everything after), and mirrors the same
12//! shape for flow contexts: `YAML_FLOW_MAP` / `YAML_FLOW_MAP_ENTRY` /
13//! `YAML_FLOW_MAP_KEY` / `YAML_FLOW_MAP_VALUE` and
14//! `YAML_FLOW_SEQUENCE` / `YAML_FLOW_SEQUENCE_ITEM`. Source-backed
15//! `[` / `]` / `{` / `}` / `,` are emitted at the container level
16//! (matching v1's emission), with item/entry sub-wrappers closing on
17//! `,` and the matching closer.
18//!
19//! Per-feature event-parity work (matching each fixture's `test.event`
20//! exactly) lands incrementally on top of this shape.
21
22#![allow(dead_code)]
23
24use rowan::GreenNodeBuilder;
25
26use crate::syntax::{SyntaxKind, SyntaxNode};
27
28use super::scanner::{Scanner, TokenKind, TriviaKind};
29
30/// Drive the scanner over `input` and build a CST. Always returns a
31/// `SyntaxNode` — the scanner is permissive and the v2 builder
32/// preserves bytes regardless of well-formedness.
33pub fn parse_v2(input: &str) -> SyntaxNode {
34    let mut builder = GreenNodeBuilder::new();
35    builder.start_node(SyntaxKind::YAML_STREAM.into());
36    let mut scanner = Scanner::new(input);
37    let mut doc_open = false;
38    // True when the open YAML_DOCUMENT has only seen directives + trivia
39    // (no body content yet, no `---`). YAML 1.2 says directives belong to
40    // the document the following `---` opens, so when DocumentStart
41    // arrives in this state the marker stays inside the same document
42    // rather than splitting it. Cleared as soon as any non-directive
43    // body content lands.
44    let mut doc_only_has_directives = false;
45    // Stack of currently-open block containers. Each frame tracks
46    // whether its current `YAML_BLOCK_MAP_ENTRY` / `YAML_BLOCK_SEQUENCE_ITEM`
47    // sub-wrapper is still open and waiting to be closed (by the next
48    // `Key` / `BlockEntry` peer or by `BlockEnd`).
49    let mut block_stack: Vec<BlockFrame> = Vec::new();
50    // Kind of the last non-trivia, non-stream-marker, non-decoration
51    // token emitted. An indentless block sequence is only valid when
52    // its `-` directly follows the map entry's `:` (the value is
53    // otherwise empty), so the `BlockEntry` handler consults this to
54    // tell RLU9 (`foo:\n- 42`, value is purely the sequence) apart from
55    // G9HC (`seq:\n&anchor\n- a` with the anchor at column 0 — an
56    // error the validator must still catch on the unwrapped shape).
57    // Anchor / Tag / Alias tokens are *decorations* of the next node
58    // and don't fill the empty-value slot; they're skipped here so a
59    // value-leading decoration still permits an indentless sequence
60    // (SKE5: `seq:\n &anchor\n- a`).
61    let mut prev_significant: Option<TokenKind> = None;
62    // Smallest column among Anchor/Tag/Alias decorations seen since the
63    // last value-filling token. The indentless detector uses this to
64    // distinguish SKE5 (decoration indented past parent → wrap) from
65    // G9HC (decoration at parent indent → leave unwrapped for the
66    // validator). `None` when no decoration is pending.
67    let mut decoration_col_floor: Option<usize> = None;
68    while let Some(tok) = scanner.next_token() {
69        let last_significant = prev_significant;
70        let decorations_so_far = decoration_col_floor;
71        let is_decoration = matches!(
72            tok.kind,
73            TokenKind::Anchor | TokenKind::Tag | TokenKind::Alias
74        );
75        if !matches!(
76            tok.kind,
77            TokenKind::Trivia(_) | TokenKind::StreamStart | TokenKind::StreamEnd
78        ) {
79            if is_decoration {
80                decoration_col_floor = Some(
81                    decoration_col_floor.map_or(tok.start.column, |c| c.min(tok.start.column)),
82                );
83            } else {
84                prev_significant = Some(tok.kind);
85                decoration_col_floor = None;
86            }
87        }
88        match tok.kind {
89            TokenKind::StreamStart | TokenKind::StreamEnd => continue,
90            TokenKind::BlockMappingStart => {
91                ensure_doc_open(&mut builder, &mut doc_open);
92                doc_only_has_directives = false;
93                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
94                builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
95                block_stack.push(BlockFrame::BlockMap {
96                    entry_open: false,
97                    in_value: false,
98                });
99                continue;
100            }
101            TokenKind::BlockSequenceStart => {
102                ensure_doc_open(&mut builder, &mut doc_open);
103                doc_only_has_directives = false;
104                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
105                builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
106                block_stack.push(BlockFrame::BlockSequence {
107                    item_open: false,
108                    indentless: false,
109                });
110                continue;
111            }
112            TokenKind::BlockEnd => {
113                // Indentless sequences have no scanner BlockEnd of their
114                // own, so a BlockEnd arriving while one is on top is meant
115                // for the real container beneath it. Close the indentless
116                // frame(s) first, then consume the BlockEnd normally.
117                close_indentless_sequences(&mut builder, &mut block_stack);
118                close_open_sub_wrapper(&mut builder, &mut block_stack);
119                // Defensive: only close if the scanner gave us an open
120                // container. A stray BlockEnd would otherwise pop the
121                // YAML_DOCUMENT or YAML_STREAM frame.
122                if block_stack.pop().is_some() {
123                    builder.finish_node();
124                }
125                continue;
126            }
127            TokenKind::FlowSequenceStart => {
128                ensure_doc_open(&mut builder, &mut doc_open);
129                doc_only_has_directives = false;
130                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
131                // If nested inside a Map's open KEY/VALUE wrapper, the
132                // current open scope is the appropriate parent.
133                builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE.into());
134                block_stack.push(BlockFrame::FlowSequence { item_open: false });
135                let text = &input[tok.start.index..tok.end.index];
136                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
137                continue;
138            }
139            TokenKind::FlowSequenceEnd => {
140                close_open_sub_wrapper(&mut builder, &mut block_stack);
141                let text = &input[tok.start.index..tok.end.index];
142                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
143                if matches!(
144                    block_stack.last(),
145                    Some(BlockFrame::FlowSequence { .. } | BlockFrame::FlowMap { .. })
146                ) {
147                    block_stack.pop();
148                    builder.finish_node();
149                }
150                continue;
151            }
152            TokenKind::FlowMappingStart => {
153                ensure_doc_open(&mut builder, &mut doc_open);
154                doc_only_has_directives = false;
155                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
156                builder.start_node(SyntaxKind::YAML_FLOW_MAP.into());
157                block_stack.push(BlockFrame::FlowMap {
158                    entry_open: false,
159                    in_value: false,
160                });
161                let text = &input[tok.start.index..tok.end.index];
162                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
163                continue;
164            }
165            TokenKind::FlowMappingEnd => {
166                close_open_sub_wrapper(&mut builder, &mut block_stack);
167                let text = &input[tok.start.index..tok.end.index];
168                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
169                if matches!(
170                    block_stack.last(),
171                    Some(BlockFrame::FlowMap { .. } | BlockFrame::FlowSequence { .. })
172                ) {
173                    block_stack.pop();
174                    builder.finish_node();
175                }
176                continue;
177            }
178            TokenKind::FlowEntry => {
179                // `,` closes the current entry/item and lives at the
180                // container level (between peer entries/items).
181                close_open_sub_wrapper(&mut builder, &mut block_stack);
182                let text = &input[tok.start.index..tok.end.index];
183                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
184                continue;
185            }
186            TokenKind::Key => {
187                // A `Key` at the parent map's level terminates any
188                // open indentless sequence value first, revealing the
189                // map frame below.
190                close_indentless_sequences(&mut builder, &mut block_stack);
191                // Both the synthetic 0-width splice and the source-backed
192                // `?` indicator open a new map entry. Close the previous
193                // entry first if still open. After this, the current
194                // open scope is the new key wrapper.
195                if matches!(
196                    block_stack.last(),
197                    Some(BlockFrame::BlockMap { .. } | BlockFrame::FlowMap { .. })
198                ) {
199                    open_map_entry_with_key(&mut builder, &mut block_stack);
200                }
201                if tok.start.index == tok.end.index {
202                    // Synthetic Key splice carries no bytes.
203                    continue;
204                }
205                // Source-backed `?`: ensure we have somewhere to put it.
206                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
207                // Fall through to emit `?` inside the open KEY (or
208                // current scope if not in a Map frame).
209            }
210            TokenKind::Value => {
211                // An empty-key `:` at the parent map's level likewise
212                // terminates an open indentless sequence value first.
213                close_indentless_sequences(&mut builder, &mut block_stack);
214                let map_state = match block_stack.last().copied() {
215                    Some(BlockFrame::BlockMap {
216                        entry_open,
217                        in_value,
218                    }) => Some((false, entry_open, in_value)),
219                    Some(BlockFrame::FlowMap {
220                        entry_open,
221                        in_value,
222                    }) => Some((true, entry_open, in_value)),
223                    _ => None,
224                };
225                if let Some((is_flow, mut entry_open, mut in_value)) = map_state {
226                    // A bare `:` arriving while the current block-map
227                    // entry is already in its VALUE phase starts a NEW
228                    // entry whose key is empty (`: a\n: b`, 2JQS/S3PD) —
229                    // not a double-colon inside that value. The scanner's
230                    // indent machinery guarantees we only reach here for a
231                    // peer at the map's column (a deeper colon rolls a
232                    // fresh BlockMappingStart; a shallower one unwinds with
233                    // BlockEnd first), so close the current entry and fall
234                    // through to open the new one. Flow maps separate
235                    // entries with `,`, which already closes the entry, so
236                    // their in_value is false here — leave them alone.
237                    if !is_flow && entry_open && in_value {
238                        close_open_sub_wrapper(&mut builder, &mut block_stack);
239                        entry_open = false;
240                        in_value = false;
241                    }
242                    // Empty-key shorthand: `:` arriving without a prior
243                    // Key opens an ENTRY+KEY before consuming the colon.
244                    if !entry_open {
245                        open_map_entry_with_key(&mut builder, &mut block_stack);
246                    }
247                    if !in_value {
248                        // The colon is the last token of KEY. After it
249                        // we close KEY and open VALUE.
250                        let text = &input[tok.start.index..tok.end.index];
251                        if !text.is_empty() {
252                            builder.token(SyntaxKind::YAML_COLON.into(), text);
253                        }
254                        builder.finish_node(); // close KEY
255                        let value_kind = if is_flow {
256                            SyntaxKind::YAML_FLOW_MAP_VALUE
257                        } else {
258                            SyntaxKind::YAML_BLOCK_MAP_VALUE
259                        };
260                        builder.start_node(value_kind.into());
261                        if let Some(
262                            BlockFrame::BlockMap { in_value, .. }
263                            | BlockFrame::FlowMap { in_value, .. },
264                        ) = block_stack.last_mut()
265                        {
266                            *in_value = true;
267                        }
268                        continue;
269                    }
270                    // Already in_value: pathological double-colon. Fall
271                    // through and emit at the current scope (inside
272                    // VALUE) for losslessness.
273                }
274                // Not a Map frame: ensure flow-seq ITEM is open, then
275                // fall through to emit `:` at current scope.
276                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
277            }
278            TokenKind::BlockEntry => {
279                // An indentless sequence opens when a `-` lands directly
280                // in a block-map VALUE: the scanner pushed no indent
281                // level (the `-` is at the parent key's column), so no
282                // `BlockSequenceStart` arrived. Synthesize the
283                // `YAML_BLOCK_SEQUENCE` frame inside the open VALUE so the
284                // tree matches the indented form (spec 8.2.1). Only when
285                // the `:` is the last significant token — i.e. the value
286                // is otherwise empty; a `-` after scalar content in the
287                // value is a structural error left unwrapped for the
288                // validator to reject.
289                // Decorations between `:` and `-` are allowed only when
290                // they sit inside the value scope — strictly indented
291                // past the indentless `-`. Otherwise the anchor is at
292                // the parent mapping's level (G9HC) and the sequence
293                // shouldn't wrap.
294                let decorations_inside_value =
295                    decorations_so_far.is_none_or(|c| c > tok.start.column);
296                let indentless_value = last_significant == Some(TokenKind::Value)
297                    && matches!(
298                        block_stack.last(),
299                        Some(BlockFrame::BlockMap { in_value: true, .. })
300                    )
301                    && decorations_inside_value;
302                // The mirror case: a `-` landing directly after the `?`
303                // explicit-key indicator opens an indentless sequence as
304                // the KEY's content (6PBE). The scanner likewise pushes no
305                // indent level, so synthesize the `YAML_BLOCK_SEQUENCE`
306                // inside the open KEY. `close_indentless_sequences` later
307                // pops it when the entry's `:` (`Value`) arrives.
308                let indentless_key = last_significant == Some(TokenKind::Key)
309                    && matches!(
310                        block_stack.last(),
311                        Some(BlockFrame::BlockMap {
312                            entry_open: true,
313                            in_value: false,
314                        })
315                    )
316                    && decorations_inside_value;
317                if indentless_value || indentless_key {
318                    builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
319                    block_stack.push(BlockFrame::BlockSequence {
320                        item_open: false,
321                        indentless: true,
322                    });
323                }
324                if matches!(block_stack.last(), Some(BlockFrame::BlockSequence { .. })) {
325                    close_open_sub_wrapper(&mut builder, &mut block_stack);
326                    builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM.into());
327                    if let Some(BlockFrame::BlockSequence { item_open, .. }) =
328                        block_stack.last_mut()
329                    {
330                        *item_open = true;
331                    }
332                }
333                // Fall through to emit the `-` byte inside the new ITEM
334                // (or at current scope if not in a Sequence frame).
335            }
336            TokenKind::Trivia(_) => {
337                // Trivia bypasses item-opening: pre-content trivia in a
338                // flow sequence stays at SEQUENCE level (matching v1's
339                // emission shape).
340            }
341            _ => {
342                // Any other source-backed content (Scalar, Anchor, Tag,
343                // Alias, Directive, doc markers): if we're inside a
344                // FlowSequence with no open ITEM, open one before
345                // emitting. Doc markers are handled below.
346                if !matches!(tok.kind, TokenKind::DocumentStart | TokenKind::DocumentEnd) {
347                    ensure_flow_seq_item_open(&mut builder, &mut block_stack);
348                }
349            }
350        }
351        let text = &input[tok.start.index..tok.end.index];
352        if text.is_empty() {
353            // Defensive: never emit zero-width tokens (rowan rejects).
354            continue;
355        }
356        let kind = map_token_to_syntax_kind(tok.kind);
357        match tok.kind {
358            TokenKind::DocumentStart => {
359                // `---` begins a fresh document. Two cases:
360                //  - The currently-open document only has directives so
361                //    far: per YAML 1.2 the directives belong to the doc
362                //    that this `---` opens. Stay inside, just emit the
363                //    marker.
364                //  - Otherwise: close the previous doc (and any open
365                //    block containers) and open a new YAML_DOCUMENT.
366                //    The scanner unwinds the indent stack at column 0,
367                //    but a same-indent map at indent==0 leaves them
368                //    open, so close them defensively.
369                if doc_open && doc_only_has_directives {
370                    builder.token(kind.into(), text);
371                    doc_only_has_directives = false;
372                } else {
373                    close_block_containers(&mut builder, &mut block_stack);
374                    if doc_open {
375                        builder.finish_node();
376                    }
377                    builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
378                    doc_open = true;
379                    doc_only_has_directives = false;
380                    builder.token(kind.into(), text);
381                }
382            }
383            TokenKind::DocumentEnd => {
384                // `...` closes the current document. Close any open
385                // block containers first so the marker is a child of
386                // the document, not buried in a block container.
387                close_block_containers(&mut builder, &mut block_stack);
388                if !doc_open {
389                    builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
390                }
391                builder.token(kind.into(), text);
392                builder.finish_node();
393                doc_open = false;
394                doc_only_has_directives = false;
395            }
396            TokenKind::Trivia(_) => {
397                // Trivia goes to whichever level is currently open;
398                // pre-document trivia stays at YAML_STREAM, in-document
399                // trivia stays inside the YAML_DOCUMENT, the open
400                // block container, or the open ENTRY/ITEM sub-wrapper.
401                builder.token(kind.into(), text);
402            }
403            TokenKind::Directive => {
404                // Directives belong inside a YAML_DOCUMENT but don't by
405                // themselves count as body content — a following `---`
406                // should not split into a separate doc.
407                let was_open = doc_open;
408                ensure_doc_open(&mut builder, &mut doc_open);
409                if !was_open {
410                    doc_only_has_directives = true;
411                }
412                builder.token(kind.into(), text);
413            }
414            _ => {
415                // Any non-trivia content opens an implicit document
416                // when one isn't already in progress and counts as
417                // body content (clears the directives-only flag).
418                ensure_doc_open(&mut builder, &mut doc_open);
419                doc_only_has_directives = false;
420                builder.token(kind.into(), text);
421            }
422        }
423    }
424    // Close any open block containers (and their open ENTRY/ITEM
425    // sub-wrappers) and the open document. The scanner emits BlockEnd
426    // on stream end via `unwind_indent(-1)`, so this is normally a
427    // no-op for `block_stack`; kept for safety against truncated
428    // inputs and future scanner quirks.
429    close_block_containers(&mut builder, &mut block_stack);
430    if doc_open {
431        builder.finish_node();
432    }
433    builder.finish_node();
434    SyntaxNode::new_root(builder.finish())
435}
436
437/// Tracks an open container in the v2 builder's stack. Block and
438/// flow contexts share state shape, but their containers and
439/// sub-wrappers use different `SyntaxKind` variants and they close on
440/// different tokens (`BlockEnd` / dedent vs. `]` / `}` / `,`).
441///
442/// For maps, `entry_open` records whether the entry sub-wrapper is
443/// still open, and `in_value` selects between the KEY and VALUE
444/// sub-sub-wrapper. For sequences, `item_open` records whether the
445/// item sub-wrapper is still open.
446#[derive(Debug, Clone, Copy)]
447enum BlockFrame {
448    BlockMap {
449        entry_open: bool,
450        in_value: bool,
451    },
452    /// `indentless` marks a sequence opened as a block-map value whose
453    /// `-` entries sit at the same column as the parent key (YAML's
454    /// "indentless sequence", spec 8.2.1). The scanner never pushes an
455    /// indent level for it, so it emits no matching `BlockEnd`; v2 must
456    /// close the frame itself when the parent map's next `Key` / `Value`
457    /// / `BlockEnd` arrives.
458    BlockSequence {
459        item_open: bool,
460        indentless: bool,
461    },
462    FlowMap {
463        entry_open: bool,
464        in_value: bool,
465    },
466    FlowSequence {
467        item_open: bool,
468    },
469}
470
471fn ensure_doc_open(builder: &mut GreenNodeBuilder<'_>, doc_open: &mut bool) {
472    if !*doc_open {
473        builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
474        *doc_open = true;
475    }
476}
477
478/// In a flow sequence, source-backed content opens a new
479/// `YAML_FLOW_SEQUENCE_ITEM` lazily — there is no `-` token to drive
480/// the boundary the way `BlockEntry` drives block sequences. Trivia
481/// arriving before the first item stays at the container level.
482fn ensure_flow_seq_item_open(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
483    if let Some(BlockFrame::FlowSequence { item_open }) = stack.last_mut()
484        && !*item_open
485    {
486        builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
487        *item_open = true;
488    }
489}
490
491/// Open `<MAP>_ENTRY` > `<MAP>_KEY` for the next entry, closing any
492/// previously-open entry on the same Map frame. Caller must have
493/// verified the top frame is a Map (Block or Flow).
494fn open_map_entry_with_key(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
495    close_open_sub_wrapper(builder, stack);
496    let (entry_kind, key_kind) = match stack.last() {
497        Some(BlockFrame::BlockMap { .. }) => (
498            SyntaxKind::YAML_BLOCK_MAP_ENTRY,
499            SyntaxKind::YAML_BLOCK_MAP_KEY,
500        ),
501        Some(BlockFrame::FlowMap { .. }) => (
502            SyntaxKind::YAML_FLOW_MAP_ENTRY,
503            SyntaxKind::YAML_FLOW_MAP_KEY,
504        ),
505        _ => return,
506    };
507    builder.start_node(entry_kind.into());
508    builder.start_node(key_kind.into());
509    if let Some(
510        BlockFrame::BlockMap {
511            entry_open,
512            in_value,
513        }
514        | BlockFrame::FlowMap {
515            entry_open,
516            in_value,
517        },
518    ) = stack.last_mut()
519    {
520        *entry_open = true;
521        *in_value = false;
522    }
523}
524
525/// Close any indentless `YAML_BLOCK_SEQUENCE` frames on top of the
526/// stack. These have no matching scanner `BlockEnd`, so they're closed
527/// here when the parent map's next `Key` / `Value` / `BlockEnd` arrives.
528/// Closing the open ITEM, finishing the SEQUENCE node, and popping the
529/// frame reveals the parent map for the incoming token. Loops because
530/// the next token may close several levels, though in practice
531/// indentless frames never stack directly (they're always separated by
532/// a map frame).
533fn close_indentless_sequences(builder: &mut GreenNodeBuilder<'_>, stack: &mut Vec<BlockFrame>) {
534    while let Some(BlockFrame::BlockSequence {
535        indentless: true, ..
536    }) = stack.last()
537    {
538        close_open_sub_wrapper(builder, stack);
539        stack.pop();
540        builder.finish_node(); // close YAML_BLOCK_SEQUENCE
541    }
542}
543
544/// Close the top-of-stack frame's entry/item sub-wrapper if still open
545/// and clear the flag. For maps, this closes the inner KEY/VALUE
546/// node and the surrounding ENTRY. If we're closing while the entry
547/// is still in its KEY phase (i.e. the entry never received a `:`,
548/// e.g. a `?`-only explicit-key entry), an empty VALUE wrapper is
549/// inserted before the ENTRY closes so every ENTRY has the same
550/// `KEY + VALUE` child shape — the projection layer relies on that
551/// invariant. For sequences it closes the ITEM. Caller decides whether
552/// to also pop the frame itself.
553fn close_open_sub_wrapper(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
554    let Some(frame) = stack.last_mut() else {
555        return;
556    };
557    match frame {
558        BlockFrame::BlockMap {
559            entry_open: true,
560            in_value,
561        } => {
562            if *in_value {
563                builder.finish_node(); // close VALUE
564            } else {
565                builder.finish_node(); // close KEY
566                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
567                builder.finish_node(); // empty VALUE for shape parity
568            }
569            builder.finish_node(); // close ENTRY
570            *frame = BlockFrame::BlockMap {
571                entry_open: false,
572                in_value: false,
573            };
574        }
575        BlockFrame::FlowMap {
576            entry_open: true,
577            in_value,
578        } => {
579            if *in_value {
580                builder.finish_node();
581            } else {
582                builder.finish_node();
583                builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
584                builder.finish_node();
585            }
586            builder.finish_node();
587            *frame = BlockFrame::FlowMap {
588                entry_open: false,
589                in_value: false,
590            };
591        }
592        BlockFrame::BlockSequence {
593            item_open: true,
594            indentless,
595        } => {
596            let indentless = *indentless;
597            builder.finish_node();
598            *frame = BlockFrame::BlockSequence {
599                item_open: false,
600                indentless,
601            };
602        }
603        BlockFrame::FlowSequence { item_open: true } => {
604            builder.finish_node();
605            *frame = BlockFrame::FlowSequence { item_open: false };
606        }
607        _ => {}
608    }
609}
610
611fn close_block_containers(builder: &mut GreenNodeBuilder<'_>, stack: &mut Vec<BlockFrame>) {
612    while let Some(frame) = stack.pop() {
613        match frame {
614            BlockFrame::BlockMap {
615                entry_open: true,
616                in_value,
617            } => {
618                if in_value {
619                    builder.finish_node(); // close VALUE
620                } else {
621                    builder.finish_node(); // close KEY
622                    builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
623                    builder.finish_node();
624                }
625                builder.finish_node(); // close ENTRY
626            }
627            BlockFrame::FlowMap {
628                entry_open: true,
629                in_value,
630            } => {
631                if in_value {
632                    builder.finish_node();
633                } else {
634                    builder.finish_node();
635                    builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
636                    builder.finish_node();
637                }
638                builder.finish_node();
639            }
640            BlockFrame::BlockSequence {
641                item_open: true, ..
642            }
643            | BlockFrame::FlowSequence { item_open: true } => {
644                builder.finish_node();
645            }
646            _ => {}
647        }
648        builder.finish_node();
649    }
650}
651
652fn map_token_to_syntax_kind(kind: TokenKind) -> SyntaxKind {
653    match kind {
654        TokenKind::Trivia(TriviaKind::Whitespace) => SyntaxKind::WHITESPACE,
655        TokenKind::Trivia(TriviaKind::Newline) => SyntaxKind::NEWLINE,
656        TokenKind::Trivia(TriviaKind::Comment) => SyntaxKind::YAML_COMMENT,
657        TokenKind::DocumentStart => SyntaxKind::YAML_DOCUMENT_START,
658        TokenKind::DocumentEnd => SyntaxKind::YAML_DOCUMENT_END,
659        TokenKind::Directive => SyntaxKind::YAML_SCALAR,
660        TokenKind::BlockEntry => SyntaxKind::YAML_BLOCK_SEQ_ENTRY,
661        TokenKind::FlowEntry => SyntaxKind::YAML_SCALAR,
662        TokenKind::FlowSequenceStart | TokenKind::FlowSequenceEnd => SyntaxKind::YAML_SCALAR,
663        TokenKind::FlowMappingStart | TokenKind::FlowMappingEnd => SyntaxKind::YAML_SCALAR,
664        TokenKind::Value => SyntaxKind::YAML_COLON,
665        TokenKind::Anchor => SyntaxKind::YAML_ANCHOR,
666        TokenKind::Alias => SyntaxKind::YAML_ALIAS,
667        TokenKind::Tag => SyntaxKind::YAML_TAG,
668        TokenKind::Scalar(_) => SyntaxKind::YAML_SCALAR,
669        // Source-backed `Key` (the explicit `?` indicator) — there is
670        // no dedicated SyntaxKind yet, route to YAML_KEY for now.
671        TokenKind::Key => SyntaxKind::YAML_KEY,
672        // Synthetic markers handled before this map; defensive
673        // fallback.
674        TokenKind::StreamStart
675        | TokenKind::StreamEnd
676        | TokenKind::BlockSequenceStart
677        | TokenKind::BlockMappingStart
678        | TokenKind::BlockEnd => SyntaxKind::YAML_SCALAR,
679    }
680}
681
682/// Public byte-completeness report from running the v2 parser scaffold
683/// over an input. The harness in `tests/yaml.rs` uses this to gate
684/// each step-11 sub-commit on losslessness.
685#[derive(Debug, Clone)]
686pub struct ShadowParserV2Report {
687    /// True if `tree.text() == input`.
688    pub text_lossless: bool,
689    /// Number of children directly under YAML_STREAM (a coarse proxy
690    /// for "did we emit any nesting yet"); useful to track structural
691    /// progression across sub-commits.
692    pub stream_child_count: usize,
693}
694
695/// Run the v2 parser and return a losslessness report. Exposed so the
696/// integration harness can run over allowlisted fixtures without
697/// depending on private types.
698pub fn shadow_parser_v2_check(input: &str) -> ShadowParserV2Report {
699    let tree = parse_v2(input);
700    let text = tree.text().to_string();
701    ShadowParserV2Report {
702        text_lossless: text == input,
703        stream_child_count: tree.children().count(),
704    }
705}
706
707#[cfg(test)]
708mod tests {
709    use super::*;
710
711    #[test]
712    fn v2_returns_byte_lossless_cst_for_empty_input() {
713        let report = shadow_parser_v2_check("");
714        assert!(report.text_lossless);
715    }
716
717    #[test]
718    fn v2_returns_byte_lossless_cst_for_simple_mapping() {
719        let report = shadow_parser_v2_check("key: value\n");
720        assert!(report.text_lossless);
721    }
722
723    #[test]
724    fn v2_returns_byte_lossless_cst_for_block_sequence() {
725        let report = shadow_parser_v2_check("- a\n- b\n");
726        assert!(report.text_lossless);
727    }
728
729    #[test]
730    fn v2_returns_byte_lossless_cst_for_flow_mapping() {
731        let report = shadow_parser_v2_check("{a: b, c: d}\n");
732        assert!(report.text_lossless);
733    }
734
735    #[test]
736    fn v2_returns_byte_lossless_cst_for_block_scalar() {
737        let report = shadow_parser_v2_check("key: |\n  hello\n  world\n");
738        assert!(report.text_lossless);
739    }
740
741    #[test]
742    fn v2_returns_byte_lossless_cst_for_quoted_scalar() {
743        let report = shadow_parser_v2_check("\"key\": \"value\"\n");
744        assert!(report.text_lossless);
745    }
746
747    #[test]
748    fn v2_returns_byte_lossless_cst_for_multi_line_plain_scalar() {
749        let report = shadow_parser_v2_check("key: hello\n  world\n");
750        assert!(report.text_lossless);
751    }
752
753    #[test]
754    fn v2_preserves_explicit_key_indicator_byte_in_flow_context() {
755        // The `?` explicit-key indicator carries a 1-byte source span
756        // even in flow context, so the v2 builder must NOT drop it
757        // (only zero-width `Key` splices from `fetch_value` should be
758        // dropped). Regression: an earlier draft filtered every Key.
759        let input = "{ ?foo: bar }\n";
760        let report = shadow_parser_v2_check(input);
761        assert!(report.text_lossless, "input {input:?} not preserved");
762    }
763
764    #[test]
765    fn v2_does_not_absorb_terminator_line_break_into_flow_scalar() {
766        // Regression: in flow context the multi-line plain
767        // continuation must abort if the next non-blank char is a
768        // flow terminator (`}`/`]`/`,`). Otherwise the trailing
769        // newline got swallowed into the scalar (`42\n` instead of
770        // `42`) and the closer's byte position drifted.
771        let input = "{a: 42\n}\n";
772        let report = shadow_parser_v2_check(input);
773        assert!(report.text_lossless, "input {input:?} not preserved");
774    }
775
776    fn document_count(tree: &SyntaxNode) -> usize {
777        tree.children()
778            .filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
779            .count()
780    }
781
782    #[test]
783    fn implicit_document_wraps_body_with_no_markers() {
784        // No explicit `---` or `...` — the body still belongs to one
785        // YAML_DOCUMENT so projection has a node to walk.
786        let input = "key: value\n";
787        let tree = parse_v2(input);
788        assert_eq!(document_count(&tree), 1);
789        assert_eq!(tree.text().to_string(), input);
790    }
791
792    #[test]
793    fn explicit_doc_start_opens_document_marker_lives_inside() {
794        let input = "---\nkey: value\n";
795        let tree = parse_v2(input);
796        assert_eq!(document_count(&tree), 1);
797        let doc = tree
798            .children()
799            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
800            .expect("document node");
801        assert!(
802            doc.children_with_tokens().any(|el| el
803                .as_token()
804                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_START)),
805            "`---` token should live inside YAML_DOCUMENT"
806        );
807        assert_eq!(tree.text().to_string(), input);
808    }
809
810    #[test]
811    fn explicit_doc_end_closes_document_marker_lives_inside() {
812        let input = "key: value\n...\n";
813        let tree = parse_v2(input);
814        assert_eq!(document_count(&tree), 1);
815        let doc = tree
816            .children()
817            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
818            .expect("document node");
819        assert!(
820            doc.children_with_tokens().any(|el| el
821                .as_token()
822                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_END)),
823            "`...` token should live inside YAML_DOCUMENT"
824        );
825        assert_eq!(tree.text().to_string(), input);
826    }
827
828    #[test]
829    fn consecutive_doc_starts_emit_two_documents() {
830        let input = "---\na\n---\nb\n";
831        let tree = parse_v2(input);
832        assert_eq!(document_count(&tree), 2);
833        assert_eq!(tree.text().to_string(), input);
834    }
835
836    #[test]
837    fn pre_document_trivia_stays_at_stream_level() {
838        // A leading newline before the first document content should
839        // sit under YAML_STREAM, not inside a YAML_DOCUMENT — there is
840        // no document yet at that point.
841        let input = "\n---\nkey: value\n";
842        let tree = parse_v2(input);
843        let stream_token_kinds: Vec<SyntaxKind> = tree
844            .children_with_tokens()
845            .filter_map(|el| el.into_token())
846            .map(|t| t.kind())
847            .collect();
848        assert!(
849            stream_token_kinds.contains(&SyntaxKind::NEWLINE),
850            "leading newline should be a direct child of YAML_STREAM, got {stream_token_kinds:?}"
851        );
852        assert_eq!(tree.text().to_string(), input);
853    }
854
855    #[test]
856    fn bare_doc_end_at_stream_start_opens_synthetic_empty_document() {
857        // Pathological but lossless: a stream that begins with `...`
858        // wraps the marker in an empty YAML_DOCUMENT so no source
859        // bytes leak out at YAML_STREAM level uncoupled from a doc.
860        let input = "...\n";
861        let tree = parse_v2(input);
862        assert_eq!(document_count(&tree), 1);
863        assert_eq!(tree.text().to_string(), input);
864    }
865
866    fn first_document(tree: &SyntaxNode) -> SyntaxNode {
867        tree.children()
868            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
869            .expect("at least one document")
870    }
871
872    fn block_map_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
873        parent
874            .children()
875            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
876    }
877
878    fn block_seq_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
879        parent
880            .children()
881            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
882    }
883
884    fn block_map_entries(map: &SyntaxNode) -> Vec<SyntaxNode> {
885        map.children()
886            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
887            .collect()
888    }
889
890    fn block_seq_items(seq: &SyntaxNode) -> Vec<SyntaxNode> {
891        seq.children()
892            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
893            .collect()
894    }
895
896    fn entry_key(entry: &SyntaxNode) -> SyntaxNode {
897        entry
898            .children()
899            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
900            .expect("entry should have a YAML_BLOCK_MAP_KEY child")
901    }
902
903    fn entry_value(entry: &SyntaxNode) -> SyntaxNode {
904        entry
905            .children()
906            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
907            .expect("entry should have a YAML_BLOCK_MAP_VALUE child")
908    }
909
910    #[test]
911    fn consecutive_empty_key_colons_open_separate_entries() {
912        // `: a\n: b` is two block-map entries, each with an empty
913        // (null) key and a value (2JQS). The scanner emits two bare
914        // `Value` tokens with no Key/BlockEnd between them, so v2 must
915        // close the first entry when the second `:` arrives at the
916        // map's column rather than absorbing it into the first value.
917        let input = ": a\n: b\n";
918        let tree = parse_v2(input);
919        let doc = first_document(&tree);
920        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
921        let entries = block_map_entries(&map);
922        assert_eq!(entries.len(), 2, "expected two empty-key ENTRY nodes");
923        for (entry, scalar) in entries.iter().zip(["a", "b"]) {
924            let key = entry_key(entry);
925            // Empty key: the KEY holds only the `:` value indicator.
926            assert!(
927                !key.children_with_tokens().any(|el| el
928                    .as_token()
929                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
930                "empty key should carry no scalar, got {key:?}",
931            );
932            let value = entry_value(entry);
933            assert!(
934                value.children_with_tokens().any(|el| el
935                    .as_token()
936                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR && t.text() == scalar)),
937                "value should be {scalar:?}, got {value:?}",
938            );
939        }
940        assert_eq!(tree.text().to_string(), input);
941    }
942
943    #[test]
944    fn block_mapping_wraps_key_value_with_key_and_value_sub_wrappers() {
945        let input = "key: value\n";
946        let tree = parse_v2(input);
947        let doc = first_document(&tree);
948        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
949        let entries = block_map_entries(&map);
950        assert_eq!(entries.len(), 1, "expected one ENTRY for `key: value`");
951        let key = entry_key(&entries[0]);
952        let value = entry_value(&entries[0]);
953        // Colon ends the KEY (last token); VALUE has the scalar.
954        assert!(
955            key.children_with_tokens().any(|el| el
956                .as_token()
957                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
958            "colon should be the trailing token of YAML_BLOCK_MAP_KEY",
959        );
960        assert!(
961            value.children_with_tokens().any(|el| el
962                .as_token()
963                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
964            "scalar `value` should live inside YAML_BLOCK_MAP_VALUE",
965        );
966        assert_eq!(tree.text().to_string(), input);
967    }
968
969    #[test]
970    fn block_sequence_wraps_entries_in_yaml_block_sequence() {
971        let input = "- a\n- b\n";
972        let tree = parse_v2(input);
973        let doc = first_document(&tree);
974        let seq = block_seq_under(&doc).expect("YAML_BLOCK_SEQUENCE child");
975        let items = block_seq_items(&seq);
976        assert_eq!(items.len(), 2, "expected 2 YAML_BLOCK_SEQUENCE_ITEM");
977        // Each item must own its own `-` entry token.
978        for item in &items {
979            let dash_count = item
980                .children_with_tokens()
981                .filter(|el| {
982                    el.as_token()
983                        .is_some_and(|t| t.kind() == SyntaxKind::YAML_BLOCK_SEQ_ENTRY)
984                })
985                .count();
986            assert_eq!(dash_count, 1, "each item owns exactly one `-` token");
987        }
988        assert_eq!(tree.text().to_string(), input);
989    }
990
991    #[test]
992    fn nested_block_mapping_nests_inner_block_map_inside_outer_value() {
993        let input = "outer:\n  inner: x\n";
994        let tree = parse_v2(input);
995        let doc = first_document(&tree);
996        let outer = block_map_under(&doc).expect("outer YAML_BLOCK_MAP");
997        let outer_entries = block_map_entries(&outer);
998        assert_eq!(outer_entries.len(), 1);
999        let outer_value = entry_value(&outer_entries[0]);
1000        let inner = outer_value
1001            .children()
1002            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1003            .expect("inner YAML_BLOCK_MAP nested under outer VALUE");
1004        let inner_entries = block_map_entries(&inner);
1005        assert_eq!(inner_entries.len(), 1);
1006        let inner_key = entry_key(&inner_entries[0]);
1007        assert!(
1008            inner_key.children_with_tokens().any(|el| el
1009                .as_token()
1010                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1011            "inner key should own its colon",
1012        );
1013        assert_eq!(tree.text().to_string(), input);
1014    }
1015
1016    #[test]
1017    fn block_sequence_inside_mapping_nests_under_outer_map_value() {
1018        let input = "items:\n  - a\n  - b\n";
1019        let tree = parse_v2(input);
1020        let doc = first_document(&tree);
1021        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1022        let entries = block_map_entries(&map);
1023        assert_eq!(entries.len(), 1, "one entry: `items: <seq>`");
1024        let value = entry_value(&entries[0]);
1025        let seq = value
1026            .children()
1027            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
1028            .expect("YAML_BLOCK_SEQUENCE nested under map VALUE");
1029        let items = block_seq_items(&seq);
1030        assert_eq!(items.len(), 2);
1031        assert_eq!(tree.text().to_string(), input);
1032    }
1033
1034    #[test]
1035    fn dedent_closes_inner_block_map_before_next_outer_key() {
1036        // outer:
1037        //   inner: x
1038        // sibling: y
1039        // The dedent before `sibling` must close the inner map and
1040        // its outer ENTRY so `sibling: y` lands as a sibling ENTRY
1041        // under the outer map.
1042        let input = "outer:\n  inner: x\nsibling: y\n";
1043        let tree = parse_v2(input);
1044        let doc = first_document(&tree);
1045        let outer = block_map_under(&doc).expect("outer YAML_BLOCK_MAP");
1046        let entries = block_map_entries(&outer);
1047        assert_eq!(
1048            entries.len(),
1049            2,
1050            "outer map should have two entries (`outer:` and `sibling:`)",
1051        );
1052        // Only the first entry's VALUE has a nested map; the second is flat.
1053        let first_value = entry_value(&entries[0]);
1054        let nested_in_first = first_value
1055            .children()
1056            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1057            .count();
1058        assert_eq!(nested_in_first, 1);
1059        let second_value = entry_value(&entries[1]);
1060        let nested_in_second = second_value
1061            .children()
1062            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1063            .count();
1064        assert_eq!(nested_in_second, 0);
1065        assert_eq!(tree.text().to_string(), input);
1066    }
1067
1068    #[test]
1069    fn block_map_with_two_top_level_entries_emits_two_entry_wrappers() {
1070        let input = "a: 1\nb: 2\n";
1071        let tree = parse_v2(input);
1072        let doc = first_document(&tree);
1073        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1074        assert_eq!(block_map_entries(&map).len(), 2);
1075        assert_eq!(tree.text().to_string(), input);
1076    }
1077
1078    #[test]
1079    fn explicit_key_indicator_question_mark_lives_inside_key() {
1080        // `? a\n: b\n` — the `?` is a source-backed Key token. It
1081        // opens the ENTRY and lives inside the resulting KEY node
1082        // (alongside the scalar `a` and the trailing `:`).
1083        let input = "? a\n: b\n";
1084        let tree = parse_v2(input);
1085        let doc = first_document(&tree);
1086        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1087        let entries = block_map_entries(&map);
1088        assert_eq!(entries.len(), 1);
1089        let key = entry_key(&entries[0]);
1090        let has_question = key.children_with_tokens().any(|el| {
1091            el.as_token()
1092                .is_some_and(|t| t.kind() == SyntaxKind::YAML_KEY)
1093        });
1094        assert!(has_question, "`?` should live inside YAML_BLOCK_MAP_KEY");
1095        assert_eq!(tree.text().to_string(), input);
1096    }
1097
1098    #[test]
1099    fn explicit_key_indentless_sequence_wraps_inside_key() {
1100        // `?\n- a\n- b\n:\n- c\n- d\n` (6PBE) — the explicit `?` key's
1101        // content is a zero-indented block sequence. As with an indentless
1102        // sequence in a VALUE, the scanner pushes no indent level and emits
1103        // no BlockSequenceStart, so the builder must synthesize a
1104        // YAML_BLOCK_SEQUENCE inside the KEY (mirroring the VALUE side)
1105        // rather than leaving the `- a` / `- b` entries flat.
1106        let input = "?\n- a\n- b\n:\n- c\n- d\n";
1107        let tree = parse_v2(input);
1108        let doc = first_document(&tree);
1109        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1110        let entries = block_map_entries(&map);
1111        assert_eq!(entries.len(), 1);
1112        let key = entry_key(&entries[0]);
1113        assert!(
1114            key.children()
1115                .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE),
1116            "explicit-key block sequence should be wrapped in YAML_BLOCK_SEQUENCE inside KEY",
1117        );
1118        let value = entry_value(&entries[0]);
1119        assert!(
1120            value
1121                .children()
1122                .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE),
1123            "value-side block sequence should remain wrapped",
1124        );
1125        assert_eq!(tree.text().to_string(), input);
1126    }
1127
1128    #[test]
1129    fn empty_key_shorthand_opens_entry_with_empty_key() {
1130        // `: value\n` — bare `:` at column 0 is the empty-implicit-key
1131        // shorthand. The v2 builder must open ENTRY+KEY before the
1132        // colon arrives so the colon ends up as the only KEY child.
1133        let input = ": value\n";
1134        let tree = parse_v2(input);
1135        let doc = first_document(&tree);
1136        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1137        let entries = block_map_entries(&map);
1138        assert_eq!(entries.len(), 1);
1139        let key = entry_key(&entries[0]);
1140        // KEY has no scalar; only the colon.
1141        assert!(
1142            !key.children_with_tokens().any(|el| el
1143                .as_token()
1144                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1145            "empty-key shorthand has no scalar in KEY",
1146        );
1147        assert!(
1148            key.children_with_tokens().any(|el| el
1149                .as_token()
1150                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1151            "empty-key KEY still owns the `:` token",
1152        );
1153        let value = entry_value(&entries[0]);
1154        assert!(
1155            value.children_with_tokens().any(|el| el
1156                .as_token()
1157                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1158            "VALUE owns the `value` scalar",
1159        );
1160        assert_eq!(tree.text().to_string(), input);
1161    }
1162
1163    #[test]
1164    fn document_end_marker_lives_at_document_level_not_inside_block_map() {
1165        // `...` must not be buried inside the block map; it is a
1166        // document-level marker. The v2 builder closes any open block
1167        // containers before consuming `DocumentEnd`.
1168        let input = "key: value\n...\n";
1169        let tree = parse_v2(input);
1170        let doc = first_document(&tree);
1171        let has_doc_end = doc.children_with_tokens().any(|el| {
1172            el.as_token()
1173                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_END)
1174        });
1175        assert!(
1176            has_doc_end,
1177            "DOCUMENT_END should be a direct child of YAML_DOCUMENT"
1178        );
1179        assert_eq!(tree.text().to_string(), input);
1180    }
1181
1182    fn flow_map_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1183        parent
1184            .children()
1185            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1186    }
1187
1188    fn flow_seq_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
1189        parent
1190            .children()
1191            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1192    }
1193
1194    fn flow_map_entries(map: &SyntaxNode) -> Vec<SyntaxNode> {
1195        map.children()
1196            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
1197            .collect()
1198    }
1199
1200    fn flow_seq_items(seq: &SyntaxNode) -> Vec<SyntaxNode> {
1201        seq.children()
1202            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
1203            .collect()
1204    }
1205
1206    #[test]
1207    fn flow_sequence_wraps_each_item_in_flow_sequence_item() {
1208        let input = "[a, b, c]\n";
1209        let tree = parse_v2(input);
1210        let doc = first_document(&tree);
1211        let seq = flow_seq_under(&doc).expect("YAML_FLOW_SEQUENCE child");
1212        let items = flow_seq_items(&seq);
1213        assert_eq!(items.len(), 3);
1214        // The opening `[` and closing `]` live at SEQUENCE level
1215        // (siblings of items), matching v1's emission.
1216        let bracket_count = seq
1217            .children_with_tokens()
1218            .filter(|el| {
1219                el.as_token().map(|t| t.text()) == Some("[")
1220                    || el.as_token().map(|t| t.text()) == Some("]")
1221            })
1222            .count();
1223        assert_eq!(bracket_count, 2, "`[` and `]` at SEQUENCE level");
1224        assert_eq!(tree.text().to_string(), input);
1225    }
1226
1227    #[test]
1228    fn flow_mapping_wraps_each_entry_with_key_and_value() {
1229        let input = "{a: 1, b: 2}\n";
1230        let tree = parse_v2(input);
1231        let doc = first_document(&tree);
1232        let map = flow_map_under(&doc).expect("YAML_FLOW_MAP child");
1233        let entries = flow_map_entries(&map);
1234        assert_eq!(entries.len(), 2);
1235        for entry in &entries {
1236            let key = entry
1237                .children()
1238                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
1239                .expect("entry has YAML_FLOW_MAP_KEY");
1240            assert!(
1241                key.children_with_tokens().any(|el| el
1242                    .as_token()
1243                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1244                "flow KEY owns trailing `:`",
1245            );
1246            let value = entry
1247                .children()
1248                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
1249                .expect("entry has YAML_FLOW_MAP_VALUE");
1250            assert!(
1251                value.children_with_tokens().any(|el| el
1252                    .as_token()
1253                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1254                "flow VALUE owns its scalar",
1255            );
1256        }
1257        assert_eq!(tree.text().to_string(), input);
1258    }
1259
1260    #[test]
1261    fn flow_sequence_inside_flow_sequence_nests_under_outer_item() {
1262        let input = "[[1, 2], [3, 4]]\n";
1263        let tree = parse_v2(input);
1264        let doc = first_document(&tree);
1265        let outer = flow_seq_under(&doc).expect("outer YAML_FLOW_SEQUENCE");
1266        let outer_items = flow_seq_items(&outer);
1267        assert_eq!(outer_items.len(), 2);
1268        for item in &outer_items {
1269            assert!(
1270                item.children()
1271                    .any(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE),
1272                "outer item should contain a nested YAML_FLOW_SEQUENCE",
1273            );
1274        }
1275        assert_eq!(tree.text().to_string(), input);
1276    }
1277
1278    #[test]
1279    fn flow_mapping_inside_flow_sequence_nests_under_item() {
1280        let input = "[{a: 1}, {b: 2}]\n";
1281        let tree = parse_v2(input);
1282        let doc = first_document(&tree);
1283        let seq = flow_seq_under(&doc).expect("YAML_FLOW_SEQUENCE child");
1284        let items = flow_seq_items(&seq);
1285        assert_eq!(items.len(), 2);
1286        for item in &items {
1287            assert!(
1288                item.children()
1289                    .any(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP),
1290                "each item should contain a nested YAML_FLOW_MAP",
1291            );
1292        }
1293        assert_eq!(tree.text().to_string(), input);
1294    }
1295
1296    #[test]
1297    fn flow_mapping_at_block_map_value_nests_under_block_map_value() {
1298        let input = "key: {a: 1, b: 2}\n";
1299        let tree = parse_v2(input);
1300        let doc = first_document(&tree);
1301        let block_map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1302        let entries = block_map_entries(&block_map);
1303        assert_eq!(entries.len(), 1);
1304        let value = entry_value(&entries[0]);
1305        assert!(
1306            value
1307                .children()
1308                .any(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP),
1309            "flow map should be nested under outer block map's VALUE",
1310        );
1311        assert_eq!(tree.text().to_string(), input);
1312    }
1313
1314    #[test]
1315    fn directive_prelude_stays_inside_document_opened_by_marker() {
1316        // YAML 1.2 §6.8.1: directives belong to the document the
1317        // following `---` opens. The v2 builder must not split the
1318        // directive line into a separate doc — the entire input is one
1319        // YAML_DOCUMENT.
1320        let input = "%TAG !e! tag:example.com,2000:app/\n---\n!e!foo \"bar\"\n";
1321        let tree = parse_v2(input);
1322        assert_eq!(document_count(&tree), 1);
1323        let doc = first_document(&tree);
1324        let has_doc_start = doc.children_with_tokens().any(|el| {
1325            el.as_token()
1326                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_START)
1327        });
1328        assert!(has_doc_start, "the `---` should live inside the same doc");
1329        assert_eq!(tree.text().to_string(), input);
1330    }
1331
1332    #[test]
1333    fn explicit_key_without_value_emits_empty_value_for_shape_parity() {
1334        // `? a\n? b\n` — neither entry has a `:`. Each ENTRY must still
1335        // hold both KEY and VALUE children (VALUE empty) so projection
1336        // walkers don't have to special-case missing children.
1337        let input = "? a\n? b\n";
1338        let tree = parse_v2(input);
1339        let doc = first_document(&tree);
1340        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP");
1341        let entries = block_map_entries(&map);
1342        assert_eq!(entries.len(), 2);
1343        for entry in &entries {
1344            assert!(
1345                entry
1346                    .children()
1347                    .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY),
1348                "ENTRY missing KEY child",
1349            );
1350            assert!(
1351                entry
1352                    .children()
1353                    .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE),
1354                "ENTRY missing VALUE child",
1355            );
1356        }
1357        assert_eq!(tree.text().to_string(), input);
1358    }
1359}