Skip to main content

panache_parser/parser/yaml/
parser_v2.rs

1//! Step-11 parser scaffold — a CST builder that consumes the streaming
2//! scanner. Wraps each contiguous run of body content in a
3//! `YAML_DOCUMENT` node (with `---` / `...` markers consumed inside the
4//! document they delimit), nests block-context content under
5//! `YAML_BLOCK_MAP` / `YAML_BLOCK_SEQUENCE` containers driven by the
6//! scanner's synthetic `BlockMappingStart` / `BlockSequenceStart` /
7//! `BlockEnd` markers, wraps each key-value pair in
8//! `YAML_BLOCK_MAP_ENTRY` / each `-` entry in
9//! `YAML_BLOCK_SEQUENCE_ITEM`, splits each map entry into
10//! `YAML_BLOCK_MAP_KEY` (everything up to and including the `:`) and
11//! `YAML_BLOCK_MAP_VALUE` (everything after), and mirrors the same
12//! shape for flow contexts: `YAML_FLOW_MAP` / `YAML_FLOW_MAP_ENTRY` /
13//! `YAML_FLOW_MAP_KEY` / `YAML_FLOW_MAP_VALUE` and
14//! `YAML_FLOW_SEQUENCE` / `YAML_FLOW_SEQUENCE_ITEM`. Source-backed
15//! `[` / `]` / `{` / `}` / `,` are emitted at the container level
16//! (matching v1's emission), with item/entry sub-wrappers closing on
17//! `,` and the matching closer.
18//!
19//! Per-feature event-parity work (matching each fixture's `test.event`
20//! exactly) lands incrementally on top of this shape.
21
22#![allow(dead_code)]
23
24use rowan::GreenNodeBuilder;
25
26use crate::syntax::{SyntaxKind, SyntaxNode};
27
28use super::scanner::{Scanner, TokenKind, TriviaKind};
29
30/// Drive the scanner over `input` and build a CST. Always returns a
31/// `SyntaxNode` — the scanner is permissive and the v2 builder
32/// preserves bytes regardless of well-formedness.
33pub fn parse_v2(input: &str) -> SyntaxNode {
34    let mut builder = GreenNodeBuilder::new();
35    builder.start_node(SyntaxKind::YAML_STREAM.into());
36    let mut scanner = Scanner::new(input);
37    let mut doc_open = false;
38    // True when the open YAML_DOCUMENT has only seen directives + trivia
39    // (no body content yet, no `---`). YAML 1.2 says directives belong to
40    // the document the following `---` opens, so when DocumentStart
41    // arrives in this state the marker stays inside the same document
42    // rather than splitting it. Cleared as soon as any non-directive
43    // body content lands.
44    let mut doc_only_has_directives = false;
45    // Stack of currently-open block containers. Each frame tracks
46    // whether its current `YAML_BLOCK_MAP_ENTRY` / `YAML_BLOCK_SEQUENCE_ITEM`
47    // sub-wrapper is still open and waiting to be closed (by the next
48    // `Key` / `BlockEntry` peer or by `BlockEnd`).
49    let mut block_stack: Vec<BlockFrame> = Vec::new();
50    while let Some(tok) = scanner.next_token() {
51        match tok.kind {
52            TokenKind::StreamStart | TokenKind::StreamEnd => continue,
53            TokenKind::BlockMappingStart => {
54                ensure_doc_open(&mut builder, &mut doc_open);
55                doc_only_has_directives = false;
56                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
57                builder.start_node(SyntaxKind::YAML_BLOCK_MAP.into());
58                block_stack.push(BlockFrame::BlockMap {
59                    entry_open: false,
60                    in_value: false,
61                });
62                continue;
63            }
64            TokenKind::BlockSequenceStart => {
65                ensure_doc_open(&mut builder, &mut doc_open);
66                doc_only_has_directives = false;
67                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
68                builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE.into());
69                block_stack.push(BlockFrame::BlockSequence { item_open: false });
70                continue;
71            }
72            TokenKind::BlockEnd => {
73                close_open_sub_wrapper(&mut builder, &mut block_stack);
74                // Defensive: only close if the scanner gave us an open
75                // container. A stray BlockEnd would otherwise pop the
76                // YAML_DOCUMENT or YAML_STREAM frame.
77                if block_stack.pop().is_some() {
78                    builder.finish_node();
79                }
80                continue;
81            }
82            TokenKind::FlowSequenceStart => {
83                ensure_doc_open(&mut builder, &mut doc_open);
84                doc_only_has_directives = false;
85                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
86                // If nested inside a Map's open KEY/VALUE wrapper, the
87                // current open scope is the appropriate parent.
88                builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE.into());
89                block_stack.push(BlockFrame::FlowSequence { item_open: false });
90                let text = &input[tok.start.index..tok.end.index];
91                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
92                continue;
93            }
94            TokenKind::FlowSequenceEnd => {
95                close_open_sub_wrapper(&mut builder, &mut block_stack);
96                let text = &input[tok.start.index..tok.end.index];
97                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
98                if matches!(
99                    block_stack.last(),
100                    Some(BlockFrame::FlowSequence { .. } | BlockFrame::FlowMap { .. })
101                ) {
102                    block_stack.pop();
103                    builder.finish_node();
104                }
105                continue;
106            }
107            TokenKind::FlowMappingStart => {
108                ensure_doc_open(&mut builder, &mut doc_open);
109                doc_only_has_directives = false;
110                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
111                builder.start_node(SyntaxKind::YAML_FLOW_MAP.into());
112                block_stack.push(BlockFrame::FlowMap {
113                    entry_open: false,
114                    in_value: false,
115                });
116                let text = &input[tok.start.index..tok.end.index];
117                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
118                continue;
119            }
120            TokenKind::FlowMappingEnd => {
121                close_open_sub_wrapper(&mut builder, &mut block_stack);
122                let text = &input[tok.start.index..tok.end.index];
123                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
124                if matches!(
125                    block_stack.last(),
126                    Some(BlockFrame::FlowMap { .. } | BlockFrame::FlowSequence { .. })
127                ) {
128                    block_stack.pop();
129                    builder.finish_node();
130                }
131                continue;
132            }
133            TokenKind::FlowEntry => {
134                // `,` closes the current entry/item and lives at the
135                // container level (between peer entries/items).
136                close_open_sub_wrapper(&mut builder, &mut block_stack);
137                let text = &input[tok.start.index..tok.end.index];
138                builder.token(SyntaxKind::YAML_SCALAR.into(), text);
139                continue;
140            }
141            TokenKind::Key => {
142                // Both the synthetic 0-width splice and the source-backed
143                // `?` indicator open a new map entry. Close the previous
144                // entry first if still open. After this, the current
145                // open scope is the new key wrapper.
146                if matches!(
147                    block_stack.last(),
148                    Some(BlockFrame::BlockMap { .. } | BlockFrame::FlowMap { .. })
149                ) {
150                    open_map_entry_with_key(&mut builder, &mut block_stack);
151                }
152                if tok.start.index == tok.end.index {
153                    // Synthetic Key splice carries no bytes.
154                    continue;
155                }
156                // Source-backed `?`: ensure we have somewhere to put it.
157                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
158                // Fall through to emit `?` inside the open KEY (or
159                // current scope if not in a Map frame).
160            }
161            TokenKind::Value => {
162                let map_state = match block_stack.last().copied() {
163                    Some(BlockFrame::BlockMap {
164                        entry_open,
165                        in_value,
166                    }) => Some((false, entry_open, in_value)),
167                    Some(BlockFrame::FlowMap {
168                        entry_open,
169                        in_value,
170                    }) => Some((true, entry_open, in_value)),
171                    _ => None,
172                };
173                if let Some((is_flow, entry_open, in_value)) = map_state {
174                    // Empty-key shorthand: `:` arriving without a prior
175                    // Key opens an ENTRY+KEY before consuming the colon.
176                    if !entry_open {
177                        open_map_entry_with_key(&mut builder, &mut block_stack);
178                    }
179                    if !in_value {
180                        // The colon is the last token of KEY. After it
181                        // we close KEY and open VALUE.
182                        let text = &input[tok.start.index..tok.end.index];
183                        if !text.is_empty() {
184                            builder.token(SyntaxKind::YAML_COLON.into(), text);
185                        }
186                        builder.finish_node(); // close KEY
187                        let value_kind = if is_flow {
188                            SyntaxKind::YAML_FLOW_MAP_VALUE
189                        } else {
190                            SyntaxKind::YAML_BLOCK_MAP_VALUE
191                        };
192                        builder.start_node(value_kind.into());
193                        if let Some(
194                            BlockFrame::BlockMap { in_value, .. }
195                            | BlockFrame::FlowMap { in_value, .. },
196                        ) = block_stack.last_mut()
197                        {
198                            *in_value = true;
199                        }
200                        continue;
201                    }
202                    // Already in_value: pathological double-colon. Fall
203                    // through and emit at the current scope (inside
204                    // VALUE) for losslessness.
205                }
206                // Not a Map frame: ensure flow-seq ITEM is open, then
207                // fall through to emit `:` at current scope.
208                ensure_flow_seq_item_open(&mut builder, &mut block_stack);
209            }
210            TokenKind::BlockEntry => {
211                if matches!(block_stack.last(), Some(BlockFrame::BlockSequence { .. })) {
212                    close_open_sub_wrapper(&mut builder, &mut block_stack);
213                    builder.start_node(SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM.into());
214                    if let Some(BlockFrame::BlockSequence { item_open }) = block_stack.last_mut() {
215                        *item_open = true;
216                    }
217                }
218                // Fall through to emit the `-` byte inside the new ITEM
219                // (or at current scope if not in a Sequence frame).
220            }
221            TokenKind::Trivia(_) => {
222                // Trivia bypasses item-opening: pre-content trivia in a
223                // flow sequence stays at SEQUENCE level (matching v1's
224                // emission shape).
225            }
226            _ => {
227                // Any other source-backed content (Scalar, Anchor, Tag,
228                // Alias, Directive, doc markers): if we're inside a
229                // FlowSequence with no open ITEM, open one before
230                // emitting. Doc markers are handled below.
231                if !matches!(tok.kind, TokenKind::DocumentStart | TokenKind::DocumentEnd) {
232                    ensure_flow_seq_item_open(&mut builder, &mut block_stack);
233                }
234            }
235        }
236        let text = &input[tok.start.index..tok.end.index];
237        if text.is_empty() {
238            // Defensive: never emit zero-width tokens (rowan rejects).
239            continue;
240        }
241        let kind = map_token_to_syntax_kind(tok.kind);
242        match tok.kind {
243            TokenKind::DocumentStart => {
244                // `---` begins a fresh document. Two cases:
245                //  - The currently-open document only has directives so
246                //    far: per YAML 1.2 the directives belong to the doc
247                //    that this `---` opens. Stay inside, just emit the
248                //    marker.
249                //  - Otherwise: close the previous doc (and any open
250                //    block containers) and open a new YAML_DOCUMENT.
251                //    The scanner unwinds the indent stack at column 0,
252                //    but a same-indent map at indent==0 leaves them
253                //    open, so close them defensively.
254                if doc_open && doc_only_has_directives {
255                    builder.token(kind.into(), text);
256                    doc_only_has_directives = false;
257                } else {
258                    close_block_containers(&mut builder, &mut block_stack);
259                    if doc_open {
260                        builder.finish_node();
261                    }
262                    builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
263                    doc_open = true;
264                    doc_only_has_directives = false;
265                    builder.token(kind.into(), text);
266                }
267            }
268            TokenKind::DocumentEnd => {
269                // `...` closes the current document. Close any open
270                // block containers first so the marker is a child of
271                // the document, not buried in a block container.
272                close_block_containers(&mut builder, &mut block_stack);
273                if !doc_open {
274                    builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
275                }
276                builder.token(kind.into(), text);
277                builder.finish_node();
278                doc_open = false;
279                doc_only_has_directives = false;
280            }
281            TokenKind::Trivia(_) => {
282                // Trivia goes to whichever level is currently open;
283                // pre-document trivia stays at YAML_STREAM, in-document
284                // trivia stays inside the YAML_DOCUMENT, the open
285                // block container, or the open ENTRY/ITEM sub-wrapper.
286                builder.token(kind.into(), text);
287            }
288            TokenKind::Directive => {
289                // Directives belong inside a YAML_DOCUMENT but don't by
290                // themselves count as body content — a following `---`
291                // should not split into a separate doc.
292                let was_open = doc_open;
293                ensure_doc_open(&mut builder, &mut doc_open);
294                if !was_open {
295                    doc_only_has_directives = true;
296                }
297                builder.token(kind.into(), text);
298            }
299            _ => {
300                // Any non-trivia content opens an implicit document
301                // when one isn't already in progress and counts as
302                // body content (clears the directives-only flag).
303                ensure_doc_open(&mut builder, &mut doc_open);
304                doc_only_has_directives = false;
305                builder.token(kind.into(), text);
306            }
307        }
308    }
309    // Close any open block containers (and their open ENTRY/ITEM
310    // sub-wrappers) and the open document. The scanner emits BlockEnd
311    // on stream end via `unwind_indent(-1)`, so this is normally a
312    // no-op for `block_stack`; kept for safety against truncated
313    // inputs and future scanner quirks.
314    close_block_containers(&mut builder, &mut block_stack);
315    if doc_open {
316        builder.finish_node();
317    }
318    builder.finish_node();
319    SyntaxNode::new_root(builder.finish())
320}
321
322/// Tracks an open container in the v2 builder's stack. Block and
323/// flow contexts share state shape, but their containers and
324/// sub-wrappers use different `SyntaxKind` variants and they close on
325/// different tokens (`BlockEnd` / dedent vs. `]` / `}` / `,`).
326///
327/// For maps, `entry_open` records whether the entry sub-wrapper is
328/// still open, and `in_value` selects between the KEY and VALUE
329/// sub-sub-wrapper. For sequences, `item_open` records whether the
330/// item sub-wrapper is still open.
331#[derive(Debug, Clone, Copy)]
332enum BlockFrame {
333    BlockMap { entry_open: bool, in_value: bool },
334    BlockSequence { item_open: bool },
335    FlowMap { entry_open: bool, in_value: bool },
336    FlowSequence { item_open: bool },
337}
338
339fn ensure_doc_open(builder: &mut GreenNodeBuilder<'_>, doc_open: &mut bool) {
340    if !*doc_open {
341        builder.start_node(SyntaxKind::YAML_DOCUMENT.into());
342        *doc_open = true;
343    }
344}
345
346/// In a flow sequence, source-backed content opens a new
347/// `YAML_FLOW_SEQUENCE_ITEM` lazily — there is no `-` token to drive
348/// the boundary the way `BlockEntry` drives block sequences. Trivia
349/// arriving before the first item stays at the container level.
350fn ensure_flow_seq_item_open(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
351    if let Some(BlockFrame::FlowSequence { item_open }) = stack.last_mut()
352        && !*item_open
353    {
354        builder.start_node(SyntaxKind::YAML_FLOW_SEQUENCE_ITEM.into());
355        *item_open = true;
356    }
357}
358
359/// Open `<MAP>_ENTRY` > `<MAP>_KEY` for the next entry, closing any
360/// previously-open entry on the same Map frame. Caller must have
361/// verified the top frame is a Map (Block or Flow).
362fn open_map_entry_with_key(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
363    close_open_sub_wrapper(builder, stack);
364    let (entry_kind, key_kind) = match stack.last() {
365        Some(BlockFrame::BlockMap { .. }) => (
366            SyntaxKind::YAML_BLOCK_MAP_ENTRY,
367            SyntaxKind::YAML_BLOCK_MAP_KEY,
368        ),
369        Some(BlockFrame::FlowMap { .. }) => (
370            SyntaxKind::YAML_FLOW_MAP_ENTRY,
371            SyntaxKind::YAML_FLOW_MAP_KEY,
372        ),
373        _ => return,
374    };
375    builder.start_node(entry_kind.into());
376    builder.start_node(key_kind.into());
377    if let Some(
378        BlockFrame::BlockMap {
379            entry_open,
380            in_value,
381        }
382        | BlockFrame::FlowMap {
383            entry_open,
384            in_value,
385        },
386    ) = stack.last_mut()
387    {
388        *entry_open = true;
389        *in_value = false;
390    }
391}
392
393/// Close the top-of-stack frame's entry/item sub-wrapper if still open
394/// and clear the flag. For maps, this closes the inner KEY/VALUE
395/// node and the surrounding ENTRY. If we're closing while the entry
396/// is still in its KEY phase (i.e. the entry never received a `:`,
397/// e.g. a `?`-only explicit-key entry), an empty VALUE wrapper is
398/// inserted before the ENTRY closes so every ENTRY has the same
399/// `KEY + VALUE` child shape — the projection layer relies on that
400/// invariant. For sequences it closes the ITEM. Caller decides whether
401/// to also pop the frame itself.
402fn close_open_sub_wrapper(builder: &mut GreenNodeBuilder<'_>, stack: &mut [BlockFrame]) {
403    let Some(frame) = stack.last_mut() else {
404        return;
405    };
406    match frame {
407        BlockFrame::BlockMap {
408            entry_open: true,
409            in_value,
410        } => {
411            if *in_value {
412                builder.finish_node(); // close VALUE
413            } else {
414                builder.finish_node(); // close KEY
415                builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
416                builder.finish_node(); // empty VALUE for shape parity
417            }
418            builder.finish_node(); // close ENTRY
419            *frame = BlockFrame::BlockMap {
420                entry_open: false,
421                in_value: false,
422            };
423        }
424        BlockFrame::FlowMap {
425            entry_open: true,
426            in_value,
427        } => {
428            if *in_value {
429                builder.finish_node();
430            } else {
431                builder.finish_node();
432                builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
433                builder.finish_node();
434            }
435            builder.finish_node();
436            *frame = BlockFrame::FlowMap {
437                entry_open: false,
438                in_value: false,
439            };
440        }
441        BlockFrame::BlockSequence { item_open: true } => {
442            builder.finish_node();
443            *frame = BlockFrame::BlockSequence { item_open: false };
444        }
445        BlockFrame::FlowSequence { item_open: true } => {
446            builder.finish_node();
447            *frame = BlockFrame::FlowSequence { item_open: false };
448        }
449        _ => {}
450    }
451}
452
453fn close_block_containers(builder: &mut GreenNodeBuilder<'_>, stack: &mut Vec<BlockFrame>) {
454    while let Some(frame) = stack.pop() {
455        match frame {
456            BlockFrame::BlockMap {
457                entry_open: true,
458                in_value,
459            } => {
460                if in_value {
461                    builder.finish_node(); // close VALUE
462                } else {
463                    builder.finish_node(); // close KEY
464                    builder.start_node(SyntaxKind::YAML_BLOCK_MAP_VALUE.into());
465                    builder.finish_node();
466                }
467                builder.finish_node(); // close ENTRY
468            }
469            BlockFrame::FlowMap {
470                entry_open: true,
471                in_value,
472            } => {
473                if in_value {
474                    builder.finish_node();
475                } else {
476                    builder.finish_node();
477                    builder.start_node(SyntaxKind::YAML_FLOW_MAP_VALUE.into());
478                    builder.finish_node();
479                }
480                builder.finish_node();
481            }
482            BlockFrame::BlockSequence { item_open: true }
483            | BlockFrame::FlowSequence { item_open: true } => {
484                builder.finish_node();
485            }
486            _ => {}
487        }
488        builder.finish_node();
489    }
490}
491
492fn map_token_to_syntax_kind(kind: TokenKind) -> SyntaxKind {
493    match kind {
494        TokenKind::Trivia(TriviaKind::Whitespace) => SyntaxKind::WHITESPACE,
495        TokenKind::Trivia(TriviaKind::Newline) => SyntaxKind::NEWLINE,
496        TokenKind::Trivia(TriviaKind::Comment) => SyntaxKind::YAML_COMMENT,
497        TokenKind::DocumentStart => SyntaxKind::YAML_DOCUMENT_START,
498        TokenKind::DocumentEnd => SyntaxKind::YAML_DOCUMENT_END,
499        TokenKind::Directive => SyntaxKind::YAML_SCALAR,
500        TokenKind::BlockEntry => SyntaxKind::YAML_BLOCK_SEQ_ENTRY,
501        TokenKind::FlowEntry => SyntaxKind::YAML_SCALAR,
502        TokenKind::FlowSequenceStart | TokenKind::FlowSequenceEnd => SyntaxKind::YAML_SCALAR,
503        TokenKind::FlowMappingStart | TokenKind::FlowMappingEnd => SyntaxKind::YAML_SCALAR,
504        TokenKind::Value => SyntaxKind::YAML_COLON,
505        TokenKind::Anchor | TokenKind::Alias | TokenKind::Tag => SyntaxKind::YAML_TAG,
506        TokenKind::Scalar(_) => SyntaxKind::YAML_SCALAR,
507        // Source-backed `Key` (the explicit `?` indicator) — there is
508        // no dedicated SyntaxKind yet, route to YAML_KEY for now.
509        TokenKind::Key => SyntaxKind::YAML_KEY,
510        // Synthetic markers handled before this map; defensive
511        // fallback.
512        TokenKind::StreamStart
513        | TokenKind::StreamEnd
514        | TokenKind::BlockSequenceStart
515        | TokenKind::BlockMappingStart
516        | TokenKind::BlockEnd => SyntaxKind::YAML_SCALAR,
517    }
518}
519
520/// Public byte-completeness report from running the v2 parser scaffold
521/// over an input. The harness in `tests/yaml.rs` uses this to gate
522/// each step-11 sub-commit on losslessness.
523#[derive(Debug, Clone)]
524pub struct ShadowParserV2Report {
525    /// True if `tree.text() == input`.
526    pub text_lossless: bool,
527    /// Number of children directly under YAML_STREAM (a coarse proxy
528    /// for "did we emit any nesting yet"); useful to track structural
529    /// progression across sub-commits.
530    pub stream_child_count: usize,
531}
532
533/// Run the v2 parser and return a losslessness report. Exposed so the
534/// integration harness can run over allowlisted fixtures without
535/// depending on private types.
536pub fn shadow_parser_v2_check(input: &str) -> ShadowParserV2Report {
537    let tree = parse_v2(input);
538    let text = tree.text().to_string();
539    ShadowParserV2Report {
540        text_lossless: text == input,
541        stream_child_count: tree.children().count(),
542    }
543}
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548
549    #[test]
550    fn v2_returns_byte_lossless_cst_for_empty_input() {
551        let report = shadow_parser_v2_check("");
552        assert!(report.text_lossless);
553    }
554
555    #[test]
556    fn v2_returns_byte_lossless_cst_for_simple_mapping() {
557        let report = shadow_parser_v2_check("key: value\n");
558        assert!(report.text_lossless);
559    }
560
561    #[test]
562    fn v2_returns_byte_lossless_cst_for_block_sequence() {
563        let report = shadow_parser_v2_check("- a\n- b\n");
564        assert!(report.text_lossless);
565    }
566
567    #[test]
568    fn v2_returns_byte_lossless_cst_for_flow_mapping() {
569        let report = shadow_parser_v2_check("{a: b, c: d}\n");
570        assert!(report.text_lossless);
571    }
572
573    #[test]
574    fn v2_returns_byte_lossless_cst_for_block_scalar() {
575        let report = shadow_parser_v2_check("key: |\n  hello\n  world\n");
576        assert!(report.text_lossless);
577    }
578
579    #[test]
580    fn v2_returns_byte_lossless_cst_for_quoted_scalar() {
581        let report = shadow_parser_v2_check("\"key\": \"value\"\n");
582        assert!(report.text_lossless);
583    }
584
585    #[test]
586    fn v2_returns_byte_lossless_cst_for_multi_line_plain_scalar() {
587        let report = shadow_parser_v2_check("key: hello\n  world\n");
588        assert!(report.text_lossless);
589    }
590
591    #[test]
592    fn v2_preserves_explicit_key_indicator_byte_in_flow_context() {
593        // The `?` explicit-key indicator carries a 1-byte source span
594        // even in flow context, so the v2 builder must NOT drop it
595        // (only zero-width `Key` splices from `fetch_value` should be
596        // dropped). Regression: an earlier draft filtered every Key.
597        let input = "{ ?foo: bar }\n";
598        let report = shadow_parser_v2_check(input);
599        assert!(report.text_lossless, "input {input:?} not preserved");
600    }
601
602    #[test]
603    fn v2_does_not_absorb_terminator_line_break_into_flow_scalar() {
604        // Regression: in flow context the multi-line plain
605        // continuation must abort if the next non-blank char is a
606        // flow terminator (`}`/`]`/`,`). Otherwise the trailing
607        // newline got swallowed into the scalar (`42\n` instead of
608        // `42`) and the closer's byte position drifted.
609        let input = "{a: 42\n}\n";
610        let report = shadow_parser_v2_check(input);
611        assert!(report.text_lossless, "input {input:?} not preserved");
612    }
613
614    fn document_count(tree: &SyntaxNode) -> usize {
615        tree.children()
616            .filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
617            .count()
618    }
619
620    #[test]
621    fn implicit_document_wraps_body_with_no_markers() {
622        // No explicit `---` or `...` — the body still belongs to one
623        // YAML_DOCUMENT so projection has a node to walk.
624        let input = "key: value\n";
625        let tree = parse_v2(input);
626        assert_eq!(document_count(&tree), 1);
627        assert_eq!(tree.text().to_string(), input);
628    }
629
630    #[test]
631    fn explicit_doc_start_opens_document_marker_lives_inside() {
632        let input = "---\nkey: value\n";
633        let tree = parse_v2(input);
634        assert_eq!(document_count(&tree), 1);
635        let doc = tree
636            .children()
637            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
638            .expect("document node");
639        assert!(
640            doc.children_with_tokens().any(|el| el
641                .as_token()
642                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_START)),
643            "`---` token should live inside YAML_DOCUMENT"
644        );
645        assert_eq!(tree.text().to_string(), input);
646    }
647
648    #[test]
649    fn explicit_doc_end_closes_document_marker_lives_inside() {
650        let input = "key: value\n...\n";
651        let tree = parse_v2(input);
652        assert_eq!(document_count(&tree), 1);
653        let doc = tree
654            .children()
655            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
656            .expect("document node");
657        assert!(
658            doc.children_with_tokens().any(|el| el
659                .as_token()
660                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_END)),
661            "`...` token should live inside YAML_DOCUMENT"
662        );
663        assert_eq!(tree.text().to_string(), input);
664    }
665
666    #[test]
667    fn consecutive_doc_starts_emit_two_documents() {
668        let input = "---\na\n---\nb\n";
669        let tree = parse_v2(input);
670        assert_eq!(document_count(&tree), 2);
671        assert_eq!(tree.text().to_string(), input);
672    }
673
674    #[test]
675    fn pre_document_trivia_stays_at_stream_level() {
676        // A leading newline before the first document content should
677        // sit under YAML_STREAM, not inside a YAML_DOCUMENT — there is
678        // no document yet at that point.
679        let input = "\n---\nkey: value\n";
680        let tree = parse_v2(input);
681        let stream_token_kinds: Vec<SyntaxKind> = tree
682            .children_with_tokens()
683            .filter_map(|el| el.into_token())
684            .map(|t| t.kind())
685            .collect();
686        assert!(
687            stream_token_kinds.contains(&SyntaxKind::NEWLINE),
688            "leading newline should be a direct child of YAML_STREAM, got {stream_token_kinds:?}"
689        );
690        assert_eq!(tree.text().to_string(), input);
691    }
692
693    #[test]
694    fn bare_doc_end_at_stream_start_opens_synthetic_empty_document() {
695        // Pathological but lossless: a stream that begins with `...`
696        // wraps the marker in an empty YAML_DOCUMENT so no source
697        // bytes leak out at YAML_STREAM level uncoupled from a doc.
698        let input = "...\n";
699        let tree = parse_v2(input);
700        assert_eq!(document_count(&tree), 1);
701        assert_eq!(tree.text().to_string(), input);
702    }
703
704    fn first_document(tree: &SyntaxNode) -> SyntaxNode {
705        tree.children()
706            .find(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
707            .expect("at least one document")
708    }
709
710    fn block_map_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
711        parent
712            .children()
713            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
714    }
715
716    fn block_seq_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
717        parent
718            .children()
719            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
720    }
721
722    fn block_map_entries(map: &SyntaxNode) -> Vec<SyntaxNode> {
723        map.children()
724            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
725            .collect()
726    }
727
728    fn block_seq_items(seq: &SyntaxNode) -> Vec<SyntaxNode> {
729        seq.children()
730            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
731            .collect()
732    }
733
734    fn entry_key(entry: &SyntaxNode) -> SyntaxNode {
735        entry
736            .children()
737            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
738            .expect("entry should have a YAML_BLOCK_MAP_KEY child")
739    }
740
741    fn entry_value(entry: &SyntaxNode) -> SyntaxNode {
742        entry
743            .children()
744            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
745            .expect("entry should have a YAML_BLOCK_MAP_VALUE child")
746    }
747
748    #[test]
749    fn block_mapping_wraps_key_value_with_key_and_value_sub_wrappers() {
750        let input = "key: value\n";
751        let tree = parse_v2(input);
752        let doc = first_document(&tree);
753        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
754        let entries = block_map_entries(&map);
755        assert_eq!(entries.len(), 1, "expected one ENTRY for `key: value`");
756        let key = entry_key(&entries[0]);
757        let value = entry_value(&entries[0]);
758        // Colon ends the KEY (last token); VALUE has the scalar.
759        assert!(
760            key.children_with_tokens().any(|el| el
761                .as_token()
762                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
763            "colon should be the trailing token of YAML_BLOCK_MAP_KEY",
764        );
765        assert!(
766            value.children_with_tokens().any(|el| el
767                .as_token()
768                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
769            "scalar `value` should live inside YAML_BLOCK_MAP_VALUE",
770        );
771        assert_eq!(tree.text().to_string(), input);
772    }
773
774    #[test]
775    fn block_sequence_wraps_entries_in_yaml_block_sequence() {
776        let input = "- a\n- b\n";
777        let tree = parse_v2(input);
778        let doc = first_document(&tree);
779        let seq = block_seq_under(&doc).expect("YAML_BLOCK_SEQUENCE child");
780        let items = block_seq_items(&seq);
781        assert_eq!(items.len(), 2, "expected 2 YAML_BLOCK_SEQUENCE_ITEM");
782        // Each item must own its own `-` entry token.
783        for item in &items {
784            let dash_count = item
785                .children_with_tokens()
786                .filter(|el| {
787                    el.as_token()
788                        .is_some_and(|t| t.kind() == SyntaxKind::YAML_BLOCK_SEQ_ENTRY)
789                })
790                .count();
791            assert_eq!(dash_count, 1, "each item owns exactly one `-` token");
792        }
793        assert_eq!(tree.text().to_string(), input);
794    }
795
796    #[test]
797    fn nested_block_mapping_nests_inner_block_map_inside_outer_value() {
798        let input = "outer:\n  inner: x\n";
799        let tree = parse_v2(input);
800        let doc = first_document(&tree);
801        let outer = block_map_under(&doc).expect("outer YAML_BLOCK_MAP");
802        let outer_entries = block_map_entries(&outer);
803        assert_eq!(outer_entries.len(), 1);
804        let outer_value = entry_value(&outer_entries[0]);
805        let inner = outer_value
806            .children()
807            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
808            .expect("inner YAML_BLOCK_MAP nested under outer VALUE");
809        let inner_entries = block_map_entries(&inner);
810        assert_eq!(inner_entries.len(), 1);
811        let inner_key = entry_key(&inner_entries[0]);
812        assert!(
813            inner_key.children_with_tokens().any(|el| el
814                .as_token()
815                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
816            "inner key should own its colon",
817        );
818        assert_eq!(tree.text().to_string(), input);
819    }
820
821    #[test]
822    fn block_sequence_inside_mapping_nests_under_outer_map_value() {
823        let input = "items:\n  - a\n  - b\n";
824        let tree = parse_v2(input);
825        let doc = first_document(&tree);
826        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
827        let entries = block_map_entries(&map);
828        assert_eq!(entries.len(), 1, "one entry: `items: <seq>`");
829        let value = entry_value(&entries[0]);
830        let seq = value
831            .children()
832            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
833            .expect("YAML_BLOCK_SEQUENCE nested under map VALUE");
834        let items = block_seq_items(&seq);
835        assert_eq!(items.len(), 2);
836        assert_eq!(tree.text().to_string(), input);
837    }
838
839    #[test]
840    fn dedent_closes_inner_block_map_before_next_outer_key() {
841        // outer:
842        //   inner: x
843        // sibling: y
844        // The dedent before `sibling` must close the inner map and
845        // its outer ENTRY so `sibling: y` lands as a sibling ENTRY
846        // under the outer map.
847        let input = "outer:\n  inner: x\nsibling: y\n";
848        let tree = parse_v2(input);
849        let doc = first_document(&tree);
850        let outer = block_map_under(&doc).expect("outer YAML_BLOCK_MAP");
851        let entries = block_map_entries(&outer);
852        assert_eq!(
853            entries.len(),
854            2,
855            "outer map should have two entries (`outer:` and `sibling:`)",
856        );
857        // Only the first entry's VALUE has a nested map; the second is flat.
858        let first_value = entry_value(&entries[0]);
859        let nested_in_first = first_value
860            .children()
861            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
862            .count();
863        assert_eq!(nested_in_first, 1);
864        let second_value = entry_value(&entries[1]);
865        let nested_in_second = second_value
866            .children()
867            .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
868            .count();
869        assert_eq!(nested_in_second, 0);
870        assert_eq!(tree.text().to_string(), input);
871    }
872
873    #[test]
874    fn block_map_with_two_top_level_entries_emits_two_entry_wrappers() {
875        let input = "a: 1\nb: 2\n";
876        let tree = parse_v2(input);
877        let doc = first_document(&tree);
878        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
879        assert_eq!(block_map_entries(&map).len(), 2);
880        assert_eq!(tree.text().to_string(), input);
881    }
882
883    #[test]
884    fn explicit_key_indicator_question_mark_lives_inside_key() {
885        // `? a\n: b\n` — the `?` is a source-backed Key token. It
886        // opens the ENTRY and lives inside the resulting KEY node
887        // (alongside the scalar `a` and the trailing `:`).
888        let input = "? a\n: b\n";
889        let tree = parse_v2(input);
890        let doc = first_document(&tree);
891        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
892        let entries = block_map_entries(&map);
893        assert_eq!(entries.len(), 1);
894        let key = entry_key(&entries[0]);
895        let has_question = key.children_with_tokens().any(|el| {
896            el.as_token()
897                .is_some_and(|t| t.kind() == SyntaxKind::YAML_KEY)
898        });
899        assert!(has_question, "`?` should live inside YAML_BLOCK_MAP_KEY");
900        assert_eq!(tree.text().to_string(), input);
901    }
902
903    #[test]
904    fn empty_key_shorthand_opens_entry_with_empty_key() {
905        // `: value\n` — bare `:` at column 0 is the empty-implicit-key
906        // shorthand. The v2 builder must open ENTRY+KEY before the
907        // colon arrives so the colon ends up as the only KEY child.
908        let input = ": value\n";
909        let tree = parse_v2(input);
910        let doc = first_document(&tree);
911        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
912        let entries = block_map_entries(&map);
913        assert_eq!(entries.len(), 1);
914        let key = entry_key(&entries[0]);
915        // KEY has no scalar; only the colon.
916        assert!(
917            !key.children_with_tokens().any(|el| el
918                .as_token()
919                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
920            "empty-key shorthand has no scalar in KEY",
921        );
922        assert!(
923            key.children_with_tokens().any(|el| el
924                .as_token()
925                .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
926            "empty-key KEY still owns the `:` token",
927        );
928        let value = entry_value(&entries[0]);
929        assert!(
930            value.children_with_tokens().any(|el| el
931                .as_token()
932                .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
933            "VALUE owns the `value` scalar",
934        );
935        assert_eq!(tree.text().to_string(), input);
936    }
937
938    #[test]
939    fn document_end_marker_lives_at_document_level_not_inside_block_map() {
940        // `...` must not be buried inside the block map; it is a
941        // document-level marker. The v2 builder closes any open block
942        // containers before consuming `DocumentEnd`.
943        let input = "key: value\n...\n";
944        let tree = parse_v2(input);
945        let doc = first_document(&tree);
946        let has_doc_end = doc.children_with_tokens().any(|el| {
947            el.as_token()
948                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_END)
949        });
950        assert!(
951            has_doc_end,
952            "DOCUMENT_END should be a direct child of YAML_DOCUMENT"
953        );
954        assert_eq!(tree.text().to_string(), input);
955    }
956
957    fn flow_map_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
958        parent
959            .children()
960            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
961    }
962
963    fn flow_seq_under(parent: &SyntaxNode) -> Option<SyntaxNode> {
964        parent
965            .children()
966            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
967    }
968
969    fn flow_map_entries(map: &SyntaxNode) -> Vec<SyntaxNode> {
970        map.children()
971            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
972            .collect()
973    }
974
975    fn flow_seq_items(seq: &SyntaxNode) -> Vec<SyntaxNode> {
976        seq.children()
977            .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
978            .collect()
979    }
980
981    #[test]
982    fn flow_sequence_wraps_each_item_in_flow_sequence_item() {
983        let input = "[a, b, c]\n";
984        let tree = parse_v2(input);
985        let doc = first_document(&tree);
986        let seq = flow_seq_under(&doc).expect("YAML_FLOW_SEQUENCE child");
987        let items = flow_seq_items(&seq);
988        assert_eq!(items.len(), 3);
989        // The opening `[` and closing `]` live at SEQUENCE level
990        // (siblings of items), matching v1's emission.
991        let bracket_count = seq
992            .children_with_tokens()
993            .filter(|el| {
994                el.as_token().map(|t| t.text()) == Some("[")
995                    || el.as_token().map(|t| t.text()) == Some("]")
996            })
997            .count();
998        assert_eq!(bracket_count, 2, "`[` and `]` at SEQUENCE level");
999        assert_eq!(tree.text().to_string(), input);
1000    }
1001
1002    #[test]
1003    fn flow_mapping_wraps_each_entry_with_key_and_value() {
1004        let input = "{a: 1, b: 2}\n";
1005        let tree = parse_v2(input);
1006        let doc = first_document(&tree);
1007        let map = flow_map_under(&doc).expect("YAML_FLOW_MAP child");
1008        let entries = flow_map_entries(&map);
1009        assert_eq!(entries.len(), 2);
1010        for entry in &entries {
1011            let key = entry
1012                .children()
1013                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
1014                .expect("entry has YAML_FLOW_MAP_KEY");
1015            assert!(
1016                key.children_with_tokens().any(|el| el
1017                    .as_token()
1018                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_COLON)),
1019                "flow KEY owns trailing `:`",
1020            );
1021            let value = entry
1022                .children()
1023                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
1024                .expect("entry has YAML_FLOW_MAP_VALUE");
1025            assert!(
1026                value.children_with_tokens().any(|el| el
1027                    .as_token()
1028                    .is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR)),
1029                "flow VALUE owns its scalar",
1030            );
1031        }
1032        assert_eq!(tree.text().to_string(), input);
1033    }
1034
1035    #[test]
1036    fn flow_sequence_inside_flow_sequence_nests_under_outer_item() {
1037        let input = "[[1, 2], [3, 4]]\n";
1038        let tree = parse_v2(input);
1039        let doc = first_document(&tree);
1040        let outer = flow_seq_under(&doc).expect("outer YAML_FLOW_SEQUENCE");
1041        let outer_items = flow_seq_items(&outer);
1042        assert_eq!(outer_items.len(), 2);
1043        for item in &outer_items {
1044            assert!(
1045                item.children()
1046                    .any(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE),
1047                "outer item should contain a nested YAML_FLOW_SEQUENCE",
1048            );
1049        }
1050        assert_eq!(tree.text().to_string(), input);
1051    }
1052
1053    #[test]
1054    fn flow_mapping_inside_flow_sequence_nests_under_item() {
1055        let input = "[{a: 1}, {b: 2}]\n";
1056        let tree = parse_v2(input);
1057        let doc = first_document(&tree);
1058        let seq = flow_seq_under(&doc).expect("YAML_FLOW_SEQUENCE child");
1059        let items = flow_seq_items(&seq);
1060        assert_eq!(items.len(), 2);
1061        for item in &items {
1062            assert!(
1063                item.children()
1064                    .any(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP),
1065                "each item should contain a nested YAML_FLOW_MAP",
1066            );
1067        }
1068        assert_eq!(tree.text().to_string(), input);
1069    }
1070
1071    #[test]
1072    fn flow_mapping_at_block_map_value_nests_under_block_map_value() {
1073        let input = "key: {a: 1, b: 2}\n";
1074        let tree = parse_v2(input);
1075        let doc = first_document(&tree);
1076        let block_map = block_map_under(&doc).expect("YAML_BLOCK_MAP child");
1077        let entries = block_map_entries(&block_map);
1078        assert_eq!(entries.len(), 1);
1079        let value = entry_value(&entries[0]);
1080        assert!(
1081            value
1082                .children()
1083                .any(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP),
1084            "flow map should be nested under outer block map's VALUE",
1085        );
1086        assert_eq!(tree.text().to_string(), input);
1087    }
1088
1089    #[test]
1090    fn directive_prelude_stays_inside_document_opened_by_marker() {
1091        // YAML 1.2 §6.8.1: directives belong to the document the
1092        // following `---` opens. The v2 builder must not split the
1093        // directive line into a separate doc — the entire input is one
1094        // YAML_DOCUMENT.
1095        let input = "%TAG !e! tag:example.com,2000:app/\n---\n!e!foo \"bar\"\n";
1096        let tree = parse_v2(input);
1097        assert_eq!(document_count(&tree), 1);
1098        let doc = first_document(&tree);
1099        let has_doc_start = doc.children_with_tokens().any(|el| {
1100            el.as_token()
1101                .is_some_and(|t| t.kind() == SyntaxKind::YAML_DOCUMENT_START)
1102        });
1103        assert!(has_doc_start, "the `---` should live inside the same doc");
1104        assert_eq!(tree.text().to_string(), input);
1105    }
1106
1107    #[test]
1108    fn explicit_key_without_value_emits_empty_value_for_shape_parity() {
1109        // `? a\n? b\n` — neither entry has a `:`. Each ENTRY must still
1110        // hold both KEY and VALUE children (VALUE empty) so projection
1111        // walkers don't have to special-case missing children.
1112        let input = "? a\n? b\n";
1113        let tree = parse_v2(input);
1114        let doc = first_document(&tree);
1115        let map = block_map_under(&doc).expect("YAML_BLOCK_MAP");
1116        let entries = block_map_entries(&map);
1117        assert_eq!(entries.len(), 2);
1118        for entry in &entries {
1119            assert!(
1120                entry
1121                    .children()
1122                    .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY),
1123                "ENTRY missing KEY child",
1124            );
1125            assert!(
1126                entry
1127                    .children()
1128                    .any(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE),
1129                "ENTRY missing VALUE child",
1130            );
1131        }
1132        assert_eq!(tree.text().to_string(), input);
1133    }
1134}