Skip to main content

panache_parser/parser/yaml/
events.rs

1//! YAML event projection: walk a YAML parser CST and produce a
2//! yaml-test-suite style event stream (`+STR`, `+DOC`, `+MAP`, `=VAL :foo`,
3//! ...).
4//!
5//! This module is parser-crate scoped and used only by the test harness in
6//! `crates/panache-parser/tests/yaml.rs` for fixture parity. It reads the
7//! green tree built by [`crate::parser::yaml::parse_yaml_tree`] and re-derives
8//! event-stream semantics (tag resolution, anchor stripping, flow-seq
9//! splitting). The intent is to keep the projection adjacent to the parser so
10//! CST shape is the single source of truth for events.
11
12use std::collections::HashMap;
13
14use crate::syntax::{SyntaxKind, SyntaxNode, SyntaxToken};
15
16use super::cooking;
17use super::parser::parse_yaml_tree;
18
19/// Per-document tag handle map: handle (`!!`, `!yaml!`, `!e!`) → URI prefix.
20/// The secondary handle `!!` always defaults to `tag:yaml.org,2002:` per the
21/// YAML 1.2 spec. Per-document `%TAG` directives override and add to this map.
22type TagHandles = HashMap<String, String>;
23
24fn default_tag_handles() -> TagHandles {
25    let mut handles = HashMap::new();
26    handles.insert("!!".to_string(), "tag:yaml.org,2002:".to_string());
27    handles
28}
29
30/// Scan a `YAML_DOCUMENT` for `%TAG` directive lines and merge them into
31/// the default handle map.
32fn collect_tag_handles(doc: &SyntaxNode) -> TagHandles {
33    let mut handles = default_tag_handles();
34    for tok in doc
35        .descendants_with_tokens()
36        .filter_map(|el| el.into_token())
37    {
38        if tok.kind() != SyntaxKind::YAML_DIRECTIVE {
39            continue;
40        }
41        let line = tok.text().trim_start();
42        let Some(rest) = line.strip_prefix("%TAG") else {
43            continue;
44        };
45        let mut parts = rest.split_whitespace();
46        let Some(handle) = parts.next() else { continue };
47        let Some(prefix) = parts.next() else { continue };
48        handles.insert(handle.to_string(), prefix.to_string());
49    }
50    handles
51}
52
53/// Resolve a tag shorthand (e.g. `!!str`, `!yaml!str`, `!e!foo`, `!local`) to
54/// the long-form `<tag:...>` event token, consulting the per-document handle
55/// map. Handles are checked first (so a `%TAG !` directive can override the
56/// primary handle); we fall back to the built-in handling for unknown handles.
57fn resolve_long_tag(tag: &str, handles: &TagHandles) -> Option<String> {
58    // Verbatim tag `!<URI>` (YAML 1.2 §6.8.1): the URI between the angle
59    // brackets is used as-is, bypassing handle resolution. Local verbatim
60    // tags keep their leading `!` (`!<!bar>` → `<!bar>`). Checked before the
61    // handle loop so a registered `!` primary handle can't claim it.
62    if let Some(inner) = tag.strip_prefix("!<").and_then(|t| t.strip_suffix('>')) {
63        return Some(format!("<{}>", percent_decode_tag(inner)));
64    }
65    let mut best: Option<(&str, &String)> = None;
66    for (h, p) in handles {
67        if tag.starts_with(h)
68            && best.is_none_or(|(b_handle, _): (&str, _)| h.len() > b_handle.len())
69        {
70            best = Some((h.as_str(), p));
71        }
72    }
73    if let Some((handle, prefix)) = best {
74        let suffix = &tag[handle.len()..];
75        let resolved = format!("{prefix}{suffix}");
76        return Some(format!("<{}>", percent_decode_tag(&resolved)));
77    }
78    long_tag_builtin(tag)
79}
80
81/// Decode percent-encoded bytes (`%xx`) in a resolved tag URI. YAML 1.2 allows
82/// percent-encoding in tag suffixes so callers can embed otherwise-special
83/// characters (`!`, `:`, etc.); event-stream parity expects the decoded form.
84fn percent_decode_tag(tag: &str) -> String {
85    let bytes = tag.as_bytes();
86    let mut out = Vec::with_capacity(bytes.len());
87    let mut i = 0;
88    while i < bytes.len() {
89        if bytes[i] == b'%'
90            && i + 2 < bytes.len()
91            && let (Some(hi), Some(lo)) =
92                (hex_digit_value(bytes[i + 1]), hex_digit_value(bytes[i + 2]))
93        {
94            out.push(hi * 16 + lo);
95            i += 3;
96            continue;
97        }
98        out.push(bytes[i]);
99        i += 1;
100    }
101    String::from_utf8(out).unwrap_or_else(|_| tag.to_string())
102}
103
104fn hex_digit_value(byte: u8) -> Option<u8> {
105    match byte {
106        b'0'..=b'9' => Some(byte - b'0'),
107        b'a'..=b'f' => Some(byte - b'a' + 10),
108        b'A'..=b'F' => Some(byte - b'A' + 10),
109        _ => None,
110    }
111}
112
113/// Walk the YAML CST for `input` and return the projected yaml-test-suite
114/// event stream. Returns an empty vector if the input fails to parse.
115pub fn project_events(input: &str) -> Vec<String> {
116    let Some(tree) = parse_yaml_tree(input) else {
117        return Vec::new();
118    };
119    project_events_from_tree(&tree)
120}
121
122/// Walk a YAML parser CST and return the projected yaml-test-suite event
123/// stream. Decoupled from `parse_yaml_tree` so callers that already hold a
124/// tree (e.g. yaml-test-suite parity checks) can reuse the same projection.
125pub fn project_events_from_tree(tree: &SyntaxNode) -> Vec<String> {
126    let mut events = vec!["+STR".to_string()];
127    let stream = tree
128        .descendants()
129        .find(|n| n.kind() == SyntaxKind::YAML_STREAM);
130    if let Some(stream) = stream {
131        for doc in stream
132            .children()
133            .filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
134        {
135            project_document(&doc, &mut events);
136        }
137    }
138    events.push("-STR".to_string());
139    events
140}
141
142/// True when the document holds no content beyond a `DocumentEnd`
143/// marker and surrounding trivia (whitespace, newlines, comments).
144/// Used to distinguish a real (possibly empty) document from a
145/// synthetic doc the v2 builder wrapped around a bare `...`.
146fn doc_is_marker_only(doc: &SyntaxNode) -> bool {
147    for el in doc.descendants_with_tokens() {
148        if let Some(tok) = el.as_token() {
149            match tok.kind() {
150                SyntaxKind::WHITESPACE
151                | SyntaxKind::NEWLINE
152                | SyntaxKind::YAML_COMMENT
153                | SyntaxKind::YAML_DOCUMENT_END
154                | SyntaxKind::YAML_DOCUMENT_START => {}
155                _ => return false,
156            }
157        }
158    }
159    true
160}
161
162/// LX3P: a `[flow]` sequence written as a block-map key lands in the v2 CST
163/// as a YAML_FLOW_SEQUENCE that's a direct child of the YAML_DOCUMENT,
164/// preceding the YAML_BLOCK_MAP that the trailing `:` opens. Returns that
165/// flow-sequence when this shape is present.
166fn flow_seq_preceding_block_map_at_doc_level(
167    doc: &SyntaxNode,
168    block_map: &SyntaxNode,
169) -> Option<SyntaxNode> {
170    let block_map_offset = block_map.text_range().start();
171    doc.children()
172        .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
173        .find(|n| n.text_range().end() <= block_map_offset)
174}
175
176/// True when a YAML_BLOCK_MAP_ENTRY's KEY wrapper carries no key text —
177/// only structural trivia and the `:` indicator. Used to detect the
178/// implicit-empty-key shape (`: value`) and the LX3P pattern where the
179/// real key lives in a sibling node preceding the map.
180fn block_map_entry_key_is_empty(entry: &SyntaxNode) -> bool {
181    let Some(key_node) = entry
182        .children()
183        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
184    else {
185        return false;
186    };
187    // The key text lives in a `YAML_SCALAR` node child or, for the
188    // explicit `?`/tag shapes, a `YAML_KEY`/`YAML_TAG` token — all before
189    // the trailing `:`.
190    !key_node
191        .children_with_tokens()
192        .take_while(|el| el.as_token().map(|t| t.kind()) != Some(SyntaxKind::YAML_COLON))
193        .any(|el| match el {
194            rowan::NodeOrToken::Node(n) => {
195                n.kind() == SyntaxKind::YAML_SCALAR && !n.text().to_string().trim().is_empty()
196            }
197            rowan::NodeOrToken::Token(t) => {
198                matches!(t.kind(), SyntaxKind::YAML_KEY | SyntaxKind::YAML_TAG)
199                    && !t.text().trim().is_empty()
200            }
201        })
202}
203
204fn project_document(doc: &SyntaxNode, out: &mut Vec<String>) {
205    let has_doc_start = doc
206        .children_with_tokens()
207        .filter_map(|el| el.into_token())
208        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START);
209    let has_doc_end = doc
210        .children_with_tokens()
211        .filter_map(|el| el.into_token())
212        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END);
213    // A v2 builder synthesizes a `YAML_DOCUMENT` around a bare `...`
214    // (or comments preceding it) to keep the marker inside a document
215    // for losslessness. v1 / yaml-test-suite considers such input an
216    // empty stream — no `+DOC`/`-DOC` events. Skip the projection when
217    // the only structural content is a `DocumentEnd` marker (HWV9,
218    // QT73).
219    if !has_doc_start && doc_is_marker_only(doc) {
220        return;
221    }
222    out.push(if has_doc_start {
223        "+DOC ---".to_string()
224    } else {
225        "+DOC".to_string()
226    });
227    let handles = collect_tag_handles(doc);
228
229    // Top-level container detection must look at direct children, not
230    // arbitrary descendants. A `descendants()` walk surfaces the first
231    // BLOCK_SEQUENCE/BLOCK_MAP it finds in document order — which for a
232    // block-map whose values contain nested block-sequences would be the
233    // inner sequence, collapsing the entire map into a bare `+SEQ`.
234    if let Some(seq_node) = doc
235        .children()
236        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
237    {
238        out.push(seq_open_event(&seq_node, &handles));
239        project_block_sequence_items(&seq_node, &handles, out);
240        out.push("-SEQ".to_string());
241    } else if let Some(root_map) = doc
242        .children()
243        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
244    {
245        // Flow-sequence used as a block-map key (LX3P: `[flow]: block`).
246        // v2 lands the `[flow]` flow-sequence as a sibling preceding the
247        // YAML_BLOCK_MAP (the colon opens an empty-key entry inside the
248        // map), but yaml-test-suite expects `+MAP +SEQ []…-SEQ value -MAP`.
249        // Splice the flow-seq in as the first entry's key when this shape
250        // is present.
251        if let Some(flow_seq) = flow_seq_preceding_block_map_at_doc_level(doc, &root_map)
252            && let Some(first_entry) = root_map
253                .children()
254                .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
255            && block_map_entry_key_is_empty(&first_entry)
256        {
257            out.push(map_open_event_for_block_map(&root_map, &handles));
258            out.push("+SEQ []".to_string());
259            project_flow_sequence_items_cst(&flow_seq, &handles, out);
260            out.push("-SEQ".to_string());
261            if let Some(value_node) = first_entry
262                .children()
263                .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
264            {
265                project_block_map_entry_value(&value_node, &handles, out);
266            } else {
267                out.push("=VAL :".to_string());
268            }
269            for entry in root_map
270                .children()
271                .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
272                .skip(1)
273            {
274                project_block_map_entry(&entry, &handles, out);
275            }
276            out.push("-MAP".to_string());
277        } else {
278            let mut values = Vec::new();
279            project_block_map_entries(&root_map, &handles, &mut values);
280            if !values.is_empty() {
281                out.push(map_open_event_for_block_map(&root_map, &handles));
282                out.append(&mut values);
283                out.push("-MAP".to_string());
284            } else if let Some(flow_map) = doc
285                .descendants()
286                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
287            {
288                let mut flow_values = Vec::new();
289                project_flow_map_entries(&flow_map, &handles, &mut flow_values);
290                out.push("+MAP {}".to_string());
291                out.append(&mut flow_values);
292                out.push("-MAP".to_string());
293            } else if let Some(scalar) = scalar_document_value(doc, &handles) {
294                out.push(scalar);
295            } else {
296                out.push("=VAL :".to_string());
297            }
298        }
299    } else if let Some(flow_collection) = doc.children().find(|n| {
300        matches!(
301            n.kind(),
302            SyntaxKind::YAML_FLOW_MAP | SyntaxKind::YAML_FLOW_SEQUENCE
303        )
304    }) {
305        // A doc-direct flow collection may be preceded by a doc-level
306        // anchor token (`&flowseq [ ... ]`, CN3R). Carry the anchor
307        // onto the open event so `+SEQ [] &flowseq` matches the
308        // expected projection. Looking at `descendants()` (the prior
309        // implementation) is wrong here because it surfaces the
310        // first nested flow_map encountered in document order — for a
311        // `&flowseq [ ... { e: f } ... ]` shape that collapses the
312        // whole document into a bare flow-map projection.
313        let anchor = anchor_preceding_node(doc, &flow_collection);
314        project_flow_collection_node_with_anchor(
315            &flow_collection,
316            anchor.as_deref(),
317            &handles,
318            out,
319        );
320    } else if let Some(flow_map) = doc
321        .descendants()
322        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
323    {
324        out.push("+MAP {}".to_string());
325        project_flow_map_entries(&flow_map, &handles, out);
326        out.push("-MAP".to_string());
327    } else if let Some(flow_seq) = doc
328        .descendants()
329        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
330    {
331        out.push("+SEQ []".to_string());
332        project_flow_sequence_items_cst(&flow_seq, &handles, out);
333        out.push("-SEQ".to_string());
334    } else if let Some(scalar) = scalar_document_value(doc, &handles) {
335        out.push(scalar);
336    } else {
337        out.push("=VAL :".to_string());
338    }
339
340    out.push(if has_doc_end {
341        "-DOC ...".to_string()
342    } else {
343        "-DOC".to_string()
344    });
345}
346
347fn scalar_document_value(doc: &SyntaxNode, handles: &TagHandles) -> Option<String> {
348    // `--- |` / `--- >` packs a block-scalar header onto the directive-end
349    // marker line. Detect that pattern first so the folded body (with proper
350    // chomping) is emitted instead of a single-line plain scalar.
351    if let Some((indicator, body)) = extract_scalar_doc_block_body(doc) {
352        let escaped = escape_block_scalar_text(&body);
353        return Some(format!("=VAL {indicator}{escaped}"));
354    }
355    // Bare top-level block scalar (no `---` marker) — e.g. a doc that begins
356    // with `>\n …` or `|\n …`. Reuse the same folder; the only difference vs
357    // the directive-end-packed form is the absence of a `YAML_DOCUMENT_START`
358    // sentinel separating the header from the body.
359    if let Some((indicator, body)) = extract_top_level_block_body(doc) {
360        let escaped = escape_block_scalar_text(&body);
361        return Some(format!("=VAL {indicator}{escaped}"));
362    }
363    // Skip `%TAG`/`%YAML` directive lines: those are document-level metadata,
364    // not part of the scalar body.
365    // Include WHITESPACE between tokens so a top-level `&anchor body`
366    // joins as `&anchor body`, letting `decompose_scalar` find the
367    // whitespace terminator on the anchor name.
368    let text = doc
369        .descendants_with_tokens()
370        .filter_map(|el| el.into_token())
371        .filter(|tok| {
372            matches!(
373                tok.kind(),
374                SyntaxKind::YAML_SCALAR_TEXT
375                    | SyntaxKind::YAML_ANCHOR
376                    | SyntaxKind::YAML_ALIAS
377                    | SyntaxKind::WHITESPACE
378            )
379        })
380        .filter(|tok| !tok.text().trim_start().starts_with('%'))
381        .map(|tok| tok.text().to_string())
382        .collect::<Vec<_>>()
383        .join("");
384    let trimmed_text = text.trim();
385    if trimmed_text.is_empty() {
386        // Tagged-but-empty scalar document still emits a `=VAL <tag> :` event.
387        let tag_only = doc
388            .descendants_with_tokens()
389            .filter_map(|el| el.into_token())
390            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
391            .map(|tok| tok.text().to_string());
392        if let Some(tag) = tag_only
393            && let Some(long) = resolve_long_tag(&tag, handles)
394        {
395            return Some(format!("=VAL {long} :"));
396        }
397        return None;
398    }
399    let tag_text = doc
400        .descendants_with_tokens()
401        .filter_map(|el| el.into_token())
402        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
403        .map(|tok| tok.text().to_string());
404    let multi_line_text = collect_scalar_source(doc);
405    let is_multi_line_quoted = multi_line_text.contains('\n')
406        && (trimmed_text.starts_with('"') || trimmed_text.starts_with('\''));
407    let event = if let Some(tag) = tag_text
408        && let Some(long) = resolve_long_tag(&tag, handles)
409    {
410        if trimmed_text.starts_with('"') || trimmed_text.starts_with('\'') {
411            let quoted = if is_multi_line_quoted {
412                quoted_val_event_multi_line(&multi_line_text)
413            } else {
414                quoted_val_event(trimmed_text)
415            };
416            // quoted_val_event returns `=VAL "body` — splice the tag in.
417            quoted.replacen("=VAL ", &format!("=VAL {long} "), 1)
418        } else {
419            // Plain scalar: fold multi-line continuations the same way the
420            // untagged path does so `!!str\nd\ne` projects as `:d e`. The
421            // folded text may still carry a leading anchor token
422            // (`&a1\nscalar1`, 9KAX) since `fold_plain_document_lines`
423            // keeps YAML_ANCHOR tokens — peel it off so the event renders
424            // as `=VAL &anchor <tag> :body` rather than burying `&anchor`
425            // in the scalar body.
426            let folded = fold_plain_document_lines(doc);
427            let (anchor, _, body) = decompose_scalar(folded.trim_start(), handles);
428            scalar_event(anchor, Some(&long), &escape_block_scalar_text(body))
429        }
430    } else if is_multi_line_quoted {
431        quoted_val_event_multi_line(&multi_line_text)
432    } else if trimmed_text.starts_with('"') || trimmed_text.starts_with('\'') {
433        quoted_val_event(&text)
434    } else {
435        let folded = fold_plain_document_lines(doc);
436        // Plain top-level scalars may carry node properties (`&anchor`,
437        // `!tag`) before the actual scalar body; decompose so events project
438        // them in canonical `&anchor <tag> :body` order.
439        let (anchor, body_tag, body) = decompose_scalar(folded.trim_start(), handles);
440        if anchor.is_some() || body_tag.is_some() {
441            scalar_event(anchor, body_tag.as_deref(), &escape_block_scalar_text(body))
442        } else {
443            format!("=VAL :{}", escape_block_scalar_text(&folded))
444        }
445    };
446    Some(event)
447}
448
449/// Collect a node's scalar source bytes — concatenate every
450/// `YAML_SCALAR` / `YAML_ANCHOR` / `YAML_ALIAS` / `NEWLINE` descendant
451/// token in order. Real directive lines are emitted as `YAML_DIRECTIVE`
452/// tokens and so are already excluded by the kind filter; a scalar
453/// fragment that merely *looks* like a directive (e.g. a `%YAML 1.2`
454/// continuation line of a plain scalar, XLQ9) is genuine content and
455/// kept. The result is the raw multi-line text the [`super::cooking`]
456/// helpers expect when folding a multi-line quoted scalar — usable at the
457/// document level or scoped to a single `YAML_BLOCK_MAP_VALUE`.
458fn collect_scalar_source(node: &SyntaxNode) -> String {
459    node.descendants_with_tokens()
460        .filter_map(|el| el.into_token())
461        .filter(|tok| {
462            matches!(
463                tok.kind(),
464                SyntaxKind::YAML_SCALAR_TEXT
465                    | SyntaxKind::YAML_ANCHOR
466                    | SyntaxKind::YAML_ALIAS
467                    | SyntaxKind::NEWLINE
468            )
469        })
470        .map(|tok| tok.text().to_string())
471        .collect()
472}
473
474fn plain_val_event(text: &str) -> String {
475    format!("=VAL :{}", text.replace('\\', "\\\\"))
476}
477
478/// Fold the YAML-1.2 plain-scalar body of a top-level scalar `YAML_DOCUMENT`
479/// into its canonical value: walk `YAML_SCALAR` and `NEWLINE` tokens in order,
480/// then apply plain-scalar folding — non-empty-line breaks fold to a single
481/// space, runs of `n` empty lines fold to `n` line feeds. Leading/trailing
482/// empty lines are stripped. Real directives are `YAML_DIRECTIVE` tokens
483/// (excluded by the kind filter); a `%`-leading plain-scalar continuation
484/// line (XLQ9) is content and is kept.
485fn fold_plain_document_lines(doc: &SyntaxNode) -> String {
486    let raw: String = doc
487        .descendants_with_tokens()
488        .filter_map(|el| el.into_token())
489        .filter(|tok| {
490            matches!(
491                tok.kind(),
492                SyntaxKind::YAML_SCALAR_TEXT
493                    | SyntaxKind::YAML_ANCHOR
494                    | SyntaxKind::YAML_ALIAS
495                    | SyntaxKind::WHITESPACE
496                    | SyntaxKind::NEWLINE
497            )
498        })
499        .map(|tok| tok.text().to_string())
500        .collect();
501
502    let mut out = String::with_capacity(raw.len());
503    let mut empty_run: usize = 0;
504    let mut have_content = false;
505    for line in raw.split('\n') {
506        let trimmed = line.trim();
507        if trimmed.is_empty() {
508            if have_content {
509                empty_run += 1;
510            }
511            continue;
512        }
513        if !have_content {
514            out.push_str(trimmed);
515            have_content = true;
516        } else if empty_run == 0 {
517            out.push(' ');
518            out.push_str(trimmed);
519        } else {
520            for _ in 0..empty_run {
521                out.push('\n');
522            }
523            out.push_str(trimmed);
524        }
525        empty_run = 0;
526    }
527    out
528}
529
530/// Project a flow-collection scalar token, preserving quoted-scalar
531/// classification when the source uses `"..."` or `'...'`. Plain scalars are
532/// folded just like outside flow context. A leading tag shorthand (`!!str`,
533/// `!handle!suffix`, `!local`) is resolved through `handles`.
534fn flow_scalar_event(text: &str, handles: &TagHandles) -> String {
535    let trimmed = text.trim();
536    if trimmed.starts_with('"') || trimmed.starts_with('\'') {
537        if trimmed.contains('\n') {
538            return quoted_val_event_multi_line(trimmed);
539        }
540        return quoted_val_event(trimmed);
541    }
542    // Alias indicator (`*name`). YAML plain scalars cannot begin with `*`,
543    // so a leading `*` is always an alias reference. The trimmed body
544    // (`*name`) is the alias's serialized form.
545    if trimmed.starts_with('*') {
546        return format!("=ALI {trimmed}");
547    }
548    let (anchor, long_tag, body) = decompose_scalar(trimmed, handles);
549    if anchor.is_some() || long_tag.is_some() {
550        return scalar_event(anchor, long_tag.as_deref(), body);
551    }
552    plain_val_event(&cooking::cook_plain(text))
553}
554
555/// Split a leading tag shorthand (`!handle!suffix` or `!local`) off `text`,
556/// returning `(tag, remainder)`. The tag must be terminated by whitespace or
557/// end of input; otherwise `text` is returned as-is.
558fn split_leading_tag(text: &str) -> Option<(&str, &str)> {
559    let rest = text.strip_prefix('!')?;
560    // Verbatim tag `!<URI>`: the URI runs to the closing `>` and may contain
561    // characters (`,`, `:`) that otherwise terminate a shorthand. Span the
562    // whole `!<…>` so the URI isn't truncated at the first comma/colon.
563    if let Some(uri) = rest.strip_prefix('<') {
564        let close = uri.find('>')?;
565        // `!` + `<` + URI + `>`
566        return Some(text.split_at(2 + close + 1));
567    }
568    let mut i = 0usize;
569    let mut bangs = 0usize;
570    for (idx, ch) in rest.char_indices() {
571        if ch == '!' {
572            bangs += 1;
573            if bangs > 1 {
574                return None;
575            }
576            i = idx + 1;
577            continue;
578        }
579        if matches!(ch, ' ' | '\t' | '\n' | ',' | '}' | ']') {
580            i = idx;
581            break;
582        }
583        i = idx + ch.len_utf8();
584    }
585    let tag_len = 1 + i;
586    let (tag, remainder) = text.split_at(tag_len);
587    Some((tag, remainder))
588}
589
590/// Locate a flow-context key/value `:` indicator within a flow-sequence item.
591/// Per YAML 1.2 a `:` is the mapping-key indicator only when followed by
592/// whitespace or by end of the item; otherwise it's part of a plain scalar
593/// (e.g. `http://foo.com`). Quoted regions are skipped.
594fn flow_kv_split(item: &str) -> Option<(usize, usize)> {
595    let bytes = item.as_bytes();
596    let mut in_single = false;
597    let mut in_double = false;
598    let mut escaped_double = false;
599    for (idx, ch) in item.char_indices() {
600        if in_double {
601            if escaped_double {
602                escaped_double = false;
603                continue;
604            }
605            match ch {
606                '\\' => escaped_double = true,
607                '"' => in_double = false,
608                _ => {}
609            }
610            continue;
611        }
612        if in_single {
613            if ch == '\'' {
614                in_single = false;
615            }
616            continue;
617        }
618        match ch {
619            '\'' => in_single = true,
620            '"' => in_double = true,
621            ':' => {
622                let next_off = idx + ch.len_utf8();
623                let after_is_break = next_off >= bytes.len()
624                    || matches!(bytes[next_off], b' ' | b'\t' | b'\n' | b'\r');
625                // YAML 1.2 §7.4.2: a JSON-like key (here, a quoted scalar)
626                // permits an adjacent value colon with no following space
627                // (`"JSON like":adjacent`, 9MMW). Flow-collection keys are
628                // projected structurally before reaching this text path.
629                let key_is_json_like = item[..idx].trim_end().ends_with(['"', '\'']);
630                if after_is_break || key_is_json_like {
631                    return Some((idx, next_off));
632                }
633            }
634            _ => {}
635        }
636    }
637    None
638}
639
640/// Emit events for a single flow-sequence item: either `+MAP {} key val -MAP`
641/// when the item is a flow-map entry (`key: value`, possibly with empty key
642/// or value), or a single `=VAL` for a bare scalar.
643fn project_flow_seq_item(item: &str, handles: &TagHandles, out: &mut Vec<String>) {
644    if let Some((colon, after)) = flow_kv_split(item) {
645        let raw_key_full = item[..colon].trim();
646        // Strip the explicit-key `?` indicator (followed by whitespace or
647        // end-of-key) when present.
648        let raw_key = strip_explicit_key_indicator(raw_key_full);
649        let raw_value = item[after..].trim();
650        out.push("+MAP {}".to_string());
651        if raw_key.is_empty() {
652            out.push("=VAL :".to_string());
653        } else {
654            out.push(flow_scalar_event(raw_key, handles));
655        }
656        if raw_value.is_empty() {
657            out.push("=VAL :".to_string());
658        } else {
659            out.push(flow_scalar_event(raw_value, handles));
660        }
661        out.push("-MAP".to_string());
662    } else if item.trim_start().starts_with('"') || item.trim_start().starts_with('\'') {
663        let trimmed = item.trim();
664        // Multi-line quoted scalar inside a flow sequence: apply YAML
665        // 1.2 §7.3 line-folding rules so embedded newlines fold to a
666        // space (or `\n` for blank-line runs) before the event's escape
667        // pass. Without this, joining tokens directly leaves the literal
668        // newline inside the body.
669        if trimmed.contains('\n') {
670            out.push(quoted_val_event_multi_line(trimmed));
671        } else {
672            out.push(quoted_val_event(trimmed));
673        }
674    } else {
675        // Route through `flow_scalar_event` so node properties on a
676        // flow-seq item (`[&item a, b, c]`, 6BFJ) project as
677        // `=VAL &item :a` and alias items (`[*b]`, X38W) project as
678        // `=ALI *b`.
679        out.push(flow_scalar_event(&cooking::cook_plain(item), handles));
680    }
681}
682
683fn strip_explicit_key_indicator(key: &str) -> &str {
684    let trimmed = key.trim_start();
685    if let Some(rest) = trimmed.strip_prefix('?')
686        && (rest.is_empty() || rest.starts_with([' ', '\t', '\n']))
687    {
688        return rest.trim_start();
689    }
690    key
691}
692
693fn quoted_val_event(text: &str) -> String {
694    if text.starts_with('\'') {
695        let inner = cooking::cook_single_quoted_single_line(text);
696        format!("=VAL '{}", escape_for_event(&inner))
697    } else {
698        let inner = cooking::cook_double_quoted_single_line(text);
699        format!("=VAL \"{}", escape_for_event(&inner))
700    }
701}
702
703/// Multi-line quoted scalar projection: applies YAML 1.2 §7.3.2 / §7.3.3 line
704/// folding (single line break → space, blank-line run of `n` blanks → `n`
705/// `\n`s) before escape decoding. Required when a top-level quoted document
706/// spans more than one source line — the single-line `quoted_val_event`
707/// concatenates `YAML_SCALAR` tokens directly and would lose all line
708/// structure.
709fn quoted_val_event_multi_line(raw: &str) -> String {
710    let trimmed = raw.trim_start_matches([' ', '\t', '\n']);
711    if trimmed.starts_with('\'') {
712        let decoded = cooking::cook_single_quoted_multi_line(trimmed);
713        format!("=VAL '{}", escape_for_event(&decoded))
714    } else {
715        let decoded = cooking::cook_double_quoted_multi_line(trimmed);
716        format!("=VAL \"{}", escape_for_event(&decoded))
717    }
718}
719
720/// Escape decoded scalar text for the yaml-test-suite event format, where
721/// control characters and structural backslashes are rendered as backslash
722/// escapes (`\n`, `\t`, `\b`, ...).
723fn escape_for_event(text: &str) -> String {
724    let mut out = String::with_capacity(text.len());
725    for ch in text.chars() {
726        match ch {
727            '\\' => out.push_str("\\\\"),
728            '\n' => out.push_str("\\n"),
729            '\t' => out.push_str("\\t"),
730            '\r' => out.push_str("\\r"),
731            '\u{07}' => out.push_str("\\a"),
732            '\u{08}' => out.push_str("\\b"),
733            '\u{0B}' => out.push_str("\\v"),
734            '\u{0C}' => out.push_str("\\f"),
735            '\u{1B}' => out.push_str("\\e"),
736            '\0' => out.push_str("\\0"),
737            other => out.push(other),
738        }
739    }
740    out
741}
742
743fn long_tag_builtin(tag: &str) -> Option<String> {
744    if tag == "!" {
745        return Some("<!>".to_string());
746    }
747    // Bare local tag: `!local` (single leading `!`, no second `!`).
748    if let Some(rest) = tag.strip_prefix('!')
749        && !rest.contains('!')
750    {
751        return Some(format!("<!{rest}>"));
752    }
753    None
754}
755
756fn escape_block_scalar_text(text: &str) -> String {
757    let mut out = String::with_capacity(text.len());
758    for ch in text.chars() {
759        match ch {
760            '\\' => out.push_str("\\\\"),
761            '\n' => out.push_str("\\n"),
762            '\t' => out.push_str("\\t"),
763            '\r' => out.push_str("\\r"),
764            other => out.push(other),
765        }
766    }
767    out
768}
769
770/// If `value_node` encodes a literal (`|`) or folded (`>`) block scalar,
771/// return the folded scalar body. Headers with explicit chomping (`-` strip,
772/// `+` keep) or indent indicators are recognized; chomping is applied to the
773/// final body. Default chomping is "clip" (single trailing newline).
774fn extract_block_scalar_body(value_node: &SyntaxNode) -> Option<(char, String)> {
775    let tokens: Vec<_> = value_node
776        .descendants_with_tokens()
777        .filter_map(|el| el.into_token())
778        .filter(|tok| {
779            matches!(
780                tok.kind(),
781                SyntaxKind::YAML_SCALAR_TEXT
782                    | SyntaxKind::NEWLINE
783                    | SyntaxKind::WHITESPACE
784                    | SyntaxKind::YAML_COMMENT,
785            )
786        })
787        .collect();
788    fold_block_scalar_tokens(&tokens, block_scalar_parent_indent(value_node))
789}
790
791/// Compute the column of the start-of-line for the parent scope of a
792/// block-scalar value, used to anchor explicit indent indicators per
793/// YAML 1.2 §8.1.1.1: when a block-scalar header carries an indentation
794/// indicator `m`, the absolute content indent is `parent_indent + m`.
795///
796/// Walks up to the YAML_BLOCK_MAP_ENTRY (for map values) or treats a
797/// passed YAML_BLOCK_SEQUENCE_ITEM as its own parent. Other shapes
798/// (e.g. top-level YAML_DOCUMENT) fall back to the node's own column,
799/// which is 0 at the document level.
800fn block_scalar_parent_indent(value_node: &SyntaxNode) -> usize {
801    let target = match value_node.kind() {
802        SyntaxKind::YAML_BLOCK_MAP_VALUE => value_node
803            .parent()
804            .filter(|p| p.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
805            .unwrap_or_else(|| value_node.clone()),
806        _ => value_node.clone(),
807    };
808    column_of_node_start(&target)
809}
810
811fn column_of_node_start(node: &SyntaxNode) -> usize {
812    let offset: usize = node.text_range().start().into();
813    let root = node.ancestors().last().unwrap_or_else(|| node.clone());
814    let text = root.text().to_string();
815    let cap = offset.min(text.len());
816    let prefix = &text[..cap];
817    match prefix.rfind('\n') {
818        Some(nl) => offset.saturating_sub(nl + 1),
819        None => offset,
820    }
821}
822
823/// Variant of [`extract_block_scalar_body`] that walks a full `YAML_DOCUMENT`
824/// node and applies block-scalar folding to the tokens *after* a
825/// `YAML_DOCUMENT_START` marker. Used for the directive-end-with-payload
826/// pattern (`--- |\n  ab\n  cd\n`) where the block-scalar header is packed
827/// onto the marker line itself rather than being a block-map value.
828fn extract_scalar_doc_block_body(doc: &SyntaxNode) -> Option<(char, String)> {
829    let mut started = false;
830    let mut tokens = Vec::new();
831    for el in doc.descendants_with_tokens() {
832        let Some(tok) = el.into_token() else { continue };
833        if !started {
834            if tok.kind() == SyntaxKind::YAML_DOCUMENT_START {
835                started = true;
836            }
837            continue;
838        }
839        match tok.kind() {
840            SyntaxKind::YAML_DOCUMENT_END => break,
841            SyntaxKind::YAML_SCALAR_TEXT
842            | SyntaxKind::NEWLINE
843            | SyntaxKind::WHITESPACE
844            | SyntaxKind::YAML_COMMENT => tokens.push(tok),
845            _ => {}
846        }
847    }
848    fold_block_scalar_tokens(&tokens, 0)
849}
850
851/// Detect a top-level (no `YAML_DOCUMENT_START` marker) block-scalar document
852/// of the form `>\n …` or `|\n …`. Walks the document's content tokens and
853/// applies block-scalar folding when the first scalar token is a bare
854/// block-scalar header. Returns `None` otherwise so plain / quoted scalar
855/// handling can proceed.
856fn extract_top_level_block_body(doc: &SyntaxNode) -> Option<(char, String)> {
857    if doc
858        .descendants_with_tokens()
859        .filter_map(|el| el.into_token())
860        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START)
861    {
862        return None;
863    }
864    let tokens: Vec<_> = doc
865        .descendants_with_tokens()
866        .filter_map(|el| el.into_token())
867        .filter(|tok| {
868            matches!(
869                tok.kind(),
870                SyntaxKind::YAML_SCALAR_TEXT
871                    | SyntaxKind::NEWLINE
872                    | SyntaxKind::WHITESPACE
873                    | SyntaxKind::YAML_COMMENT,
874            )
875        })
876        .collect();
877    // Same shape tolerance as `fold_block_scalar_tokens`: v1 emits the
878    // header as a standalone scalar, v2 emits the whole block scalar
879    // (header + newline + body) as a single token. Detect the header by
880    // inspecting up to the first newline.
881    let first = tokens.iter().find(|tok| {
882        if tok.kind() != SyntaxKind::YAML_SCALAR_TEXT {
883            return false;
884        }
885        let header_part = tok.text().split('\n').next().unwrap_or("");
886        parse_block_scalar_indicator(header_part).is_some()
887    })?;
888    let _ = first;
889    fold_block_scalar_tokens(&tokens, 0)
890}
891
892fn fold_block_scalar_tokens(
893    tokens: &[SyntaxToken],
894    parent_indent: usize,
895) -> Option<(char, String)> {
896    // Locate the header. v1 emits the header (`|`, `|+`, `>1` …) as a
897    // standalone YAML_SCALAR token and the body as separate per-line
898    // tokens. v2 emits the entire block scalar (header + newline + body)
899    // as a single YAML_SCALAR token. Detect either shape by inspecting
900    // the chars before the first `\n` of the candidate token.
901    let header_idx = tokens.iter().position(|t| {
902        if t.kind() != SyntaxKind::YAML_SCALAR_TEXT {
903            return false;
904        }
905        let header_part = t.text().split('\n').next().unwrap_or("");
906        parse_block_scalar_indicator(header_part).is_some()
907    })?;
908    let header_text = tokens[header_idx].text();
909    let header_part = header_text.split('\n').next().unwrap_or("");
910
911    // Reconstruct the body source. Including `WHITESPACE` and
912    // `YAML_COMMENT` tokens preserves the indentation needed for
913    // content-indent calculation and lets a `# ...` line at column 0
914    // (DK3J) land inside the body, while a less-indented `# Comment`
915    // after a fully-indented body region (7T8X) gets recognized as a
916    // body terminator.
917    let mut raw = String::new();
918    let unified_token = header_text.len() > header_part.len();
919    if unified_token {
920        // v2 shape: peel the header and its trailing newline out of the
921        // single token, keep the rest as the body prefix. Then append
922        // any later tokens verbatim.
923        raw.push_str(&header_text[header_part.len() + 1..]);
924        for tok in &tokens[header_idx + 1..] {
925            raw.push_str(tok.text());
926        }
927    } else {
928        // v1 shape: skip the standalone header's trailing NEWLINE and
929        // stitch every later token verbatim.
930        let mut skipped_header_newline = false;
931        for tok in &tokens[header_idx + 1..] {
932            if !skipped_header_newline && tok.kind() == SyntaxKind::NEWLINE {
933                skipped_header_newline = true;
934                continue;
935            }
936            raw.push_str(tok.text());
937        }
938    }
939
940    fold_block_scalar_raw(header_part, &raw, parent_indent)
941}
942
943/// Fold a block scalar from its header line (`|`, `>2-`, …) and the raw body
944/// source that follows the header's trailing newline. `parent_indent` anchors
945/// explicit indent indicators per YAML 1.2 §8.1.1.1.
946fn fold_block_scalar_raw(
947    header_part: &str,
948    raw: &str,
949    parent_indent: usize,
950) -> Option<(char, String)> {
951    let (indicator, chomp, explicit_indent) = parse_block_scalar_indicator(header_part)?;
952
953    let raw_trailing_newlines = raw.chars().rev().take_while(|c| *c == '\n').count();
954
955    let lines: Vec<&str> = raw.split('\n').collect();
956
957    // Per YAML 1.2 §8.1.1.1, the content indentation level is set by the
958    // first non-empty line of the contents — unless an explicit indent
959    // indicator is given in the header, in which case the absolute
960    // content indent is `parent_indent + m`. `parent_indent` is the
961    // column of the parent block (block-map-entry or block-sequence-item)
962    // that contains the block-scalar; nested map/seq values pick up
963    // the right anchor (e.g. `- aaa: |2` → parent col 2 + 2 → 4).
964    //
965    // §6.1: indentation only counts as spaces. A tab (or other non-space
966    // char) past the leading spaces is content, so a line like ` \t`
967    // counts as non-empty with leading-space count 1 (Y79Y/001).
968    // If every line is space-only, fall back to the max leading-space
969    // count among all lines per §8.1.1.1 paragraph 2 (JEF9/01-02).
970    let leading_spaces = |l: &str| l.chars().take_while(|c| *c == ' ').count();
971    let content_indent = match explicit_indent {
972        Some(m) => parent_indent + m,
973        None => lines
974            .iter()
975            .find(|l| l.chars().any(|c| c != ' '))
976            .map(|l| leading_spaces(l))
977            .unwrap_or_else(|| lines.iter().map(|l| leading_spaces(l)).max().unwrap_or(0)),
978    };
979
980    // Truncate at the first non-empty line whose indentation drops below the
981    // content indent — that's where the block scalar's body ends per spec.
982    // Trailing blanks coming from the source are kept; only the synthetic
983    // final empty produced by `split('\n')` over a trailing newline is
984    // dropped (and only when we walked off the end of the input — when we
985    // broke out early on a dedented line, the trailing blank is real).
986    let mut body_lines: Vec<&str> = Vec::new();
987    let mut seen_content = false;
988    let mut broke_out = false;
989    for line in lines.iter() {
990        let is_blank = line.trim().is_empty();
991        let indent = line.chars().take_while(|c| *c == ' ').count();
992        if !is_blank && seen_content && indent < content_indent {
993            broke_out = true;
994            break;
995        }
996        body_lines.push(line);
997        if !is_blank {
998            seen_content = true;
999        }
1000    }
1001    if !broke_out && body_lines.last().is_some_and(|s| s.is_empty()) {
1002        body_lines.pop();
1003    }
1004
1005    let stripped: Vec<BlockBodyLine> = body_lines
1006        .iter()
1007        .map(|l| {
1008            // Always strip up to `content_indent` columns; for `|` style this
1009            // preserves trailing spaces past the content indent (T26H).
1010            let text = if l.len() >= content_indent {
1011                l[content_indent..].to_string()
1012            } else {
1013                String::new()
1014            };
1015            // "Blank" for folding is decided on the stripped text, not the
1016            // raw line: a line of pure whitespace less-indented than content
1017            // (e.g. ` ` with content_indent=2) strips to empty and is blank,
1018            // while a stripped tab (` \t` with content_indent=1 → `\t`) is
1019            // content, not blank. More-indented lines (per §8.1.3) preserve
1020            // literal line breaks; the spec defines them as content lines
1021            // beginning with extra whitespace, so we test the stripped text's
1022            // first character rather than counting only leading spaces (which
1023            // would miss tab-prefixed content like R4YG/MJS9).
1024            let is_blank = text.is_empty();
1025            let is_mi = !is_blank && text.starts_with([' ', '\t']);
1026            BlockBodyLine {
1027                text,
1028                is_blank,
1029                is_mi,
1030            }
1031        })
1032        .collect();
1033
1034    let folded = match indicator {
1035        '|' => stripped
1036            .iter()
1037            .map(|l| l.text.as_str())
1038            .collect::<Vec<_>>()
1039            .join("\n"),
1040        '>' => fold_greater_lines(&stripped),
1041        _ => unreachable!(),
1042    };
1043
1044    let trimmed = folded.trim_end_matches('\n');
1045    let body = match chomp {
1046        BlockScalarChomp::Strip => trimmed.to_string(),
1047        BlockScalarChomp::Clip => {
1048            if trimmed.is_empty() {
1049                String::new()
1050            } else {
1051                format!("{trimmed}\n")
1052            }
1053        }
1054        BlockScalarChomp::Keep => {
1055            // Keep chomping preserves the line break after the last
1056            // content line plus one line break per trailing empty line.
1057            // "Empty" is checked on the stripped text (so a raw `  `
1058            // line stripped to ` ` is content, not empty).
1059            //
1060            // When there are no content lines (`seen_content == false`),
1061            // each whitespace-only body line still contributes one `\n`
1062            // (JEF9/02 produces `\n` even with no trailing source newline,
1063            // because the line break after the header is implicit). Fall
1064            // back to `raw_trailing_newlines` only when no body line was
1065            // captured at all (`|+\n` with no body source).
1066            let body_trailing_empty = stripped
1067                .iter()
1068                .rev()
1069                .take_while(|l| l.text.is_empty())
1070                .count();
1071            let count = if seen_content {
1072                body_trailing_empty + 1
1073            } else if !stripped.is_empty() {
1074                body_trailing_empty
1075            } else {
1076                raw_trailing_newlines
1077            };
1078            format!("{trimmed}{}", "\n".repeat(count))
1079        }
1080    };
1081    Some((indicator, body))
1082}
1083
1084struct BlockBodyLine {
1085    text: String,
1086    is_blank: bool,
1087    is_mi: bool,
1088}
1089
1090/// Apply the YAML 1.2 §8.1.3 folded-scalar rules to a sequence of
1091/// content-indent-stripped body lines:
1092/// - Each leading blank line contributes a single `\n` to the output.
1093/// - Between two adjacent non-MI content lines, a single line break folds to
1094///   ` `; a run of `n` blank lines folds to `n` `\n` chars.
1095/// - When either side of the boundary is more-indented, *all* line breaks
1096///   between the two content lines are preserved literally.
1097fn fold_greater_lines(lines: &[BlockBodyLine]) -> String {
1098    let mut out = String::new();
1099    let mut idx = 0usize;
1100
1101    while idx < lines.len() && lines[idx].is_blank {
1102        out.push('\n');
1103        idx += 1;
1104    }
1105    if idx >= lines.len() {
1106        return out;
1107    }
1108
1109    out.push_str(&lines[idx].text);
1110    let mut prev_is_mi = lines[idx].is_mi;
1111    idx += 1;
1112
1113    while idx < lines.len() {
1114        let mut empty_count = 0usize;
1115        while idx < lines.len() && lines[idx].is_blank {
1116            empty_count += 1;
1117            idx += 1;
1118        }
1119        if idx >= lines.len() {
1120            break;
1121        }
1122        let line = &lines[idx];
1123        let mi_involved = prev_is_mi || line.is_mi;
1124        if mi_involved {
1125            for _ in 0..(empty_count + 1) {
1126                out.push('\n');
1127            }
1128        } else if empty_count == 0 {
1129            out.push(' ');
1130        } else {
1131            for _ in 0..empty_count {
1132                out.push('\n');
1133            }
1134        }
1135        out.push_str(&line.text);
1136        prev_is_mi = line.is_mi;
1137        idx += 1;
1138    }
1139    out
1140}
1141
1142#[derive(Clone, Copy)]
1143enum BlockScalarChomp {
1144    Clip,
1145    Strip,
1146    Keep,
1147}
1148
1149fn parse_block_scalar_indicator(text: &str) -> Option<(char, BlockScalarChomp, Option<usize>)> {
1150    let mut chars = text.chars().peekable();
1151    let indicator = match chars.next()? {
1152        '|' => '|',
1153        '>' => '>',
1154        _ => return None,
1155    };
1156    let mut chomp = BlockScalarChomp::Clip;
1157    let mut seen_chomp = false;
1158    let mut indent: Option<usize> = None;
1159    while let Some(&ch) = chars.peek() {
1160        match ch {
1161            '+' if !seen_chomp => {
1162                chomp = BlockScalarChomp::Keep;
1163                seen_chomp = true;
1164                chars.next();
1165            }
1166            '-' if !seen_chomp => {
1167                chomp = BlockScalarChomp::Strip;
1168                seen_chomp = true;
1169                chars.next();
1170            }
1171            '1'..='9' if indent.is_none() => {
1172                indent = Some(ch.to_digit(10).unwrap() as usize);
1173                chars.next();
1174            }
1175            ' ' | '\t' => {
1176                // Trailing whitespace + optional comment is allowed after
1177                // the indicators per YAML 1.2 §8.1.1 (the header line
1178                // can carry a comment, e.g. `| # description`).
1179                for rest in chars.by_ref() {
1180                    if rest == '#' {
1181                        // Rest of the header line is a comment — ignore.
1182                        return Some((indicator, chomp, indent));
1183                    }
1184                    if rest != ' ' && rest != '\t' {
1185                        return None;
1186                    }
1187                }
1188                return Some((indicator, chomp, indent));
1189            }
1190            _ => return None,
1191        }
1192    }
1193    Some((indicator, chomp, indent))
1194}
1195
1196fn project_flow_map_entries(flow_map: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
1197    // Walk the flow_map's children left-to-right, tracking any orphan
1198    // scalar text (`pending`) that sits between entries. A scalar that
1199    // isn't enclosed in a `YAML_FLOW_MAP_ENTRY` reaches us in two
1200    // shapes:
1201    //
1202    //   1. A multi-line plain scalar that the v2 scanner couldn't
1203    //      register as a simple-key candidate before the `:` arrived
1204    //      (NJ66, ZF4X, UDR7's `sky`, 8KB6, ...). In that case the
1205    //      following entry has an empty `KEY` (just the `:`), and the
1206    //      orphan IS the key — we merge them.
1207    //
1208    //   2. A standalone scalar with no `:` at all (`{a, b: c}` shape;
1209    //      8KB6's `single line, ...`). YAML 1.2 says this is a key with
1210    //      an implicit empty value, projecting as `=VAL :a` then
1211    //      `=VAL :`.
1212    //
1213    // Both shapes resolve to flushing `pending` either as the key of
1214    // the next empty-key entry or as a value-less standalone entry
1215    // (when we hit a `,` or `}` before a matching empty-key entry).
1216    let mut pending = String::new();
1217    let mut pending_has_content = false;
1218    // A flow-sequence/flow-map node sitting *between* entries is an
1219    // orphan collection key: `{[d, e]: f}` lands `[d, e]` as a sibling
1220    // node, then a separate empty-key entry carries the `:` and value
1221    // (SBG9). Hold it until the following entry so we project it as
1222    // that entry's key instead of dropping it on the `_ => {}` arm.
1223    let mut pending_key_collection: Option<SyntaxNode> = None;
1224    for child in flow_map.children_with_tokens() {
1225        match child {
1226            rowan::NodeOrToken::Token(tok) => match tok.kind() {
1227                SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {
1228                    if pending_has_content {
1229                        pending.push_str(tok.text());
1230                    }
1231                }
1232                SyntaxKind::YAML_FLOW_INDICATOR => match tok.text() {
1233                    "{" | "}" => {}
1234                    "," => {
1235                        if pending_has_content {
1236                            flush_pending_orphan(&pending, handles, out);
1237                            pending.clear();
1238                            pending_has_content = false;
1239                        }
1240                    }
1241                    _ => {}
1242                },
1243                SyntaxKind::YAML_KEY => {
1244                    pending.push_str(tok.text());
1245                    pending_has_content = true;
1246                }
1247                _ => {}
1248            },
1249            // An orphan scalar (not wrapped in a `YAML_FLOW_MAP_ENTRY`)
1250            // accumulates into `pending` as the next entry's key.
1251            rowan::NodeOrToken::Node(node) if node.kind() == SyntaxKind::YAML_SCALAR => {
1252                pending.push_str(&node.text().to_string());
1253                pending_has_content = true;
1254            }
1255            rowan::NodeOrToken::Node(node) if node.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY => {
1256                if let Some(key_collection) = pending_key_collection.take() {
1257                    // The orphan collection is this entry's key; the
1258                    // entry itself contributes only the `:` and value.
1259                    project_flow_collection_node(&key_collection, handles, out);
1260                    if let Some(value_node) = node
1261                        .children()
1262                        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
1263                    {
1264                        project_flow_map_value(&value_node, handles, out);
1265                    } else {
1266                        out.push("=VAL :".to_string());
1267                    }
1268                } else {
1269                    project_flow_map_entry(
1270                        &node,
1271                        if pending_has_content {
1272                            Some(pending.as_str())
1273                        } else {
1274                            None
1275                        },
1276                        handles,
1277                        out,
1278                    );
1279                }
1280                pending.clear();
1281                pending_has_content = false;
1282            }
1283            rowan::NodeOrToken::Node(node)
1284                if matches!(
1285                    node.kind(),
1286                    SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
1287                ) =>
1288            {
1289                pending_key_collection = Some(node);
1290            }
1291            _ => {}
1292        }
1293    }
1294    // A trailing orphan collection with no following entry is a key
1295    // with an implicit empty value: `{[a, b]}` ≡ `{[a, b]: ~}`.
1296    if let Some(key_collection) = pending_key_collection.take() {
1297        project_flow_collection_node(&key_collection, handles, out);
1298        out.push("=VAL :".to_string());
1299    }
1300    if pending_has_content {
1301        flush_pending_orphan(&pending, handles, out);
1302    }
1303}
1304
1305/// Flush an orphan scalar that wasn't followed by a matching
1306/// empty-key entry. YAML 1.2 treats this as an implicit-value entry
1307/// (`{a, b: c}` ≡ `{a: ~, b: c}`), so the projection emits the key
1308/// then an empty value.
1309fn flush_pending_orphan(pending: &str, handles: &TagHandles, out: &mut Vec<String>) {
1310    let trimmed = pending.trim();
1311    if trimmed.is_empty() {
1312        return;
1313    }
1314    if trimmed.starts_with('"') || trimmed.starts_with('\'') {
1315        if trimmed.contains('\n') {
1316            out.push(quoted_val_event_multi_line(trimmed));
1317        } else {
1318            out.push(quoted_val_event(trimmed));
1319        }
1320    } else {
1321        let folded = cooking::cook_plain(trimmed);
1322        let stripped = strip_explicit_key_indicator(&folded);
1323        if stripped.is_empty() {
1324            out.push("=VAL :".to_string());
1325        } else {
1326            // Resolve a leading anchor/tag/handle on the orphan key the
1327            // same way `flow_scalar_event` does for in-entry scalars.
1328            out.push(flow_scalar_event(stripped, handles));
1329        }
1330    }
1331    out.push("=VAL :".to_string());
1332}
1333
1334fn project_flow_map_entry(
1335    entry: &SyntaxNode,
1336    external_key: Option<&str>,
1337    handles: &TagHandles,
1338    out: &mut Vec<String>,
1339) {
1340    let key_node = entry
1341        .children()
1342        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
1343        .expect("flow map key");
1344    let value_node = entry
1345        .children()
1346        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
1347        .expect("flow map value");
1348
1349    let has_explicit_colon = key_node
1350        .children_with_tokens()
1351        .filter_map(|el| el.into_token())
1352        .any(|tok| tok.kind() == SyntaxKind::YAML_COLON);
1353    let key_has_content = key_node
1354        .descendants_with_tokens()
1355        .filter_map(|el| el.into_token())
1356        .any(|tok| {
1357            matches!(
1358                tok.kind(),
1359                SyntaxKind::YAML_SCALAR_TEXT | SyntaxKind::YAML_KEY
1360            )
1361        });
1362
1363    // A flow collection (`[...]` / `{...}`) nested directly inside the
1364    // KEY wrapper is a complex key (SBG9 `{[d, e]: f}`) and must project
1365    // structurally (`+SEQ [] ... -SEQ`) rather than as slurped scalar
1366    // text. With the scanner registering flow-collection-start as a
1367    // simple-key candidate, the resulting CST places the collection
1368    // node directly under `YAML_FLOW_MAP_KEY` instead of leaving it as
1369    // an orphan sibling.
1370    let key_collection = key_node.children().find(|n| {
1371        matches!(
1372            n.kind(),
1373            SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
1374        )
1375    });
1376    if let Some(collection) = key_collection {
1377        if let Some(ext) = external_key {
1378            flush_pending_orphan(ext, handles, out);
1379        }
1380        // Pick up an anchor sitting in the KEY wrapper before the
1381        // collection (`{ &a [a, &b b]: *b }`, X38W) so the structural
1382        // projection carries `&a` on the open event.
1383        let anchor = anchor_preceding_node(&key_node, &collection);
1384        project_flow_collection_node_with_anchor(&collection, anchor.as_deref(), handles, out);
1385        project_flow_map_value(&value_node, handles, out);
1386        return;
1387    }
1388
1389    // Include WHITESPACE / NEWLINE so v2's separately-emitted `?`
1390    // (`YAML_KEY`) and key scalar (`YAML_SCALAR`) keep the original
1391    // trivia between them, letting `strip_explicit_key_indicator`
1392    // recognize the `?<sp>` pattern. v1 emitted both as a single
1393    // `YAML_KEY` token so the join was already a no-op there.
1394    // Include `YAML_ANCHOR`/`YAML_ALIAS` so node properties on a flow
1395    // map key (`{ &c c: d }`, CN3R) and an alias-as-key (`{ *a: v }`,
1396    // X38W) survive into the key text — `flow_scalar_event` then
1397    // peels the leading `&anchor` or projects the `*alias`.
1398    let mut raw_key = key_node
1399        .descendants_with_tokens()
1400        .filter_map(|el| el.into_token())
1401        .filter(|tok| {
1402            matches!(
1403                tok.kind(),
1404                SyntaxKind::YAML_SCALAR_TEXT
1405                    | SyntaxKind::YAML_KEY
1406                    | SyntaxKind::YAML_ANCHOR
1407                    | SyntaxKind::YAML_ALIAS
1408                    | SyntaxKind::WHITESPACE
1409                    | SyntaxKind::NEWLINE
1410            )
1411        })
1412        .map(|tok| tok.text().to_string())
1413        .collect::<Vec<_>>()
1414        .join("");
1415
1416    // External key prepends only when the entry's own key is empty
1417    // (the v2-scanner orphan-merge case): the orphan provides the key
1418    // bytes, the entry just contributes the `:` and the value.
1419    if let Some(ext) = external_key
1420        && !key_has_content
1421    {
1422        raw_key = format!("{ext}{raw_key}");
1423    } else if let Some(ext) = external_key {
1424        // Pending was non-empty but this entry already has a real
1425        // key — flush pending as a standalone implicit-value entry
1426        // first so neither side gets dropped.
1427        flush_pending_orphan(ext, handles, out);
1428    }
1429
1430    if has_explicit_colon {
1431        // Strip the explicit-key `?` indicator (`{ ? foo : v }`) from
1432        // the projected key text. A bare `? :` entry (key reduces to
1433        // empty after stripping) projects to an empty `=VAL :`.
1434        let key_for_classify = raw_key.trim();
1435        let stripped_key = strip_explicit_key_indicator(key_for_classify);
1436        if stripped_key.is_empty() {
1437            // Tag-only key (`!!str : bar` in WZ62) — `raw_key` skips
1438            // YAML_TAG, so an entry whose key is only a tag arrives
1439            // here empty. Pick the YAML_TAG sibling off the KEY node.
1440            let key_tag = key_node
1441                .children_with_tokens()
1442                .filter_map(|el| el.into_token())
1443                .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1444                .map(|tok| tok.text().to_string());
1445            if let Some(t) = key_tag
1446                && let Some(long) = resolve_long_tag(&t, handles)
1447            {
1448                out.push(format!("=VAL {long} :"));
1449            } else {
1450                out.push("=VAL :".to_string());
1451            }
1452        } else if stripped_key.starts_with('"') || stripped_key.starts_with('\'') {
1453            if stripped_key.contains('\n') {
1454                out.push(quoted_val_event_multi_line(stripped_key));
1455            } else {
1456                out.push(quoted_val_event(stripped_key));
1457            }
1458        } else {
1459            // Multi-line plain key text needs folding before
1460            // resolution; flow_scalar_event does it for plain text but
1461            // bypasses folding when the input contains explicit tag
1462            // bytes — handle the plain branch here so multi-line
1463            // orphans collapse to a single line.
1464            let folded = cooking::cook_plain(stripped_key);
1465            out.push(flow_scalar_event(&folded, handles));
1466        }
1467        project_flow_map_value(&value_node, handles, out);
1468    } else {
1469        let raw_value = value_node
1470            .descendants_with_tokens()
1471            .filter_map(|el| el.into_token())
1472            .filter(|tok| {
1473                matches!(
1474                    tok.kind(),
1475                    SyntaxKind::YAML_SCALAR_TEXT | SyntaxKind::YAML_ANCHOR | SyntaxKind::YAML_ALIAS
1476                )
1477            })
1478            .map(|tok| tok.text().to_string())
1479            .collect::<Vec<_>>()
1480            .join("");
1481        let combined = format!("{raw_key}{raw_value}");
1482        let folded = cooking::cook_plain(&combined);
1483        let stripped = strip_explicit_key_indicator(&folded);
1484        if stripped.is_empty() {
1485            out.push("=VAL :".to_string());
1486        } else {
1487            out.push(plain_val_event(stripped));
1488        }
1489        out.push("=VAL :".to_string());
1490    }
1491}
1492
1493/// Project a `YAML_FLOW_MAP_VALUE` node, recursing into nested flow
1494/// collections (`+SEQ [] ... -SEQ`, `+MAP {} ... -MAP`) when present so that
1495/// multi-line nested flow values like `{ a: [ b, c, { d: [e, f] } ] }`
1496/// produce structured event streams instead of one slurped scalar.
1497fn project_flow_map_value(value_node: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
1498    // A YAML_TAG sibling decorates the nested flow collection or scalar
1499    // that follows (EHF6 `k: !!seq [ a, !!str b]`).
1500    let decoration_tag = value_node
1501        .children_with_tokens()
1502        .filter_map(|el| el.into_token())
1503        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1504        .and_then(|tok| resolve_long_tag(tok.text(), handles));
1505    if let Some(flow_seq) = value_node
1506        .children()
1507        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1508    {
1509        out.push(match decoration_tag {
1510            Some(t) => format!("+SEQ [] {t}"),
1511            None => "+SEQ []".to_string(),
1512        });
1513        project_flow_sequence_items_cst(&flow_seq, handles, out);
1514        out.push("-SEQ".to_string());
1515        return;
1516    }
1517    if let Some(nested_map) = value_node
1518        .children()
1519        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1520    {
1521        out.push(match decoration_tag {
1522            Some(t) => format!("+MAP {{}} {t}"),
1523            None => "+MAP {}".to_string(),
1524        });
1525        project_flow_map_entries(&nested_map, handles, out);
1526        out.push("-MAP".to_string());
1527        return;
1528    }
1529
1530    // Include `YAML_COLON` tokens alongside `YAML_SCALAR` so a
1531    // plain-scalar value that begins with `:` (e.g. 5T43's
1532    // `{ "key"::value }` and 58MP's `{x: :x}` — leading `:` after
1533    // the entry's key indicator) carries its colon into the event
1534    // body. The scanner emits the leading `:` as a stray Value token
1535    // that the v2 builder lands inside the VALUE wrapper; without
1536    // collecting `YAML_COLON` here the projection drops it and the
1537    // event becomes `=VAL :value` instead of `=VAL ::value`.
1538    let raw_value = value_node
1539        .descendants_with_tokens()
1540        .filter_map(|el| el.into_token())
1541        .filter(|tok| {
1542            matches!(
1543                tok.kind(),
1544                SyntaxKind::YAML_SCALAR_TEXT
1545                    | SyntaxKind::YAML_ANCHOR
1546                    | SyntaxKind::YAML_ALIAS
1547                    | SyntaxKind::YAML_COLON
1548            )
1549        })
1550        .map(|tok| tok.text().to_string())
1551        .collect::<Vec<_>>()
1552        .join("");
1553    if raw_value.trim().is_empty() {
1554        // Tag-only value (`!!str,` in WZ62) — no scalar content but a
1555        // YAML_TAG sibling annotates the empty value.
1556        let tag = value_node
1557            .children_with_tokens()
1558            .filter_map(|el| el.into_token())
1559            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1560            .map(|tok| tok.text().to_string());
1561        if let Some(t) = tag
1562            && let Some(long) = resolve_long_tag(&t, handles)
1563        {
1564            out.push(format!("=VAL {long} :"));
1565            return;
1566        }
1567    }
1568    out.push(flow_scalar_event(&raw_value, handles));
1569}
1570
1571/// Emit the events for a flow collection node (`+SEQ [] ... -SEQ` or
1572/// `+MAP {} ... -MAP`). Shared by flow-map orphan-key projection and
1573/// flow-sequence single-pair-map projection so a collection sitting in
1574/// key position is projected structurally, not slurped as scalar text.
1575fn project_flow_collection_node(node: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
1576    project_flow_collection_node_with_anchor(node, None, handles, out);
1577}
1578
1579/// Variant of [`project_flow_collection_node`] that propagates a
1580/// caller-extracted anchor (e.g. `&a [a, &b b]`) into the collection's
1581/// open event (`+SEQ [] &a`, `+MAP {} &a`). The anchor name is passed
1582/// without its leading `&`. A `tag` (already resolved to the long form
1583/// `<tag:...>`) is appended after the anchor when the parent decorates
1584/// the flow collection (`--- !!map { ... }`, EHF6).
1585fn project_flow_collection_node_with_anchor(
1586    node: &SyntaxNode,
1587    anchor: Option<&str>,
1588    handles: &TagHandles,
1589    out: &mut Vec<String>,
1590) {
1591    let parent_tag = node
1592        .parent()
1593        .and_then(|p| {
1594            p.children_with_tokens()
1595                .filter_map(|el| el.into_token())
1596                .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1597        })
1598        .and_then(|tok| resolve_long_tag(tok.text(), handles));
1599    let decoration_suffix = match (anchor, parent_tag) {
1600        (Some(a), Some(t)) => format!(" &{a} {t}"),
1601        (Some(a), None) => format!(" &{a}"),
1602        (None, Some(t)) => format!(" {t}"),
1603        (None, None) => String::new(),
1604    };
1605    match node.kind() {
1606        SyntaxKind::YAML_FLOW_SEQUENCE => {
1607            out.push(format!("+SEQ []{decoration_suffix}"));
1608            project_flow_sequence_items_cst(node, handles, out);
1609            out.push("-SEQ".to_string());
1610        }
1611        SyntaxKind::YAML_FLOW_MAP => {
1612            out.push(format!("+MAP {{}}{decoration_suffix}"));
1613            project_flow_map_entries(node, handles, out);
1614            out.push("-MAP".to_string());
1615        }
1616        _ => {}
1617    }
1618}
1619
1620/// Walk `container`'s children-with-tokens from the start; return the
1621/// anchor name (sans `&`) of any `YAML_ANCHOR` token that sits before
1622/// `target` (and is not separated from it by a non-trivia token). Used
1623/// to splice a key/value anchor onto a structural projection of a
1624/// flow collection (`&a [...]`, `&a { ... }`).
1625fn anchor_preceding_node(container: &SyntaxNode, target: &SyntaxNode) -> Option<String> {
1626    let mut anchor: Option<String> = None;
1627    for el in container.children_with_tokens() {
1628        match el {
1629            rowan::NodeOrToken::Token(tok) => match tok.kind() {
1630                SyntaxKind::YAML_ANCHOR => {
1631                    anchor = tok.text().strip_prefix('&').map(|s| s.to_string());
1632                }
1633                SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {}
1634                _ => anchor = None,
1635            },
1636            rowan::NodeOrToken::Node(node) => {
1637                if node == *target {
1638                    return anchor;
1639                }
1640                anchor = None;
1641            }
1642        }
1643    }
1644    None
1645}
1646
1647/// Project the value side of a flow-sequence single-pair map item:
1648/// everything after the item's first direct-child colon. A trailing
1649/// flow collection projects structurally; otherwise the scalar text
1650/// (possibly empty → `=VAL :`) is emitted inline.
1651fn project_flow_seq_item_pair_value(
1652    item: &SyntaxNode,
1653    handles: &TagHandles,
1654    out: &mut Vec<String>,
1655) {
1656    let mut seen_colon = false;
1657    let mut value_text = String::new();
1658    for el in item.children_with_tokens() {
1659        match el {
1660            rowan::NodeOrToken::Token(tok) => {
1661                if !seen_colon {
1662                    if tok.kind() == SyntaxKind::YAML_COLON {
1663                        seen_colon = true;
1664                    }
1665                    continue;
1666                }
1667                if matches!(
1668                    tok.kind(),
1669                    SyntaxKind::YAML_KEY | SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE
1670                ) {
1671                    value_text.push_str(tok.text());
1672                }
1673            }
1674            rowan::NodeOrToken::Node(node)
1675                if seen_colon
1676                    && matches!(
1677                        node.kind(),
1678                        SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
1679                    ) =>
1680            {
1681                project_flow_collection_node(&node, handles, out);
1682                return;
1683            }
1684            // The value scalar after the colon is a `YAML_SCALAR` node.
1685            rowan::NodeOrToken::Node(node)
1686                if seen_colon && node.kind() == SyntaxKind::YAML_SCALAR =>
1687            {
1688                value_text.push_str(&node.text().to_string());
1689            }
1690            _ => {}
1691        }
1692    }
1693    project_inline_scalar(&value_text, handles, out);
1694}
1695
1696/// CST-walking variant of flow-sequence projection. Each
1697/// `YAML_FLOW_SEQUENCE_ITEM` may contain a nested `YAML_FLOW_SEQUENCE` /
1698/// `YAML_FLOW_MAP`; if neither is present we fall back to the text-based
1699/// `project_flow_seq_item` for plain/quoted scalar items.
1700fn project_flow_sequence_items_cst(
1701    flow_seq: &SyntaxNode,
1702    handles: &TagHandles,
1703    out: &mut Vec<String>,
1704) {
1705    for item in flow_seq
1706        .children()
1707        .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
1708    {
1709        // A flow-sequence item shaped `<collection>: <value>` is an
1710        // implicit single-pair map keyed by the collection
1711        // (`[ [[b,c]]: d ]`, `[ {JSON: like}: adjacent ]`). Detect a
1712        // leading collection node followed by a direct-child colon and
1713        // wrap it in `+MAP {} ... -MAP`; scalar-keyed pairs keep the
1714        // proven text path (`flow_kv_split`) below.
1715        if let Some(key_collection) = item.children().next().filter(|n| {
1716            matches!(
1717                n.kind(),
1718                SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
1719            )
1720        }) && item
1721            .children_with_tokens()
1722            .filter_map(|el| el.into_token())
1723            .any(|tok| tok.kind() == SyntaxKind::YAML_COLON)
1724        {
1725            out.push("+MAP {}".to_string());
1726            project_flow_collection_node(&key_collection, handles, out);
1727            project_flow_seq_item_pair_value(&item, handles, out);
1728            out.push("-MAP".to_string());
1729            continue;
1730        }
1731        if let Some(nested_seq) = item
1732            .children()
1733            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1734        {
1735            // Propagate an item-level anchor (`[ &g [...] ]`, CN3R-shape)
1736            // onto the nested collection's open event.
1737            let anchor = anchor_preceding_node(&item, &nested_seq);
1738            project_flow_collection_node_with_anchor(&nested_seq, anchor.as_deref(), handles, out);
1739            continue;
1740        }
1741        if let Some(nested_map) = item
1742            .children()
1743            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1744        {
1745            let anchor = anchor_preceding_node(&item, &nested_map);
1746            project_flow_collection_node_with_anchor(&nested_map, anchor.as_deref(), handles, out);
1747            continue;
1748        }
1749        // Build the item text from scalar/key/colon tokens plus
1750        // structural whitespace so an embedded `:` (e.g. an implicit
1751        // flow-map entry like `'k' : v` written inside `[...]`, see
1752        // 87E4 / L9U5 / LQZ7) survives into `flow_kv_split`. Skipping
1753        // colons collapsed the entry into a single `=VAL :scalar` and
1754        // hid the `+MAP {} ... -MAP` wrap; preserving them lets
1755        // `project_flow_seq_item` recognize the kv pattern.
1756        // `YAML_COMMENT` tokens stay excluded so leading/trailing
1757        // comments inside multi-line items don't leak into the value.
1758        // Include `YAML_ANCHOR`/`YAML_ALIAS` so node properties on a
1759        // plain item (`[&item a, b]`, 6BFJ) and bare aliases (`[*b]`,
1760        // X38W) survive into the item text — `flow_scalar_event`
1761        // (called from `project_flow_seq_item`) then peels them.
1762        let item_text: String = item
1763            .descendants_with_tokens()
1764            .filter_map(|el| el.into_token())
1765            .filter(|tok| {
1766                matches!(
1767                    tok.kind(),
1768                    SyntaxKind::YAML_SCALAR_TEXT
1769                        | SyntaxKind::YAML_KEY
1770                        | SyntaxKind::YAML_COLON
1771                        | SyntaxKind::YAML_ANCHOR
1772                        | SyntaxKind::YAML_ALIAS
1773                        | SyntaxKind::YAML_TAG
1774                        | SyntaxKind::WHITESPACE
1775                        | SyntaxKind::NEWLINE
1776                )
1777            })
1778            .map(|tok| tok.text().to_string())
1779            .collect();
1780        project_flow_seq_item(&item_text, handles, out);
1781    }
1782}
1783
1784/// Project a single scalar (without surrounding `+MAP`/`-MAP`) for an inline
1785/// map key or value position. Anchors/tags are decomposed in canonical order;
1786/// alias references (`*name`) emit `=ALI`. An empty body emits `=VAL :`.
1787fn project_inline_scalar(text: &str, handles: &TagHandles, out: &mut Vec<String>) {
1788    let trimmed = text.trim();
1789    if trimmed.is_empty() {
1790        out.push("=VAL :".to_string());
1791        return;
1792    }
1793    if trimmed.starts_with('*') {
1794        out.push(format!("=ALI {trimmed}"));
1795        return;
1796    }
1797    let (anchor, body_tag, body) = decompose_scalar(trimmed, handles);
1798    out.push(scalar_event(anchor, body_tag.as_deref(), body));
1799}
1800
1801fn project_block_sequence_items(
1802    seq_node: &SyntaxNode,
1803    handles: &TagHandles,
1804    out: &mut Vec<String>,
1805) {
1806    for item in seq_node
1807        .children()
1808        .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
1809    {
1810        if let Some(nested_seq) = item
1811            .children()
1812            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
1813        {
1814            // A YAML_TAG / YAML_ANCHOR sibling decorates the nested
1815            // sequence (`- !!seq\n - nested`, 57H4).
1816            let mut suffix = String::new();
1817            let anchor = item
1818                .children_with_tokens()
1819                .filter_map(|el| el.into_token())
1820                .find(|tok| tok.kind() == SyntaxKind::YAML_ANCHOR)
1821                .and_then(|tok| tok.text().strip_prefix('&').map(str::to_owned));
1822            if let Some(a) = anchor {
1823                suffix.push_str(&format!(" &{a}"));
1824            }
1825            let tag = item
1826                .children_with_tokens()
1827                .filter_map(|el| el.into_token())
1828                .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1829                .and_then(|tok| resolve_long_tag(tok.text(), handles));
1830            if let Some(t) = tag {
1831                suffix.push(' ');
1832                suffix.push_str(&t);
1833            }
1834            out.push(format!("+SEQ{suffix}"));
1835            project_block_sequence_items(&nested_seq, handles, out);
1836            out.push("-SEQ".to_string());
1837            continue;
1838        }
1839        // Inline-map sequence item: `- key: value` (with optional continuation
1840        // lines that the parser captures as a nested YAML_BLOCK_MAP). The full
1841        // entry chain lives in the nested map node.
1842        if let Some(nested_map) = item
1843            .children()
1844            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1845        {
1846            out.push(map_open_event_for_block_map(&nested_map, handles));
1847            project_block_map_entries(&nested_map, handles, out);
1848            out.push("-MAP".to_string());
1849            continue;
1850        }
1851        if let Some(flow_seq) = item
1852            .children()
1853            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1854        {
1855            // Walk the CST rather than re-splitting the flow text: only the
1856            // CST walker structurally projects items whose key is itself a
1857            // flow collection (`[ {JSON: like}:adjacent ]`, 9MMW) or a nested
1858            // flow sequence; the text splitter mis-folds those into scalars.
1859            out.push("+SEQ []".to_string());
1860            project_flow_sequence_items_cst(&flow_seq, handles, out);
1861            out.push("-SEQ".to_string());
1862            continue;
1863        }
1864        if let Some(flow_map) = item
1865            .children()
1866            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1867        {
1868            out.push("+MAP {}".to_string());
1869            project_flow_map_entries(&flow_map, handles, out);
1870            out.push("-MAP".to_string());
1871            continue;
1872        }
1873        if let Some((indicator, body)) = extract_block_scalar_body(&item) {
1874            let escaped = escape_block_scalar_text(&body);
1875            out.push(format!("=VAL {indicator}{escaped}"));
1876            continue;
1877        }
1878        let item_tag = item
1879            .descendants_with_tokens()
1880            .filter_map(|el| el.into_token())
1881            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1882            .map(|tok| tok.text().to_string());
1883        // Include WHITESPACE so `&anchor body` joins as `&anchor body`,
1884        // letting `decompose_scalar` find the whitespace terminator on
1885        // the anchor name, and NEWLINE so a multi-line plain item folds
1886        // its line breaks (`x\n  \tx` → `x x`, UV7Q) instead of
1887        // concatenating the continuation indentation. See
1888        // `project_block_map_entry_value` for the matching rationale.
1889        let scalar_text = item
1890            .descendants_with_tokens()
1891            .filter_map(|el| el.into_token())
1892            .filter(|tok| {
1893                matches!(
1894                    tok.kind(),
1895                    SyntaxKind::YAML_SCALAR_TEXT
1896                        | SyntaxKind::YAML_ANCHOR
1897                        | SyntaxKind::YAML_ALIAS
1898                        | SyntaxKind::WHITESPACE
1899                        | SyntaxKind::NEWLINE
1900                )
1901            })
1902            .map(|tok| tok.text().to_string())
1903            .collect::<Vec<_>>()
1904            .join("");
1905        let scalar_trimmed = scalar_text.trim();
1906        let event = if scalar_trimmed.starts_with('*') {
1907            format!("=ALI {scalar_trimmed}")
1908        } else {
1909            // Combine the optional `YAML_TAG` token (already separated from
1910            // the scalar text by the parser) with anchors/tags found in the
1911            // scalar body, and render the YAML event in canonical
1912            // `&anchor <tag> :body` order.
1913            let item_long_tag = item_tag
1914                .as_deref()
1915                .and_then(|t| resolve_long_tag(t, handles));
1916            let (anchor, body_tag, body) = decompose_scalar(scalar_trimmed, handles);
1917            let long_tag = item_long_tag.or(body_tag);
1918            let folded;
1919            let body_for_event: &str = if body.contains('\n') {
1920                folded = cooking::cook_plain(body);
1921                &folded
1922            } else {
1923                body
1924            };
1925            scalar_event(anchor, long_tag.as_deref(), body_for_event)
1926        };
1927        out.push(event);
1928    }
1929}
1930
1931/// Decompose a node-property + scalar string into `(anchor, long_tag, body)`,
1932/// peeling off any leading `&anchor` and tag shorthand in either order
1933/// (`&a !!str foo` or `!!str &a foo`). Returns the raw body trimmed.
1934/// Build the `+SEQ` open event for a YAML_BLOCK_SEQUENCE, attaching any
1935/// document-level node properties (a tag, or a `&anchor` carried by the
1936/// block-sequence header line) that precede the first sequence item. The
1937/// parser stores those properties as YAML_TAG / YAML_SCALAR siblings of
1938/// the YAML_BLOCK_SEQUENCE_ITEM children, in source order.
1939fn seq_open_event(seq_node: &SyntaxNode, handles: &TagHandles) -> String {
1940    let mut anchor: Option<String> = None;
1941    let mut long_tag: Option<String> = None;
1942    // v2 emits anchors/tags as siblings of the YAML_BLOCK_SEQUENCE within
1943    // the parent container (e.g. directly under a YAML_DOCUMENT for the
1944    // top-level `&anchor\n- a` shape) — not as inner-prefix tokens like
1945    // v1. Scan parent siblings preceding the SEQ first.
1946    absorb_preceding_anchor_and_tag(seq_node, handles, &mut anchor, &mut long_tag);
1947    // v1 emits anchors/tags as inner-prefix tokens of the SEQ before the
1948    // first BLOCK_SEQUENCE_ITEM. Also walk those for backward compat.
1949    for child in seq_node.children_with_tokens() {
1950        if let Some(node) = child.as_node()
1951            && node.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
1952        {
1953            break;
1954        }
1955        let Some(tok) = child.as_token() else {
1956            continue;
1957        };
1958        absorb_anchor_or_tag(tok, handles, &mut anchor, &mut long_tag);
1959    }
1960    let mut event = String::from("+SEQ");
1961    if let Some(a) = anchor {
1962        event.push_str(" &");
1963        event.push_str(&a);
1964    }
1965    if let Some(t) = long_tag {
1966        event.push(' ');
1967        event.push_str(&t);
1968    }
1969    event
1970}
1971
1972/// Walk the parent's children and absorb `YAML_TAG`/`YAML_SCALAR` tokens
1973/// (carrying a `&...` anchor or `!...` tag) that appear *before* the
1974/// `child` node, stopping at `child`. Used by `seq_open_event` /
1975/// `map_open_event_for_block_map` to capture v2's emission of leading
1976/// anchor/tag tokens at the parent level rather than inside the
1977/// container.
1978fn absorb_preceding_anchor_and_tag(
1979    child: &SyntaxNode,
1980    handles: &TagHandles,
1981    anchor: &mut Option<String>,
1982    long_tag: &mut Option<String>,
1983) {
1984    let Some(parent) = child.parent() else {
1985        return;
1986    };
1987    let target_range = child.text_range();
1988    for el in parent.children_with_tokens() {
1989        if let Some(node) = el.as_node() {
1990            if node.text_range() == target_range {
1991                break;
1992            }
1993            continue;
1994        }
1995        if let Some(tok) = el.as_token() {
1996            absorb_anchor_or_tag(tok, handles, anchor, long_tag);
1997        }
1998    }
1999}
2000
2001/// Inspect a single token for an anchor or tag and update the
2002/// respective slot. Recognizes both v1's and v2's emission shape:
2003/// - v1 emits anchors as `YAML_SCALAR` tokens whose text starts with `&`.
2004/// - v2 emits anchors as `YAML_TAG` tokens (the synthesis of anchor and
2005///   tag into a single SyntaxKind), distinguishable by the leading byte.
2006fn absorb_anchor_or_tag(
2007    tok: &SyntaxToken,
2008    handles: &TagHandles,
2009    anchor: &mut Option<String>,
2010    long_tag: &mut Option<String>,
2011) {
2012    match tok.kind() {
2013        SyntaxKind::YAML_ANCHOR => {
2014            if anchor.is_none() {
2015                *anchor = Some(tok.text().trim_start_matches('&').to_string());
2016            }
2017        }
2018        SyntaxKind::YAML_TAG => {
2019            let trimmed = tok.text().trim();
2020            if let Some(name) = trimmed.strip_prefix('&') {
2021                if anchor.is_none() {
2022                    *anchor = Some(name.to_string());
2023                }
2024            } else if trimmed.starts_with('!')
2025                && long_tag.is_none()
2026                && let Some(long) = resolve_long_tag(trimmed, handles)
2027            {
2028                *long_tag = Some(long);
2029            }
2030        }
2031        _ => {}
2032    }
2033}
2034
2035/// Build the `+MAP` open event for a nested YAML_BLOCK_MAP that lives inside
2036/// a YAML_BLOCK_MAP_VALUE. Captures any anchor (`&name`) or tag (`!!str`,
2037/// `!shorthand`, etc.) tokens that precede the inner block map so that
2038/// projected events match patterns like `+MAP &node3` from yaml-test-suite
2039/// case 26DV (`top3: &node3` followed by an indented nested block map).
2040fn map_open_event_for_value(value_node: &SyntaxNode, handles: &TagHandles) -> String {
2041    let (anchor, long_tag, _residual) = extract_leading_node_properties(value_node, handles);
2042    map_open_event_from_props(anchor.as_deref(), long_tag.as_deref())
2043}
2044
2045/// Render a `+MAP` open event from pre-extracted node properties, emitting them
2046/// in the canonical yaml-test-suite order: `&anchor` before `<tag>` (matching
2047/// [`scalar_event`] and `+MAP &a4 <tag:…>` fixtures).
2048fn map_open_event_from_props(anchor: Option<&str>, long_tag: Option<&str>) -> String {
2049    let mut event = String::from("+MAP");
2050    if let Some(a) = anchor {
2051        event.push_str(" &");
2052        event.push_str(a);
2053    }
2054    if let Some(t) = long_tag {
2055        event.push(' ');
2056        event.push_str(t);
2057    }
2058    event
2059}
2060
2061/// Walk the leading children of a node that precedes a nested collection — a
2062/// YAML_BLOCK_MAP_VALUE (`key: &a !!map\n …`, BU8L) or a YAML_BLOCK_SEQUENCE_ITEM
2063/// (`- !!map\n …`, 6JWB) — stopping at any nested YAML_BLOCK_MAP / YAML_FLOW_MAP
2064/// / YAML_FLOW_SEQUENCE. Pulls out the optional anchor (`&name`, ending at
2065/// whitespace, comma, or flow-collection closer), the optional resolved tag,
2066/// and any residual scalar text that follows the node properties (e.g. the
2067/// `*alias1` in 26DV's `&node3 \n  *alias1` scalar, or the fused first key in
2068/// `&a !!map\n  a`). Both anchor and tag are peeled from the embedded scalar
2069/// text in either order, since the scanner fuses node properties and the first
2070/// key into one YAML_SCALAR token rather than emitting a separate YAML_TAG.
2071fn extract_leading_node_properties(
2072    node: &SyntaxNode,
2073    handles: &TagHandles,
2074) -> (Option<String>, Option<String>, String) {
2075    let mut anchor: Option<String> = None;
2076    let mut long_tag: Option<String> = None;
2077    let mut residual = String::new();
2078    for child in node.children_with_tokens() {
2079        if let Some(node) = child.as_node()
2080            && matches!(
2081                node.kind(),
2082                SyntaxKind::YAML_BLOCK_MAP
2083                    | SyntaxKind::YAML_FLOW_MAP
2084                    | SyntaxKind::YAML_FLOW_SEQUENCE
2085            )
2086        {
2087            break;
2088        }
2089        // The fused node-property + first-key scalar is a `YAML_SCALAR`
2090        // node; peel anchor/tag out of its text and keep the residual.
2091        if let Some(scalar) = child
2092            .as_node()
2093            .filter(|n| n.kind() == SyntaxKind::YAML_SCALAR)
2094        {
2095            let scalar_text = scalar.text().to_string();
2096            let mut rest = scalar_text.trim();
2097            loop {
2098                if anchor.is_none()
2099                    && let Some(after) = rest.strip_prefix('&')
2100                {
2101                    let end = after
2102                        .find(|c: char| c.is_whitespace() || matches!(c, ',' | '}' | ']'))
2103                        .unwrap_or(after.len());
2104                    anchor = Some(after[..end].to_string());
2105                    rest = after[end..].trim_start();
2106                    continue;
2107                }
2108                if long_tag.is_none()
2109                    && let Some((tag, tail)) = split_leading_tag(rest)
2110                    && let Some(long) = resolve_long_tag(tag, handles)
2111                {
2112                    long_tag = Some(long);
2113                    rest = tail.trim_start();
2114                    continue;
2115                }
2116                break;
2117            }
2118            let extra = rest.trim();
2119            if !extra.is_empty() {
2120                if !residual.is_empty() {
2121                    residual.push(' ');
2122                }
2123                residual.push_str(extra);
2124            }
2125            continue;
2126        }
2127        let Some(tok) = child.as_token() else {
2128            continue;
2129        };
2130        match tok.kind() {
2131            SyntaxKind::YAML_ANCHOR => {
2132                if anchor.is_none() {
2133                    anchor = Some(tok.text().trim_start_matches('&').to_string());
2134                }
2135            }
2136            SyntaxKind::YAML_TAG => {
2137                if long_tag.is_none()
2138                    && let Some(long) = resolve_long_tag(tok.text(), handles)
2139                {
2140                    long_tag = Some(long);
2141                }
2142            }
2143            _ => {}
2144        }
2145    }
2146    (anchor, long_tag, residual)
2147}
2148
2149/// Build the `+MAP` open event for a YAML_BLOCK_MAP rooted directly under
2150/// a YAML_DOCUMENT. Captures any anchor (`&name`) or tag (`!!str`,
2151/// `!shorthand`, etc.) tokens that the parser absorbed at the top of the
2152/// block map so that documents like `--- !!set\n? a\n? b` project as
2153/// `+MAP <tag:yaml.org,2002:set>`.
2154fn map_open_event_for_block_map(map_node: &SyntaxNode, handles: &TagHandles) -> String {
2155    let mut anchor: Option<String> = None;
2156    let mut long_tag: Option<String> = None;
2157    // Mirror `seq_open_event`: scan parent siblings preceding this MAP
2158    // first (v2 emission), then the MAP's inner-prefix tokens (v1).
2159    absorb_preceding_anchor_and_tag(map_node, handles, &mut anchor, &mut long_tag);
2160    for child in map_node.children_with_tokens() {
2161        if let Some(node) = child.as_node() {
2162            if node.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY {
2163                break;
2164            }
2165            // A `? `-prefixed scalar (now a `YAML_SCALAR` node) is the
2166            // first key of the map; stop scanning header tokens there so
2167            // we don't pick up entry-level data as document-level
2168            // node properties.
2169            if node.kind() == SyntaxKind::YAML_SCALAR {
2170                let text = node.text().to_string();
2171                let trimmed = text.trim();
2172                if trimmed.starts_with("? ") || trimmed == "?" {
2173                    break;
2174                }
2175            }
2176            continue;
2177        }
2178        let Some(tok) = child.as_token() else {
2179            continue;
2180        };
2181        absorb_anchor_or_tag(tok, handles, &mut anchor, &mut long_tag);
2182    }
2183    map_open_event_from_props(anchor.as_deref(), long_tag.as_deref())
2184}
2185
2186fn decompose_scalar<'a>(
2187    text: &'a str,
2188    handles: &TagHandles,
2189) -> (Option<&'a str>, Option<String>, &'a str) {
2190    let mut anchor: Option<&str> = None;
2191    let mut long_tag: Option<String> = None;
2192    let mut rest = text.trim();
2193    loop {
2194        if anchor.is_none()
2195            && let Some(after) = rest.strip_prefix('&')
2196        {
2197            let end = after
2198                .find(|c: char| c.is_whitespace() || matches!(c, ',' | '}' | ']'))
2199                .unwrap_or(after.len());
2200            let (name, tail) = after.split_at(end);
2201            anchor = Some(name);
2202            rest = tail.trim_start();
2203            continue;
2204        }
2205        if long_tag.is_none()
2206            && let Some((tag, tail)) = split_leading_tag(rest)
2207            && let Some(long) = resolve_long_tag(tag, handles)
2208        {
2209            long_tag = Some(long);
2210            rest = tail.trim_start();
2211            continue;
2212        }
2213        break;
2214    }
2215    (anchor, long_tag, rest)
2216}
2217
2218/// Render a scalar event from its decomposed parts: optional anchor,
2219/// optional long-form tag (already in `<...>` form), and the scalar body.
2220/// Handles plain, double-quoted, and single-quoted bodies; quoted bodies
2221/// share the same escape normalization as [`quoted_val_event`].
2222fn scalar_event(anchor: Option<&str>, long_tag: Option<&str>, body: &str) -> String {
2223    let mut prefix = String::new();
2224    if let Some(a) = anchor {
2225        prefix.push_str(&format!("&{a} "));
2226    }
2227    if let Some(t) = long_tag {
2228        prefix.push_str(t);
2229        prefix.push(' ');
2230    }
2231    let body = body.trim();
2232    if body.is_empty() {
2233        return format!("=VAL {prefix}:");
2234    }
2235    if body.starts_with('"') || body.starts_with('\'') {
2236        // Reuse the shared escape/normalization rules; splice the prefix in
2237        // place of the leading `=VAL ` token.
2238        let quoted = quoted_val_event(body);
2239        return quoted.replacen("=VAL ", &format!("=VAL {prefix}"), 1);
2240    }
2241    // yaml-test-suite events escape `\`, control characters, and embedded
2242    // newlines in plain-scalar bodies. Apply that here so callers can pass
2243    // raw (or fold-only) text and not pre-escape.
2244    format!("=VAL {prefix}:{}", escape_for_event(body))
2245}
2246
2247fn project_block_map_entries(map_node: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
2248    let children: Vec<_> = map_node.children_with_tokens().collect();
2249    let mut idx = 0;
2250    while idx < children.len() {
2251        match &children[idx] {
2252            rowan::NodeOrToken::Node(scalar)
2253                if scalar.kind() == SyntaxKind::YAML_SCALAR && {
2254                    let t = scalar.text().to_string();
2255                    let ts = t.trim_start();
2256                    ts.starts_with("? ") || ts == "?"
2257                } =>
2258            {
2259                let scalar_text = scalar.text().to_string();
2260                let body = scalar_text.trim_start().trim_start_matches('?').trim();
2261                if body.is_empty() {
2262                    out.push("=VAL :".to_string());
2263                } else {
2264                    let (anchor, body_tag, rest) = decompose_scalar(body, handles);
2265                    out.push(scalar_event(anchor, body_tag.as_deref(), rest));
2266                }
2267                idx += 1;
2268                // Look ahead for the matching `:value` line. Skip
2269                // intervening newlines, whitespace, and comments. Stop at
2270                // anything else — that means the value is implicitly null.
2271                let mut peek = idx;
2272                while peek < children.len() {
2273                    if let rowan::NodeOrToken::Token(t) = &children[peek] {
2274                        if matches!(
2275                            t.kind(),
2276                            SyntaxKind::NEWLINE | SyntaxKind::WHITESPACE | SyntaxKind::YAML_COMMENT
2277                        ) {
2278                            peek += 1;
2279                            continue;
2280                        }
2281                        if t.kind() == SyntaxKind::YAML_COLON {
2282                            // Colon found: collect value tokens up to the
2283                            // next NEWLINE.
2284                            let mut value_tag: Option<String> = None;
2285                            let mut value_text = String::new();
2286                            let mut value_end = peek + 1;
2287                            while value_end < children.len() {
2288                                match &children[value_end] {
2289                                    rowan::NodeOrToken::Token(vt) => {
2290                                        if vt.kind() == SyntaxKind::NEWLINE {
2291                                            break;
2292                                        }
2293                                        if vt.kind() == SyntaxKind::YAML_TAG && value_tag.is_none()
2294                                        {
2295                                            value_tag = Some(vt.text().to_string());
2296                                        } else if matches!(
2297                                            vt.kind(),
2298                                            SyntaxKind::YAML_ANCHOR
2299                                                | SyntaxKind::YAML_ALIAS
2300                                                | SyntaxKind::WHITESPACE
2301                                        ) {
2302                                            value_text.push_str(vt.text());
2303                                        }
2304                                        value_end += 1;
2305                                    }
2306                                    // The value scalar is a `YAML_SCALAR` node.
2307                                    rowan::NodeOrToken::Node(vn)
2308                                        if vn.kind() == SyntaxKind::YAML_SCALAR =>
2309                                    {
2310                                        value_text.push_str(&vn.text().to_string());
2311                                        value_end += 1;
2312                                    }
2313                                    _ => break,
2314                                }
2315                            }
2316                            let trimmed = value_text.trim();
2317                            let value_long_tag = value_tag
2318                                .as_deref()
2319                                .and_then(|t| resolve_long_tag(t, handles));
2320                            if trimmed.is_empty() {
2321                                if let Some(long) = value_long_tag {
2322                                    out.push(format!("=VAL {long} :"));
2323                                } else {
2324                                    out.push("=VAL :".to_string());
2325                                }
2326                            } else if trimmed.starts_with('"') || trimmed.starts_with('\'') {
2327                                let quoted = quoted_val_event(trimmed);
2328                                if let Some(long) = value_long_tag {
2329                                    out.push(quoted.replacen("=VAL ", &format!("=VAL {long} "), 1));
2330                                } else {
2331                                    out.push(quoted);
2332                                }
2333                            } else {
2334                                let (anchor, body_tag, body) = decompose_scalar(trimmed, handles);
2335                                let long_tag = value_long_tag.or(body_tag);
2336                                out.push(scalar_event(anchor, long_tag.as_deref(), body));
2337                            }
2338                            idx = value_end;
2339                            break;
2340                        }
2341                    }
2342                    // Non-trivia, non-colon: implicit null value.
2343                    out.push("=VAL :".to_string());
2344                    break;
2345                }
2346                if peek >= children.len() {
2347                    out.push("=VAL :".to_string());
2348                }
2349            }
2350            rowan::NodeOrToken::Node(entry) if entry.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY => {
2351                project_block_map_entry(entry, handles, out);
2352                idx += 1;
2353            }
2354            _ => {
2355                idx += 1;
2356            }
2357        }
2358    }
2359}
2360
2361/// Project a YAML_BLOCK_MAP_KEY whose content is a nested collection — the
2362/// explicit-key `? <seq-or-map>` shape — into the key position. Mirrors the
2363/// nested-collection branches of [`project_block_map_entry_value`]. Returns
2364/// `true` when a collection child was found and projected, `false` when the
2365/// key is a plain scalar the caller should handle with its token-join logic.
2366fn project_block_map_key_collection(
2367    key_node: &SyntaxNode,
2368    handles: &TagHandles,
2369    out: &mut Vec<String>,
2370) -> bool {
2371    for child in key_node.children() {
2372        match child.kind() {
2373            SyntaxKind::YAML_BLOCK_SEQUENCE => {
2374                out.push(seq_open_event(&child, handles));
2375                project_block_sequence_items(&child, handles, out);
2376                out.push("-SEQ".to_string());
2377                return true;
2378            }
2379            SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP => {
2380                // A flow collection in key position may carry an anchor
2381                // sitting as a sibling token inside the KEY wrapper
2382                // (`&key [a, b]: value`, 6BFJ). Surface it on the open
2383                // event so the projection matches `+SEQ [] &key …`.
2384                let anchor = anchor_preceding_node(key_node, &child);
2385                project_flow_collection_node_with_anchor(&child, anchor.as_deref(), handles, out);
2386                return true;
2387            }
2388            SyntaxKind::YAML_BLOCK_MAP => {
2389                out.push("+MAP".to_string());
2390                project_block_map_entries(&child, handles, out);
2391                out.push("-MAP".to_string());
2392                return true;
2393            }
2394            _ => {}
2395        }
2396    }
2397    false
2398}
2399
2400fn project_block_map_entry(entry: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
2401    let key_node = entry
2402        .children()
2403        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
2404        .expect("key node");
2405    let value_node = entry
2406        .children()
2407        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
2408        .expect("value node");
2409
2410    // Explicit-key (`?`) entry whose key content is a nested collection (block
2411    // or flow sequence/map) rather than a scalar. The collection lives as a
2412    // child NODE of YAML_BLOCK_MAP_KEY, so the token-join key-text logic below
2413    // sees only the `?` indicator and would emit an empty `=VAL :`. Project the
2414    // collection in the key position instead. M5DY: block/flow seq keys; V9D5:
2415    // nested block-map key.
2416    if project_block_map_key_collection(&key_node, handles, out) {
2417        project_block_map_entry_value(&value_node, handles, out);
2418        return;
2419    }
2420
2421    let key_tag = key_node
2422        .children_with_tokens()
2423        .filter_map(|el| el.into_token())
2424        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
2425        .map(|tok| tok.text().to_string());
2426    // The key text lives in either a `YAML_KEY` token (v1's emission, used
2427    // both for the explicit `?` indicator and for implicit key text) or
2428    // a `YAML_SCALAR` token (v2's emission, where wrapper position
2429    // carries the role and the explicit `?` is the only `YAML_KEY`).
2430    // Concatenate matching tokens — interleave WHITESPACE / NEWLINE so the
2431    // explicit `?` and any subsequent key scalar are separated by their
2432    // original trivia, letting `strip_explicit_key_indicator` recognize
2433    // the `?<sp>` pattern. Stops at the trailing `:` (`YAML_COLON`).
2434    // Falls back to empty for the empty-implicit-key shorthand
2435    // (`: value` — KEY wrapper holds only the colon).
2436    let key_text = key_node
2437        .children_with_tokens()
2438        .take_while(|el| el.as_token().map(|t| t.kind()) != Some(SyntaxKind::YAML_COLON))
2439        .filter_map(|el| match el {
2440            // The key scalar is a `YAML_SCALAR` node.
2441            rowan::NodeOrToken::Node(n) if n.kind() == SyntaxKind::YAML_SCALAR => {
2442                Some(n.text().to_string())
2443            }
2444            rowan::NodeOrToken::Token(t)
2445                if matches!(
2446                    t.kind(),
2447                    SyntaxKind::YAML_KEY
2448                        | SyntaxKind::YAML_ANCHOR
2449                        | SyntaxKind::YAML_ALIAS
2450                        | SyntaxKind::WHITESPACE
2451                        | SyntaxKind::NEWLINE
2452                ) =>
2453            {
2454                Some(t.text().to_string())
2455            }
2456            _ => None,
2457        })
2458        .collect::<Vec<_>>()
2459        .join("");
2460    let key_text = key_text.trim_end().to_string();
2461
2462    // Strip an explicit-key `?` indicator that precedes the actual key
2463    // text. v2 emits the `?` as a `YAML_KEY` token sibling of the
2464    // `YAML_SCALAR`, so it ends up in `key_text` after the join above.
2465    // v1 wouldn't reach this strip because its v1-shape `YAML_KEY`
2466    // token carried only the implicit key body.
2467    let key_trimmed = strip_explicit_key_indicator(key_text.trim());
2468    if key_trimmed.starts_with('*') {
2469        out.push(format!("=ALI {key_trimmed}"));
2470    } else if key_tag.is_none()
2471        && let Some((indicator, body)) = extract_block_scalar_body(&key_node)
2472    {
2473        // Explicit-key whose key is itself a literal (`|`) or folded
2474        // (`>`) block scalar (5WE3, KK5P complex4).
2475        // `extract_block_scalar_body` ignores the `?` indicator (a
2476        // `YAML_KEY` token) and the trailing `:` (`YAML_COLON`), folding
2477        // only the scalar body — the same path as a block-scalar value.
2478        out.push(format!(
2479            "=VAL {indicator}{}",
2480            escape_block_scalar_text(&body)
2481        ));
2482    } else {
2483        let key_long_tag = key_tag
2484            .as_deref()
2485            .and_then(|t| resolve_long_tag(t, handles));
2486        let (anchor, body_tag, body) = decompose_scalar(key_trimmed, handles);
2487        let long_tag = key_long_tag.or(body_tag);
2488        let folded;
2489        let body_for_event: &str = if body.contains('\n') {
2490            folded = cooking::fold_quoted_inner(body, false);
2491            &folded
2492        } else {
2493            body
2494        };
2495        out.push(scalar_event(anchor, long_tag.as_deref(), body_for_event));
2496    }
2497
2498    project_block_map_entry_value(&value_node, handles, out);
2499}
2500
2501fn project_block_map_entry_value(
2502    value_node: &SyntaxNode,
2503    handles: &TagHandles,
2504    out: &mut Vec<String>,
2505) {
2506    if let Some(nested_map) = value_node
2507        .children()
2508        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
2509    {
2510        out.push(map_open_event_for_value(value_node, handles));
2511        project_block_map_entries(&nested_map, handles, out);
2512        out.push("-MAP".to_string());
2513        return;
2514    }
2515
2516    if let Some(nested_seq) = value_node
2517        .children()
2518        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
2519    {
2520        out.push(seq_open_event(&nested_seq, handles));
2521        project_block_sequence_items(&nested_seq, handles, out);
2522        out.push("-SEQ".to_string());
2523        return;
2524    }
2525
2526    if let Some(flow_map) = value_node
2527        .children()
2528        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
2529    {
2530        let anchor = anchor_preceding_node(value_node, &flow_map);
2531        project_flow_collection_node_with_anchor(&flow_map, anchor.as_deref(), handles, out);
2532        return;
2533    }
2534
2535    // A flow-sequence value with embedded `:` (an implicit flow-map
2536    // entry inside `[...]`, e.g. 87E4 / L9U5 / LQZ7) needs the
2537    // CST-walking item projector — the text-based fallback below
2538    // strips colons during `value_text` assembly so `flow_kv_split`
2539    // never sees them and the entry collapses into one bare scalar.
2540    if let Some(flow_seq) = value_node
2541        .children()
2542        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
2543    {
2544        let anchor = anchor_preceding_node(value_node, &flow_seq);
2545        project_flow_collection_node_with_anchor(&flow_seq, anchor.as_deref(), handles, out);
2546        return;
2547    }
2548
2549    if let Some((indicator, body)) = extract_block_scalar_body(value_node) {
2550        // Tag/anchor siblings of the block scalar (e.g. `!foo >1\n value`,
2551        // `!!binary | ...`) decorate the scalar — splice them into the
2552        // event in canonical `&anchor <tag> <indicator>body` order.
2553        let mut prefix = String::new();
2554        let anchor_text = value_node
2555            .children_with_tokens()
2556            .filter_map(|el| el.into_token())
2557            .find(|tok| tok.kind() == SyntaxKind::YAML_ANCHOR)
2558            .map(|tok| tok.text().to_string());
2559        if let Some(anchor) = anchor_text.as_deref().and_then(|t| t.strip_prefix('&')) {
2560            prefix.push_str(&format!("&{anchor} "));
2561        }
2562        let tag_text = value_node
2563            .children_with_tokens()
2564            .filter_map(|el| el.into_token())
2565            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
2566            .map(|tok| tok.text().to_string());
2567        if let Some(tag) = tag_text
2568            && let Some(long) = resolve_long_tag(&tag, handles)
2569        {
2570            prefix.push_str(&long);
2571            prefix.push(' ');
2572        }
2573        let escaped = escape_block_scalar_text(&body);
2574        out.push(format!("=VAL {prefix}{indicator}{escaped}"));
2575        return;
2576    }
2577
2578    let value_tag = value_node
2579        .children_with_tokens()
2580        .filter_map(|el| el.into_token())
2581        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
2582        .map(|tok| tok.text().to_string());
2583    // Include WHITESPACE between scalar-ish tokens so a value like
2584    // `&anchor body` joins as `&anchor body` (not `&anchorbody`),
2585    // letting `decompose_scalar` find the whitespace terminator on the
2586    // anchor name. NEWLINE must be kept too: the scanner splits a
2587    // multi-line plain scalar into per-line `YAML_SCALAR_TEXT` leaves
2588    // interleaved with `NEWLINE` leaves, and the downstream plain fold
2589    // needs those breaks to collapse `e\n  f` to `e f` (A984) rather than
2590    // concatenating the continuation indentation into `e  f`.
2591    let value_text = value_node
2592        .descendants_with_tokens()
2593        .filter_map(|el| el.into_token())
2594        .filter(|tok| {
2595            matches!(
2596                tok.kind(),
2597                SyntaxKind::YAML_SCALAR_TEXT
2598                    | SyntaxKind::YAML_ANCHOR
2599                    | SyntaxKind::YAML_ALIAS
2600                    | SyntaxKind::WHITESPACE
2601                    | SyntaxKind::NEWLINE
2602            )
2603        })
2604        .map(|tok| tok.text().to_string())
2605        .collect::<Vec<_>>()
2606        .join("");
2607
2608    if value_text.trim().is_empty() {
2609        if let Some(tag) = value_tag
2610            && let Some(long) = resolve_long_tag(&tag, handles)
2611        {
2612            out.push(format!("=VAL {long} :"));
2613        } else {
2614            out.push("=VAL :".to_string());
2615        }
2616    } else if value_text.trim_start().starts_with('*') {
2617        out.push(format!("=ALI {}", value_text.trim()));
2618    } else {
2619        let value_long_tag = value_tag
2620            .as_deref()
2621            .and_then(|t| resolve_long_tag(t, handles));
2622        let trimmed = value_text.trim();
2623        if trimmed.starts_with('"') || trimmed.starts_with('\'') {
2624            // Multi-line quoted scalar value: rebuild the source text with
2625            // newlines intact (parser splits each physical line into its own
2626            // YAML_SCALAR token), then run the YAML 1.2 §7.3 line-folding
2627            // rules so blank lines fold to `\n` and single breaks fold to
2628            // space. Without this, joining YAML_SCALAR tokens directly drops
2629            // line structure (yaml-test-suite case XV9V).
2630            let multi_line_text = collect_scalar_source(value_node);
2631            // Strip trailing whitespace/newlines that come AFTER the
2632            // closing quote. v2 keeps a single quoted-scalar token so
2633            // those bytes are post-value trivia (NEWLINE) — they don't
2634            // make the scalar body multi-line. Without this trim, a
2635            // single-line quoted with trailing significant whitespace
2636            // (J3BT's `"Quoted \t"`) hits the multi-line folder which
2637            // strips trailing tabs/spaces from the scalar body.
2638            let is_multi_line = multi_line_text
2639                .trim_end_matches(['\n', '\r', ' ', '\t'])
2640                .contains('\n');
2641            let quoted = if is_multi_line {
2642                quoted_val_event_multi_line(&multi_line_text)
2643            } else {
2644                quoted_val_event(trimmed)
2645            };
2646            if let Some(long) = value_long_tag {
2647                out.push(quoted.replacen("=VAL ", &format!("=VAL {long} "), 1));
2648            } else {
2649                out.push(quoted);
2650            }
2651        } else {
2652            let (anchor, body_tag, body) = decompose_scalar(trimmed, handles);
2653            let long_tag = value_long_tag.or(body_tag);
2654            let folded;
2655            let body_for_event: &str = if body.contains('\n') {
2656                // A tag/anchor can precede a multi-line double-quoted value
2657                // (`!!binary "\\\n …"`, 565N), so the quoted branch above is
2658                // skipped. Enable §7.5 escaped line breaks when the decomposed
2659                // body is itself double-quoted; the later `decode_double_quoted`
2660                // in `scalar_event` strips the quotes and remaining escapes.
2661                let escaped_breaks = body.trim_start().starts_with('"');
2662                folded = cooking::fold_quoted_inner(body, escaped_breaks);
2663                &folded
2664            } else {
2665                body
2666            };
2667            out.push(scalar_event(anchor, long_tag.as_deref(), body_for_event));
2668        }
2669    }
2670}