Skip to main content

panache_parser/parser/yaml/
events.rs

1//! YAML event projection: walk a shadow-parser CST and produce a
2//! yaml-test-suite style event stream (`+STR`, `+DOC`, `+MAP`, `=VAL :foo`,
3//! ...).
4//!
5//! This module is parser-crate scoped and used only by the test harness in
6//! `crates/panache-parser/tests/yaml.rs` for fixture parity. It reads the
7//! green tree built by [`crate::parser::yaml::parse_yaml_tree`] and re-derives
8//! event-stream semantics (tag resolution, anchor stripping, flow-seq
9//! splitting). The intent is to keep the projection adjacent to the parser so
10//! CST shape is the single source of truth for events.
11
12use std::collections::HashMap;
13
14use crate::syntax::{SyntaxKind, SyntaxNode, SyntaxToken};
15
16use super::parser::parse_yaml_tree;
17
18/// Per-document tag handle map: handle (`!!`, `!yaml!`, `!e!`) → URI prefix.
19/// The secondary handle `!!` always defaults to `tag:yaml.org,2002:` per the
20/// YAML 1.2 spec. Per-document `%TAG` directives override and add to this map.
21type TagHandles = HashMap<String, String>;
22
23fn default_tag_handles() -> TagHandles {
24    let mut handles = HashMap::new();
25    handles.insert("!!".to_string(), "tag:yaml.org,2002:".to_string());
26    handles
27}
28
29/// Scan a `YAML_DOCUMENT` for `%TAG` directive lines and merge them into
30/// the default handle map.
31fn collect_tag_handles(doc: &SyntaxNode) -> TagHandles {
32    let mut handles = default_tag_handles();
33    for tok in doc
34        .descendants_with_tokens()
35        .filter_map(|el| el.into_token())
36    {
37        if tok.kind() != SyntaxKind::YAML_SCALAR {
38            continue;
39        }
40        let line = tok.text().trim_start();
41        let Some(rest) = line.strip_prefix("%TAG") else {
42            continue;
43        };
44        let mut parts = rest.split_whitespace();
45        let Some(handle) = parts.next() else { continue };
46        let Some(prefix) = parts.next() else { continue };
47        handles.insert(handle.to_string(), prefix.to_string());
48    }
49    handles
50}
51
52/// Resolve a tag shorthand (e.g. `!!str`, `!yaml!str`, `!e!foo`, `!local`) to
53/// the long-form `<tag:...>` event token, consulting the per-document handle
54/// map. Handles are checked first (so a `%TAG !` directive can override the
55/// primary handle); we fall back to the built-in handling for unknown handles.
56fn resolve_long_tag(tag: &str, handles: &TagHandles) -> Option<String> {
57    let mut best: Option<(&str, &String)> = None;
58    for (h, p) in handles {
59        if tag.starts_with(h)
60            && best.is_none_or(|(b_handle, _): (&str, _)| h.len() > b_handle.len())
61        {
62            best = Some((h.as_str(), p));
63        }
64    }
65    if let Some((handle, prefix)) = best {
66        let suffix = &tag[handle.len()..];
67        let resolved = format!("{prefix}{suffix}");
68        return Some(format!("<{}>", percent_decode_tag(&resolved)));
69    }
70    long_tag_builtin(tag)
71}
72
73/// Decode percent-encoded bytes (`%xx`) in a resolved tag URI. YAML 1.2 allows
74/// percent-encoding in tag suffixes so callers can embed otherwise-special
75/// characters (`!`, `:`, etc.); event-stream parity expects the decoded form.
76fn percent_decode_tag(tag: &str) -> String {
77    let bytes = tag.as_bytes();
78    let mut out = Vec::with_capacity(bytes.len());
79    let mut i = 0;
80    while i < bytes.len() {
81        if bytes[i] == b'%'
82            && i + 2 < bytes.len()
83            && let (Some(hi), Some(lo)) =
84                (hex_digit_value(bytes[i + 1]), hex_digit_value(bytes[i + 2]))
85        {
86            out.push(hi * 16 + lo);
87            i += 3;
88            continue;
89        }
90        out.push(bytes[i]);
91        i += 1;
92    }
93    String::from_utf8(out).unwrap_or_else(|_| tag.to_string())
94}
95
96fn hex_digit_value(byte: u8) -> Option<u8> {
97    match byte {
98        b'0'..=b'9' => Some(byte - b'0'),
99        b'a'..=b'f' => Some(byte - b'a' + 10),
100        b'A'..=b'F' => Some(byte - b'A' + 10),
101        _ => None,
102    }
103}
104
105/// Walk the shadow CST for `input` and return the projected yaml-test-suite
106/// event stream. Returns an empty vector if the input fails to parse.
107pub fn project_events(input: &str) -> Vec<String> {
108    let Some(tree) = parse_yaml_tree(input) else {
109        return Vec::new();
110    };
111    project_events_from_tree(&tree)
112}
113
114/// Walk a shadow-parser CST and return the projected yaml-test-suite event
115/// stream. Decoupled from `parse_yaml_tree` so the v2 parser can reuse the
116/// same projection for parity comparisons.
117pub fn project_events_from_tree(tree: &SyntaxNode) -> Vec<String> {
118    let mut events = vec!["+STR".to_string()];
119    let stream = tree
120        .descendants()
121        .find(|n| n.kind() == SyntaxKind::YAML_STREAM);
122    if let Some(stream) = stream {
123        for doc in stream
124            .children()
125            .filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
126        {
127            project_document(&doc, &mut events);
128        }
129    }
130    events.push("-STR".to_string());
131    events
132}
133
134/// True when the document holds no content beyond a `DocumentEnd`
135/// marker and surrounding trivia (whitespace, newlines, comments).
136/// Used to distinguish a real (possibly empty) document from a
137/// synthetic doc the v2 builder wrapped around a bare `...`.
138fn doc_is_marker_only(doc: &SyntaxNode) -> bool {
139    for el in doc.descendants_with_tokens() {
140        if let Some(tok) = el.as_token() {
141            match tok.kind() {
142                SyntaxKind::WHITESPACE
143                | SyntaxKind::NEWLINE
144                | SyntaxKind::YAML_COMMENT
145                | SyntaxKind::YAML_DOCUMENT_END
146                | SyntaxKind::YAML_DOCUMENT_START => {}
147                _ => return false,
148            }
149        }
150    }
151    true
152}
153
154/// LX3P: a `[flow]` sequence written as a block-map key lands in the v2 CST
155/// as a YAML_FLOW_SEQUENCE that's a direct child of the YAML_DOCUMENT,
156/// preceding the YAML_BLOCK_MAP that the trailing `:` opens. Returns that
157/// flow-sequence when this shape is present.
158fn flow_seq_preceding_block_map_at_doc_level(
159    doc: &SyntaxNode,
160    block_map: &SyntaxNode,
161) -> Option<SyntaxNode> {
162    let block_map_offset = block_map.text_range().start();
163    doc.children()
164        .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
165        .find(|n| n.text_range().end() <= block_map_offset)
166}
167
168/// True when a YAML_BLOCK_MAP_ENTRY's KEY wrapper carries no key text —
169/// only structural trivia and the `:` indicator. Used to detect the
170/// implicit-empty-key shape (`: value`) and the LX3P pattern where the
171/// real key lives in a sibling node preceding the map.
172fn block_map_entry_key_is_empty(entry: &SyntaxNode) -> bool {
173    let Some(key_node) = entry
174        .children()
175        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
176    else {
177        return false;
178    };
179    !key_node
180        .children_with_tokens()
181        .filter_map(|el| el.into_token())
182        .take_while(|tok| tok.kind() != SyntaxKind::YAML_COLON)
183        .any(|tok| {
184            matches!(
185                tok.kind(),
186                SyntaxKind::YAML_KEY | SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_TAG
187            ) && !tok.text().trim().is_empty()
188        })
189}
190
191fn project_document(doc: &SyntaxNode, out: &mut Vec<String>) {
192    let has_doc_start = doc
193        .children_with_tokens()
194        .filter_map(|el| el.into_token())
195        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START);
196    let has_doc_end = doc
197        .children_with_tokens()
198        .filter_map(|el| el.into_token())
199        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END);
200    // A v2 builder synthesizes a `YAML_DOCUMENT` around a bare `...`
201    // (or comments preceding it) to keep the marker inside a document
202    // for losslessness. v1 / yaml-test-suite considers such input an
203    // empty stream — no `+DOC`/`-DOC` events. Skip the projection when
204    // the only structural content is a `DocumentEnd` marker (HWV9,
205    // QT73).
206    if !has_doc_start && doc_is_marker_only(doc) {
207        return;
208    }
209    out.push(if has_doc_start {
210        "+DOC ---".to_string()
211    } else {
212        "+DOC".to_string()
213    });
214    let handles = collect_tag_handles(doc);
215
216    // Top-level container detection must look at direct children, not
217    // arbitrary descendants. A `descendants()` walk surfaces the first
218    // BLOCK_SEQUENCE/BLOCK_MAP it finds in document order — which for a
219    // block-map whose values contain nested block-sequences would be the
220    // inner sequence, collapsing the entire map into a bare `+SEQ`.
221    if let Some(seq_node) = doc
222        .children()
223        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
224    {
225        out.push(seq_open_event(&seq_node, &handles));
226        project_block_sequence_items(&seq_node, &handles, out);
227        out.push("-SEQ".to_string());
228    } else if let Some(root_map) = doc
229        .children()
230        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
231    {
232        // Flow-sequence used as a block-map key (LX3P: `[flow]: block`).
233        // v2 lands the `[flow]` flow-sequence as a sibling preceding the
234        // YAML_BLOCK_MAP (the colon opens an empty-key entry inside the
235        // map), but yaml-test-suite expects `+MAP +SEQ []…-SEQ value -MAP`.
236        // Splice the flow-seq in as the first entry's key when this shape
237        // is present.
238        if let Some(flow_seq) = flow_seq_preceding_block_map_at_doc_level(doc, &root_map)
239            && let Some(first_entry) = root_map
240                .children()
241                .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
242            && block_map_entry_key_is_empty(&first_entry)
243        {
244            out.push(map_open_event_for_block_map(&root_map, &handles));
245            out.push("+SEQ []".to_string());
246            project_flow_sequence_items_cst(&flow_seq, &handles, out);
247            out.push("-SEQ".to_string());
248            if let Some(value_node) = first_entry
249                .children()
250                .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
251            {
252                project_block_map_entry_value(&value_node, &handles, out);
253            } else {
254                out.push("=VAL :".to_string());
255            }
256            for entry in root_map
257                .children()
258                .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
259                .skip(1)
260            {
261                project_block_map_entry(&entry, &handles, out);
262            }
263            out.push("-MAP".to_string());
264        } else {
265            let mut values = Vec::new();
266            project_block_map_entries(&root_map, &handles, &mut values);
267            if !values.is_empty() {
268                out.push(map_open_event_for_block_map(&root_map, &handles));
269                out.append(&mut values);
270                out.push("-MAP".to_string());
271            } else if let Some(flow_map) = doc
272                .descendants()
273                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
274            {
275                let mut flow_values = Vec::new();
276                project_flow_map_entries(&flow_map, &handles, &mut flow_values);
277                out.push("+MAP {}".to_string());
278                out.append(&mut flow_values);
279                out.push("-MAP".to_string());
280            } else if let Some(flow_seq) = doc
281                .descendants()
282                .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
283                && let Some(items) = simple_flow_sequence_items(&flow_seq.text().to_string())
284            {
285                out.push("+SEQ []".to_string());
286                for item in items {
287                    project_flow_seq_item(&item, &handles, out);
288                }
289                out.push("-SEQ".to_string());
290            } else if let Some(scalar) = scalar_document_value(doc, &handles) {
291                out.push(scalar);
292            } else {
293                out.push("=VAL :".to_string());
294            }
295        }
296    } else if let Some(flow_map) = doc
297        .descendants()
298        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
299    {
300        out.push("+MAP {}".to_string());
301        project_flow_map_entries(&flow_map, &handles, out);
302        out.push("-MAP".to_string());
303    } else if let Some(flow_seq) = doc
304        .descendants()
305        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
306    {
307        out.push("+SEQ []".to_string());
308        project_flow_sequence_items_cst(&flow_seq, &handles, out);
309        out.push("-SEQ".to_string());
310    } else if let Some(scalar) = scalar_document_value(doc, &handles) {
311        out.push(scalar);
312    } else {
313        out.push("=VAL :".to_string());
314    }
315
316    out.push(if has_doc_end {
317        "-DOC ...".to_string()
318    } else {
319        "-DOC".to_string()
320    });
321}
322
323fn scalar_document_value(doc: &SyntaxNode, handles: &TagHandles) -> Option<String> {
324    // `--- |` / `--- >` packs a block-scalar header onto the directive-end
325    // marker line. Detect that pattern first so the folded body (with proper
326    // chomping) is emitted instead of a single-line plain scalar.
327    if let Some((indicator, body)) = extract_scalar_doc_block_body(doc) {
328        let escaped = escape_block_scalar_text(&body);
329        return Some(format!("=VAL {indicator}{escaped}"));
330    }
331    // Bare top-level block scalar (no `---` marker) — e.g. a doc that begins
332    // with `>\n …` or `|\n …`. Reuse the same folder; the only difference vs
333    // the directive-end-packed form is the absence of a `YAML_DOCUMENT_START`
334    // sentinel separating the header from the body.
335    if let Some((indicator, body)) = extract_top_level_block_body(doc) {
336        let escaped = escape_block_scalar_text(&body);
337        return Some(format!("=VAL {indicator}{escaped}"));
338    }
339    // Skip `%TAG`/`%YAML` directive lines: those are document-level metadata,
340    // not part of the scalar body.
341    let text = doc
342        .descendants_with_tokens()
343        .filter_map(|el| el.into_token())
344        .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
345        .filter(|tok| !tok.text().trim_start().starts_with('%'))
346        .map(|tok| tok.text().to_string())
347        .collect::<Vec<_>>()
348        .join("");
349    let trimmed_text = text.trim();
350    if trimmed_text.is_empty() {
351        // Tagged-but-empty scalar document still emits a `=VAL <tag> :` event.
352        let tag_only = doc
353            .descendants_with_tokens()
354            .filter_map(|el| el.into_token())
355            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
356            .map(|tok| tok.text().to_string());
357        if let Some(tag) = tag_only
358            && let Some(long) = resolve_long_tag(&tag, handles)
359        {
360            return Some(format!("=VAL {long} :"));
361        }
362        return None;
363    }
364    let tag_text = doc
365        .descendants_with_tokens()
366        .filter_map(|el| el.into_token())
367        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
368        .map(|tok| tok.text().to_string());
369    let multi_line_text = collect_doc_scalar_text_with_newlines(doc);
370    let is_multi_line_quoted = multi_line_text.contains('\n')
371        && (trimmed_text.starts_with('"') || trimmed_text.starts_with('\''));
372    let event = if let Some(tag) = tag_text
373        && let Some(long) = resolve_long_tag(&tag, handles)
374    {
375        if trimmed_text.starts_with('"') || trimmed_text.starts_with('\'') {
376            let quoted = if is_multi_line_quoted {
377                quoted_val_event_multi_line(&multi_line_text)
378            } else {
379                quoted_val_event(trimmed_text)
380            };
381            // quoted_val_event returns `=VAL "body` — splice the tag in.
382            quoted.replacen("=VAL ", &format!("=VAL {long} "), 1)
383        } else {
384            format!("=VAL {long} :{trimmed_text}")
385        }
386    } else if is_multi_line_quoted {
387        quoted_val_event_multi_line(&multi_line_text)
388    } else if trimmed_text.starts_with('"') || trimmed_text.starts_with('\'') {
389        quoted_val_event(&text)
390    } else {
391        let folded = fold_plain_document_lines(doc);
392        // Plain top-level scalars may carry node properties (`&anchor`,
393        // `!tag`) before the actual scalar body; decompose so events project
394        // them in canonical `&anchor <tag> :body` order.
395        let (anchor, body_tag, body) = decompose_scalar(folded.trim_start(), handles);
396        if anchor.is_some() || body_tag.is_some() {
397            scalar_event(anchor, body_tag.as_deref(), &escape_block_scalar_text(body))
398        } else {
399            format!("=VAL :{}", escape_block_scalar_text(&folded))
400        }
401    };
402    Some(event)
403}
404
405/// Reconstruct the doc's scalar text with line breaks intact: walk
406/// `YAML_SCALAR` + `NEWLINE` tokens in order (skipping directive lines).
407/// Required for multi-line quoted folding because `YAML_SCALAR`-only joins
408/// throw away the line structure that drives YAML 1.2 §7.3.2/§7.3.3 folding.
409fn collect_doc_scalar_text_with_newlines(doc: &SyntaxNode) -> String {
410    doc.descendants_with_tokens()
411        .filter_map(|el| el.into_token())
412        .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::NEWLINE))
413        .filter(|tok| !tok.text().trim_start().starts_with('%'))
414        .map(|tok| tok.text().to_string())
415        .collect()
416}
417
418fn plain_val_event(text: &str) -> String {
419    format!("=VAL :{}", text.replace('\\', "\\\\"))
420}
421
422/// Fold the YAML-1.2 plain-scalar body of a top-level scalar `YAML_DOCUMENT`
423/// into its canonical value: walk `YAML_SCALAR` and `NEWLINE` tokens in order
424/// (skipping directive lines), then apply plain-scalar folding —
425/// non-empty-line breaks fold to a single space, runs of `n` empty lines fold
426/// to `n` line feeds. Leading/trailing empty lines are stripped.
427fn fold_plain_document_lines(doc: &SyntaxNode) -> String {
428    let raw: String = doc
429        .descendants_with_tokens()
430        .filter_map(|el| el.into_token())
431        .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::NEWLINE))
432        .filter(|tok| !tok.text().trim_start().starts_with('%'))
433        .map(|tok| tok.text().to_string())
434        .collect();
435
436    let mut out = String::with_capacity(raw.len());
437    let mut empty_run: usize = 0;
438    let mut have_content = false;
439    for line in raw.split('\n') {
440        let trimmed = line.trim();
441        if trimmed.is_empty() {
442            if have_content {
443                empty_run += 1;
444            }
445            continue;
446        }
447        if !have_content {
448            out.push_str(trimmed);
449            have_content = true;
450        } else if empty_run == 0 {
451            out.push(' ');
452            out.push_str(trimmed);
453        } else {
454            for _ in 0..empty_run {
455                out.push('\n');
456            }
457            out.push_str(trimmed);
458        }
459        empty_run = 0;
460    }
461    out
462}
463
464/// Project a flow-collection scalar token, preserving quoted-scalar
465/// classification when the source uses `"..."` or `'...'`. Plain scalars are
466/// folded just like outside flow context. A leading tag shorthand (`!!str`,
467/// `!handle!suffix`, `!local`) is resolved through `handles`.
468fn flow_scalar_event(text: &str, handles: &TagHandles) -> String {
469    let trimmed = text.trim();
470    if trimmed.starts_with('"') || trimmed.starts_with('\'') {
471        if trimmed.contains('\n') {
472            return quoted_val_event_multi_line(trimmed);
473        }
474        return quoted_val_event(trimmed);
475    }
476    let (anchor, long_tag, body) = decompose_scalar(trimmed, handles);
477    if anchor.is_some() || long_tag.is_some() {
478        return scalar_event(anchor, long_tag.as_deref(), body);
479    }
480    plain_val_event(&fold_plain_scalar(text))
481}
482
483/// Split a leading tag shorthand (`!handle!suffix` or `!local`) off `text`,
484/// returning `(tag, remainder)`. The tag must be terminated by whitespace or
485/// end of input; otherwise `text` is returned as-is.
486fn split_leading_tag(text: &str) -> Option<(&str, &str)> {
487    let rest = text.strip_prefix('!')?;
488    let mut i = 0usize;
489    let mut bangs = 0usize;
490    for (idx, ch) in rest.char_indices() {
491        if ch == '!' {
492            bangs += 1;
493            if bangs > 1 {
494                return None;
495            }
496            i = idx + 1;
497            continue;
498        }
499        if matches!(ch, ' ' | '\t' | '\n' | ',' | '}' | ']') {
500            i = idx;
501            break;
502        }
503        i = idx + ch.len_utf8();
504    }
505    let tag_len = 1 + i;
506    let (tag, remainder) = text.split_at(tag_len);
507    Some((tag, remainder))
508}
509
510/// Locate a flow-context key/value `:` indicator within a flow-sequence item.
511/// Per YAML 1.2 a `:` is the mapping-key indicator only when followed by
512/// whitespace or by end of the item; otherwise it's part of a plain scalar
513/// (e.g. `http://foo.com`). Quoted regions are skipped.
514fn flow_kv_split(item: &str) -> Option<(usize, usize)> {
515    let bytes = item.as_bytes();
516    let mut in_single = false;
517    let mut in_double = false;
518    let mut escaped_double = false;
519    for (idx, ch) in item.char_indices() {
520        if in_double {
521            if escaped_double {
522                escaped_double = false;
523                continue;
524            }
525            match ch {
526                '\\' => escaped_double = true,
527                '"' => in_double = false,
528                _ => {}
529            }
530            continue;
531        }
532        if in_single {
533            if ch == '\'' {
534                in_single = false;
535            }
536            continue;
537        }
538        match ch {
539            '\'' => in_single = true,
540            '"' => in_double = true,
541            ':' => {
542                let next_off = idx + ch.len_utf8();
543                let after_is_break = next_off >= bytes.len()
544                    || matches!(bytes[next_off], b' ' | b'\t' | b'\n' | b'\r');
545                if after_is_break {
546                    return Some((idx, next_off));
547                }
548            }
549            _ => {}
550        }
551    }
552    None
553}
554
555/// Emit events for a single flow-sequence item: either `+MAP {} key val -MAP`
556/// when the item is a flow-map entry (`key: value`, possibly with empty key
557/// or value), or a single `=VAL` for a bare scalar.
558fn project_flow_seq_item(item: &str, handles: &TagHandles, out: &mut Vec<String>) {
559    if let Some((colon, after)) = flow_kv_split(item) {
560        let raw_key_full = item[..colon].trim();
561        // Strip the explicit-key `?` indicator (followed by whitespace or
562        // end-of-key) when present.
563        let raw_key = strip_explicit_key_indicator(raw_key_full);
564        let raw_value = item[after..].trim();
565        out.push("+MAP {}".to_string());
566        if raw_key.is_empty() {
567            out.push("=VAL :".to_string());
568        } else {
569            out.push(flow_scalar_event(raw_key, handles));
570        }
571        if raw_value.is_empty() {
572            out.push("=VAL :".to_string());
573        } else {
574            out.push(flow_scalar_event(raw_value, handles));
575        }
576        out.push("-MAP".to_string());
577    } else if item.trim_start().starts_with('"') || item.trim_start().starts_with('\'') {
578        let trimmed = item.trim();
579        // Multi-line quoted scalar inside a flow sequence: apply YAML
580        // 1.2 §7.3 line-folding rules so embedded newlines fold to a
581        // space (or `\n` for blank-line runs) before the event's escape
582        // pass. Without this, joining tokens directly leaves the literal
583        // newline inside the body.
584        if trimmed.contains('\n') {
585            out.push(quoted_val_event_multi_line(trimmed));
586        } else {
587            out.push(quoted_val_event(trimmed));
588        }
589    } else {
590        out.push(plain_val_event(&fold_plain_scalar(item)));
591    }
592}
593
594fn strip_explicit_key_indicator(key: &str) -> &str {
595    let trimmed = key.trim_start();
596    if let Some(rest) = trimmed.strip_prefix('?')
597        && (rest.is_empty() || rest.starts_with([' ', '\t', '\n']))
598    {
599        return rest.trim_start();
600    }
601    key
602}
603
604fn quoted_val_event(text: &str) -> String {
605    if text.starts_with('\'') {
606        let inner = decode_single_quoted(text);
607        format!("=VAL '{}", escape_for_event(&inner))
608    } else {
609        let inner = decode_double_quoted(text);
610        format!("=VAL \"{}", escape_for_event(&inner))
611    }
612}
613
614/// Multi-line quoted scalar projection: applies YAML 1.2 §7.3.2 / §7.3.3 line
615/// folding (single line break → space, blank-line run of `n` blanks → `n`
616/// `\n`s) before escape decoding. Required when a top-level quoted document
617/// spans more than one source line — the single-line `quoted_val_event`
618/// concatenates `YAML_SCALAR` tokens directly and would lose all line
619/// structure.
620fn quoted_val_event_multi_line(raw: &str) -> String {
621    let trimmed = raw.trim_start_matches([' ', '\t', '\n']);
622    if trimmed.starts_with('\'') {
623        let inner_with_breaks = strip_quoted_wrapper(trimmed, '\'');
624        let folded = fold_quoted_inner(&inner_with_breaks);
625        let decoded = folded.replace("''", "'");
626        format!("=VAL '{}", escape_for_event(&decoded))
627    } else {
628        let inner_with_breaks = strip_quoted_wrapper(trimmed, '"');
629        let folded = fold_quoted_inner(&inner_with_breaks);
630        let decoded = decode_double_quoted_inner(&folded);
631        format!("=VAL \"{}", escape_for_event(&decoded))
632    }
633}
634
635/// Strip the surrounding quote characters from a multi-line quoted scalar's
636/// raw source. Walks until the first un-escaped (for `"`) or non-doubled
637/// (for `'`) closing quote so embedded `\"` / `''` don't terminate early.
638fn strip_quoted_wrapper(text: &str, quote: char) -> String {
639    let body = text.strip_prefix(quote).unwrap_or(text);
640    let mut out = String::with_capacity(body.len());
641    let mut chars = body.chars().peekable();
642    while let Some(ch) = chars.next() {
643        if quote == '"' {
644            if ch == '\\' {
645                out.push(ch);
646                if let Some(next) = chars.next() {
647                    out.push(next);
648                }
649                continue;
650            }
651            if ch == '"' {
652                break;
653            }
654        } else if ch == '\'' {
655            if chars.peek() == Some(&'\'') {
656                out.push('\'');
657                out.push('\'');
658                chars.next();
659                continue;
660            }
661            break;
662        }
663        out.push(ch);
664    }
665    out
666}
667
668/// Fold the inner body of a multi-line quoted scalar per YAML §7.3:
669/// - On the first line, leading whitespace is preserved as-is.
670/// - On continuation lines, leading whitespace is stripped.
671/// - Trailing whitespace from the running output is dropped before folding.
672/// - A run of `n` consecutive empty lines folds to `n` `\n` chars.
673/// - A single line break (no blank between) folds to a single space.
674/// - Trailing whitespace of the final line is stripped (matching
675///   yaml-test-suite event expectations for multi-line quoted scalars).
676fn fold_quoted_inner(inner: &str) -> String {
677    let mut out = String::new();
678    let mut blanks = 0usize;
679    let mut have_first = false;
680    for (idx, line) in inner.split('\n').enumerate() {
681        if idx == 0 {
682            out.push_str(line);
683            have_first = true;
684            continue;
685        }
686        let stripped = line.trim_start_matches([' ', '\t']);
687        if stripped.is_empty() {
688            blanks += 1;
689            continue;
690        }
691        let trimmed_end = out.trim_end_matches([' ', '\t']);
692        out.truncate(trimmed_end.len());
693        if !have_first {
694            // No content yet, so prepend nothing — first-line leading
695            // whitespace is preserved later by the `idx == 0` branch only.
696        } else if blanks == 0 {
697            out.push(' ');
698        } else {
699            for _ in 0..blanks {
700                out.push('\n');
701            }
702        }
703        out.push_str(stripped);
704        blanks = 0;
705        have_first = true;
706    }
707    let trimmed_tail = out.trim_end_matches([' ', '\t']);
708    out.truncate(trimmed_tail.len());
709    out
710}
711
712/// Inner-only variant of [`decode_double_quoted`]: the input has no
713/// surrounding quote characters and is consumed in full. Shares escape
714/// decoding semantics with the wrapped form.
715fn decode_double_quoted_inner(body: &str) -> String {
716    let mut out = String::with_capacity(body.len());
717    let mut chars = body.chars();
718    while let Some(ch) = chars.next() {
719        if ch != '\\' {
720            out.push(ch);
721            continue;
722        }
723        let Some(next) = chars.next() else {
724            out.push('\\');
725            break;
726        };
727        match next {
728            '0' => out.push('\0'),
729            'a' => out.push('\u{07}'),
730            'b' => out.push('\u{08}'),
731            't' | '\t' => out.push('\t'),
732            'n' => out.push('\n'),
733            'v' => out.push('\u{0B}'),
734            'f' => out.push('\u{0C}'),
735            'r' => out.push('\r'),
736            'e' => out.push('\u{1B}'),
737            ' ' => out.push(' '),
738            '"' => out.push('"'),
739            '/' => out.push('/'),
740            '\\' => out.push('\\'),
741            'N' => out.push('\u{85}'),
742            '_' => out.push('\u{A0}'),
743            'L' => out.push('\u{2028}'),
744            'P' => out.push('\u{2029}'),
745            'x' => {
746                if let Some(c) = take_hex_char(&mut chars, 2) {
747                    out.push(c);
748                }
749            }
750            'u' => {
751                if let Some(c) = take_hex_char(&mut chars, 4) {
752                    out.push(c);
753                }
754            }
755            'U' => {
756                if let Some(c) = take_hex_char(&mut chars, 8) {
757                    out.push(c);
758                }
759            }
760            other => {
761                out.push('\\');
762                out.push(other);
763            }
764        }
765    }
766    out
767}
768
769fn decode_single_quoted(text: &str) -> String {
770    let body = text.strip_prefix('\'').unwrap_or(text);
771    let body = body.strip_suffix('\'').unwrap_or(body);
772    body.replace("''", "'")
773}
774
775/// Decode YAML double-quoted scalar escape sequences into actual characters
776/// per YAML 1.2 §5.7. Unknown escapes are kept verbatim so the harness can
777/// surface them as bare backslash-prefixed text.
778fn decode_double_quoted(text: &str) -> String {
779    let body = text.strip_prefix('"').unwrap_or(text);
780    let mut out = String::with_capacity(body.len());
781    let mut chars = body.chars();
782    while let Some(ch) = chars.next() {
783        if ch == '"' {
784            break;
785        }
786        if ch != '\\' {
787            out.push(ch);
788            continue;
789        }
790        let Some(next) = chars.next() else {
791            out.push('\\');
792            break;
793        };
794        match next {
795            '0' => out.push('\0'),
796            'a' => out.push('\u{07}'),
797            'b' => out.push('\u{08}'),
798            't' | '\t' => out.push('\t'),
799            'n' => out.push('\n'),
800            'v' => out.push('\u{0B}'),
801            'f' => out.push('\u{0C}'),
802            'r' => out.push('\r'),
803            'e' => out.push('\u{1B}'),
804            ' ' => out.push(' '),
805            '"' => out.push('"'),
806            '/' => out.push('/'),
807            '\\' => out.push('\\'),
808            'N' => out.push('\u{85}'),
809            '_' => out.push('\u{A0}'),
810            'L' => out.push('\u{2028}'),
811            'P' => out.push('\u{2029}'),
812            'x' => {
813                if let Some(c) = take_hex_char(&mut chars, 2) {
814                    out.push(c);
815                }
816            }
817            'u' => {
818                if let Some(c) = take_hex_char(&mut chars, 4) {
819                    out.push(c);
820                }
821            }
822            'U' => {
823                if let Some(c) = take_hex_char(&mut chars, 8) {
824                    out.push(c);
825                }
826            }
827            other => {
828                out.push('\\');
829                out.push(other);
830            }
831        }
832    }
833    out
834}
835
836fn take_hex_char(chars: &mut std::str::Chars<'_>, n: usize) -> Option<char> {
837    let hex: String = chars.take(n).collect();
838    if hex.len() != n {
839        return None;
840    }
841    u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32)
842}
843
844/// Escape decoded scalar text for the yaml-test-suite event format, where
845/// control characters and structural backslashes are rendered as backslash
846/// escapes (`\n`, `\t`, `\b`, ...).
847fn escape_for_event(text: &str) -> String {
848    let mut out = String::with_capacity(text.len());
849    for ch in text.chars() {
850        match ch {
851            '\\' => out.push_str("\\\\"),
852            '\n' => out.push_str("\\n"),
853            '\t' => out.push_str("\\t"),
854            '\r' => out.push_str("\\r"),
855            '\u{07}' => out.push_str("\\a"),
856            '\u{08}' => out.push_str("\\b"),
857            '\u{0B}' => out.push_str("\\v"),
858            '\u{0C}' => out.push_str("\\f"),
859            '\u{1B}' => out.push_str("\\e"),
860            '\0' => out.push_str("\\0"),
861            other => out.push(other),
862        }
863    }
864    out
865}
866
867fn long_tag_builtin(tag: &str) -> Option<String> {
868    if tag == "!" {
869        return Some("<!>".to_string());
870    }
871    // Bare local tag: `!local` (single leading `!`, no second `!`).
872    if let Some(rest) = tag.strip_prefix('!')
873        && !rest.contains('!')
874    {
875        return Some(format!("<!{rest}>"));
876    }
877    None
878}
879
880fn simple_flow_sequence_items(text: &str) -> Option<Vec<String>> {
881    let trimmed = text.trim();
882    let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
883    let inner = inner.trim();
884    if inner.is_empty() {
885        return Some(Vec::new());
886    }
887
888    let mut items = Vec::new();
889    let mut start = 0usize;
890    let mut in_single = false;
891    let mut in_double = false;
892    let mut escaped_double = false;
893
894    for (idx, ch) in inner.char_indices() {
895        if in_double {
896            if escaped_double {
897                escaped_double = false;
898                continue;
899            }
900            match ch {
901                '\\' => escaped_double = true,
902                '"' => in_double = false,
903                _ => {}
904            }
905            continue;
906        }
907
908        if in_single {
909            if ch == '\'' {
910                in_single = false;
911            }
912            continue;
913        }
914
915        match ch {
916            '\'' => in_single = true,
917            '"' => in_double = true,
918            ',' => {
919                let item = inner[start..idx].trim();
920                if item.is_empty() {
921                    return None;
922                }
923                items.push(item.to_string());
924                start = idx + 1;
925            }
926            _ => {}
927        }
928    }
929
930    let last = inner[start..].trim();
931    if !last.is_empty() {
932        items.push(last.to_string());
933    }
934    Some(items)
935}
936
937fn escape_block_scalar_text(text: &str) -> String {
938    let mut out = String::with_capacity(text.len());
939    for ch in text.chars() {
940        match ch {
941            '\\' => out.push_str("\\\\"),
942            '\n' => out.push_str("\\n"),
943            '\t' => out.push_str("\\t"),
944            '\r' => out.push_str("\\r"),
945            other => out.push(other),
946        }
947    }
948    out
949}
950
951/// If `value_node` encodes a literal (`|`) or folded (`>`) block scalar,
952/// return the folded scalar body. Headers with explicit chomping (`-` strip,
953/// `+` keep) or indent indicators are recognized; chomping is applied to the
954/// final body. Default chomping is "clip" (single trailing newline).
955fn extract_block_scalar_body(value_node: &SyntaxNode) -> Option<(char, String)> {
956    let tokens: Vec<_> = value_node
957        .descendants_with_tokens()
958        .filter_map(|el| el.into_token())
959        .filter(|tok| {
960            matches!(
961                tok.kind(),
962                SyntaxKind::YAML_SCALAR
963                    | SyntaxKind::NEWLINE
964                    | SyntaxKind::WHITESPACE
965                    | SyntaxKind::YAML_COMMENT,
966            )
967        })
968        .collect();
969    fold_block_scalar_tokens(&tokens, block_scalar_parent_indent(value_node))
970}
971
972/// Compute the column of the start-of-line for the parent scope of a
973/// block-scalar value, used to anchor explicit indent indicators per
974/// YAML 1.2 §8.1.1.1: when a block-scalar header carries an indentation
975/// indicator `m`, the absolute content indent is `parent_indent + m`.
976///
977/// Walks up to the YAML_BLOCK_MAP_ENTRY (for map values) or treats a
978/// passed YAML_BLOCK_SEQUENCE_ITEM as its own parent. Other shapes
979/// (e.g. top-level YAML_DOCUMENT) fall back to the node's own column,
980/// which is 0 at the document level.
981fn block_scalar_parent_indent(value_node: &SyntaxNode) -> usize {
982    let target = match value_node.kind() {
983        SyntaxKind::YAML_BLOCK_MAP_VALUE => value_node
984            .parent()
985            .filter(|p| p.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
986            .unwrap_or_else(|| value_node.clone()),
987        _ => value_node.clone(),
988    };
989    column_of_node_start(&target)
990}
991
992fn column_of_node_start(node: &SyntaxNode) -> usize {
993    let offset: usize = node.text_range().start().into();
994    let root = node.ancestors().last().unwrap_or_else(|| node.clone());
995    let text = root.text().to_string();
996    let cap = offset.min(text.len());
997    let prefix = &text[..cap];
998    match prefix.rfind('\n') {
999        Some(nl) => offset.saturating_sub(nl + 1),
1000        None => offset,
1001    }
1002}
1003
1004/// Variant of [`extract_block_scalar_body`] that walks a full `YAML_DOCUMENT`
1005/// node and applies block-scalar folding to the tokens *after* a
1006/// `YAML_DOCUMENT_START` marker. Used for the directive-end-with-payload
1007/// pattern (`--- |\n  ab\n  cd\n`) where the block-scalar header is packed
1008/// onto the marker line itself rather than being a block-map value.
1009fn extract_scalar_doc_block_body(doc: &SyntaxNode) -> Option<(char, String)> {
1010    let mut started = false;
1011    let mut tokens = Vec::new();
1012    for el in doc.descendants_with_tokens() {
1013        let Some(tok) = el.into_token() else { continue };
1014        if !started {
1015            if tok.kind() == SyntaxKind::YAML_DOCUMENT_START {
1016                started = true;
1017            }
1018            continue;
1019        }
1020        match tok.kind() {
1021            SyntaxKind::YAML_DOCUMENT_END => break,
1022            SyntaxKind::YAML_SCALAR
1023            | SyntaxKind::NEWLINE
1024            | SyntaxKind::WHITESPACE
1025            | SyntaxKind::YAML_COMMENT => tokens.push(tok),
1026            _ => {}
1027        }
1028    }
1029    fold_block_scalar_tokens(&tokens, 0)
1030}
1031
1032/// Detect a top-level (no `YAML_DOCUMENT_START` marker) block-scalar document
1033/// of the form `>\n …` or `|\n …`. Walks the document's content tokens and
1034/// applies block-scalar folding when the first scalar token is a bare
1035/// block-scalar header. Returns `None` otherwise so plain / quoted scalar
1036/// handling can proceed.
1037fn extract_top_level_block_body(doc: &SyntaxNode) -> Option<(char, String)> {
1038    if doc
1039        .descendants_with_tokens()
1040        .filter_map(|el| el.into_token())
1041        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START)
1042    {
1043        return None;
1044    }
1045    let tokens: Vec<_> = doc
1046        .descendants_with_tokens()
1047        .filter_map(|el| el.into_token())
1048        .filter(|tok| {
1049            matches!(
1050                tok.kind(),
1051                SyntaxKind::YAML_SCALAR
1052                    | SyntaxKind::NEWLINE
1053                    | SyntaxKind::WHITESPACE
1054                    | SyntaxKind::YAML_COMMENT,
1055            )
1056        })
1057        .collect();
1058    // Same shape tolerance as `fold_block_scalar_tokens`: v1 emits the
1059    // header as a standalone scalar, v2 emits the whole block scalar
1060    // (header + newline + body) as a single token. Detect the header by
1061    // inspecting up to the first newline.
1062    let first = tokens.iter().find(|tok| {
1063        if tok.kind() != SyntaxKind::YAML_SCALAR {
1064            return false;
1065        }
1066        let header_part = tok.text().split('\n').next().unwrap_or("");
1067        parse_block_scalar_indicator(header_part).is_some()
1068    })?;
1069    let _ = first;
1070    fold_block_scalar_tokens(&tokens, 0)
1071}
1072
1073fn fold_block_scalar_tokens(
1074    tokens: &[SyntaxToken],
1075    parent_indent: usize,
1076) -> Option<(char, String)> {
1077    // Locate the header. v1 emits the header (`|`, `|+`, `>1` …) as a
1078    // standalone YAML_SCALAR token and the body as separate per-line
1079    // tokens. v2 emits the entire block scalar (header + newline + body)
1080    // as a single YAML_SCALAR token. Detect either shape by inspecting
1081    // the chars before the first `\n` of the candidate token.
1082    let header_idx = tokens.iter().position(|t| {
1083        if t.kind() != SyntaxKind::YAML_SCALAR {
1084            return false;
1085        }
1086        let header_part = t.text().split('\n').next().unwrap_or("");
1087        parse_block_scalar_indicator(header_part).is_some()
1088    })?;
1089    let header_text = tokens[header_idx].text();
1090    let header_part = header_text.split('\n').next().unwrap_or("");
1091    let (indicator, chomp, explicit_indent) = parse_block_scalar_indicator(header_part)?;
1092
1093    // Reconstruct the body source. Including `WHITESPACE` and
1094    // `YAML_COMMENT` tokens preserves the indentation needed for
1095    // content-indent calculation and lets a `# ...` line at column 0
1096    // (DK3J) land inside the body, while a less-indented `# Comment`
1097    // after a fully-indented body region (7T8X) gets recognized as a
1098    // body terminator.
1099    let mut raw = String::new();
1100    let unified_token = header_text.len() > header_part.len();
1101    if unified_token {
1102        // v2 shape: peel the header and its trailing newline out of the
1103        // single token, keep the rest as the body prefix. Then append
1104        // any later tokens verbatim.
1105        raw.push_str(&header_text[header_part.len() + 1..]);
1106        for tok in &tokens[header_idx + 1..] {
1107            raw.push_str(tok.text());
1108        }
1109    } else {
1110        // v1 shape: skip the standalone header's trailing NEWLINE and
1111        // stitch every later token verbatim.
1112        let mut skipped_header_newline = false;
1113        for tok in &tokens[header_idx + 1..] {
1114            if !skipped_header_newline && tok.kind() == SyntaxKind::NEWLINE {
1115                skipped_header_newline = true;
1116                continue;
1117            }
1118            raw.push_str(tok.text());
1119        }
1120    }
1121
1122    let raw_trailing_newlines = raw.chars().rev().take_while(|c| *c == '\n').count();
1123
1124    let lines: Vec<&str> = raw.split('\n').collect();
1125
1126    // Per YAML 1.2 §8.1.1.1, the content indentation level is set by the
1127    // first non-empty line of the contents — unless an explicit indent
1128    // indicator is given in the header, in which case the absolute
1129    // content indent is `parent_indent + m`. `parent_indent` is the
1130    // column of the parent block (block-map-entry or block-sequence-item)
1131    // that contains the block-scalar; nested map/seq values pick up
1132    // the right anchor (e.g. `- aaa: |2` → parent col 2 + 2 → 4).
1133    let content_indent = match explicit_indent {
1134        Some(m) => parent_indent + m,
1135        None => lines
1136            .iter()
1137            .find(|l| !l.trim().is_empty())
1138            .map(|l| l.chars().take_while(|c| *c == ' ').count())
1139            .unwrap_or(0),
1140    };
1141
1142    // Truncate at the first non-empty line whose indentation drops below the
1143    // content indent — that's where the block scalar's body ends per spec.
1144    // Trailing blanks coming from the source are kept; only the synthetic
1145    // final empty produced by `split('\n')` over a trailing newline is
1146    // dropped (and only when we walked off the end of the input — when we
1147    // broke out early on a dedented line, the trailing blank is real).
1148    let mut body_lines: Vec<&str> = Vec::new();
1149    let mut seen_content = false;
1150    let mut broke_out = false;
1151    for line in lines.iter() {
1152        let is_blank = line.trim().is_empty();
1153        let indent = line.chars().take_while(|c| *c == ' ').count();
1154        if !is_blank && seen_content && indent < content_indent {
1155            broke_out = true;
1156            break;
1157        }
1158        body_lines.push(line);
1159        if !is_blank {
1160            seen_content = true;
1161        }
1162    }
1163    if !broke_out && body_lines.last().is_some_and(|s| s.is_empty()) {
1164        body_lines.pop();
1165    }
1166
1167    let stripped: Vec<BlockBodyLine> = body_lines
1168        .iter()
1169        .map(|l| {
1170            // Always strip up to `content_indent` columns; for `|` style this
1171            // preserves trailing spaces past the content indent (T26H).
1172            let text = if l.len() >= content_indent {
1173                l[content_indent..].to_string()
1174            } else {
1175                String::new()
1176            };
1177            // "Blank" for folding is decided on the stripped text, not the
1178            // raw line: a line of pure whitespace less-indented than content
1179            // (e.g. ` ` with content_indent=2) strips to empty and is blank,
1180            // while a stripped tab (` \t` with content_indent=1 → `\t`) is
1181            // content, not blank. More-indented lines (per §8.1.3) preserve
1182            // literal line breaks; the spec defines them as content lines
1183            // beginning with extra whitespace, so we test the stripped text's
1184            // first character rather than counting only leading spaces (which
1185            // would miss tab-prefixed content like R4YG/MJS9).
1186            let is_blank = text.is_empty();
1187            let is_mi = !is_blank && text.starts_with([' ', '\t']);
1188            BlockBodyLine {
1189                text,
1190                is_blank,
1191                is_mi,
1192            }
1193        })
1194        .collect();
1195
1196    let folded = match indicator {
1197        '|' => stripped
1198            .iter()
1199            .map(|l| l.text.as_str())
1200            .collect::<Vec<_>>()
1201            .join("\n"),
1202        '>' => fold_greater_lines(&stripped),
1203        _ => unreachable!(),
1204    };
1205
1206    let trimmed = folded.trim_end_matches('\n');
1207    let body = match chomp {
1208        BlockScalarChomp::Strip => trimmed.to_string(),
1209        BlockScalarChomp::Clip => {
1210            if trimmed.is_empty() {
1211                String::new()
1212            } else {
1213                format!("{trimmed}\n")
1214            }
1215        }
1216        BlockScalarChomp::Keep => {
1217            // Keep chomping preserves the line break after the last
1218            // content line plus one line break per trailing empty line.
1219            // "Empty" is checked on the stripped text (so a raw `  `
1220            // line stripped to ` ` is content, not empty). Falling back
1221            // on `raw_trailing_newlines` for content-free bodies keeps
1222            // bare-blank-keep cases (`|+\n\n\n`) producing the right
1223            // count without a spurious extra newline.
1224            let body_trailing_empty = stripped
1225                .iter()
1226                .rev()
1227                .take_while(|l| l.text.is_empty())
1228                .count();
1229            let count = if seen_content {
1230                body_trailing_empty + 1
1231            } else {
1232                raw_trailing_newlines
1233            };
1234            format!("{trimmed}{}", "\n".repeat(count))
1235        }
1236    };
1237    Some((indicator, body))
1238}
1239
1240struct BlockBodyLine {
1241    text: String,
1242    is_blank: bool,
1243    is_mi: bool,
1244}
1245
1246/// Apply the YAML 1.2 §8.1.3 folded-scalar rules to a sequence of
1247/// content-indent-stripped body lines:
1248/// - Each leading blank line contributes a single `\n` to the output.
1249/// - Between two adjacent non-MI content lines, a single line break folds to
1250///   ` `; a run of `n` blank lines folds to `n` `\n` chars.
1251/// - When either side of the boundary is more-indented, *all* line breaks
1252///   between the two content lines are preserved literally.
1253fn fold_greater_lines(lines: &[BlockBodyLine]) -> String {
1254    let mut out = String::new();
1255    let mut idx = 0usize;
1256
1257    while idx < lines.len() && lines[idx].is_blank {
1258        out.push('\n');
1259        idx += 1;
1260    }
1261    if idx >= lines.len() {
1262        return out;
1263    }
1264
1265    out.push_str(&lines[idx].text);
1266    let mut prev_is_mi = lines[idx].is_mi;
1267    idx += 1;
1268
1269    while idx < lines.len() {
1270        let mut empty_count = 0usize;
1271        while idx < lines.len() && lines[idx].is_blank {
1272            empty_count += 1;
1273            idx += 1;
1274        }
1275        if idx >= lines.len() {
1276            break;
1277        }
1278        let line = &lines[idx];
1279        let mi_involved = prev_is_mi || line.is_mi;
1280        if mi_involved {
1281            for _ in 0..(empty_count + 1) {
1282                out.push('\n');
1283            }
1284        } else if empty_count == 0 {
1285            out.push(' ');
1286        } else {
1287            for _ in 0..empty_count {
1288                out.push('\n');
1289            }
1290        }
1291        out.push_str(&line.text);
1292        prev_is_mi = line.is_mi;
1293        idx += 1;
1294    }
1295    out
1296}
1297
1298#[derive(Clone, Copy)]
1299enum BlockScalarChomp {
1300    Clip,
1301    Strip,
1302    Keep,
1303}
1304
1305fn parse_block_scalar_indicator(text: &str) -> Option<(char, BlockScalarChomp, Option<usize>)> {
1306    let mut chars = text.chars().peekable();
1307    let indicator = match chars.next()? {
1308        '|' => '|',
1309        '>' => '>',
1310        _ => return None,
1311    };
1312    let mut chomp = BlockScalarChomp::Clip;
1313    let mut seen_chomp = false;
1314    let mut indent: Option<usize> = None;
1315    while let Some(&ch) = chars.peek() {
1316        match ch {
1317            '+' if !seen_chomp => {
1318                chomp = BlockScalarChomp::Keep;
1319                seen_chomp = true;
1320                chars.next();
1321            }
1322            '-' if !seen_chomp => {
1323                chomp = BlockScalarChomp::Strip;
1324                seen_chomp = true;
1325                chars.next();
1326            }
1327            '1'..='9' if indent.is_none() => {
1328                indent = Some(ch.to_digit(10).unwrap() as usize);
1329                chars.next();
1330            }
1331            ' ' | '\t' => {
1332                // Trailing whitespace + optional comment is allowed after
1333                // the indicators per YAML 1.2 §8.1.1 (the header line
1334                // can carry a comment, e.g. `| # description`).
1335                for rest in chars.by_ref() {
1336                    if rest == '#' {
1337                        // Rest of the header line is a comment — ignore.
1338                        return Some((indicator, chomp, indent));
1339                    }
1340                    if rest != ' ' && rest != '\t' {
1341                        return None;
1342                    }
1343                }
1344                return Some((indicator, chomp, indent));
1345            }
1346            _ => return None,
1347        }
1348    }
1349    Some((indicator, chomp, indent))
1350}
1351
1352fn fold_plain_scalar(text: &str) -> String {
1353    let mut pieces = Vec::new();
1354    for line in text.split('\n') {
1355        let trimmed = line.trim();
1356        // A line whose first non-blank character is `#` is a YAML comment
1357        // line (the lexer currently leaves these embedded in scalar token
1358        // text inside multi-line flow continuations); skip it from folding.
1359        if trimmed.is_empty() || trimmed.starts_with('#') {
1360            continue;
1361        }
1362        pieces.push(trimmed.to_string());
1363    }
1364    if pieces.is_empty() {
1365        return String::new();
1366    }
1367    pieces.join(" ")
1368}
1369
1370fn project_flow_map_entries(flow_map: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
1371    // Walk the flow_map's children left-to-right, tracking any orphan
1372    // scalar text (`pending`) that sits between entries. A scalar that
1373    // isn't enclosed in a `YAML_FLOW_MAP_ENTRY` reaches us in two
1374    // shapes:
1375    //
1376    //   1. A multi-line plain scalar that the v2 scanner couldn't
1377    //      register as a simple-key candidate before the `:` arrived
1378    //      (NJ66, ZF4X, UDR7's `sky`, 8KB6, ...). In that case the
1379    //      following entry has an empty `KEY` (just the `:`), and the
1380    //      orphan IS the key — we merge them.
1381    //
1382    //   2. A standalone scalar with no `:` at all (`{a, b: c}` shape;
1383    //      8KB6's `single line, ...`). YAML 1.2 says this is a key with
1384    //      an implicit empty value, projecting as `=VAL :a` then
1385    //      `=VAL :`.
1386    //
1387    // Both shapes resolve to flushing `pending` either as the key of
1388    // the next empty-key entry or as a value-less standalone entry
1389    // (when we hit a `,` or `}` before a matching empty-key entry).
1390    let mut pending = String::new();
1391    let mut pending_has_content = false;
1392    for child in flow_map.children_with_tokens() {
1393        match child {
1394            rowan::NodeOrToken::Token(tok) => match tok.kind() {
1395                SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {
1396                    if pending_has_content {
1397                        pending.push_str(tok.text());
1398                    }
1399                }
1400                SyntaxKind::YAML_SCALAR => {
1401                    let text = tok.text();
1402                    match text {
1403                        "{" | "}" => {}
1404                        "," => {
1405                            if pending_has_content {
1406                                flush_pending_orphan(&pending, handles, out);
1407                                pending.clear();
1408                                pending_has_content = false;
1409                            }
1410                        }
1411                        _ => {
1412                            pending.push_str(text);
1413                            pending_has_content = true;
1414                        }
1415                    }
1416                }
1417                SyntaxKind::YAML_KEY => {
1418                    pending.push_str(tok.text());
1419                    pending_has_content = true;
1420                }
1421                _ => {}
1422            },
1423            rowan::NodeOrToken::Node(entry) if entry.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY => {
1424                project_flow_map_entry(
1425                    &entry,
1426                    if pending_has_content {
1427                        Some(pending.as_str())
1428                    } else {
1429                        None
1430                    },
1431                    handles,
1432                    out,
1433                );
1434                pending.clear();
1435                pending_has_content = false;
1436            }
1437            _ => {}
1438        }
1439    }
1440    if pending_has_content {
1441        flush_pending_orphan(&pending, handles, out);
1442    }
1443}
1444
1445/// Flush an orphan scalar that wasn't followed by a matching
1446/// empty-key entry. YAML 1.2 treats this as an implicit-value entry
1447/// (`{a, b: c}` ≡ `{a: ~, b: c}`), so the projection emits the key
1448/// then an empty value.
1449fn flush_pending_orphan(pending: &str, handles: &TagHandles, out: &mut Vec<String>) {
1450    let trimmed = pending.trim();
1451    if trimmed.is_empty() {
1452        return;
1453    }
1454    if trimmed.starts_with('"') || trimmed.starts_with('\'') {
1455        if trimmed.contains('\n') {
1456            out.push(quoted_val_event_multi_line(trimmed));
1457        } else {
1458            out.push(quoted_val_event(trimmed));
1459        }
1460    } else {
1461        let folded = fold_plain_scalar(trimmed);
1462        let stripped = strip_explicit_key_indicator(&folded);
1463        if stripped.is_empty() {
1464            out.push("=VAL :".to_string());
1465        } else {
1466            // Resolve a leading anchor/tag/handle on the orphan key the
1467            // same way `flow_scalar_event` does for in-entry scalars.
1468            out.push(flow_scalar_event(stripped, handles));
1469        }
1470    }
1471    out.push("=VAL :".to_string());
1472}
1473
1474fn project_flow_map_entry(
1475    entry: &SyntaxNode,
1476    external_key: Option<&str>,
1477    handles: &TagHandles,
1478    out: &mut Vec<String>,
1479) {
1480    let key_node = entry
1481        .children()
1482        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
1483        .expect("flow map key");
1484    let value_node = entry
1485        .children()
1486        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
1487        .expect("flow map value");
1488
1489    let has_explicit_colon = key_node
1490        .children_with_tokens()
1491        .filter_map(|el| el.into_token())
1492        .any(|tok| tok.kind() == SyntaxKind::YAML_COLON);
1493    let key_has_content = key_node
1494        .descendants_with_tokens()
1495        .filter_map(|el| el.into_token())
1496        .any(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_KEY));
1497
1498    // Include WHITESPACE / NEWLINE so v2's separately-emitted `?`
1499    // (`YAML_KEY`) and key scalar (`YAML_SCALAR`) keep the original
1500    // trivia between them, letting `strip_explicit_key_indicator`
1501    // recognize the `?<sp>` pattern. v1 emitted both as a single
1502    // `YAML_KEY` token so the join was already a no-op there.
1503    let mut raw_key = key_node
1504        .descendants_with_tokens()
1505        .filter_map(|el| el.into_token())
1506        .filter(|tok| {
1507            matches!(
1508                tok.kind(),
1509                SyntaxKind::YAML_SCALAR
1510                    | SyntaxKind::YAML_KEY
1511                    | SyntaxKind::WHITESPACE
1512                    | SyntaxKind::NEWLINE
1513            )
1514        })
1515        .map(|tok| tok.text().to_string())
1516        .collect::<Vec<_>>()
1517        .join("");
1518
1519    // External key prepends only when the entry's own key is empty
1520    // (the v2-scanner orphan-merge case): the orphan provides the key
1521    // bytes, the entry just contributes the `:` and the value.
1522    if let Some(ext) = external_key
1523        && !key_has_content
1524    {
1525        raw_key = format!("{ext}{raw_key}");
1526    } else if let Some(ext) = external_key {
1527        // Pending was non-empty but this entry already has a real
1528        // key — flush pending as a standalone implicit-value entry
1529        // first so neither side gets dropped.
1530        flush_pending_orphan(ext, handles, out);
1531    }
1532
1533    if has_explicit_colon {
1534        // Strip the explicit-key `?` indicator (`{ ? foo : v }`) from
1535        // the projected key text. A bare `? :` entry (key reduces to
1536        // empty after stripping) projects to an empty `=VAL :`.
1537        let key_for_classify = raw_key.trim();
1538        let stripped_key = strip_explicit_key_indicator(key_for_classify);
1539        if stripped_key.is_empty() {
1540            out.push("=VAL :".to_string());
1541        } else if stripped_key.starts_with('"') || stripped_key.starts_with('\'') {
1542            if stripped_key.contains('\n') {
1543                out.push(quoted_val_event_multi_line(stripped_key));
1544            } else {
1545                out.push(quoted_val_event(stripped_key));
1546            }
1547        } else {
1548            // Multi-line plain key text needs folding before
1549            // resolution; flow_scalar_event does it for plain text but
1550            // bypasses folding when the input contains explicit tag
1551            // bytes — handle the plain branch here so multi-line
1552            // orphans collapse to a single line.
1553            let folded = fold_plain_scalar(stripped_key);
1554            out.push(flow_scalar_event(&folded, handles));
1555        }
1556        project_flow_map_value(&value_node, handles, out);
1557    } else {
1558        let raw_value = value_node
1559            .descendants_with_tokens()
1560            .filter_map(|el| el.into_token())
1561            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
1562            .map(|tok| tok.text().to_string())
1563            .collect::<Vec<_>>()
1564            .join("");
1565        let combined = format!("{raw_key}{raw_value}");
1566        let folded = fold_plain_scalar(&combined);
1567        let stripped = strip_explicit_key_indicator(&folded);
1568        if stripped.is_empty() {
1569            out.push("=VAL :".to_string());
1570        } else {
1571            out.push(plain_val_event(stripped));
1572        }
1573        out.push("=VAL :".to_string());
1574    }
1575}
1576
1577/// Project a `YAML_FLOW_MAP_VALUE` node, recursing into nested flow
1578/// collections (`+SEQ [] ... -SEQ`, `+MAP {} ... -MAP`) when present so that
1579/// multi-line nested flow values like `{ a: [ b, c, { d: [e, f] } ] }`
1580/// produce structured event streams instead of one slurped scalar.
1581fn project_flow_map_value(value_node: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
1582    if let Some(flow_seq) = value_node
1583        .children()
1584        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1585    {
1586        out.push("+SEQ []".to_string());
1587        project_flow_sequence_items_cst(&flow_seq, handles, out);
1588        out.push("-SEQ".to_string());
1589        return;
1590    }
1591    if let Some(nested_map) = value_node
1592        .children()
1593        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1594    {
1595        out.push("+MAP {}".to_string());
1596        project_flow_map_entries(&nested_map, handles, out);
1597        out.push("-MAP".to_string());
1598        return;
1599    }
1600
1601    // Include `YAML_COLON` tokens alongside `YAML_SCALAR` so a
1602    // plain-scalar value that begins with `:` (e.g. 5T43's
1603    // `{ "key"::value }` and 58MP's `{x: :x}` — leading `:` after
1604    // the entry's key indicator) carries its colon into the event
1605    // body. The scanner emits the leading `:` as a stray Value token
1606    // that the v2 builder lands inside the VALUE wrapper; without
1607    // collecting `YAML_COLON` here the projection drops it and the
1608    // event becomes `=VAL :value` instead of `=VAL ::value`.
1609    let raw_value = value_node
1610        .descendants_with_tokens()
1611        .filter_map(|el| el.into_token())
1612        .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_COLON))
1613        .map(|tok| tok.text().to_string())
1614        .collect::<Vec<_>>()
1615        .join("");
1616    out.push(flow_scalar_event(&raw_value, handles));
1617}
1618
1619/// CST-walking variant of flow-sequence projection. Each
1620/// `YAML_FLOW_SEQUENCE_ITEM` may contain a nested `YAML_FLOW_SEQUENCE` /
1621/// `YAML_FLOW_MAP`; if neither is present we fall back to the text-based
1622/// `project_flow_seq_item` for plain/quoted scalar items.
1623fn project_flow_sequence_items_cst(
1624    flow_seq: &SyntaxNode,
1625    handles: &TagHandles,
1626    out: &mut Vec<String>,
1627) {
1628    for item in flow_seq
1629        .children()
1630        .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
1631    {
1632        if let Some(nested_seq) = item
1633            .children()
1634            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1635        {
1636            out.push("+SEQ []".to_string());
1637            project_flow_sequence_items_cst(&nested_seq, handles, out);
1638            out.push("-SEQ".to_string());
1639            continue;
1640        }
1641        if let Some(nested_map) = item
1642            .children()
1643            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1644        {
1645            out.push("+MAP {}".to_string());
1646            project_flow_map_entries(&nested_map, handles, out);
1647            out.push("-MAP".to_string());
1648            continue;
1649        }
1650        // Build the item text from scalar/key/colon tokens plus
1651        // structural whitespace so an embedded `:` (e.g. an implicit
1652        // flow-map entry like `'k' : v` written inside `[...]`, see
1653        // 87E4 / L9U5 / LQZ7) survives into `flow_kv_split`. Skipping
1654        // colons collapsed the entry into a single `=VAL :scalar` and
1655        // hid the `+MAP {} ... -MAP` wrap; preserving them lets
1656        // `project_flow_seq_item` recognize the kv pattern.
1657        // `YAML_COMMENT` tokens stay excluded so leading/trailing
1658        // comments inside multi-line items don't leak into the value.
1659        let item_text: String = item
1660            .descendants_with_tokens()
1661            .filter_map(|el| el.into_token())
1662            .filter(|tok| {
1663                matches!(
1664                    tok.kind(),
1665                    SyntaxKind::YAML_SCALAR
1666                        | SyntaxKind::YAML_KEY
1667                        | SyntaxKind::YAML_COLON
1668                        | SyntaxKind::WHITESPACE
1669                        | SyntaxKind::NEWLINE
1670                )
1671            })
1672            .map(|tok| tok.text().to_string())
1673            .collect();
1674        project_flow_seq_item(&item_text, handles, out);
1675    }
1676}
1677
1678/// Locate a key/colon split in a block-context scalar. Honors a leading
1679/// quoted body (`"key": value`, `'key': value`) and percent-encoded URIs by
1680/// only treating `:` as a key indicator when followed by whitespace, a flow
1681/// indicator, or end-of-input. Per YAML 1.2 §7.4.3.1, embedded `"` / `'`
1682/// inside plain scalars are literal, so no quote-toggling occurs after the
1683/// leading-quote phase.
1684fn find_block_scalar_kv_split(text: &str) -> Option<usize> {
1685    let bytes = text.as_bytes();
1686    let lead = bytes
1687        .iter()
1688        .position(|b| !matches!(b, b' ' | b'\t'))
1689        .unwrap_or(bytes.len());
1690    let mut idx = lead;
1691    match bytes.get(idx) {
1692        Some(b'"') => {
1693            idx += 1;
1694            let mut escaped = false;
1695            while idx < bytes.len() {
1696                let b = bytes[idx];
1697                idx += 1;
1698                if escaped {
1699                    escaped = false;
1700                    continue;
1701                }
1702                if b == b'\\' {
1703                    escaped = true;
1704                    continue;
1705                }
1706                if b == b'"' {
1707                    break;
1708                }
1709            }
1710        }
1711        Some(b'\'') => {
1712            idx += 1;
1713            while idx < bytes.len() {
1714                let b = bytes[idx];
1715                idx += 1;
1716                if b == b'\'' {
1717                    if bytes.get(idx) == Some(&b'\'') {
1718                        idx += 1;
1719                        continue;
1720                    }
1721                    break;
1722                }
1723            }
1724        }
1725        _ => {}
1726    }
1727    while idx < bytes.len() {
1728        if bytes[idx] == b':' {
1729            let after = idx + 1;
1730            let next = bytes.get(after);
1731            // In block context (which is where this helper runs) only
1732            // whitespace or end-of-input qualifies as the key/value
1733            // indicator's trailing context. The flow-collection terminators
1734            // (`,`, `}`, `]`) are literal here — `- :,` is a single scalar
1735            // `:,`, not an empty-key map.
1736            let is_separator = matches!(next, None | Some(b' ' | b'\t' | b'\n' | b'\r'));
1737            if is_separator {
1738                return Some(idx);
1739            }
1740        }
1741        idx += 1;
1742    }
1743    None
1744}
1745
1746/// Project a single scalar (without surrounding `+MAP`/`-MAP`) for an inline
1747/// map key or value position. Anchors/tags are decomposed in canonical order;
1748/// alias references (`*name`) emit `=ALI`. An empty body emits `=VAL :`.
1749fn project_inline_scalar(text: &str, handles: &TagHandles, out: &mut Vec<String>) {
1750    let trimmed = text.trim();
1751    if trimmed.is_empty() {
1752        out.push("=VAL :".to_string());
1753        return;
1754    }
1755    if trimmed.starts_with('*') {
1756        out.push(format!("=ALI {trimmed}"));
1757        return;
1758    }
1759    let (anchor, body_tag, body) = decompose_scalar(trimmed, handles);
1760    out.push(scalar_event(anchor, body_tag.as_deref(), body));
1761}
1762
1763fn project_block_sequence_items(
1764    seq_node: &SyntaxNode,
1765    handles: &TagHandles,
1766    out: &mut Vec<String>,
1767) {
1768    for item in seq_node
1769        .children()
1770        .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
1771    {
1772        if let Some(nested_seq) = item
1773            .children()
1774            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
1775        {
1776            out.push("+SEQ".to_string());
1777            project_block_sequence_items(&nested_seq, handles, out);
1778            out.push("-SEQ".to_string());
1779            continue;
1780        }
1781        // Inline-map sequence item: `- key: value` (with optional continuation
1782        // lines that the parser captures as a nested YAML_BLOCK_MAP). The
1783        // direct YAML_SCALAR/YAML_TAG/whitespace token chain encodes the first
1784        // entry; subsequent entries live in the nested map node. Including
1785        // YAML_TAG keeps tagged empty keys/values (`- !!str : !!null`) intact
1786        // so `decompose_scalar` can recover the tag.
1787        let direct_scalar: String = item
1788            .children_with_tokens()
1789            .filter_map(|el| el.into_token())
1790            .filter(|tok| {
1791                matches!(
1792                    tok.kind(),
1793                    SyntaxKind::YAML_SCALAR
1794                        | SyntaxKind::YAML_TAG
1795                        | SyntaxKind::YAML_KEY
1796                        | SyntaxKind::YAML_COLON
1797                        | SyntaxKind::WHITESPACE,
1798                )
1799            })
1800            .map(|tok| tok.text().to_string())
1801            .collect();
1802        if let Some(colon_idx) = find_block_scalar_kv_split(&direct_scalar) {
1803            let nested_map = item
1804                .children()
1805                .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP);
1806            out.push("+MAP".to_string());
1807            project_inline_scalar(&direct_scalar[..colon_idx], handles, out);
1808            project_inline_scalar(&direct_scalar[colon_idx + 1..], handles, out);
1809            if let Some(nm) = nested_map {
1810                project_block_map_entries(&nm, handles, out);
1811            }
1812            out.push("-MAP".to_string());
1813            continue;
1814        }
1815        if let Some(nested_map) = item
1816            .children()
1817            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1818        {
1819            out.push(map_open_event_for_block_map(&nested_map, handles));
1820            project_block_map_entries(&nested_map, handles, out);
1821            out.push("-MAP".to_string());
1822            continue;
1823        }
1824        if let Some(flow_seq) = item
1825            .children()
1826            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
1827        {
1828            let flow_text = flow_seq.text().to_string();
1829            if let Some(flow_items) = simple_flow_sequence_items(&flow_text) {
1830                out.push("+SEQ []".to_string());
1831                for value in flow_items {
1832                    project_flow_seq_item(&value, handles, out);
1833                }
1834                out.push("-SEQ".to_string());
1835                continue;
1836            }
1837        }
1838        if let Some(flow_map) = item
1839            .children()
1840            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1841        {
1842            out.push("+MAP {}".to_string());
1843            project_flow_map_entries(&flow_map, handles, out);
1844            out.push("-MAP".to_string());
1845            continue;
1846        }
1847        if let Some((indicator, body)) = extract_block_scalar_body(&item) {
1848            let escaped = escape_block_scalar_text(&body);
1849            out.push(format!("=VAL {indicator}{escaped}"));
1850            continue;
1851        }
1852        let item_tag = item
1853            .descendants_with_tokens()
1854            .filter_map(|el| el.into_token())
1855            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1856            .map(|tok| tok.text().to_string());
1857        let scalar_text = item
1858            .descendants_with_tokens()
1859            .filter_map(|el| el.into_token())
1860            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
1861            .map(|tok| tok.text().to_string())
1862            .collect::<Vec<_>>()
1863            .join("");
1864        let scalar_trimmed = scalar_text.trim();
1865        let event = if scalar_trimmed.starts_with('*') {
1866            format!("=ALI {scalar_trimmed}")
1867        } else {
1868            // Combine the optional `YAML_TAG` token (already separated from
1869            // the scalar text by the parser) with anchors/tags found in the
1870            // scalar body, and render the YAML event in canonical
1871            // `&anchor <tag> :body` order.
1872            let item_long_tag = item_tag
1873                .as_deref()
1874                .and_then(|t| resolve_long_tag(t, handles));
1875            let (anchor, body_tag, body) = decompose_scalar(scalar_trimmed, handles);
1876            let long_tag = item_long_tag.or(body_tag);
1877            let folded;
1878            let body_for_event: &str = if body.contains('\n') {
1879                folded = fold_plain_scalar(body);
1880                &folded
1881            } else {
1882                body
1883            };
1884            scalar_event(anchor, long_tag.as_deref(), body_for_event)
1885        };
1886        out.push(event);
1887    }
1888}
1889
1890/// Decompose a node-property + scalar string into `(anchor, long_tag, body)`,
1891/// peeling off any leading `&anchor` and tag shorthand in either order
1892/// (`&a !!str foo` or `!!str &a foo`). Returns the raw body trimmed.
1893/// Build the `+SEQ` open event for a YAML_BLOCK_SEQUENCE, attaching any
1894/// document-level node properties (a tag, or a `&anchor` carried by the
1895/// block-sequence header line) that precede the first sequence item. The
1896/// parser stores those properties as YAML_TAG / YAML_SCALAR siblings of
1897/// the YAML_BLOCK_SEQUENCE_ITEM children, in source order.
1898fn seq_open_event(seq_node: &SyntaxNode, handles: &TagHandles) -> String {
1899    let mut anchor: Option<String> = None;
1900    let mut long_tag: Option<String> = None;
1901    // v2 emits anchors/tags as siblings of the YAML_BLOCK_SEQUENCE within
1902    // the parent container (e.g. directly under a YAML_DOCUMENT for the
1903    // top-level `&anchor\n- a` shape) — not as inner-prefix tokens like
1904    // v1. Scan parent siblings preceding the SEQ first.
1905    absorb_preceding_anchor_and_tag(seq_node, handles, &mut anchor, &mut long_tag);
1906    // v1 emits anchors/tags as inner-prefix tokens of the SEQ before the
1907    // first BLOCK_SEQUENCE_ITEM. Also walk those for backward compat.
1908    for child in seq_node.children_with_tokens() {
1909        if let Some(node) = child.as_node()
1910            && node.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
1911        {
1912            break;
1913        }
1914        let Some(tok) = child.as_token() else {
1915            continue;
1916        };
1917        absorb_anchor_or_tag(tok, handles, &mut anchor, &mut long_tag);
1918    }
1919    let mut event = String::from("+SEQ");
1920    if let Some(t) = long_tag {
1921        event.push(' ');
1922        event.push_str(&t);
1923    }
1924    if let Some(a) = anchor {
1925        event.push_str(" &");
1926        event.push_str(&a);
1927    }
1928    event
1929}
1930
1931/// Walk the parent's children and absorb `YAML_TAG`/`YAML_SCALAR` tokens
1932/// (carrying a `&...` anchor or `!...` tag) that appear *before* the
1933/// `child` node, stopping at `child`. Used by `seq_open_event` /
1934/// `map_open_event_for_block_map` to capture v2's emission of leading
1935/// anchor/tag tokens at the parent level rather than inside the
1936/// container.
1937fn absorb_preceding_anchor_and_tag(
1938    child: &SyntaxNode,
1939    handles: &TagHandles,
1940    anchor: &mut Option<String>,
1941    long_tag: &mut Option<String>,
1942) {
1943    let Some(parent) = child.parent() else {
1944        return;
1945    };
1946    let target_range = child.text_range();
1947    for el in parent.children_with_tokens() {
1948        if let Some(node) = el.as_node() {
1949            if node.text_range() == target_range {
1950                break;
1951            }
1952            continue;
1953        }
1954        if let Some(tok) = el.as_token() {
1955            absorb_anchor_or_tag(tok, handles, anchor, long_tag);
1956        }
1957    }
1958}
1959
1960/// Inspect a single token for an anchor or tag and update the
1961/// respective slot. Recognizes both v1's and v2's emission shape:
1962/// - v1 emits anchors as `YAML_SCALAR` tokens whose text starts with `&`.
1963/// - v2 emits anchors as `YAML_TAG` tokens (the synthesis of anchor and
1964///   tag into a single SyntaxKind), distinguishable by the leading byte.
1965fn absorb_anchor_or_tag(
1966    tok: &SyntaxToken,
1967    handles: &TagHandles,
1968    anchor: &mut Option<String>,
1969    long_tag: &mut Option<String>,
1970) {
1971    match tok.kind() {
1972        SyntaxKind::YAML_TAG => {
1973            let trimmed = tok.text().trim();
1974            if let Some(name) = trimmed.strip_prefix('&') {
1975                if anchor.is_none() {
1976                    *anchor = Some(name.to_string());
1977                }
1978            } else if trimmed.starts_with('!')
1979                && long_tag.is_none()
1980                && let Some(long) = resolve_long_tag(trimmed, handles)
1981            {
1982                *long_tag = Some(long);
1983            }
1984        }
1985        SyntaxKind::YAML_SCALAR => {
1986            let trimmed = tok.text().trim();
1987            if anchor.is_none()
1988                && let Some(name) = trimmed.strip_prefix('&')
1989            {
1990                *anchor = Some(name.to_string());
1991            } else if long_tag.is_none()
1992                && trimmed.starts_with('!')
1993                && let Some(long) = resolve_long_tag(trimmed, handles)
1994            {
1995                *long_tag = Some(long);
1996            }
1997        }
1998        _ => {}
1999    }
2000}
2001
2002/// Build the `+MAP` open event for a nested YAML_BLOCK_MAP that lives inside
2003/// a YAML_BLOCK_MAP_VALUE. Captures any anchor (`&name`) or tag (`!!str`,
2004/// `!shorthand`, etc.) tokens that precede the inner block map so that
2005/// projected events match patterns like `+MAP &node3` from yaml-test-suite
2006/// case 26DV (`top3: &node3` followed by an indented nested block map).
2007fn map_open_event_for_value(value_node: &SyntaxNode, handles: &TagHandles) -> String {
2008    let (anchor, long_tag, _residual) = extract_value_node_properties(value_node, handles);
2009    let mut event = String::from("+MAP");
2010    if let Some(t) = long_tag {
2011        event.push(' ');
2012        event.push_str(&t);
2013    }
2014    if let Some(a) = anchor {
2015        event.push_str(" &");
2016        event.push_str(&a);
2017    }
2018    event
2019}
2020
2021/// Walk the leading children of a YAML_BLOCK_MAP_VALUE — the tokens before
2022/// any nested YAML_BLOCK_MAP / YAML_FLOW_MAP / YAML_FLOW_SEQUENCE — and pull
2023/// out the optional anchor (`&name`, ending at whitespace, comma, or
2024/// flow-collection closer), the optional resolved tag, and any residual
2025/// scalar text that follows the anchor (e.g. the `*alias1` in 26DV's
2026/// `&node3 \n  *alias1` scalar that precedes a nested implicit map).
2027fn extract_value_node_properties(
2028    value_node: &SyntaxNode,
2029    handles: &TagHandles,
2030) -> (Option<String>, Option<String>, String) {
2031    let mut anchor: Option<String> = None;
2032    let mut long_tag: Option<String> = None;
2033    let mut residual = String::new();
2034    for child in value_node.children_with_tokens() {
2035        if let Some(node) = child.as_node()
2036            && matches!(
2037                node.kind(),
2038                SyntaxKind::YAML_BLOCK_MAP
2039                    | SyntaxKind::YAML_FLOW_MAP
2040                    | SyntaxKind::YAML_FLOW_SEQUENCE
2041            )
2042        {
2043            break;
2044        }
2045        let Some(tok) = child.as_token() else {
2046            continue;
2047        };
2048        match tok.kind() {
2049            SyntaxKind::YAML_TAG => {
2050                if long_tag.is_none()
2051                    && let Some(long) = resolve_long_tag(tok.text(), handles)
2052                {
2053                    long_tag = Some(long);
2054                }
2055            }
2056            SyntaxKind::YAML_SCALAR => {
2057                let text = tok.text();
2058                let trimmed = text.trim();
2059                if anchor.is_none()
2060                    && let Some(after) = trimmed.strip_prefix('&')
2061                {
2062                    let end = after
2063                        .find(|c: char| c.is_whitespace() || matches!(c, ',' | '}' | ']'))
2064                        .unwrap_or(after.len());
2065                    let (name, tail) = after.split_at(end);
2066                    anchor = Some(name.to_string());
2067                    let extra = tail.trim();
2068                    if !extra.is_empty() {
2069                        if !residual.is_empty() {
2070                            residual.push(' ');
2071                        }
2072                        residual.push_str(extra);
2073                    }
2074                } else {
2075                    let extra = trimmed;
2076                    if !extra.is_empty() {
2077                        if !residual.is_empty() {
2078                            residual.push(' ');
2079                        }
2080                        residual.push_str(extra);
2081                    }
2082                }
2083            }
2084            _ => {}
2085        }
2086    }
2087    (anchor, long_tag, residual)
2088}
2089
2090/// Build the `+MAP` open event for a YAML_BLOCK_MAP rooted directly under
2091/// a YAML_DOCUMENT. Captures any anchor (`&name`) or tag (`!!str`,
2092/// `!shorthand`, etc.) tokens that the parser absorbed at the top of the
2093/// block map so that documents like `--- !!set\n? a\n? b` project as
2094/// `+MAP <tag:yaml.org,2002:set>`.
2095fn map_open_event_for_block_map(map_node: &SyntaxNode, handles: &TagHandles) -> String {
2096    let mut anchor: Option<String> = None;
2097    let mut long_tag: Option<String> = None;
2098    // Mirror `seq_open_event`: scan parent siblings preceding this MAP
2099    // first (v2 emission), then the MAP's inner-prefix tokens (v1).
2100    absorb_preceding_anchor_and_tag(map_node, handles, &mut anchor, &mut long_tag);
2101    for child in map_node.children_with_tokens() {
2102        if let Some(node) = child.as_node()
2103            && node.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY
2104        {
2105            break;
2106        }
2107        let Some(tok) = child.as_token() else {
2108            continue;
2109        };
2110        if tok.kind() == SyntaxKind::YAML_SCALAR {
2111            let trimmed = tok.text().trim();
2112            // A `? `-prefixed scalar is the first key of the map; stop
2113            // scanning header tokens at that point so we don't pick up
2114            // entry-level data as document-level node properties.
2115            if trimmed.starts_with("? ") || trimmed == "?" {
2116                break;
2117            }
2118        }
2119        absorb_anchor_or_tag(tok, handles, &mut anchor, &mut long_tag);
2120    }
2121    let mut event = String::from("+MAP");
2122    if let Some(t) = long_tag {
2123        event.push(' ');
2124        event.push_str(&t);
2125    }
2126    if let Some(a) = anchor {
2127        event.push_str(" &");
2128        event.push_str(&a);
2129    }
2130    event
2131}
2132
2133fn decompose_scalar<'a>(
2134    text: &'a str,
2135    handles: &TagHandles,
2136) -> (Option<&'a str>, Option<String>, &'a str) {
2137    let mut anchor: Option<&str> = None;
2138    let mut long_tag: Option<String> = None;
2139    let mut rest = text.trim();
2140    loop {
2141        if anchor.is_none()
2142            && let Some(after) = rest.strip_prefix('&')
2143        {
2144            let end = after
2145                .find(|c: char| c.is_whitespace() || matches!(c, ',' | '}' | ']'))
2146                .unwrap_or(after.len());
2147            let (name, tail) = after.split_at(end);
2148            anchor = Some(name);
2149            rest = tail.trim_start();
2150            continue;
2151        }
2152        if long_tag.is_none()
2153            && let Some((tag, tail)) = split_leading_tag(rest)
2154            && let Some(long) = resolve_long_tag(tag, handles)
2155        {
2156            long_tag = Some(long);
2157            rest = tail.trim_start();
2158            continue;
2159        }
2160        break;
2161    }
2162    (anchor, long_tag, rest)
2163}
2164
2165/// Render a scalar event from its decomposed parts: optional anchor,
2166/// optional long-form tag (already in `<...>` form), and the scalar body.
2167/// Handles plain, double-quoted, and single-quoted bodies; quoted bodies
2168/// share the same escape normalization as [`quoted_val_event`].
2169fn scalar_event(anchor: Option<&str>, long_tag: Option<&str>, body: &str) -> String {
2170    let mut prefix = String::new();
2171    if let Some(a) = anchor {
2172        prefix.push_str(&format!("&{a} "));
2173    }
2174    if let Some(t) = long_tag {
2175        prefix.push_str(t);
2176        prefix.push(' ');
2177    }
2178    let body = body.trim();
2179    if body.is_empty() {
2180        return format!("=VAL {prefix}:");
2181    }
2182    if body.starts_with('"') || body.starts_with('\'') {
2183        // Reuse the shared escape/normalization rules; splice the prefix in
2184        // place of the leading `=VAL ` token.
2185        let quoted = quoted_val_event(body);
2186        return quoted.replacen("=VAL ", &format!("=VAL {prefix}"), 1);
2187    }
2188    // yaml-test-suite events escape `\`, control characters, and embedded
2189    // newlines in plain-scalar bodies. Apply that here so callers can pass
2190    // raw (or fold-only) text and not pre-escape.
2191    format!("=VAL {prefix}:{}", escape_for_event(body))
2192}
2193
2194fn project_block_map_entries(map_node: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
2195    let children: Vec<_> = map_node.children_with_tokens().collect();
2196    let mut idx = 0;
2197    while idx < children.len() {
2198        match &children[idx] {
2199            rowan::NodeOrToken::Token(tok)
2200                if tok.kind() == SyntaxKind::YAML_SCALAR
2201                    && (tok.text().trim_start().starts_with("? ")
2202                        || tok.text().trim_start() == "?") =>
2203            {
2204                let body = tok.text().trim_start().trim_start_matches('?').trim();
2205                if body.is_empty() {
2206                    out.push("=VAL :".to_string());
2207                } else {
2208                    let (anchor, body_tag, rest) = decompose_scalar(body, handles);
2209                    out.push(scalar_event(anchor, body_tag.as_deref(), rest));
2210                }
2211                idx += 1;
2212                // Look ahead for the matching `:value` line. Skip
2213                // intervening newlines, whitespace, and comments. Stop at
2214                // anything else — that means the value is implicitly null.
2215                let mut peek = idx;
2216                while peek < children.len() {
2217                    if let rowan::NodeOrToken::Token(t) = &children[peek] {
2218                        if matches!(
2219                            t.kind(),
2220                            SyntaxKind::NEWLINE | SyntaxKind::WHITESPACE | SyntaxKind::YAML_COMMENT
2221                        ) {
2222                            peek += 1;
2223                            continue;
2224                        }
2225                        if t.kind() == SyntaxKind::YAML_COLON {
2226                            // Colon found: collect value tokens up to the
2227                            // next NEWLINE.
2228                            let mut value_tag: Option<String> = None;
2229                            let mut value_text = String::new();
2230                            let mut value_end = peek + 1;
2231                            while value_end < children.len() {
2232                                if let rowan::NodeOrToken::Token(vt) = &children[value_end] {
2233                                    if vt.kind() == SyntaxKind::NEWLINE {
2234                                        break;
2235                                    }
2236                                    if vt.kind() == SyntaxKind::YAML_TAG && value_tag.is_none() {
2237                                        value_tag = Some(vt.text().to_string());
2238                                    } else if vt.kind() == SyntaxKind::YAML_SCALAR {
2239                                        value_text.push_str(vt.text());
2240                                    }
2241                                    value_end += 1;
2242                                } else {
2243                                    break;
2244                                }
2245                            }
2246                            let trimmed = value_text.trim();
2247                            let value_long_tag = value_tag
2248                                .as_deref()
2249                                .and_then(|t| resolve_long_tag(t, handles));
2250                            if trimmed.is_empty() {
2251                                if let Some(long) = value_long_tag {
2252                                    out.push(format!("=VAL {long} :"));
2253                                } else {
2254                                    out.push("=VAL :".to_string());
2255                                }
2256                            } else if trimmed.starts_with('"') || trimmed.starts_with('\'') {
2257                                let quoted = quoted_val_event(trimmed);
2258                                if let Some(long) = value_long_tag {
2259                                    out.push(quoted.replacen("=VAL ", &format!("=VAL {long} "), 1));
2260                                } else {
2261                                    out.push(quoted);
2262                                }
2263                            } else {
2264                                let (anchor, body_tag, body) = decompose_scalar(trimmed, handles);
2265                                let long_tag = value_long_tag.or(body_tag);
2266                                out.push(scalar_event(anchor, long_tag.as_deref(), body));
2267                            }
2268                            idx = value_end;
2269                            break;
2270                        }
2271                    }
2272                    // Non-trivia, non-colon: implicit null value.
2273                    out.push("=VAL :".to_string());
2274                    break;
2275                }
2276                if peek >= children.len() {
2277                    out.push("=VAL :".to_string());
2278                }
2279            }
2280            rowan::NodeOrToken::Node(entry) if entry.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY => {
2281                project_block_map_entry(entry, handles, out);
2282                idx += 1;
2283            }
2284            _ => {
2285                idx += 1;
2286            }
2287        }
2288    }
2289}
2290
2291fn project_block_map_entry(entry: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
2292    let key_node = entry
2293        .children()
2294        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
2295        .expect("key node");
2296    let value_node = entry
2297        .children()
2298        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
2299        .expect("value node");
2300
2301    let key_tag = key_node
2302        .children_with_tokens()
2303        .filter_map(|el| el.into_token())
2304        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
2305        .map(|tok| tok.text().to_string());
2306    // The key text lives in either a `YAML_KEY` token (v1's emission, used
2307    // both for the explicit `?` indicator and for implicit key text) or
2308    // a `YAML_SCALAR` token (v2's emission, where wrapper position
2309    // carries the role and the explicit `?` is the only `YAML_KEY`).
2310    // Concatenate matching tokens — interleave WHITESPACE / NEWLINE so the
2311    // explicit `?` and any subsequent key scalar are separated by their
2312    // original trivia, letting `strip_explicit_key_indicator` recognize
2313    // the `?<sp>` pattern. Stops at the trailing `:` (`YAML_COLON`).
2314    // Falls back to empty for the empty-implicit-key shorthand
2315    // (`: value` — KEY wrapper holds only the colon).
2316    let key_text = key_node
2317        .children_with_tokens()
2318        .filter_map(|el| el.into_token())
2319        .take_while(|tok| tok.kind() != SyntaxKind::YAML_COLON)
2320        .filter(|tok| {
2321            matches!(
2322                tok.kind(),
2323                SyntaxKind::YAML_KEY
2324                    | SyntaxKind::YAML_SCALAR
2325                    | SyntaxKind::WHITESPACE
2326                    | SyntaxKind::NEWLINE
2327            )
2328        })
2329        .map(|tok| tok.text().to_string())
2330        .collect::<Vec<_>>()
2331        .join("");
2332    let key_text = key_text.trim_end().to_string();
2333
2334    // Strip an explicit-key `?` indicator that precedes the actual key
2335    // text. v2 emits the `?` as a `YAML_KEY` token sibling of the
2336    // `YAML_SCALAR`, so it ends up in `key_text` after the join above.
2337    // v1 wouldn't reach this strip because its v1-shape `YAML_KEY`
2338    // token carried only the implicit key body.
2339    let key_trimmed = strip_explicit_key_indicator(key_text.trim());
2340    if key_trimmed.starts_with('[')
2341        && key_trimmed.ends_with(']')
2342        && let Some(items) = simple_flow_sequence_items(key_trimmed)
2343    {
2344        out.push("+SEQ []".to_string());
2345        for item in items {
2346            project_flow_seq_item(&item, handles, out);
2347        }
2348        out.push("-SEQ".to_string());
2349    } else if key_trimmed.starts_with('*') {
2350        out.push(format!("=ALI {key_trimmed}"));
2351    } else {
2352        let key_long_tag = key_tag
2353            .as_deref()
2354            .and_then(|t| resolve_long_tag(t, handles));
2355        let (anchor, body_tag, body) = decompose_scalar(key_trimmed, handles);
2356        let long_tag = key_long_tag.or(body_tag);
2357        let folded;
2358        let body_for_event: &str = if body.contains('\n') {
2359            folded = fold_quoted_inner(body);
2360            &folded
2361        } else {
2362            body
2363        };
2364        out.push(scalar_event(anchor, long_tag.as_deref(), body_for_event));
2365    }
2366
2367    project_block_map_entry_value(&value_node, handles, out);
2368}
2369
2370/// W5VH support: detect the shape where a YAML_BLOCK_MAP_VALUE's leading
2371/// YAML_SCALAR begins with `&` (anchor) or `*` (alias) and is immediately
2372/// followed — with no whitespace — by a YAML_BLOCK_MAP whose first entry
2373/// has an empty key. The scanner consumed the boundary `:` as a value
2374/// indicator, but yaml-test-suite considers it part of the anchor/alias
2375/// name. Returns the projected events (`=VAL` / `=ALI`) when the shape
2376/// matches, or `None` otherwise.
2377fn rebuild_anchor_alias_with_trailing_colon(
2378    value_node: &SyntaxNode,
2379    nested_map: &SyntaxNode,
2380    _handles: &TagHandles,
2381) -> Option<Vec<String>> {
2382    let scalar = value_node
2383        .children_with_tokens()
2384        .filter_map(|el| el.into_token())
2385        .find(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)?;
2386    let text = scalar.text();
2387    if !(text.starts_with('&') || text.starts_with('*')) {
2388        return None;
2389    }
2390    if scalar.text_range().end() != nested_map.text_range().start() {
2391        return None;
2392    }
2393    // Anchor/alias names are bounded by whitespace. If the scalar text
2394    // after the leading `&`/`*` contains a whitespace boundary, the
2395    // scalar isn't a single anchor/alias-with-trailing-colon — it's the
2396    // 26DV shape (`&node3 \n  *alias1`) handled by the residual-splice
2397    // path above. Skip the rebuild here to keep that path active.
2398    let body = &text[1..];
2399    if body.is_empty() || body.chars().any(char::is_whitespace) {
2400        return None;
2401    }
2402    let mut entries = nested_map
2403        .children()
2404        .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY);
2405    let first_entry = entries.next()?;
2406    if entries.next().is_some() {
2407        return None;
2408    }
2409    if !block_map_entry_key_is_empty(&first_entry) {
2410        return None;
2411    }
2412
2413    let inner_value_node = first_entry
2414        .children()
2415        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE);
2416    let body_text = inner_value_node
2417        .as_ref()
2418        .map(|v| {
2419            v.descendants_with_tokens()
2420                .filter_map(|el| el.into_token())
2421                .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
2422                .map(|tok| tok.text().to_string())
2423                .collect::<Vec<_>>()
2424                .join("")
2425        })
2426        .unwrap_or_default();
2427    let body_trimmed = body_text.trim();
2428
2429    if let Some(rest) = text.strip_prefix('*') {
2430        if !body_trimmed.is_empty() {
2431            return None;
2432        }
2433        return Some(vec![format!("=ALI *{rest}:")]);
2434    }
2435
2436    let anchor_name = text.strip_prefix('&')?;
2437    let composed = format!("&{anchor_name}:");
2438    Some(vec![scalar_event(Some(&composed[1..]), None, body_trimmed)])
2439}
2440
2441fn project_block_map_entry_value(
2442    value_node: &SyntaxNode,
2443    handles: &TagHandles,
2444    out: &mut Vec<String>,
2445) {
2446    if let Some(nested_map) = value_node
2447        .children()
2448        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
2449    {
2450        // W5VH: an anchor or alias whose name contains a literal `:`
2451        // (`&:@*!$"<foo>:` / `*:@*!$"<foo>:`) lands in the v2 CST as a
2452        // YAML_SCALAR ending in `<foo>` followed by a YAML_BLOCK_MAP
2453        // starting at the colon that the scanner mistakenly read as a
2454        // value indicator. yaml-test-suite includes the trailing colon
2455        // in the anchor/alias name, so reconstruct that here when the
2456        // shape matches: scalar starts with `&`/`*`, the nested map
2457        // starts at the scalar's exact end byte, and the nested map's
2458        // single entry has an empty key.
2459        if let Some(rebuilt) =
2460            rebuild_anchor_alias_with_trailing_colon(value_node, &nested_map, handles)
2461        {
2462            out.extend(rebuilt);
2463            return;
2464        }
2465        // 26DV: a value scalar like `&node3 \n  *alias1 ` lands as a
2466        // single YAML_SCALAR before the nested YAML_BLOCK_MAP. Strip the
2467        // anchor name, then splice any residual text (e.g. `*alias1`) in
2468        // as the first entry's key when that entry has an empty key
2469        // (the v2 builder shape for an indented implicit map).
2470        let (_, _, residual) = extract_value_node_properties(value_node, handles);
2471        let first_entry = nested_map
2472            .children()
2473            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY);
2474        if !residual.is_empty()
2475            && let Some(first_entry) = first_entry.as_ref()
2476            && block_map_entry_key_is_empty(first_entry)
2477        {
2478            out.push(map_open_event_for_value(value_node, handles));
2479            if residual.starts_with('*') {
2480                out.push(format!("=ALI {residual}"));
2481            } else {
2482                let (anchor, body_tag, body) = decompose_scalar(&residual, handles);
2483                out.push(scalar_event(anchor, body_tag.as_deref(), body));
2484            }
2485            if let Some(value_node) = first_entry
2486                .children()
2487                .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
2488            {
2489                project_block_map_entry_value(&value_node, handles, out);
2490            } else {
2491                out.push("=VAL :".to_string());
2492            }
2493            for entry in nested_map
2494                .children()
2495                .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
2496                .skip(1)
2497            {
2498                project_block_map_entry(&entry, handles, out);
2499            }
2500            out.push("-MAP".to_string());
2501            return;
2502        }
2503        out.push(map_open_event_for_value(value_node, handles));
2504        project_block_map_entries(&nested_map, handles, out);
2505        out.push("-MAP".to_string());
2506        return;
2507    }
2508
2509    if let Some(nested_seq) = value_node
2510        .children()
2511        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
2512    {
2513        out.push(seq_open_event(&nested_seq, handles));
2514        project_block_sequence_items(&nested_seq, handles, out);
2515        out.push("-SEQ".to_string());
2516        return;
2517    }
2518
2519    if let Some(flow_map) = value_node
2520        .children()
2521        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
2522    {
2523        out.push("+MAP {}".to_string());
2524        project_flow_map_entries(&flow_map, handles, out);
2525        out.push("-MAP".to_string());
2526        return;
2527    }
2528
2529    // A flow-sequence value with embedded `:` (an implicit flow-map
2530    // entry inside `[...]`, e.g. 87E4 / L9U5 / LQZ7) needs the
2531    // CST-walking item projector — the text-based fallback below
2532    // strips colons during `value_text` assembly so `flow_kv_split`
2533    // never sees them and the entry collapses into one bare scalar.
2534    if let Some(flow_seq) = value_node
2535        .children()
2536        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
2537    {
2538        out.push("+SEQ []".to_string());
2539        project_flow_sequence_items_cst(&flow_seq, handles, out);
2540        out.push("-SEQ".to_string());
2541        return;
2542    }
2543
2544    if let Some((indicator, body)) = extract_block_scalar_body(value_node) {
2545        let escaped = escape_block_scalar_text(&body);
2546        out.push(format!("=VAL {indicator}{escaped}"));
2547        return;
2548    }
2549
2550    let value_tag = value_node
2551        .children_with_tokens()
2552        .filter_map(|el| el.into_token())
2553        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
2554        .map(|tok| tok.text().to_string());
2555    let value_text = value_node
2556        .descendants_with_tokens()
2557        .filter_map(|el| el.into_token())
2558        .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
2559        .map(|tok| tok.text().to_string())
2560        .collect::<Vec<_>>()
2561        .join("");
2562
2563    if value_tag.is_none()
2564        && let Some(items) = simple_flow_sequence_items(&value_text)
2565    {
2566        out.push("+SEQ []".to_string());
2567        for item in items {
2568            project_flow_seq_item(&item, handles, out);
2569        }
2570        out.push("-SEQ".to_string());
2571    } else if value_text.trim().is_empty() {
2572        if let Some(tag) = value_tag
2573            && let Some(long) = resolve_long_tag(&tag, handles)
2574        {
2575            out.push(format!("=VAL {long} :"));
2576        } else {
2577            out.push("=VAL :".to_string());
2578        }
2579    } else if value_text.trim_start().starts_with('*') {
2580        out.push(format!("=ALI {}", value_text.trim()));
2581    } else {
2582        let value_long_tag = value_tag
2583            .as_deref()
2584            .and_then(|t| resolve_long_tag(t, handles));
2585        let trimmed = value_text.trim();
2586        if trimmed.starts_with('"') || trimmed.starts_with('\'') {
2587            // Multi-line quoted scalar value: rebuild the source text with
2588            // newlines intact (parser splits each physical line into its own
2589            // YAML_SCALAR token), then run the YAML 1.2 §7.3 line-folding
2590            // rules so blank lines fold to `\n` and single breaks fold to
2591            // space. Without this, joining YAML_SCALAR tokens directly drops
2592            // line structure (yaml-test-suite case XV9V).
2593            let multi_line_text = collect_value_scalar_text_with_newlines(value_node);
2594            // Strip trailing whitespace/newlines that come AFTER the
2595            // closing quote. v2 keeps a single quoted-scalar token so
2596            // those bytes are post-value trivia (NEWLINE) — they don't
2597            // make the scalar body multi-line. Without this trim, a
2598            // single-line quoted with trailing significant whitespace
2599            // (J3BT's `"Quoted \t"`) hits the multi-line folder which
2600            // strips trailing tabs/spaces from the scalar body.
2601            let is_multi_line = multi_line_text
2602                .trim_end_matches(['\n', '\r', ' ', '\t'])
2603                .contains('\n');
2604            let quoted = if is_multi_line {
2605                quoted_val_event_multi_line(&multi_line_text)
2606            } else {
2607                quoted_val_event(trimmed)
2608            };
2609            if let Some(long) = value_long_tag {
2610                out.push(quoted.replacen("=VAL ", &format!("=VAL {long} "), 1));
2611            } else {
2612                out.push(quoted);
2613            }
2614        } else {
2615            let (anchor, body_tag, body) = decompose_scalar(trimmed, handles);
2616            let long_tag = value_long_tag.or(body_tag);
2617            let folded;
2618            let body_for_event: &str = if body.contains('\n') {
2619                folded = fold_quoted_inner(body);
2620                &folded
2621            } else {
2622                body
2623            };
2624            out.push(scalar_event(anchor, long_tag.as_deref(), body_for_event));
2625        }
2626    }
2627}
2628
2629/// Reconstruct a YAML_BLOCK_MAP_VALUE's scalar text with line breaks intact
2630/// for multi-line quoted-scalar folding. Mirrors
2631/// [`collect_doc_scalar_text_with_newlines`] but bounded to a single
2632/// block-map value so it doesn't pull in scalars from nested blocks.
2633fn collect_value_scalar_text_with_newlines(value_node: &SyntaxNode) -> String {
2634    value_node
2635        .descendants_with_tokens()
2636        .filter_map(|el| el.into_token())
2637        .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::NEWLINE))
2638        .map(|tok| tok.text().to_string())
2639        .collect()
2640}