Skip to main content

panache_parser/parser/yaml/
events.rs

1//! YAML event projection: walk a shadow-parser CST and produce a
2//! yaml-test-suite style event stream (`+STR`, `+DOC`, `+MAP`, `=VAL :foo`,
3//! ...).
4//!
5//! This module is parser-crate scoped and used only by the test harness in
6//! `crates/panache-parser/tests/yaml.rs` for fixture parity. It reads the
7//! green tree built by [`crate::parser::yaml::parse_yaml_tree`] and re-derives
8//! event-stream semantics (tag resolution, anchor stripping, flow-seq
9//! splitting). The intent is to keep the projection adjacent to the parser so
10//! CST shape is the single source of truth for events.
11
12use std::collections::HashMap;
13
14use crate::syntax::{SyntaxKind, SyntaxNode};
15
16use super::parser::parse_yaml_tree;
17
18/// Per-document tag handle map: handle (`!!`, `!yaml!`, `!e!`) → URI prefix.
19/// The secondary handle `!!` always defaults to `tag:yaml.org,2002:` per the
20/// YAML 1.2 spec. Per-document `%TAG` directives override and add to this map.
21type TagHandles = HashMap<String, String>;
22
23fn default_tag_handles() -> TagHandles {
24    let mut handles = HashMap::new();
25    handles.insert("!!".to_string(), "tag:yaml.org,2002:".to_string());
26    handles
27}
28
29/// Scan a `YAML_DOCUMENT` for `%TAG` directive lines and merge them into
30/// the default handle map.
31fn collect_tag_handles(doc: &SyntaxNode) -> TagHandles {
32    let mut handles = default_tag_handles();
33    for tok in doc
34        .descendants_with_tokens()
35        .filter_map(|el| el.into_token())
36    {
37        if tok.kind() != SyntaxKind::YAML_SCALAR {
38            continue;
39        }
40        let line = tok.text().trim_start();
41        let Some(rest) = line.strip_prefix("%TAG") else {
42            continue;
43        };
44        let mut parts = rest.split_whitespace();
45        let Some(handle) = parts.next() else { continue };
46        let Some(prefix) = parts.next() else { continue };
47        handles.insert(handle.to_string(), prefix.to_string());
48    }
49    handles
50}
51
52/// Resolve a tag shorthand (e.g. `!!str`, `!yaml!str`, `!e!foo`, `!local`) to
53/// the long-form `<tag:...>` event token, consulting the per-document handle
54/// map. Falls back to the built-in handling for unknown handles.
55fn resolve_long_tag(tag: &str, handles: &TagHandles) -> Option<String> {
56    if let Some(s) = long_tag_builtin(tag) {
57        return Some(s);
58    }
59    let mut best: Option<(&str, &String)> = None;
60    for (h, p) in handles {
61        if tag.starts_with(h)
62            && best.is_none_or(|(b_handle, _): (&str, _)| h.len() > b_handle.len())
63        {
64            best = Some((h.as_str(), p));
65        }
66    }
67    if let Some((handle, prefix)) = best {
68        let suffix = &tag[handle.len()..];
69        return Some(format!("<{prefix}{suffix}>"));
70    }
71    None
72}
73
74/// Walk the shadow CST for `input` and return the projected yaml-test-suite
75/// event stream. Returns an empty vector if the input fails to parse.
76pub fn project_events(input: &str) -> Vec<String> {
77    let Some(tree) = parse_yaml_tree(input) else {
78        return Vec::new();
79    };
80
81    let mut events = vec!["+STR".to_string()];
82    let stream = tree
83        .descendants()
84        .find(|n| n.kind() == SyntaxKind::YAML_STREAM);
85    if let Some(stream) = stream {
86        for doc in stream
87            .children()
88            .filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
89        {
90            project_document(&doc, &mut events);
91        }
92    }
93    events.push("-STR".to_string());
94    events
95}
96
97fn project_document(doc: &SyntaxNode, out: &mut Vec<String>) {
98    let has_doc_start = doc
99        .children_with_tokens()
100        .filter_map(|el| el.into_token())
101        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START);
102    let has_doc_end = doc
103        .children_with_tokens()
104        .filter_map(|el| el.into_token())
105        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END);
106    out.push(if has_doc_start {
107        "+DOC ---".to_string()
108    } else {
109        "+DOC".to_string()
110    });
111    let handles = collect_tag_handles(doc);
112
113    if let Some(seq_node) = doc
114        .descendants()
115        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
116    {
117        out.push("+SEQ".to_string());
118        project_block_sequence_items(&seq_node, &handles, out);
119        out.push("-SEQ".to_string());
120    } else if let Some(root_map) = doc
121        .descendants()
122        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
123    {
124        let mut values = Vec::new();
125        project_block_map_entries(&root_map, &handles, &mut values);
126        if !values.is_empty() {
127            out.push("+MAP".to_string());
128            out.append(&mut values);
129            out.push("-MAP".to_string());
130        } else if let Some(flow_map) = doc
131            .descendants()
132            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
133        {
134            let mut flow_values = Vec::new();
135            project_flow_map_entries(&flow_map, &handles, &mut flow_values);
136            out.push("+MAP {}".to_string());
137            out.append(&mut flow_values);
138            out.push("-MAP".to_string());
139        } else if let Some(flow_seq) = doc
140            .descendants()
141            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
142            && let Some(items) = simple_flow_sequence_items(&flow_seq.text().to_string())
143        {
144            out.push("+SEQ []".to_string());
145            for item in items {
146                project_flow_seq_item(&item, &handles, out);
147            }
148            out.push("-SEQ".to_string());
149        } else if let Some(scalar) = scalar_document_value(doc, &handles) {
150            out.push(scalar);
151        } else {
152            out.push("=VAL :".to_string());
153        }
154    } else if let Some(flow_map) = doc
155        .descendants()
156        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
157    {
158        out.push("+MAP {}".to_string());
159        project_flow_map_entries(&flow_map, &handles, out);
160        out.push("-MAP".to_string());
161    } else if let Some(flow_seq) = doc
162        .descendants()
163        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
164        && let Some(items) = simple_flow_sequence_items(&flow_seq.text().to_string())
165    {
166        out.push("+SEQ []".to_string());
167        for item in items {
168            project_flow_seq_item(&item, &handles, out);
169        }
170        out.push("-SEQ".to_string());
171    } else if let Some(scalar) = scalar_document_value(doc, &handles) {
172        out.push(scalar);
173    } else {
174        out.push("=VAL :".to_string());
175    }
176
177    out.push(if has_doc_end {
178        "-DOC ...".to_string()
179    } else {
180        "-DOC".to_string()
181    });
182}
183
184fn scalar_document_value(doc: &SyntaxNode, handles: &TagHandles) -> Option<String> {
185    // Skip `%TAG`/`%YAML` directive lines: those are document-level metadata,
186    // not part of the scalar body.
187    let text = doc
188        .descendants_with_tokens()
189        .filter_map(|el| el.into_token())
190        .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
191        .filter(|tok| !tok.text().trim_start().starts_with('%'))
192        .map(|tok| tok.text().to_string())
193        .collect::<Vec<_>>()
194        .join("");
195    let trimmed_text = text.trim();
196    if trimmed_text.is_empty() {
197        // Tagged-but-empty scalar document still emits a `=VAL <tag> :` event.
198        let tag_only = doc
199            .descendants_with_tokens()
200            .filter_map(|el| el.into_token())
201            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
202            .map(|tok| tok.text().to_string());
203        if let Some(tag) = tag_only
204            && let Some(long) = resolve_long_tag(&tag, handles)
205        {
206            return Some(format!("=VAL {long} :"));
207        }
208        return None;
209    }
210    let tag_text = doc
211        .descendants_with_tokens()
212        .filter_map(|el| el.into_token())
213        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
214        .map(|tok| tok.text().to_string());
215    let event = if let Some(tag) = tag_text
216        && let Some(long) = resolve_long_tag(&tag, handles)
217    {
218        if trimmed_text.starts_with('"') || trimmed_text.starts_with('\'') {
219            let quoted = quoted_val_event(trimmed_text);
220            // quoted_val_event returns `=VAL "body` — splice the tag in.
221            quoted.replacen("=VAL ", &format!("=VAL {long} "), 1)
222        } else {
223            format!("=VAL {long} :{trimmed_text}")
224        }
225    } else if trimmed_text.starts_with('"') || trimmed_text.starts_with('\'') {
226        quoted_val_event(&text)
227    } else {
228        plain_val_event(&text)
229    };
230    Some(event)
231}
232
233fn plain_val_event(text: &str) -> String {
234    format!("=VAL :{}", text.replace('\\', "\\\\"))
235}
236
237/// Project a flow-collection scalar token, preserving quoted-scalar
238/// classification when the source uses `"..."` or `'...'`. Plain scalars are
239/// folded just like outside flow context. A leading tag shorthand (`!!str`,
240/// `!handle!suffix`, `!local`) is resolved through `handles`.
241fn flow_scalar_event(text: &str, handles: &TagHandles) -> String {
242    let trimmed = text.trim();
243    if trimmed.starts_with('"') || trimmed.starts_with('\'') {
244        return quoted_val_event(trimmed);
245    }
246    let (anchor, long_tag, body) = decompose_scalar(trimmed, handles);
247    if anchor.is_some() || long_tag.is_some() {
248        return scalar_event(anchor, long_tag.as_deref(), body);
249    }
250    plain_val_event(&fold_plain_scalar(text))
251}
252
253/// Split a leading tag shorthand (`!handle!suffix` or `!local`) off `text`,
254/// returning `(tag, remainder)`. The tag must be terminated by whitespace or
255/// end of input; otherwise `text` is returned as-is.
256fn split_leading_tag(text: &str) -> Option<(&str, &str)> {
257    let rest = text.strip_prefix('!')?;
258    let mut i = 0usize;
259    let mut bangs = 0usize;
260    for (idx, ch) in rest.char_indices() {
261        if ch == '!' {
262            bangs += 1;
263            if bangs > 1 {
264                return None;
265            }
266            i = idx + 1;
267            continue;
268        }
269        if matches!(ch, ' ' | '\t' | '\n' | ',' | '}' | ']') {
270            i = idx;
271            break;
272        }
273        i = idx + ch.len_utf8();
274    }
275    let tag_len = 1 + i;
276    let (tag, remainder) = text.split_at(tag_len);
277    Some((tag, remainder))
278}
279
280/// Locate a flow-context key/value `:` indicator within a flow-sequence item.
281/// Per YAML 1.2 a `:` is the mapping-key indicator only when followed by
282/// whitespace or by end of the item; otherwise it's part of a plain scalar
283/// (e.g. `http://foo.com`). Quoted regions are skipped.
284fn flow_kv_split(item: &str) -> Option<(usize, usize)> {
285    let bytes = item.as_bytes();
286    let mut in_single = false;
287    let mut in_double = false;
288    let mut escaped_double = false;
289    for (idx, ch) in item.char_indices() {
290        if in_double {
291            if escaped_double {
292                escaped_double = false;
293                continue;
294            }
295            match ch {
296                '\\' => escaped_double = true,
297                '"' => in_double = false,
298                _ => {}
299            }
300            continue;
301        }
302        if in_single {
303            if ch == '\'' {
304                in_single = false;
305            }
306            continue;
307        }
308        match ch {
309            '\'' => in_single = true,
310            '"' => in_double = true,
311            ':' => {
312                let next_off = idx + ch.len_utf8();
313                let after_is_break = next_off >= bytes.len()
314                    || matches!(bytes[next_off], b' ' | b'\t' | b'\n' | b'\r');
315                if after_is_break {
316                    return Some((idx, next_off));
317                }
318            }
319            _ => {}
320        }
321    }
322    None
323}
324
325/// Emit events for a single flow-sequence item: either `+MAP {} key val -MAP`
326/// when the item is a flow-map entry (`key: value`, possibly with empty key
327/// or value), or a single `=VAL` for a bare scalar.
328fn project_flow_seq_item(item: &str, handles: &TagHandles, out: &mut Vec<String>) {
329    if let Some((colon, after)) = flow_kv_split(item) {
330        let raw_key_full = item[..colon].trim();
331        // Strip the explicit-key `?` indicator (followed by whitespace or
332        // end-of-key) when present.
333        let raw_key = strip_explicit_key_indicator(raw_key_full);
334        let raw_value = item[after..].trim();
335        out.push("+MAP {}".to_string());
336        if raw_key.is_empty() {
337            out.push("=VAL :".to_string());
338        } else {
339            out.push(flow_scalar_event(raw_key, handles));
340        }
341        if raw_value.is_empty() {
342            out.push("=VAL :".to_string());
343        } else {
344            out.push(flow_scalar_event(raw_value, handles));
345        }
346        out.push("-MAP".to_string());
347    } else if item.trim_start().starts_with('"') || item.trim_start().starts_with('\'') {
348        out.push(quoted_val_event(item.trim()));
349    } else {
350        out.push(plain_val_event(&fold_plain_scalar(item)));
351    }
352}
353
354fn strip_explicit_key_indicator(key: &str) -> &str {
355    let trimmed = key.trim_start();
356    if let Some(rest) = trimmed.strip_prefix('?')
357        && (rest.is_empty() || rest.starts_with([' ', '\t', '\n']))
358    {
359        return rest.trim_start();
360    }
361    key
362}
363
364fn quoted_val_event(text: &str) -> String {
365    if text.starts_with('\'') {
366        let trimmed = text.trim_end_matches('\'');
367        let normalized = trimmed.replace("''", "'").replace('\\', "\\\\");
368        format!("=VAL {normalized}")
369    } else {
370        let trimmed = text.trim_end_matches('"');
371        let mut normalized = String::with_capacity(trimmed.len());
372        let mut chars = trimmed.chars().peekable();
373        while let Some(ch) = chars.next() {
374            if ch != '\\' {
375                normalized.push(ch);
376                continue;
377            }
378
379            let Some(next) = chars.next() else {
380                normalized.push('\\');
381                break;
382            };
383
384            match next {
385                '/' => normalized.push('/'),
386                '"' => normalized.push('"'),
387                other => {
388                    normalized.push('\\');
389                    normalized.push(other);
390                }
391            }
392        }
393        format!("=VAL {normalized}")
394    }
395}
396
397fn long_tag_builtin(tag: &str) -> Option<String> {
398    if tag == "!" {
399        return Some("<!>".to_string());
400    }
401    // Bare local tag: `!local` (single leading `!`, no second `!`).
402    if let Some(rest) = tag.strip_prefix('!')
403        && !rest.contains('!')
404    {
405        return Some(format!("<!{rest}>"));
406    }
407    None
408}
409
410fn simple_flow_sequence_items(text: &str) -> Option<Vec<String>> {
411    let trimmed = text.trim();
412    let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
413    let inner = inner.trim();
414    if inner.is_empty() {
415        return Some(Vec::new());
416    }
417
418    let mut items = Vec::new();
419    let mut start = 0usize;
420    let mut in_single = false;
421    let mut in_double = false;
422    let mut escaped_double = false;
423
424    for (idx, ch) in inner.char_indices() {
425        if in_double {
426            if escaped_double {
427                escaped_double = false;
428                continue;
429            }
430            match ch {
431                '\\' => escaped_double = true,
432                '"' => in_double = false,
433                _ => {}
434            }
435            continue;
436        }
437
438        if in_single {
439            if ch == '\'' {
440                in_single = false;
441            }
442            continue;
443        }
444
445        match ch {
446            '\'' => in_single = true,
447            '"' => in_double = true,
448            ',' => {
449                let item = inner[start..idx].trim();
450                if item.is_empty() {
451                    return None;
452                }
453                items.push(item.to_string());
454                start = idx + 1;
455            }
456            _ => {}
457        }
458    }
459
460    let last = inner[start..].trim();
461    if !last.is_empty() {
462        items.push(last.to_string());
463    }
464    Some(items)
465}
466
467fn escape_block_scalar_text(text: &str) -> String {
468    let mut out = String::with_capacity(text.len());
469    for ch in text.chars() {
470        match ch {
471            '\\' => out.push_str("\\\\"),
472            '\n' => out.push_str("\\n"),
473            '\t' => out.push_str("\\t"),
474            '\r' => out.push_str("\\r"),
475            other => out.push(other),
476        }
477    }
478    out
479}
480
481/// If `value_node` encodes a literal (`|`) or folded (`>`) block scalar,
482/// return the folded scalar body (no escaping applied yet). Scope: default
483/// clip chomping, auto-detected content indent, no explicit indicators.
484fn extract_block_scalar_body(value_node: &SyntaxNode) -> Option<(char, String)> {
485    let tokens: Vec<_> = value_node
486        .descendants_with_tokens()
487        .filter_map(|el| el.into_token())
488        .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::NEWLINE))
489        .collect();
490    let first = tokens.first()?;
491    if first.kind() != SyntaxKind::YAML_SCALAR {
492        return None;
493    }
494    let indicator = match first.text() {
495        "|" => '|',
496        ">" => '>',
497        _ => return None,
498    };
499
500    let mut raw = String::new();
501    let mut seen_header = false;
502    let mut skipped_header_newline = false;
503    for tok in tokens.iter().skip(1) {
504        if !seen_header && !skipped_header_newline && tok.kind() == SyntaxKind::NEWLINE {
505            skipped_header_newline = true;
506            seen_header = true;
507            continue;
508        }
509        raw.push_str(tok.text());
510    }
511
512    let mut lines: Vec<&str> = raw.split('\n').collect();
513    if lines.last().is_some_and(|s| s.is_empty()) {
514        lines.pop();
515    }
516
517    let content_indent = lines
518        .iter()
519        .filter(|l| !l.trim().is_empty())
520        .map(|l| l.chars().take_while(|c| *c == ' ').count())
521        .min()
522        .unwrap_or(0);
523
524    let stripped: Vec<String> = lines
525        .iter()
526        .map(|l| {
527            if l.len() >= content_indent {
528                l[content_indent..].to_string()
529            } else {
530                String::new()
531            }
532        })
533        .collect();
534
535    let folded = match indicator {
536        '|' => stripped.join("\n"),
537        '>' => {
538            let mut result = String::new();
539            let mut last_blank = false;
540            for (idx, line) in stripped.iter().enumerate() {
541                if line.is_empty() {
542                    result.push('\n');
543                    last_blank = true;
544                } else {
545                    if idx > 0 && !last_blank {
546                        result.push(' ');
547                    }
548                    result.push_str(line);
549                    last_blank = false;
550                }
551            }
552            result
553        }
554        _ => unreachable!(),
555    };
556
557    let trimmed = folded.trim_end_matches('\n');
558    let body = if trimmed.is_empty() {
559        String::new()
560    } else {
561        format!("{trimmed}\n")
562    };
563    Some((indicator, body))
564}
565
566fn fold_plain_scalar(text: &str) -> String {
567    let mut pieces = Vec::new();
568    for line in text.split('\n') {
569        let trimmed = line.trim();
570        if !trimmed.is_empty() {
571            pieces.push(trimmed.to_string());
572        }
573    }
574    if pieces.is_empty() {
575        return String::new();
576    }
577    pieces.join(" ")
578}
579
580fn project_flow_map_entries(flow_map: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
581    let _ = handles;
582    for entry in flow_map
583        .children()
584        .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
585    {
586        let key_node = entry
587            .children()
588            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
589            .expect("flow map key");
590        let value_node = entry
591            .children()
592            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
593            .expect("flow map value");
594
595        let has_explicit_colon = key_node
596            .children_with_tokens()
597            .filter_map(|el| el.into_token())
598            .any(|tok| tok.kind() == SyntaxKind::YAML_COLON);
599
600        let raw_key = key_node
601            .descendants_with_tokens()
602            .filter_map(|el| el.into_token())
603            .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_KEY))
604            .map(|tok| tok.text().to_string())
605            .collect::<Vec<_>>()
606            .join("");
607        let raw_value = value_node
608            .descendants_with_tokens()
609            .filter_map(|el| el.into_token())
610            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
611            .map(|tok| tok.text().to_string())
612            .collect::<Vec<_>>()
613            .join("");
614
615        if has_explicit_colon {
616            out.push(flow_scalar_event(&raw_key, handles));
617            out.push(flow_scalar_event(&raw_value, handles));
618        } else {
619            let combined = format!("{raw_key}{raw_value}");
620            out.push(plain_val_event(&fold_plain_scalar(&combined)));
621            out.push("=VAL :".to_string());
622        }
623    }
624}
625
626fn project_block_sequence_items(
627    seq_node: &SyntaxNode,
628    handles: &TagHandles,
629    out: &mut Vec<String>,
630) {
631    for item in seq_node
632        .children()
633        .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
634    {
635        if let Some(nested_seq) = item
636            .children()
637            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
638        {
639            out.push("+SEQ".to_string());
640            project_block_sequence_items(&nested_seq, handles, out);
641            out.push("-SEQ".to_string());
642            continue;
643        }
644        if let Some(nested_map) = item
645            .children()
646            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
647        {
648            out.push("+MAP".to_string());
649            project_block_map_entries(&nested_map, handles, out);
650            out.push("-MAP".to_string());
651            continue;
652        }
653        if let Some(flow_seq) = item
654            .children()
655            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
656        {
657            let flow_text = flow_seq.text().to_string();
658            if let Some(flow_items) = simple_flow_sequence_items(&flow_text) {
659                out.push("+SEQ []".to_string());
660                for value in flow_items {
661                    project_flow_seq_item(&value, handles, out);
662                }
663                out.push("-SEQ".to_string());
664                continue;
665            }
666        }
667        if let Some(flow_map) = item
668            .children()
669            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
670        {
671            out.push("+MAP {}".to_string());
672            project_flow_map_entries(&flow_map, handles, out);
673            out.push("-MAP".to_string());
674            continue;
675        }
676        let item_tag = item
677            .descendants_with_tokens()
678            .filter_map(|el| el.into_token())
679            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
680            .map(|tok| tok.text().to_string());
681        let scalar_text = item
682            .descendants_with_tokens()
683            .filter_map(|el| el.into_token())
684            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
685            .map(|tok| tok.text().to_string())
686            .collect::<Vec<_>>()
687            .join("");
688        let scalar_trimmed = scalar_text.trim();
689        let event = if scalar_trimmed.starts_with('*') {
690            format!("=ALI {scalar_trimmed}")
691        } else {
692            // Combine the optional `YAML_TAG` token (already separated from
693            // the scalar text by the parser) with anchors/tags found in the
694            // scalar body, and render the YAML event in canonical
695            // `&anchor <tag> :body` order.
696            let item_long_tag = item_tag
697                .as_deref()
698                .and_then(|t| resolve_long_tag(t, handles));
699            let (anchor, body_tag, body) = decompose_scalar(scalar_trimmed, handles);
700            let long_tag = item_long_tag.or(body_tag);
701            scalar_event(anchor, long_tag.as_deref(), body)
702        };
703        out.push(event);
704    }
705}
706
707/// Decompose a node-property + scalar string into `(anchor, long_tag, body)`,
708/// peeling off any leading `&anchor` and tag shorthand in either order
709/// (`&a !!str foo` or `!!str &a foo`). Returns the raw body trimmed.
710fn decompose_scalar<'a>(
711    text: &'a str,
712    handles: &TagHandles,
713) -> (Option<&'a str>, Option<String>, &'a str) {
714    let mut anchor: Option<&str> = None;
715    let mut long_tag: Option<String> = None;
716    let mut rest = text.trim();
717    loop {
718        if anchor.is_none()
719            && let Some(after) = rest.strip_prefix('&')
720        {
721            let end = after
722                .find(|c: char| c.is_whitespace() || matches!(c, ',' | '}' | ']'))
723                .unwrap_or(after.len());
724            let (name, tail) = after.split_at(end);
725            anchor = Some(name);
726            rest = tail.trim_start();
727            continue;
728        }
729        if long_tag.is_none()
730            && let Some((tag, tail)) = split_leading_tag(rest)
731            && let Some(long) = resolve_long_tag(tag, handles)
732        {
733            long_tag = Some(long);
734            rest = tail.trim_start();
735            continue;
736        }
737        break;
738    }
739    (anchor, long_tag, rest)
740}
741
742/// Render a scalar event from its decomposed parts: optional anchor,
743/// optional long-form tag (already in `<...>` form), and the scalar body.
744/// Handles plain, double-quoted, and single-quoted bodies; quoted bodies
745/// share the same escape normalization as [`quoted_val_event`].
746fn scalar_event(anchor: Option<&str>, long_tag: Option<&str>, body: &str) -> String {
747    let mut prefix = String::new();
748    if let Some(a) = anchor {
749        prefix.push_str(&format!("&{a} "));
750    }
751    if let Some(t) = long_tag {
752        prefix.push_str(t);
753        prefix.push(' ');
754    }
755    let body = body.trim();
756    if body.is_empty() {
757        return format!("=VAL {prefix}:");
758    }
759    if body.starts_with('"') || body.starts_with('\'') {
760        // Reuse the shared escape/normalization rules; splice the prefix in
761        // place of the leading `=VAL ` token.
762        let quoted = quoted_val_event(body);
763        return quoted.replacen("=VAL ", &format!("=VAL {prefix}"), 1);
764    }
765    format!("=VAL {prefix}:{body}")
766}
767
768fn project_block_map_entries(map_node: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
769    for entry in map_node
770        .children()
771        .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
772    {
773        let key_node = entry
774            .children()
775            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
776            .expect("key node");
777        let value_node = entry
778            .children()
779            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
780            .expect("value node");
781
782        let key_tag = key_node
783            .children_with_tokens()
784            .filter_map(|el| el.into_token())
785            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
786            .map(|tok| tok.text().to_string());
787        let key_text = key_node
788            .children_with_tokens()
789            .filter_map(|el| el.into_token())
790            .find(|tok| tok.kind() == SyntaxKind::YAML_KEY)
791            .map(|tok| tok.text().trim_end().to_string())
792            .expect("key token");
793
794        let key_event = if key_text.starts_with('*') {
795            format!("=ALI {}", key_text.trim_end())
796        } else {
797            let key_long_tag = key_tag
798                .as_deref()
799                .and_then(|t| resolve_long_tag(t, handles));
800            let (anchor, body_tag, body) = decompose_scalar(key_text.trim(), handles);
801            let long_tag = key_long_tag.or(body_tag);
802            scalar_event(anchor, long_tag.as_deref(), body)
803        };
804        out.push(key_event);
805
806        if let Some(nested_map) = value_node
807            .children()
808            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
809        {
810            out.push("+MAP".to_string());
811            project_block_map_entries(&nested_map, handles, out);
812            out.push("-MAP".to_string());
813            continue;
814        }
815
816        if let Some(flow_map) = value_node
817            .children()
818            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
819        {
820            out.push("+MAP {}".to_string());
821            project_flow_map_entries(&flow_map, handles, out);
822            out.push("-MAP".to_string());
823            continue;
824        }
825
826        if let Some((indicator, body)) = extract_block_scalar_body(&value_node) {
827            let escaped = escape_block_scalar_text(&body);
828            out.push(format!("=VAL {indicator}{escaped}"));
829            continue;
830        }
831
832        let value_tag = value_node
833            .children_with_tokens()
834            .filter_map(|el| el.into_token())
835            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
836            .map(|tok| tok.text().to_string());
837        let value_text = value_node
838            .descendants_with_tokens()
839            .filter_map(|el| el.into_token())
840            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
841            .map(|tok| tok.text().to_string())
842            .collect::<Vec<_>>()
843            .join("");
844
845        if value_tag.is_none()
846            && let Some(items) = simple_flow_sequence_items(&value_text)
847        {
848            out.push("+SEQ []".to_string());
849            for item in items {
850                project_flow_seq_item(&item, handles, out);
851            }
852            out.push("-SEQ".to_string());
853        } else if value_text.trim().is_empty() {
854            if let Some(tag) = value_tag
855                && let Some(long) = resolve_long_tag(&tag, handles)
856            {
857                out.push(format!("=VAL {long} :"));
858            } else {
859                out.push("=VAL :".to_string());
860            }
861        } else if value_text.trim_start().starts_with('*') {
862            out.push(format!("=ALI {}", value_text.trim()));
863        } else {
864            let value_long_tag = value_tag
865                .as_deref()
866                .and_then(|t| resolve_long_tag(t, handles));
867            let (anchor, body_tag, body) = decompose_scalar(value_text.trim(), handles);
868            let long_tag = value_long_tag.or(body_tag);
869            out.push(scalar_event(anchor, long_tag.as_deref(), body));
870        }
871    }
872}