Skip to main content

panache_parser/parser/yaml/
events.rs

1//! YAML event projection: walk a shadow-parser CST and produce a
2//! yaml-test-suite style event stream (`+STR`, `+DOC`, `+MAP`, `=VAL :foo`,
3//! ...).
4//!
5//! This module is parser-crate scoped and used only by the test harness in
6//! `crates/panache-parser/tests/yaml.rs` for fixture parity. It reads the
7//! green tree built by [`crate::parser::yaml::parse_yaml_tree`] and re-derives
8//! event-stream semantics (tag resolution, anchor stripping, flow-seq
9//! splitting). The intent is to keep the projection adjacent to the parser so
10//! CST shape is the single source of truth for events.
11
12use std::collections::HashMap;
13
14use crate::syntax::{SyntaxKind, SyntaxNode};
15
16use super::parser::parse_yaml_tree;
17
18/// Per-document tag handle map: handle (`!!`, `!yaml!`, `!e!`) → URI prefix.
19/// The secondary handle `!!` always defaults to `tag:yaml.org,2002:` per the
20/// YAML 1.2 spec. Per-document `%TAG` directives override and add to this map.
21type TagHandles = HashMap<String, String>;
22
23fn default_tag_handles() -> TagHandles {
24    let mut handles = HashMap::new();
25    handles.insert("!!".to_string(), "tag:yaml.org,2002:".to_string());
26    handles
27}
28
29/// Scan a `YAML_DOCUMENT` for `%TAG` directive lines and merge them into
30/// the default handle map.
31fn collect_tag_handles(doc: &SyntaxNode) -> TagHandles {
32    let mut handles = default_tag_handles();
33    for tok in doc
34        .descendants_with_tokens()
35        .filter_map(|el| el.into_token())
36    {
37        if tok.kind() != SyntaxKind::YAML_SCALAR {
38            continue;
39        }
40        let line = tok.text().trim_start();
41        let Some(rest) = line.strip_prefix("%TAG") else {
42            continue;
43        };
44        let mut parts = rest.split_whitespace();
45        let Some(handle) = parts.next() else { continue };
46        let Some(prefix) = parts.next() else { continue };
47        handles.insert(handle.to_string(), prefix.to_string());
48    }
49    handles
50}
51
52/// Resolve a tag shorthand (e.g. `!!str`, `!yaml!str`, `!e!foo`, `!local`) to
53/// the long-form `<tag:...>` event token, consulting the per-document handle
54/// map. Handles are checked first (so a `%TAG !` directive can override the
55/// primary handle); we fall back to the built-in handling for unknown handles.
56fn resolve_long_tag(tag: &str, handles: &TagHandles) -> Option<String> {
57    let mut best: Option<(&str, &String)> = None;
58    for (h, p) in handles {
59        if tag.starts_with(h)
60            && best.is_none_or(|(b_handle, _): (&str, _)| h.len() > b_handle.len())
61        {
62            best = Some((h.as_str(), p));
63        }
64    }
65    if let Some((handle, prefix)) = best {
66        let suffix = &tag[handle.len()..];
67        let resolved = format!("{prefix}{suffix}");
68        return Some(format!("<{}>", percent_decode_tag(&resolved)));
69    }
70    long_tag_builtin(tag)
71}
72
73/// Decode percent-encoded bytes (`%xx`) in a resolved tag URI. YAML 1.2 allows
74/// percent-encoding in tag suffixes so callers can embed otherwise-special
75/// characters (`!`, `:`, etc.); event-stream parity expects the decoded form.
76fn percent_decode_tag(tag: &str) -> String {
77    let bytes = tag.as_bytes();
78    let mut out = Vec::with_capacity(bytes.len());
79    let mut i = 0;
80    while i < bytes.len() {
81        if bytes[i] == b'%'
82            && i + 2 < bytes.len()
83            && let (Some(hi), Some(lo)) =
84                (hex_digit_value(bytes[i + 1]), hex_digit_value(bytes[i + 2]))
85        {
86            out.push(hi * 16 + lo);
87            i += 3;
88            continue;
89        }
90        out.push(bytes[i]);
91        i += 1;
92    }
93    String::from_utf8(out).unwrap_or_else(|_| tag.to_string())
94}
95
96fn hex_digit_value(byte: u8) -> Option<u8> {
97    match byte {
98        b'0'..=b'9' => Some(byte - b'0'),
99        b'a'..=b'f' => Some(byte - b'a' + 10),
100        b'A'..=b'F' => Some(byte - b'A' + 10),
101        _ => None,
102    }
103}
104
105/// Walk the shadow CST for `input` and return the projected yaml-test-suite
106/// event stream. Returns an empty vector if the input fails to parse.
107pub fn project_events(input: &str) -> Vec<String> {
108    let Some(tree) = parse_yaml_tree(input) else {
109        return Vec::new();
110    };
111
112    let mut events = vec!["+STR".to_string()];
113    let stream = tree
114        .descendants()
115        .find(|n| n.kind() == SyntaxKind::YAML_STREAM);
116    if let Some(stream) = stream {
117        for doc in stream
118            .children()
119            .filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
120        {
121            project_document(&doc, &mut events);
122        }
123    }
124    events.push("-STR".to_string());
125    events
126}
127
128fn project_document(doc: &SyntaxNode, out: &mut Vec<String>) {
129    let has_doc_start = doc
130        .children_with_tokens()
131        .filter_map(|el| el.into_token())
132        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START);
133    let has_doc_end = doc
134        .children_with_tokens()
135        .filter_map(|el| el.into_token())
136        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END);
137    out.push(if has_doc_start {
138        "+DOC ---".to_string()
139    } else {
140        "+DOC".to_string()
141    });
142    let handles = collect_tag_handles(doc);
143
144    if let Some(seq_node) = doc
145        .descendants()
146        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
147    {
148        out.push("+SEQ".to_string());
149        project_block_sequence_items(&seq_node, &handles, out);
150        out.push("-SEQ".to_string());
151    } else if let Some(root_map) = doc
152        .descendants()
153        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
154    {
155        let mut values = Vec::new();
156        project_block_map_entries(&root_map, &handles, &mut values);
157        if !values.is_empty() {
158            out.push("+MAP".to_string());
159            out.append(&mut values);
160            out.push("-MAP".to_string());
161        } else if let Some(flow_map) = doc
162            .descendants()
163            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
164        {
165            let mut flow_values = Vec::new();
166            project_flow_map_entries(&flow_map, &handles, &mut flow_values);
167            out.push("+MAP {}".to_string());
168            out.append(&mut flow_values);
169            out.push("-MAP".to_string());
170        } else if let Some(flow_seq) = doc
171            .descendants()
172            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
173            && let Some(items) = simple_flow_sequence_items(&flow_seq.text().to_string())
174        {
175            out.push("+SEQ []".to_string());
176            for item in items {
177                project_flow_seq_item(&item, &handles, out);
178            }
179            out.push("-SEQ".to_string());
180        } else if let Some(scalar) = scalar_document_value(doc, &handles) {
181            out.push(scalar);
182        } else {
183            out.push("=VAL :".to_string());
184        }
185    } else if let Some(flow_map) = doc
186        .descendants()
187        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
188    {
189        out.push("+MAP {}".to_string());
190        project_flow_map_entries(&flow_map, &handles, out);
191        out.push("-MAP".to_string());
192    } else if let Some(flow_seq) = doc
193        .descendants()
194        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
195        && let Some(items) = simple_flow_sequence_items(&flow_seq.text().to_string())
196    {
197        out.push("+SEQ []".to_string());
198        for item in items {
199            project_flow_seq_item(&item, &handles, out);
200        }
201        out.push("-SEQ".to_string());
202    } else if let Some(scalar) = scalar_document_value(doc, &handles) {
203        out.push(scalar);
204    } else {
205        out.push("=VAL :".to_string());
206    }
207
208    out.push(if has_doc_end {
209        "-DOC ...".to_string()
210    } else {
211        "-DOC".to_string()
212    });
213}
214
215fn scalar_document_value(doc: &SyntaxNode, handles: &TagHandles) -> Option<String> {
216    // Skip `%TAG`/`%YAML` directive lines: those are document-level metadata,
217    // not part of the scalar body.
218    let text = doc
219        .descendants_with_tokens()
220        .filter_map(|el| el.into_token())
221        .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
222        .filter(|tok| !tok.text().trim_start().starts_with('%'))
223        .map(|tok| tok.text().to_string())
224        .collect::<Vec<_>>()
225        .join("");
226    let trimmed_text = text.trim();
227    if trimmed_text.is_empty() {
228        // Tagged-but-empty scalar document still emits a `=VAL <tag> :` event.
229        let tag_only = doc
230            .descendants_with_tokens()
231            .filter_map(|el| el.into_token())
232            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
233            .map(|tok| tok.text().to_string());
234        if let Some(tag) = tag_only
235            && let Some(long) = resolve_long_tag(&tag, handles)
236        {
237            return Some(format!("=VAL {long} :"));
238        }
239        return None;
240    }
241    let tag_text = doc
242        .descendants_with_tokens()
243        .filter_map(|el| el.into_token())
244        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
245        .map(|tok| tok.text().to_string());
246    let event = if let Some(tag) = tag_text
247        && let Some(long) = resolve_long_tag(&tag, handles)
248    {
249        if trimmed_text.starts_with('"') || trimmed_text.starts_with('\'') {
250            let quoted = quoted_val_event(trimmed_text);
251            // quoted_val_event returns `=VAL "body` — splice the tag in.
252            quoted.replacen("=VAL ", &format!("=VAL {long} "), 1)
253        } else {
254            format!("=VAL {long} :{trimmed_text}")
255        }
256    } else if trimmed_text.starts_with('"') || trimmed_text.starts_with('\'') {
257        quoted_val_event(&text)
258    } else {
259        plain_val_event(&text)
260    };
261    Some(event)
262}
263
264fn plain_val_event(text: &str) -> String {
265    format!("=VAL :{}", text.replace('\\', "\\\\"))
266}
267
268/// Project a flow-collection scalar token, preserving quoted-scalar
269/// classification when the source uses `"..."` or `'...'`. Plain scalars are
270/// folded just like outside flow context. A leading tag shorthand (`!!str`,
271/// `!handle!suffix`, `!local`) is resolved through `handles`.
272fn flow_scalar_event(text: &str, handles: &TagHandles) -> String {
273    let trimmed = text.trim();
274    if trimmed.starts_with('"') || trimmed.starts_with('\'') {
275        return quoted_val_event(trimmed);
276    }
277    let (anchor, long_tag, body) = decompose_scalar(trimmed, handles);
278    if anchor.is_some() || long_tag.is_some() {
279        return scalar_event(anchor, long_tag.as_deref(), body);
280    }
281    plain_val_event(&fold_plain_scalar(text))
282}
283
284/// Split a leading tag shorthand (`!handle!suffix` or `!local`) off `text`,
285/// returning `(tag, remainder)`. The tag must be terminated by whitespace or
286/// end of input; otherwise `text` is returned as-is.
287fn split_leading_tag(text: &str) -> Option<(&str, &str)> {
288    let rest = text.strip_prefix('!')?;
289    let mut i = 0usize;
290    let mut bangs = 0usize;
291    for (idx, ch) in rest.char_indices() {
292        if ch == '!' {
293            bangs += 1;
294            if bangs > 1 {
295                return None;
296            }
297            i = idx + 1;
298            continue;
299        }
300        if matches!(ch, ' ' | '\t' | '\n' | ',' | '}' | ']') {
301            i = idx;
302            break;
303        }
304        i = idx + ch.len_utf8();
305    }
306    let tag_len = 1 + i;
307    let (tag, remainder) = text.split_at(tag_len);
308    Some((tag, remainder))
309}
310
311/// Locate a flow-context key/value `:` indicator within a flow-sequence item.
312/// Per YAML 1.2 a `:` is the mapping-key indicator only when followed by
313/// whitespace or by end of the item; otherwise it's part of a plain scalar
314/// (e.g. `http://foo.com`). Quoted regions are skipped.
315fn flow_kv_split(item: &str) -> Option<(usize, usize)> {
316    let bytes = item.as_bytes();
317    let mut in_single = false;
318    let mut in_double = false;
319    let mut escaped_double = false;
320    for (idx, ch) in item.char_indices() {
321        if in_double {
322            if escaped_double {
323                escaped_double = false;
324                continue;
325            }
326            match ch {
327                '\\' => escaped_double = true,
328                '"' => in_double = false,
329                _ => {}
330            }
331            continue;
332        }
333        if in_single {
334            if ch == '\'' {
335                in_single = false;
336            }
337            continue;
338        }
339        match ch {
340            '\'' => in_single = true,
341            '"' => in_double = true,
342            ':' => {
343                let next_off = idx + ch.len_utf8();
344                let after_is_break = next_off >= bytes.len()
345                    || matches!(bytes[next_off], b' ' | b'\t' | b'\n' | b'\r');
346                if after_is_break {
347                    return Some((idx, next_off));
348                }
349            }
350            _ => {}
351        }
352    }
353    None
354}
355
356/// Emit events for a single flow-sequence item: either `+MAP {} key val -MAP`
357/// when the item is a flow-map entry (`key: value`, possibly with empty key
358/// or value), or a single `=VAL` for a bare scalar.
359fn project_flow_seq_item(item: &str, handles: &TagHandles, out: &mut Vec<String>) {
360    if let Some((colon, after)) = flow_kv_split(item) {
361        let raw_key_full = item[..colon].trim();
362        // Strip the explicit-key `?` indicator (followed by whitespace or
363        // end-of-key) when present.
364        let raw_key = strip_explicit_key_indicator(raw_key_full);
365        let raw_value = item[after..].trim();
366        out.push("+MAP {}".to_string());
367        if raw_key.is_empty() {
368            out.push("=VAL :".to_string());
369        } else {
370            out.push(flow_scalar_event(raw_key, handles));
371        }
372        if raw_value.is_empty() {
373            out.push("=VAL :".to_string());
374        } else {
375            out.push(flow_scalar_event(raw_value, handles));
376        }
377        out.push("-MAP".to_string());
378    } else if item.trim_start().starts_with('"') || item.trim_start().starts_with('\'') {
379        out.push(quoted_val_event(item.trim()));
380    } else {
381        out.push(plain_val_event(&fold_plain_scalar(item)));
382    }
383}
384
385fn strip_explicit_key_indicator(key: &str) -> &str {
386    let trimmed = key.trim_start();
387    if let Some(rest) = trimmed.strip_prefix('?')
388        && (rest.is_empty() || rest.starts_with([' ', '\t', '\n']))
389    {
390        return rest.trim_start();
391    }
392    key
393}
394
395fn quoted_val_event(text: &str) -> String {
396    if text.starts_with('\'') {
397        let inner = decode_single_quoted(text);
398        format!("=VAL '{}", escape_for_event(&inner))
399    } else {
400        let inner = decode_double_quoted(text);
401        format!("=VAL \"{}", escape_for_event(&inner))
402    }
403}
404
405fn decode_single_quoted(text: &str) -> String {
406    let body = text.strip_prefix('\'').unwrap_or(text);
407    let body = body.strip_suffix('\'').unwrap_or(body);
408    body.replace("''", "'")
409}
410
411/// Decode YAML double-quoted scalar escape sequences into actual characters
412/// per YAML 1.2 §5.7. Unknown escapes are kept verbatim so the harness can
413/// surface them as bare backslash-prefixed text.
414fn decode_double_quoted(text: &str) -> String {
415    let body = text.strip_prefix('"').unwrap_or(text);
416    let mut out = String::with_capacity(body.len());
417    let mut chars = body.chars();
418    while let Some(ch) = chars.next() {
419        if ch == '"' {
420            break;
421        }
422        if ch != '\\' {
423            out.push(ch);
424            continue;
425        }
426        let Some(next) = chars.next() else {
427            out.push('\\');
428            break;
429        };
430        match next {
431            '0' => out.push('\0'),
432            'a' => out.push('\u{07}'),
433            'b' => out.push('\u{08}'),
434            't' | '\t' => out.push('\t'),
435            'n' => out.push('\n'),
436            'v' => out.push('\u{0B}'),
437            'f' => out.push('\u{0C}'),
438            'r' => out.push('\r'),
439            'e' => out.push('\u{1B}'),
440            ' ' => out.push(' '),
441            '"' => out.push('"'),
442            '/' => out.push('/'),
443            '\\' => out.push('\\'),
444            'N' => out.push('\u{85}'),
445            '_' => out.push('\u{A0}'),
446            'L' => out.push('\u{2028}'),
447            'P' => out.push('\u{2029}'),
448            'x' => {
449                if let Some(c) = take_hex_char(&mut chars, 2) {
450                    out.push(c);
451                }
452            }
453            'u' => {
454                if let Some(c) = take_hex_char(&mut chars, 4) {
455                    out.push(c);
456                }
457            }
458            'U' => {
459                if let Some(c) = take_hex_char(&mut chars, 8) {
460                    out.push(c);
461                }
462            }
463            other => {
464                out.push('\\');
465                out.push(other);
466            }
467        }
468    }
469    out
470}
471
472fn take_hex_char(chars: &mut std::str::Chars<'_>, n: usize) -> Option<char> {
473    let hex: String = chars.take(n).collect();
474    if hex.len() != n {
475        return None;
476    }
477    u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32)
478}
479
480/// Escape decoded scalar text for the yaml-test-suite event format, where
481/// control characters and structural backslashes are rendered as backslash
482/// escapes (`\n`, `\t`, `\b`, ...).
483fn escape_for_event(text: &str) -> String {
484    let mut out = String::with_capacity(text.len());
485    for ch in text.chars() {
486        match ch {
487            '\\' => out.push_str("\\\\"),
488            '\n' => out.push_str("\\n"),
489            '\t' => out.push_str("\\t"),
490            '\r' => out.push_str("\\r"),
491            '\u{07}' => out.push_str("\\a"),
492            '\u{08}' => out.push_str("\\b"),
493            '\u{0B}' => out.push_str("\\v"),
494            '\u{0C}' => out.push_str("\\f"),
495            '\u{1B}' => out.push_str("\\e"),
496            '\0' => out.push_str("\\0"),
497            other => out.push(other),
498        }
499    }
500    out
501}
502
503fn long_tag_builtin(tag: &str) -> Option<String> {
504    if tag == "!" {
505        return Some("<!>".to_string());
506    }
507    // Bare local tag: `!local` (single leading `!`, no second `!`).
508    if let Some(rest) = tag.strip_prefix('!')
509        && !rest.contains('!')
510    {
511        return Some(format!("<!{rest}>"));
512    }
513    None
514}
515
516fn simple_flow_sequence_items(text: &str) -> Option<Vec<String>> {
517    let trimmed = text.trim();
518    let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
519    let inner = inner.trim();
520    if inner.is_empty() {
521        return Some(Vec::new());
522    }
523
524    let mut items = Vec::new();
525    let mut start = 0usize;
526    let mut in_single = false;
527    let mut in_double = false;
528    let mut escaped_double = false;
529
530    for (idx, ch) in inner.char_indices() {
531        if in_double {
532            if escaped_double {
533                escaped_double = false;
534                continue;
535            }
536            match ch {
537                '\\' => escaped_double = true,
538                '"' => in_double = false,
539                _ => {}
540            }
541            continue;
542        }
543
544        if in_single {
545            if ch == '\'' {
546                in_single = false;
547            }
548            continue;
549        }
550
551        match ch {
552            '\'' => in_single = true,
553            '"' => in_double = true,
554            ',' => {
555                let item = inner[start..idx].trim();
556                if item.is_empty() {
557                    return None;
558                }
559                items.push(item.to_string());
560                start = idx + 1;
561            }
562            _ => {}
563        }
564    }
565
566    let last = inner[start..].trim();
567    if !last.is_empty() {
568        items.push(last.to_string());
569    }
570    Some(items)
571}
572
573fn escape_block_scalar_text(text: &str) -> String {
574    let mut out = String::with_capacity(text.len());
575    for ch in text.chars() {
576        match ch {
577            '\\' => out.push_str("\\\\"),
578            '\n' => out.push_str("\\n"),
579            '\t' => out.push_str("\\t"),
580            '\r' => out.push_str("\\r"),
581            other => out.push(other),
582        }
583    }
584    out
585}
586
587/// If `value_node` encodes a literal (`|`) or folded (`>`) block scalar,
588/// return the folded scalar body (no escaping applied yet). Scope: default
589/// clip chomping, auto-detected content indent, no explicit indicators.
590fn extract_block_scalar_body(value_node: &SyntaxNode) -> Option<(char, String)> {
591    let tokens: Vec<_> = value_node
592        .descendants_with_tokens()
593        .filter_map(|el| el.into_token())
594        .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::NEWLINE))
595        .collect();
596    let first = tokens.first()?;
597    if first.kind() != SyntaxKind::YAML_SCALAR {
598        return None;
599    }
600    let indicator = match first.text() {
601        "|" => '|',
602        ">" => '>',
603        _ => return None,
604    };
605
606    let mut raw = String::new();
607    let mut seen_header = false;
608    let mut skipped_header_newline = false;
609    for tok in tokens.iter().skip(1) {
610        if !seen_header && !skipped_header_newline && tok.kind() == SyntaxKind::NEWLINE {
611            skipped_header_newline = true;
612            seen_header = true;
613            continue;
614        }
615        raw.push_str(tok.text());
616    }
617
618    let mut lines: Vec<&str> = raw.split('\n').collect();
619    if lines.last().is_some_and(|s| s.is_empty()) {
620        lines.pop();
621    }
622
623    let content_indent = lines
624        .iter()
625        .filter(|l| !l.trim().is_empty())
626        .map(|l| l.chars().take_while(|c| *c == ' ').count())
627        .min()
628        .unwrap_or(0);
629
630    let stripped: Vec<String> = lines
631        .iter()
632        .map(|l| {
633            if l.len() >= content_indent {
634                l[content_indent..].to_string()
635            } else {
636                String::new()
637            }
638        })
639        .collect();
640
641    let folded = match indicator {
642        '|' => stripped.join("\n"),
643        '>' => {
644            let mut result = String::new();
645            let mut last_blank = false;
646            for (idx, line) in stripped.iter().enumerate() {
647                if line.is_empty() {
648                    result.push('\n');
649                    last_blank = true;
650                } else {
651                    if idx > 0 && !last_blank {
652                        result.push(' ');
653                    }
654                    result.push_str(line);
655                    last_blank = false;
656                }
657            }
658            result
659        }
660        _ => unreachable!(),
661    };
662
663    let trimmed = folded.trim_end_matches('\n');
664    let body = if trimmed.is_empty() {
665        String::new()
666    } else {
667        format!("{trimmed}\n")
668    };
669    Some((indicator, body))
670}
671
672fn fold_plain_scalar(text: &str) -> String {
673    let mut pieces = Vec::new();
674    for line in text.split('\n') {
675        let trimmed = line.trim();
676        // A line whose first non-blank character is `#` is a YAML comment
677        // line (the lexer currently leaves these embedded in scalar token
678        // text inside multi-line flow continuations); skip it from folding.
679        if trimmed.is_empty() || trimmed.starts_with('#') {
680            continue;
681        }
682        pieces.push(trimmed.to_string());
683    }
684    if pieces.is_empty() {
685        return String::new();
686    }
687    pieces.join(" ")
688}
689
690fn project_flow_map_entries(flow_map: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
691    for entry in flow_map
692        .children()
693        .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
694    {
695        let key_node = entry
696            .children()
697            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
698            .expect("flow map key");
699        let value_node = entry
700            .children()
701            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
702            .expect("flow map value");
703
704        let has_explicit_colon = key_node
705            .children_with_tokens()
706            .filter_map(|el| el.into_token())
707            .any(|tok| tok.kind() == SyntaxKind::YAML_COLON);
708
709        let raw_key = key_node
710            .descendants_with_tokens()
711            .filter_map(|el| el.into_token())
712            .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_KEY))
713            .map(|tok| tok.text().to_string())
714            .collect::<Vec<_>>()
715            .join("");
716
717        if has_explicit_colon {
718            // Strip the explicit-key `?` indicator (`{ ? foo : v }`) from
719            // the projected key text. A bare `? :` entry (key reduces to
720            // empty after stripping) projects to an empty `=VAL :`.
721            let stripped_key = strip_explicit_key_indicator(raw_key.trim());
722            if stripped_key.is_empty() {
723                out.push("=VAL :".to_string());
724            } else {
725                out.push(flow_scalar_event(stripped_key, handles));
726            }
727            project_flow_map_value(&value_node, handles, out);
728        } else {
729            let raw_value = value_node
730                .descendants_with_tokens()
731                .filter_map(|el| el.into_token())
732                .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
733                .map(|tok| tok.text().to_string())
734                .collect::<Vec<_>>()
735                .join("");
736            let combined = format!("{raw_key}{raw_value}");
737            let folded = fold_plain_scalar(&combined);
738            let stripped = strip_explicit_key_indicator(&folded);
739            if stripped.is_empty() {
740                out.push("=VAL :".to_string());
741            } else {
742                out.push(plain_val_event(stripped));
743            }
744            out.push("=VAL :".to_string());
745        }
746    }
747}
748
749/// Project a `YAML_FLOW_MAP_VALUE` node, recursing into nested flow
750/// collections (`+SEQ [] ... -SEQ`, `+MAP {} ... -MAP`) when present so that
751/// multi-line nested flow values like `{ a: [ b, c, { d: [e, f] } ] }`
752/// produce structured event streams instead of one slurped scalar.
753fn project_flow_map_value(value_node: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
754    if let Some(flow_seq) = value_node
755        .children()
756        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
757    {
758        out.push("+SEQ []".to_string());
759        project_flow_sequence_items_cst(&flow_seq, handles, out);
760        out.push("-SEQ".to_string());
761        return;
762    }
763    if let Some(nested_map) = value_node
764        .children()
765        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
766    {
767        out.push("+MAP {}".to_string());
768        project_flow_map_entries(&nested_map, handles, out);
769        out.push("-MAP".to_string());
770        return;
771    }
772
773    let raw_value = value_node
774        .descendants_with_tokens()
775        .filter_map(|el| el.into_token())
776        .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
777        .map(|tok| tok.text().to_string())
778        .collect::<Vec<_>>()
779        .join("");
780    out.push(flow_scalar_event(&raw_value, handles));
781}
782
783/// CST-walking variant of flow-sequence projection. Each
784/// `YAML_FLOW_SEQUENCE_ITEM` may contain a nested `YAML_FLOW_SEQUENCE` /
785/// `YAML_FLOW_MAP`; if neither is present we fall back to the text-based
786/// `project_flow_seq_item` for plain/quoted scalar items.
787fn project_flow_sequence_items_cst(
788    flow_seq: &SyntaxNode,
789    handles: &TagHandles,
790    out: &mut Vec<String>,
791) {
792    for item in flow_seq
793        .children()
794        .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
795    {
796        if let Some(nested_seq) = item
797            .children()
798            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
799        {
800            out.push("+SEQ []".to_string());
801            project_flow_sequence_items_cst(&nested_seq, handles, out);
802            out.push("-SEQ".to_string());
803            continue;
804        }
805        if let Some(nested_map) = item
806            .children()
807            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
808        {
809            out.push("+MAP {}".to_string());
810            project_flow_map_entries(&nested_map, handles, out);
811            out.push("-MAP".to_string());
812            continue;
813        }
814        // Build the item text from scalar/key tokens only so embedded
815        // `YAML_COMMENT` tokens (e.g. `[ word1\n# comment\n, word2]`) do not
816        // leak into the projected scalar value.
817        let item_text: String = item
818            .descendants_with_tokens()
819            .filter_map(|el| el.into_token())
820            .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_KEY))
821            .map(|tok| tok.text().to_string())
822            .collect();
823        project_flow_seq_item(&item_text, handles, out);
824    }
825}
826
827fn project_block_sequence_items(
828    seq_node: &SyntaxNode,
829    handles: &TagHandles,
830    out: &mut Vec<String>,
831) {
832    for item in seq_node
833        .children()
834        .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
835    {
836        if let Some(nested_seq) = item
837            .children()
838            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
839        {
840            out.push("+SEQ".to_string());
841            project_block_sequence_items(&nested_seq, handles, out);
842            out.push("-SEQ".to_string());
843            continue;
844        }
845        if let Some(nested_map) = item
846            .children()
847            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
848        {
849            out.push("+MAP".to_string());
850            project_block_map_entries(&nested_map, handles, out);
851            out.push("-MAP".to_string());
852            continue;
853        }
854        if let Some(flow_seq) = item
855            .children()
856            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
857        {
858            let flow_text = flow_seq.text().to_string();
859            if let Some(flow_items) = simple_flow_sequence_items(&flow_text) {
860                out.push("+SEQ []".to_string());
861                for value in flow_items {
862                    project_flow_seq_item(&value, handles, out);
863                }
864                out.push("-SEQ".to_string());
865                continue;
866            }
867        }
868        if let Some(flow_map) = item
869            .children()
870            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
871        {
872            out.push("+MAP {}".to_string());
873            project_flow_map_entries(&flow_map, handles, out);
874            out.push("-MAP".to_string());
875            continue;
876        }
877        let item_tag = item
878            .descendants_with_tokens()
879            .filter_map(|el| el.into_token())
880            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
881            .map(|tok| tok.text().to_string());
882        let scalar_text = item
883            .descendants_with_tokens()
884            .filter_map(|el| el.into_token())
885            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
886            .map(|tok| tok.text().to_string())
887            .collect::<Vec<_>>()
888            .join("");
889        let scalar_trimmed = scalar_text.trim();
890        let event = if scalar_trimmed.starts_with('*') {
891            format!("=ALI {scalar_trimmed}")
892        } else {
893            // Combine the optional `YAML_TAG` token (already separated from
894            // the scalar text by the parser) with anchors/tags found in the
895            // scalar body, and render the YAML event in canonical
896            // `&anchor <tag> :body` order.
897            let item_long_tag = item_tag
898                .as_deref()
899                .and_then(|t| resolve_long_tag(t, handles));
900            let (anchor, body_tag, body) = decompose_scalar(scalar_trimmed, handles);
901            let long_tag = item_long_tag.or(body_tag);
902            scalar_event(anchor, long_tag.as_deref(), body)
903        };
904        out.push(event);
905    }
906}
907
908/// Decompose a node-property + scalar string into `(anchor, long_tag, body)`,
909/// peeling off any leading `&anchor` and tag shorthand in either order
910/// (`&a !!str foo` or `!!str &a foo`). Returns the raw body trimmed.
911fn decompose_scalar<'a>(
912    text: &'a str,
913    handles: &TagHandles,
914) -> (Option<&'a str>, Option<String>, &'a str) {
915    let mut anchor: Option<&str> = None;
916    let mut long_tag: Option<String> = None;
917    let mut rest = text.trim();
918    loop {
919        if anchor.is_none()
920            && let Some(after) = rest.strip_prefix('&')
921        {
922            let end = after
923                .find(|c: char| c.is_whitespace() || matches!(c, ',' | '}' | ']'))
924                .unwrap_or(after.len());
925            let (name, tail) = after.split_at(end);
926            anchor = Some(name);
927            rest = tail.trim_start();
928            continue;
929        }
930        if long_tag.is_none()
931            && let Some((tag, tail)) = split_leading_tag(rest)
932            && let Some(long) = resolve_long_tag(tag, handles)
933        {
934            long_tag = Some(long);
935            rest = tail.trim_start();
936            continue;
937        }
938        break;
939    }
940    (anchor, long_tag, rest)
941}
942
943/// Render a scalar event from its decomposed parts: optional anchor,
944/// optional long-form tag (already in `<...>` form), and the scalar body.
945/// Handles plain, double-quoted, and single-quoted bodies; quoted bodies
946/// share the same escape normalization as [`quoted_val_event`].
947fn scalar_event(anchor: Option<&str>, long_tag: Option<&str>, body: &str) -> String {
948    let mut prefix = String::new();
949    if let Some(a) = anchor {
950        prefix.push_str(&format!("&{a} "));
951    }
952    if let Some(t) = long_tag {
953        prefix.push_str(t);
954        prefix.push(' ');
955    }
956    let body = body.trim();
957    if body.is_empty() {
958        return format!("=VAL {prefix}:");
959    }
960    if body.starts_with('"') || body.starts_with('\'') {
961        // Reuse the shared escape/normalization rules; splice the prefix in
962        // place of the leading `=VAL ` token.
963        let quoted = quoted_val_event(body);
964        return quoted.replacen("=VAL ", &format!("=VAL {prefix}"), 1);
965    }
966    format!("=VAL {prefix}:{body}")
967}
968
969fn project_block_map_entries(map_node: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
970    for child in map_node.children_with_tokens() {
971        match child {
972            rowan::NodeOrToken::Token(tok)
973                if tok.kind() == SyntaxKind::YAML_SCALAR
974                    && tok.text().trim_start().starts_with("? ") =>
975            {
976                let body = tok.text().trim_start().trim_start_matches("? ").trim();
977                if body.is_empty() {
978                    out.push("=VAL :".to_string());
979                } else {
980                    let (anchor, body_tag, rest) = decompose_scalar(body, handles);
981                    out.push(scalar_event(anchor, body_tag.as_deref(), rest));
982                }
983                out.push("=VAL :".to_string());
984            }
985            rowan::NodeOrToken::Node(entry) if entry.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY => {
986                project_block_map_entry(&entry, handles, out);
987            }
988            _ => {}
989        }
990    }
991}
992
993fn project_block_map_entry(entry: &SyntaxNode, handles: &TagHandles, out: &mut Vec<String>) {
994    let key_node = entry
995        .children()
996        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
997        .expect("key node");
998    let value_node = entry
999        .children()
1000        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
1001        .expect("value node");
1002
1003    let key_tag = key_node
1004        .children_with_tokens()
1005        .filter_map(|el| el.into_token())
1006        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1007        .map(|tok| tok.text().to_string());
1008    let key_text = key_node
1009        .children_with_tokens()
1010        .filter_map(|el| el.into_token())
1011        .find(|tok| tok.kind() == SyntaxKind::YAML_KEY)
1012        .map(|tok| tok.text().trim_end().to_string())
1013        .expect("key token");
1014
1015    let key_event = if key_text.starts_with('*') {
1016        format!("=ALI {}", key_text.trim_end())
1017    } else {
1018        let key_long_tag = key_tag
1019            .as_deref()
1020            .and_then(|t| resolve_long_tag(t, handles));
1021        let (anchor, body_tag, body) = decompose_scalar(key_text.trim(), handles);
1022        let long_tag = key_long_tag.or(body_tag);
1023        scalar_event(anchor, long_tag.as_deref(), body)
1024    };
1025    out.push(key_event);
1026
1027    if let Some(nested_map) = value_node
1028        .children()
1029        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
1030    {
1031        out.push("+MAP".to_string());
1032        project_block_map_entries(&nested_map, handles, out);
1033        out.push("-MAP".to_string());
1034        return;
1035    }
1036
1037    if let Some(flow_map) = value_node
1038        .children()
1039        .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
1040    {
1041        out.push("+MAP {}".to_string());
1042        project_flow_map_entries(&flow_map, handles, out);
1043        out.push("-MAP".to_string());
1044        return;
1045    }
1046
1047    if let Some((indicator, body)) = extract_block_scalar_body(&value_node) {
1048        let escaped = escape_block_scalar_text(&body);
1049        out.push(format!("=VAL {indicator}{escaped}"));
1050        return;
1051    }
1052
1053    let value_tag = value_node
1054        .children_with_tokens()
1055        .filter_map(|el| el.into_token())
1056        .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
1057        .map(|tok| tok.text().to_string());
1058    let value_text = value_node
1059        .descendants_with_tokens()
1060        .filter_map(|el| el.into_token())
1061        .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
1062        .map(|tok| tok.text().to_string())
1063        .collect::<Vec<_>>()
1064        .join("");
1065
1066    if value_tag.is_none()
1067        && let Some(items) = simple_flow_sequence_items(&value_text)
1068    {
1069        out.push("+SEQ []".to_string());
1070        for item in items {
1071            project_flow_seq_item(&item, handles, out);
1072        }
1073        out.push("-SEQ".to_string());
1074    } else if value_text.trim().is_empty() {
1075        if let Some(tag) = value_tag
1076            && let Some(long) = resolve_long_tag(&tag, handles)
1077        {
1078            out.push(format!("=VAL {long} :"));
1079        } else {
1080            out.push("=VAL :".to_string());
1081        }
1082    } else if value_text.trim_start().starts_with('*') {
1083        out.push(format!("=ALI {}", value_text.trim()));
1084    } else {
1085        let value_long_tag = value_tag
1086            .as_deref()
1087            .and_then(|t| resolve_long_tag(t, handles));
1088        let (anchor, body_tag, body) = decompose_scalar(value_text.trim(), handles);
1089        let long_tag = value_long_tag.or(body_tag);
1090        out.push(scalar_event(anchor, long_tag.as_deref(), body));
1091    }
1092}