Skip to main content

panache_parser/parser/yaml/
events.rs

1//! YAML event projection: walk a shadow-parser CST and produce a
2//! yaml-test-suite style event stream (`+STR`, `+DOC`, `+MAP`, `=VAL :foo`,
3//! ...).
4//!
5//! This module is parser-crate scoped and used only by the test harness in
6//! `crates/panache-parser/tests/yaml.rs` for fixture parity. It reads the
7//! green tree built by [`crate::parser::yaml::parse_yaml_tree`] and re-derives
8//! event-stream semantics (tag resolution, anchor stripping, flow-seq
9//! splitting). The intent is to keep the projection adjacent to the parser so
10//! CST shape is the single source of truth for events.
11
12use crate::syntax::{SyntaxKind, SyntaxNode};
13
14use super::parser::parse_yaml_tree;
15
16/// Walk the shadow CST for `input` and return the projected yaml-test-suite
17/// event stream. Returns an empty vector if the input fails to parse.
18pub fn project_events(input: &str) -> Vec<String> {
19    let Some(tree) = parse_yaml_tree(input) else {
20        return Vec::new();
21    };
22
23    let has_explicit_doc_start = tree
24        .descendants_with_tokens()
25        .filter_map(|el| el.into_token())
26        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_START);
27    let doc_open = if has_explicit_doc_start {
28        "+DOC ---".to_string()
29    } else {
30        "+DOC".to_string()
31    };
32    let has_explicit_doc_end = tree
33        .descendants_with_tokens()
34        .filter_map(|el| el.into_token())
35        .any(|tok| tok.kind() == SyntaxKind::YAML_DOCUMENT_END);
36    let doc_close = if has_explicit_doc_end {
37        "-DOC ...".to_string()
38    } else {
39        "-DOC".to_string()
40    };
41
42    // Inputs that contain only comments, whitespace, and/or a document-end
43    // marker (no content, no `---` document-start) yield no document at all.
44    // The yaml-test-suite represents these as `+STR -STR` with nothing in
45    // between (e.g. `# Comment only.\n`, `...\n`, `# comment\n...\n`).
46    let has_any_content = tree.descendants().any(|n| {
47        matches!(
48            n.kind(),
49            SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
50                | SyntaxKind::YAML_BLOCK_MAP_ENTRY
51                | SyntaxKind::YAML_FLOW_MAP
52                | SyntaxKind::YAML_FLOW_SEQUENCE
53        )
54    }) || tree
55        .descendants_with_tokens()
56        .filter_map(|el| el.into_token())
57        .any(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_TAG));
58    if !has_any_content && !has_explicit_doc_start {
59        return vec!["+STR".to_string(), "-STR".to_string()];
60    }
61
62    if let Some(seq_node) = tree
63        .descendants()
64        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
65    {
66        let mut events = Vec::new();
67        events.push("+STR".to_string());
68        events.push(doc_open);
69        events.push("+SEQ".to_string());
70        project_block_sequence_items(&seq_node, &mut events);
71        events.push("-SEQ".to_string());
72        events.push(doc_close);
73        events.push("-STR".to_string());
74        return events;
75    }
76
77    let mut values = Vec::new();
78    let mut map_header = "+MAP".to_string();
79    if let Some(root_map) = tree
80        .descendants()
81        .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
82    {
83        project_block_map_entries(&root_map, &mut values);
84    }
85
86    if values.is_empty()
87        && let Some(flow_map) = tree
88            .descendants()
89            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
90    {
91        map_header = "+MAP {}".to_string();
92        project_flow_map_entries(&flow_map, &mut values);
93    }
94
95    if values.is_empty()
96        && let Some(flow_seq) = tree
97            .descendants()
98            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
99        && let Some(items) = simple_flow_sequence_items(&flow_seq.text().to_string())
100    {
101        let mut seq_events: Vec<String> = items
102            .iter()
103            .map(|item| {
104                if item.starts_with('"') || item.starts_with('\'') {
105                    quoted_val_event(item)
106                } else {
107                    plain_val_event(item)
108                }
109            })
110            .collect();
111        let mut events = Vec::with_capacity(seq_events.len() + 7);
112        events.push("+STR".to_string());
113        events.push(doc_open);
114        events.push("+SEQ []".to_string());
115        events.append(&mut seq_events);
116        events.push("-SEQ".to_string());
117        events.push(doc_close);
118        events.push("-STR".to_string());
119        return events;
120    }
121
122    let scalar_document_value = if values.is_empty() {
123        let text = tree
124            .descendants_with_tokens()
125            .filter_map(|el| el.into_token())
126            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
127            .map(|tok| tok.text().to_string())
128            .collect::<Vec<_>>()
129            .join("");
130        (!text.is_empty()).then_some(text)
131    } else {
132        None
133    };
134
135    if let Some(text) = scalar_document_value {
136        let tag_text = tree
137            .descendants_with_tokens()
138            .filter_map(|el| el.into_token())
139            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
140            .map(|tok| tok.text().to_string());
141        let scalar_event = if let Some(tag) = tag_text
142            && let Some(long) = long_tag(&tag)
143        {
144            format!("=VAL {long} :{text}")
145        } else if text.starts_with('"') || text.starts_with('\'') {
146            quoted_val_event(&text)
147        } else {
148            plain_val_event(&text)
149        };
150        return vec![
151            "+STR".to_string(),
152            doc_open.clone(),
153            scalar_event,
154            doc_close,
155            "-STR".to_string(),
156        ];
157    }
158
159    let mut events = Vec::with_capacity(values.len() + 6);
160    events.push("+STR".to_string());
161    events.push(doc_open);
162    events.push(map_header);
163    events.append(&mut values);
164    events.push("-MAP".to_string());
165    events.push(doc_close);
166    events.push("-STR".to_string());
167    events
168}
169
170fn plain_val_event(text: &str) -> String {
171    format!("=VAL :{}", text.replace('\\', "\\\\"))
172}
173
174fn quoted_val_event(text: &str) -> String {
175    if text.starts_with('\'') {
176        let trimmed = text.trim_end_matches('\'');
177        let normalized = trimmed.replace("''", "'").replace('\\', "\\\\");
178        format!("=VAL {normalized}")
179    } else {
180        let trimmed = text.trim_end_matches('"');
181        let mut normalized = String::with_capacity(trimmed.len());
182        let mut chars = trimmed.chars().peekable();
183        while let Some(ch) = chars.next() {
184            if ch != '\\' {
185                normalized.push(ch);
186                continue;
187            }
188
189            let Some(next) = chars.next() else {
190                normalized.push('\\');
191                break;
192            };
193
194            match next {
195                '/' => normalized.push('/'),
196                '"' => normalized.push('"'),
197                other => {
198                    normalized.push('\\');
199                    normalized.push(other);
200                }
201            }
202        }
203        format!("=VAL {normalized}")
204    }
205}
206
207fn long_tag(tag: &str) -> Option<String> {
208    let builtin: Option<&'static str> = match tag {
209        "!!str" => Some("<tag:yaml.org,2002:str>"),
210        "!!int" => Some("<tag:yaml.org,2002:int>"),
211        "!!bool" => Some("<tag:yaml.org,2002:bool>"),
212        "!!null" => Some("<tag:yaml.org,2002:null>"),
213        "!!float" => Some("<tag:yaml.org,2002:float>"),
214        "!!seq" => Some("<tag:yaml.org,2002:seq>"),
215        "!!map" => Some("<tag:yaml.org,2002:map>"),
216        _ => None,
217    };
218    if let Some(s) = builtin {
219        return Some(s.to_string());
220    }
221    if tag == "!" {
222        return Some("<!>".to_string());
223    }
224    if tag.starts_with('!') && !tag.starts_with("!!") {
225        return Some(format!("<{tag}>"));
226    }
227    None
228}
229
230fn simple_flow_sequence_items(text: &str) -> Option<Vec<String>> {
231    let trimmed = text.trim();
232    let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
233    let inner = inner.trim();
234    if inner.is_empty() {
235        return Some(Vec::new());
236    }
237
238    let mut items = Vec::new();
239    let mut start = 0usize;
240    let mut in_single = false;
241    let mut in_double = false;
242    let mut escaped_double = false;
243
244    for (idx, ch) in inner.char_indices() {
245        if in_double {
246            if escaped_double {
247                escaped_double = false;
248                continue;
249            }
250            match ch {
251                '\\' => escaped_double = true,
252                '"' => in_double = false,
253                _ => {}
254            }
255            continue;
256        }
257
258        if in_single {
259            if ch == '\'' {
260                in_single = false;
261            }
262            continue;
263        }
264
265        match ch {
266            '\'' => in_single = true,
267            '"' => in_double = true,
268            ',' => {
269                let item = inner[start..idx].trim();
270                if item.is_empty() {
271                    return None;
272                }
273                items.push(item.to_string());
274                start = idx + 1;
275            }
276            _ => {}
277        }
278    }
279
280    let last = inner[start..].trim();
281    if !last.is_empty() {
282        items.push(last.to_string());
283    }
284    Some(items)
285}
286
287fn escape_block_scalar_text(text: &str) -> String {
288    let mut out = String::with_capacity(text.len());
289    for ch in text.chars() {
290        match ch {
291            '\\' => out.push_str("\\\\"),
292            '\n' => out.push_str("\\n"),
293            '\t' => out.push_str("\\t"),
294            '\r' => out.push_str("\\r"),
295            other => out.push(other),
296        }
297    }
298    out
299}
300
301/// If `value_node` encodes a literal (`|`) or folded (`>`) block scalar,
302/// return the folded scalar body (no escaping applied yet). Scope: default
303/// clip chomping, auto-detected content indent, no explicit indicators.
304fn extract_block_scalar_body(value_node: &SyntaxNode) -> Option<(char, String)> {
305    let tokens: Vec<_> = value_node
306        .descendants_with_tokens()
307        .filter_map(|el| el.into_token())
308        .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::NEWLINE))
309        .collect();
310    let first = tokens.first()?;
311    if first.kind() != SyntaxKind::YAML_SCALAR {
312        return None;
313    }
314    let indicator = match first.text() {
315        "|" => '|',
316        ">" => '>',
317        _ => return None,
318    };
319
320    let mut raw = String::new();
321    let mut seen_header = false;
322    let mut skipped_header_newline = false;
323    for tok in tokens.iter().skip(1) {
324        if !seen_header && !skipped_header_newline && tok.kind() == SyntaxKind::NEWLINE {
325            skipped_header_newline = true;
326            seen_header = true;
327            continue;
328        }
329        raw.push_str(tok.text());
330    }
331
332    let mut lines: Vec<&str> = raw.split('\n').collect();
333    if lines.last().is_some_and(|s| s.is_empty()) {
334        lines.pop();
335    }
336
337    let content_indent = lines
338        .iter()
339        .filter(|l| !l.trim().is_empty())
340        .map(|l| l.chars().take_while(|c| *c == ' ').count())
341        .min()
342        .unwrap_or(0);
343
344    let stripped: Vec<String> = lines
345        .iter()
346        .map(|l| {
347            if l.len() >= content_indent {
348                l[content_indent..].to_string()
349            } else {
350                String::new()
351            }
352        })
353        .collect();
354
355    let folded = match indicator {
356        '|' => stripped.join("\n"),
357        '>' => {
358            let mut result = String::new();
359            let mut last_blank = false;
360            for (idx, line) in stripped.iter().enumerate() {
361                if line.is_empty() {
362                    result.push('\n');
363                    last_blank = true;
364                } else {
365                    if idx > 0 && !last_blank {
366                        result.push(' ');
367                    }
368                    result.push_str(line);
369                    last_blank = false;
370                }
371            }
372            result
373        }
374        _ => unreachable!(),
375    };
376
377    let trimmed = folded.trim_end_matches('\n');
378    let body = if trimmed.is_empty() {
379        String::new()
380    } else {
381        format!("{trimmed}\n")
382    };
383    Some((indicator, body))
384}
385
386fn fold_plain_scalar(text: &str) -> String {
387    let mut pieces = Vec::new();
388    for line in text.split('\n') {
389        let trimmed = line.trim();
390        if !trimmed.is_empty() {
391            pieces.push(trimmed.to_string());
392        }
393    }
394    if pieces.is_empty() {
395        return String::new();
396    }
397    pieces.join(" ")
398}
399
400fn project_flow_map_entries(flow_map: &SyntaxNode, out: &mut Vec<String>) {
401    for entry in flow_map
402        .children()
403        .filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_ENTRY)
404    {
405        let key_node = entry
406            .children()
407            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_KEY)
408            .expect("flow map key");
409        let value_node = entry
410            .children()
411            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
412            .expect("flow map value");
413
414        let has_explicit_colon = key_node
415            .children_with_tokens()
416            .filter_map(|el| el.into_token())
417            .any(|tok| tok.kind() == SyntaxKind::YAML_COLON);
418
419        let raw_key = key_node
420            .descendants_with_tokens()
421            .filter_map(|el| el.into_token())
422            .filter(|tok| matches!(tok.kind(), SyntaxKind::YAML_SCALAR | SyntaxKind::YAML_KEY))
423            .map(|tok| tok.text().to_string())
424            .collect::<Vec<_>>()
425            .join("");
426        let raw_value = value_node
427            .descendants_with_tokens()
428            .filter_map(|el| el.into_token())
429            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
430            .map(|tok| tok.text().to_string())
431            .collect::<Vec<_>>()
432            .join("");
433
434        if has_explicit_colon {
435            out.push(plain_val_event(&fold_plain_scalar(&raw_key)));
436            out.push(plain_val_event(&fold_plain_scalar(&raw_value)));
437        } else {
438            let combined = format!("{raw_key}{raw_value}");
439            out.push(plain_val_event(&fold_plain_scalar(&combined)));
440            out.push("=VAL :".to_string());
441        }
442    }
443}
444
445fn project_block_sequence_items(seq_node: &SyntaxNode, out: &mut Vec<String>) {
446    for item in seq_node
447        .children()
448        .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
449    {
450        if let Some(nested_seq) = item
451            .children()
452            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE)
453        {
454            out.push("+SEQ".to_string());
455            project_block_sequence_items(&nested_seq, out);
456            out.push("-SEQ".to_string());
457            continue;
458        }
459        if let Some(nested_map) = item
460            .children()
461            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
462        {
463            out.push("+MAP".to_string());
464            project_block_map_entries(&nested_map, out);
465            out.push("-MAP".to_string());
466            continue;
467        }
468        if let Some(flow_seq) = item
469            .children()
470            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE)
471        {
472            let flow_text = flow_seq.text().to_string();
473            if let Some(flow_items) = simple_flow_sequence_items(&flow_text) {
474                out.push("+SEQ []".to_string());
475                for value in flow_items {
476                    if value.starts_with('"') || value.starts_with('\'') {
477                        out.push(quoted_val_event(&value));
478                    } else {
479                        out.push(plain_val_event(&value));
480                    }
481                }
482                out.push("-SEQ".to_string());
483                continue;
484            }
485        }
486        if let Some(flow_map) = item
487            .children()
488            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
489        {
490            out.push("+MAP {}".to_string());
491            project_flow_map_entries(&flow_map, out);
492            out.push("-MAP".to_string());
493            continue;
494        }
495        let item_tag = item
496            .descendants_with_tokens()
497            .filter_map(|el| el.into_token())
498            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
499            .map(|tok| tok.text().to_string());
500        let scalar_text = item
501            .descendants_with_tokens()
502            .filter_map(|el| el.into_token())
503            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
504            .map(|tok| tok.text().to_string())
505            .collect::<Vec<_>>()
506            .join("");
507        let scalar_trimmed = scalar_text.trim_end();
508        let event = if let Some(tag) = item_tag
509            && let Some(long) = long_tag(&tag)
510        {
511            format!("=VAL {long} :{scalar_text}")
512        } else if let Some(rest) = scalar_trimmed.strip_prefix('&') {
513            if let Some((anchor, value)) = rest.split_once(' ') {
514                format!("=VAL &{anchor} :{value}")
515            } else {
516                format!("=VAL &{rest} :")
517            }
518        } else if scalar_trimmed.starts_with('*') {
519            format!("=ALI {scalar_trimmed}")
520        } else if scalar_text.starts_with('"') || scalar_text.starts_with('\'') {
521            quoted_val_event(&scalar_text)
522        } else {
523            plain_val_event(&scalar_text)
524        };
525        out.push(event);
526    }
527}
528
529fn project_block_map_entries(map_node: &SyntaxNode, out: &mut Vec<String>) {
530    for entry in map_node
531        .children()
532        .filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
533    {
534        let key_node = entry
535            .children()
536            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
537            .expect("key node");
538        let value_node = entry
539            .children()
540            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
541            .expect("value node");
542
543        let key_tag = key_node
544            .children_with_tokens()
545            .filter_map(|el| el.into_token())
546            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
547            .map(|tok| tok.text().to_string());
548        let key_text = key_node
549            .children_with_tokens()
550            .filter_map(|el| el.into_token())
551            .find(|tok| tok.kind() == SyntaxKind::YAML_KEY)
552            .map(|tok| tok.text().to_string())
553            .expect("key token");
554
555        let key_event = if let Some(tag) = key_tag {
556            if let Some(long) = long_tag(&tag) {
557                format!("=VAL {long} :{key_text}")
558            } else {
559                plain_val_event(&key_text)
560            }
561        } else if let Some(rest) = key_text.strip_prefix('&') {
562            if let Some((anchor, value)) = rest.split_once(' ') {
563                format!("=VAL &{} :{}", anchor, value)
564            } else {
565                format!("=VAL &{} :", rest)
566            }
567        } else if key_text.starts_with('"') || key_text.starts_with('\'') {
568            quoted_val_event(&key_text)
569        } else if key_text.starts_with('*') {
570            format!("=ALI {}", key_text.trim_end())
571        } else {
572            plain_val_event(&key_text)
573        };
574        out.push(key_event);
575
576        if let Some(nested_map) = value_node
577            .children()
578            .find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP)
579        {
580            out.push("+MAP".to_string());
581            project_block_map_entries(&nested_map, out);
582            out.push("-MAP".to_string());
583            continue;
584        }
585
586        if let Some(flow_map) = value_node
587            .children()
588            .find(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP)
589        {
590            out.push("+MAP {}".to_string());
591            project_flow_map_entries(&flow_map, out);
592            out.push("-MAP".to_string());
593            continue;
594        }
595
596        if let Some((indicator, body)) = extract_block_scalar_body(&value_node) {
597            let escaped = escape_block_scalar_text(&body);
598            out.push(format!("=VAL {indicator}{escaped}"));
599            continue;
600        }
601
602        let value_tag = value_node
603            .children_with_tokens()
604            .filter_map(|el| el.into_token())
605            .find(|tok| tok.kind() == SyntaxKind::YAML_TAG)
606            .map(|tok| tok.text().to_string());
607        let value_text = value_node
608            .descendants_with_tokens()
609            .filter_map(|el| el.into_token())
610            .filter(|tok| tok.kind() == SyntaxKind::YAML_SCALAR)
611            .map(|tok| tok.text().to_string())
612            .collect::<Vec<_>>()
613            .join("");
614
615        if value_tag.is_none()
616            && let Some(items) = simple_flow_sequence_items(&value_text)
617        {
618            out.push("+SEQ []".to_string());
619            for item in items {
620                if item.starts_with('"') || item.starts_with('\'') {
621                    out.push(quoted_val_event(&item));
622                } else {
623                    out.push(plain_val_event(&item));
624                }
625            }
626            out.push("-SEQ".to_string());
627        } else if value_text.is_empty() {
628            out.push("=VAL :".to_string());
629        } else {
630            let value_event = if let Some(tag) = value_tag {
631                if let Some(long) = long_tag(&tag) {
632                    if let Some(rest) = value_text.strip_prefix('&') {
633                        if let Some((anchor, tail)) = rest.split_once(' ') {
634                            format!("=VAL &{anchor} {long} :{tail}")
635                        } else {
636                            format!("=VAL &{rest} {long} :")
637                        }
638                    } else {
639                        format!("=VAL {long} :{value_text}")
640                    }
641                } else {
642                    plain_val_event(&value_text)
643                }
644            } else if value_text.starts_with('"') || value_text.starts_with('\'') {
645                quoted_val_event(&value_text)
646            } else if let Some(rest) = value_text.strip_prefix('&') {
647                if let Some((anchor, value)) = rest.split_once(' ') {
648                    format!("=VAL &{} :{}", anchor, value)
649                } else {
650                    format!("=VAL &{} :", rest)
651                }
652            } else if value_text.starts_with('*') {
653                format!("=ALI {value_text}")
654            } else {
655                plain_val_event(&value_text)
656            };
657            out.push(value_event);
658        }
659    }
660}