Skip to main content

panache_parser/syntax/
yaml_ast.rs

1//! Typed AST wrappers over the in-tree YAML CST.
2//!
3//! These wrappers give value-extraction consumers (metadata, bibliography,
4//! includes, hashpipe) a typed traversal API over the rowan CST produced by
5//! [`crate::parser::yaml::parse_yaml_tree`], replacing the external
6//! `yaml_parser` crate's `ast` module. They follow the house pattern
7//! ([`super::headings`], [`super::references`]): newtype wrappers with
8//! hand-written [`AstNode`] impls and `rowan::ast::support`-based accessors.
9//!
10//! Two CST facts shape the API:
11//!
12//! - YAML parses produce one or more `YAML_DOCUMENT` children under a
13//!   *stream-equivalent* container. Standalone parses
14//!   ([`crate::parser::yaml::parse_yaml_tree`]) root the tree at
15//!   `YAML_STREAM`; embedded parses nest the documents directly under
16//!   the host wrapper (`YAML_METADATA_CONTENT` for frontmatter,
17//!   `HASHPIPE_YAML_CONTENT` for hashpipe), which plays the stream
18//!   role for its singleton-stream embedding. [`parse_yaml_document`]
19//!   centralizes the descent so no consumer re-implements it.
20//! - `YAML_BLOCK_MAP_KEY` includes the trailing `:` (`YAML_COLON`) token, so
21//!   [`YamlBlockMapKey::scalar`] reads the `YAML_SCALAR` node child rather
22//!   than the whole-key text.
23//!
24//! Scalar style is not a CST kind — every style is a `YAML_SCALAR` node
25//! (wrapping `YAML_SCALAR_TEXT` content) — so [`YamlScalar`] detects the
26//! style from the leading byte and cooks via the shared
27//! [`crate::parser::yaml::cook`] primitives.
28
29use rowan::TextRange;
30
31use super::ast::{AstChildren, AstNode, support};
32use super::{PanacheLanguage, SyntaxKind, SyntaxNode, SyntaxToken};
33use crate::parser::yaml::{ScalarStyle, cook, parse_yaml_tree};
34
35/// Parse `input` and return the first YAML document. Descends through any
36/// stream-equivalent container ([`is_stream_equivalent`]) so it works against
37/// standalone parses (rooted at `YAML_STREAM`) and host embeddings alike.
38/// Returns `None` when the input fails the structural validator (no tree)
39/// or has no document.
40pub fn parse_yaml_document(input: &str) -> Option<YamlDocument> {
41    first_document(&parse_yaml_tree(input)?)
42}
43
44/// Parse `input` and return every YAML document in the stream. Most consumers
45/// only need the first ([`parse_yaml_document`]); this exists for multi-document
46/// completeness (`a: 1\n---\nb: 2`).
47pub fn parse_yaml_documents(input: &str) -> Vec<YamlDocument> {
48    let Some(tree) = parse_yaml_tree(input) else {
49        return Vec::new();
50    };
51    stream_container(&tree)
52        .map(|stream| stream.children().filter_map(YamlDocument::cast).collect())
53        .unwrap_or_default()
54}
55
56/// True for any node that plays the YAML stream container role: the spec
57/// `YAML_STREAM` for standalone parses, and the host embedding wrappers
58/// (`YAML_METADATA_CONTENT`, `HASHPIPE_YAML_CONTENT`) for the singleton-stream
59/// embeddings the host parser produces. Each wraps zero or more
60/// `YAML_DOCUMENT` children.
61pub(crate) fn is_stream_equivalent(kind: SyntaxKind) -> bool {
62    matches!(
63        kind,
64        SyntaxKind::YAML_STREAM
65            | SyntaxKind::YAML_METADATA_CONTENT
66            | SyntaxKind::HASHPIPE_YAML_CONTENT
67    )
68}
69
70fn stream_container(tree: &SyntaxNode) -> Option<SyntaxNode> {
71    tree.descendants().find(|n| is_stream_equivalent(n.kind()))
72}
73
74fn first_document(tree: &SyntaxNode) -> Option<YamlDocument> {
75    stream_container(tree)?
76        .children()
77        .find_map(YamlDocument::cast)
78}
79
80/// The five concrete node shapes a value, sequence item, or document body can
81/// take. `None` (i.e. an absent `YamlNode`) models an empty YAML value.
82#[derive(Debug, Clone)]
83pub enum YamlNode {
84    BlockMap(YamlBlockMap),
85    BlockSequence(YamlBlockSequence),
86    FlowMap(YamlFlowMap),
87    FlowSequence(YamlFlowSequence),
88    Scalar(YamlScalar),
89}
90
91/// Resolve the single content node held by a value / item / document wrapper.
92/// Container children take precedence; a bare scalar value resolves to the
93/// first `YAML_SCALAR` node (anchors/tags/aliases are skipped). Returns `None`
94/// for an empty value.
95fn node_child(parent: &SyntaxNode) -> Option<YamlNode> {
96    for child in parent.children() {
97        match child.kind() {
98            SyntaxKind::YAML_BLOCK_MAP => return YamlBlockMap::cast(child).map(YamlNode::BlockMap),
99            SyntaxKind::YAML_BLOCK_SEQUENCE => {
100                return YamlBlockSequence::cast(child).map(YamlNode::BlockSequence);
101            }
102            SyntaxKind::YAML_FLOW_MAP => return YamlFlowMap::cast(child).map(YamlNode::FlowMap),
103            SyntaxKind::YAML_FLOW_SEQUENCE => {
104                return YamlFlowSequence::cast(child).map(YamlNode::FlowSequence);
105            }
106            _ => {}
107        }
108    }
109    scalar_token(parent).map(YamlNode::Scalar)
110}
111
112/// First direct `YAML_SCALAR` node child of `parent`, wrapped. A scalar is a
113/// node whose leaves are the per-line `YAML_SCALAR_TEXT` fragments (and any
114/// `NEWLINE` between them); flow punctuation is `YAML_FLOW_INDICATOR`, so it
115/// never matches here.
116fn scalar_token(parent: &SyntaxNode) -> Option<YamlScalar> {
117    parent
118        .children()
119        .find(|n| n.kind() == SyntaxKind::YAML_SCALAR)
120        .and_then(YamlScalar::cast)
121}
122
123fn token_of(parent: &SyntaxNode, kind: SyntaxKind) -> Option<SyntaxToken> {
124    parent
125        .children_with_tokens()
126        .filter_map(|el| el.into_token())
127        .find(|t| t.kind() == kind)
128}
129
130/// Projections shared by value / item / document body wrappers. Implemented as
131/// a macro so each wrapper gets the same `as_*` surface without repetition.
132macro_rules! node_projections {
133    () => {
134        /// The single content node, or `None` for an empty value.
135        pub fn as_node(&self) -> Option<YamlNode> {
136            node_child(&self.0)
137        }
138
139        /// The value as a scalar, or `None` if it is a container or empty.
140        pub fn as_scalar(&self) -> Option<YamlScalar> {
141            match self.as_node()? {
142                YamlNode::Scalar(s) => Some(s),
143                _ => None,
144            }
145        }
146
147        pub fn as_block_map(&self) -> Option<YamlBlockMap> {
148            match self.as_node()? {
149                YamlNode::BlockMap(m) => Some(m),
150                _ => None,
151            }
152        }
153
154        pub fn as_block_sequence(&self) -> Option<YamlBlockSequence> {
155            match self.as_node()? {
156                YamlNode::BlockSequence(s) => Some(s),
157                _ => None,
158            }
159        }
160
161        pub fn as_flow_map(&self) -> Option<YamlFlowMap> {
162            match self.as_node()? {
163                YamlNode::FlowMap(m) => Some(m),
164                _ => None,
165            }
166        }
167
168        pub fn as_flow_sequence(&self) -> Option<YamlFlowSequence> {
169            match self.as_node()? {
170                YamlNode::FlowSequence(s) => Some(s),
171                _ => None,
172            }
173        }
174
175        /// Whether this value is empty (no scalar and no container child).
176        pub fn is_empty(&self) -> bool {
177            self.as_node().is_none()
178        }
179
180        /// The explicit `YAML_TAG` token decorating this value (e.g. `!expr`),
181        /// if any. Used by the hashpipe formatter to preserve chunk-option tags.
182        pub fn tag(&self) -> Option<SyntaxToken> {
183            token_of(&self.0, SyntaxKind::YAML_TAG)
184        }
185    };
186}
187
188/// Declare a newtype CST-node wrapper with the standard hand-written
189/// [`AstNode`] impl for a single `SyntaxKind`.
190macro_rules! ast_node {
191    ($(#[$meta:meta])* $name:ident, $kind:ident) => {
192        $(#[$meta])*
193        #[derive(Debug, Clone, PartialEq, Eq, Hash)]
194        pub struct $name(SyntaxNode);
195
196        impl AstNode for $name {
197            type Language = PanacheLanguage;
198
199            fn can_cast(kind: SyntaxKind) -> bool {
200                kind == SyntaxKind::$kind
201            }
202
203            fn cast(syntax: SyntaxNode) -> Option<Self> {
204                Self::can_cast(syntax.kind()).then_some(Self(syntax))
205            }
206
207            fn syntax(&self) -> &SyntaxNode {
208                &self.0
209            }
210        }
211    };
212}
213
214ast_node!(
215    /// A single YAML document inside the stream.
216    YamlDocument, YAML_DOCUMENT
217);
218
219impl YamlDocument {
220    pub fn block_map(&self) -> Option<YamlBlockMap> {
221        support::child(&self.0)
222    }
223
224    pub fn block_sequence(&self) -> Option<YamlBlockSequence> {
225        support::child(&self.0)
226    }
227
228    pub fn flow_map(&self) -> Option<YamlFlowMap> {
229        support::child(&self.0)
230    }
231
232    pub fn flow_sequence(&self) -> Option<YamlFlowSequence> {
233        support::child(&self.0)
234    }
235
236    /// A top-level bare scalar document (`"just a string"`).
237    pub fn scalar(&self) -> Option<YamlScalar> {
238        scalar_token(&self.0)
239    }
240
241    pub fn as_node(&self) -> Option<YamlNode> {
242        node_child(&self.0)
243    }
244}
245
246ast_node!(
247    /// A block mapping (`key: value` entries).
248    YamlBlockMap, YAML_BLOCK_MAP
249);
250
251impl YamlBlockMap {
252    pub fn entries(&self) -> AstChildren<YamlBlockMapEntry> {
253        support::children(&self.0)
254    }
255
256    /// The first entry whose (cooked) key text equals `key`.
257    pub fn entry(&self, key: &str) -> Option<YamlBlockMapEntry> {
258        self.entries()
259            .find(|entry| entry.key_text().as_deref() == Some(key))
260    }
261
262    pub fn value_of(&self, key: &str) -> Option<YamlBlockMapValue> {
263        self.entry(key)?.value()
264    }
265}
266
267ast_node!(
268    /// One `key: value` pair in a block mapping.
269    YamlBlockMapEntry, YAML_BLOCK_MAP_ENTRY
270);
271
272impl YamlBlockMapEntry {
273    pub fn key(&self) -> Option<YamlBlockMapKey> {
274        support::child(&self.0)
275    }
276
277    /// The cooked key text. Reads the scalar child of `YAML_BLOCK_MAP_KEY`, so
278    /// the trailing `:` token is excluded.
279    pub fn key_text(&self) -> Option<String> {
280        self.key()?.scalar().map(|s| s.value())
281    }
282
283    pub fn value(&self) -> Option<YamlBlockMapValue> {
284        support::child(&self.0)
285    }
286}
287
288ast_node!(
289    /// The key side of a block-map entry. Holds the `YAML_SCALAR` node AND the
290    /// trailing `YAML_COLON` token.
291    YamlBlockMapKey, YAML_BLOCK_MAP_KEY
292);
293
294impl YamlBlockMapKey {
295    /// The key's scalar node (excluding the `:` colon).
296    pub fn scalar(&self) -> Option<YamlScalar> {
297        scalar_token(&self.0)
298    }
299}
300
301ast_node!(
302    /// The value side of a block-map entry: a scalar, a nested container, or
303    /// empty.
304    YamlBlockMapValue, YAML_BLOCK_MAP_VALUE
305);
306
307impl YamlBlockMapValue {
308    node_projections!();
309}
310
311ast_node!(
312    /// A block sequence (`- item` entries).
313    YamlBlockSequence, YAML_BLOCK_SEQUENCE
314);
315
316impl YamlBlockSequence {
317    pub fn items(&self) -> AstChildren<YamlBlockSequenceItem> {
318        support::children(&self.0)
319    }
320}
321
322ast_node!(
323    /// One `- item` in a block sequence. The leading `-` is a
324    /// `YAML_BLOCK_SEQ_ENTRY` token, skipped by the content projections.
325    YamlBlockSequenceItem, YAML_BLOCK_SEQUENCE_ITEM
326);
327
328impl YamlBlockSequenceItem {
329    node_projections!();
330}
331
332ast_node!(
333    /// A flow sequence (`[a, b, c]`).
334    YamlFlowSequence, YAML_FLOW_SEQUENCE
335);
336
337impl YamlFlowSequence {
338    pub fn items(&self) -> AstChildren<YamlFlowSequenceItem> {
339        support::children(&self.0)
340    }
341}
342
343ast_node!(
344    /// One item in a flow sequence.
345    YamlFlowSequenceItem, YAML_FLOW_SEQUENCE_ITEM
346);
347
348impl YamlFlowSequenceItem {
349    node_projections!();
350}
351
352ast_node!(
353    /// A flow mapping (`{k: v, ...}`).
354    YamlFlowMap, YAML_FLOW_MAP
355);
356
357impl YamlFlowMap {
358    pub fn entries(&self) -> AstChildren<YamlFlowMapEntry> {
359        support::children(&self.0)
360    }
361
362    pub fn entry(&self, key: &str) -> Option<YamlFlowMapEntry> {
363        self.entries()
364            .find(|entry| entry.key_text().as_deref() == Some(key))
365    }
366
367    pub fn value_of(&self, key: &str) -> Option<YamlFlowMapValue> {
368        self.entry(key)?.value()
369    }
370}
371
372ast_node!(
373    /// One `k: v` pair in a flow mapping.
374    YamlFlowMapEntry, YAML_FLOW_MAP_ENTRY
375);
376
377impl YamlFlowMapEntry {
378    pub fn key(&self) -> Option<YamlFlowMapKey> {
379        support::child(&self.0)
380    }
381
382    pub fn key_text(&self) -> Option<String> {
383        self.key()?.scalar().map(|s| s.value())
384    }
385
386    pub fn value(&self) -> Option<YamlFlowMapValue> {
387        support::child(&self.0)
388    }
389}
390
391ast_node!(
392    /// The key side of a flow-map entry.
393    YamlFlowMapKey, YAML_FLOW_MAP_KEY
394);
395
396impl YamlFlowMapKey {
397    pub fn scalar(&self) -> Option<YamlScalar> {
398        scalar_token(&self.0)
399    }
400}
401
402ast_node!(
403    /// The value side of a flow-map entry.
404    YamlFlowMapValue, YAML_FLOW_MAP_VALUE
405);
406
407impl YamlFlowMapValue {
408    node_projections!();
409}
410
411/// The lexical style of a scalar, detected from its raw source. (The CST does
412/// not record style as a distinct kind — every style is a `YAML_SCALAR` node.)
413#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
414pub enum YamlScalarStyle {
415    Plain,
416    SingleQuoted,
417    DoubleQuoted,
418    Literal,
419    Folded,
420}
421
422impl YamlScalarStyle {
423    fn to_cook_style(self) -> ScalarStyle {
424        match self {
425            YamlScalarStyle::Plain => ScalarStyle::Plain,
426            YamlScalarStyle::SingleQuoted => ScalarStyle::SingleQuoted,
427            YamlScalarStyle::DoubleQuoted => ScalarStyle::DoubleQuoted,
428            YamlScalarStyle::Literal => ScalarStyle::Literal,
429            YamlScalarStyle::Folded => ScalarStyle::Folded,
430        }
431    }
432}
433
434fn detect_style(raw: &str) -> YamlScalarStyle {
435    match raw.trim_start().as_bytes().first() {
436        Some(b'\'') => YamlScalarStyle::SingleQuoted,
437        Some(b'"') => YamlScalarStyle::DoubleQuoted,
438        Some(b'|') => YamlScalarStyle::Literal,
439        Some(b'>') => YamlScalarStyle::Folded,
440        _ => YamlScalarStyle::Plain,
441    }
442}
443
444/// A scalar value node. Its leaves are the per-physical-line content fragments
445/// (`YAML_SCALAR_TEXT`) interleaved with `NEWLINE` (and, for embedded hashpipe,
446/// line-prefix) tokens; [`raw`](Self::raw) reassembles them.
447#[derive(Debug, Clone, PartialEq, Eq, Hash)]
448pub struct YamlScalar(SyntaxNode);
449
450impl YamlScalar {
451    pub fn cast(node: SyntaxNode) -> Option<Self> {
452        (node.kind() == SyntaxKind::YAML_SCALAR).then_some(Self(node))
453    }
454
455    /// The raw source bytes of the scalar (all leaves concatenated), including
456    /// any quotes / block header and embedded line breaks.
457    pub fn raw(&self) -> String {
458        self.0.text().to_string()
459    }
460
461    pub fn style(&self) -> YamlScalarStyle {
462        detect_style(&self.raw())
463    }
464
465    /// The cooked logical string: quotes stripped, escapes decoded, multi-line
466    /// scalars folded per YAML 1.2. Block scalars (`|`/`>`) are returned raw
467    /// (their cooking needs parent indent context).
468    pub fn value(&self) -> String {
469        let source = self.prefix_stripped_source();
470        cook(detect_style(&source).to_cook_style(), &source)
471    }
472
473    /// The scalar's content leaves concatenated, dropping any embedded
474    /// per-line prefix trivia (`YAML_LINE_PREFIX`, e.g. hashpipe `#|`). This
475    /// is the cooking input: prefixes are host framing, not scalar content,
476    /// so they must not fold into the value. For plain (frontmatter) scalars,
477    /// which carry no prefix leaves, this equals [`raw`](Self::raw).
478    fn prefix_stripped_source(&self) -> String {
479        self.0
480            .children_with_tokens()
481            .filter_map(|el| el.into_token())
482            .filter(|t| t.kind() != SyntaxKind::YAML_LINE_PREFIX)
483            .map(|t| t.text().to_string())
484            .collect()
485    }
486
487    pub fn text_range(&self) -> TextRange {
488        self.0.text_range()
489    }
490
491    pub fn syntax(&self) -> &SyntaxNode {
492        &self.0
493    }
494}
495
496#[cfg(test)]
497mod tests {
498    use super::*;
499
500    #[test]
501    fn parse_yaml_document_descends_envelope() {
502        let doc = parse_yaml_document("title: x\n").expect("document");
503        let map = doc.block_map().expect("block map");
504        assert_eq!(map.entries().count(), 1);
505    }
506
507    #[test]
508    fn key_text_strips_colon() {
509        let doc = parse_yaml_document("key: value\n").expect("document");
510        let entry = doc.block_map().unwrap().entries().next().unwrap();
511        assert_eq!(entry.key_text().as_deref(), Some("key"));
512    }
513
514    #[test]
515    fn value_is_cooked() {
516        let doc = parse_yaml_document("k: 'it''s'\n").expect("document");
517        let value = doc.block_map().unwrap().value_of("k").unwrap();
518        assert_eq!(value.as_scalar().unwrap().value(), "it's");
519
520        let doc = parse_yaml_document("k: \"a\\nb\"\n").expect("document");
521        let value = doc.block_map().unwrap().value_of("k").unwrap();
522        assert_eq!(value.as_scalar().unwrap().value(), "a\nb");
523    }
524
525    #[test]
526    fn raw_preserves_quotes() {
527        let doc = parse_yaml_document("k: 'it''s'\n").expect("document");
528        let scalar = doc
529            .block_map()
530            .unwrap()
531            .value_of("k")
532            .unwrap()
533            .as_scalar()
534            .unwrap();
535        assert_eq!(scalar.raw(), "'it''s'");
536        assert_eq!(scalar.style(), YamlScalarStyle::SingleQuoted);
537    }
538
539    #[test]
540    fn value_skips_embedded_line_prefix() {
541        use crate::parser::yaml::parse_stream_with_prefix;
542
543        // Double-quoted multi-line scalar inside hashpipe-prefixed YAML: the
544        // `#|` continuation prefix is carried as a `YAML_LINE_PREFIX` leaf for
545        // losslessness, but it must not bleed into the cooked value.
546        let tree = parse_stream_with_prefix("#| key: \"foo\n#|   bar\"\n", "#|");
547        let scalar = first_document(&tree)
548            .and_then(|d| d.block_map())
549            .and_then(|m| m.value_of("key"))
550            .and_then(|v| v.as_scalar())
551            .expect("scalar value");
552        let value = scalar.value();
553        assert!(!value.contains("#|"), "prefix leaked into value: {value:?}");
554        assert_eq!(value, "foo bar");
555        // raw() stays byte-exact (lossless contract): the prefix leaf is kept.
556        assert!(
557            scalar.raw().contains("#|"),
558            "raw() must retain the prefix leaf: {:?}",
559            scalar.raw()
560        );
561    }
562
563    #[test]
564    fn scalar_text_range_is_content_relative() {
565        let input = "k: value\n";
566        let doc = parse_yaml_document(input).expect("document");
567        let scalar = doc
568            .block_map()
569            .unwrap()
570            .value_of("k")
571            .unwrap()
572            .as_scalar()
573            .unwrap();
574        let range = scalar.text_range();
575        let start: usize = range.start().into();
576        let end: usize = range.end().into();
577        assert_eq!(&input[start..end], "value");
578    }
579
580    #[test]
581    fn empty_value_has_no_scalar() {
582        let doc = parse_yaml_document("k:\n").expect("document");
583        let value = doc.block_map().unwrap().value_of("k").unwrap();
584        assert!(value.is_empty());
585        assert!(value.as_scalar().is_none());
586    }
587
588    #[test]
589    fn block_sequence_items_yield_scalars() {
590        let doc = parse_yaml_document("k:\n  - a\n  - b\n").expect("document");
591        let seq = doc
592            .block_map()
593            .unwrap()
594            .value_of("k")
595            .unwrap()
596            .as_block_sequence()
597            .expect("block sequence");
598        let items: Vec<String> = seq
599            .items()
600            .filter_map(|item| item.as_scalar().map(|s| s.value()))
601            .collect();
602        assert_eq!(items, vec!["a".to_string(), "b".to_string()]);
603    }
604
605    #[test]
606    fn flow_sequence_items_yield_scalars() {
607        let doc = parse_yaml_document("k: [a, b]\n").expect("document");
608        let seq = doc
609            .block_map()
610            .unwrap()
611            .value_of("k")
612            .unwrap()
613            .as_flow_sequence()
614            .expect("flow sequence");
615        let items: Vec<String> = seq
616            .items()
617            .filter_map(|item| item.as_scalar().map(|s| s.value()))
618            .collect();
619        assert_eq!(items, vec!["a".to_string(), "b".to_string()]);
620    }
621
622    #[test]
623    fn tag_token_is_exposed_and_scalar_ignores_it() {
624        let doc = parse_yaml_document("k: !expr foo\n").expect("document");
625        let value = doc.block_map().unwrap().value_of("k").unwrap();
626        assert_eq!(
627            value.tag().map(|t| t.text().to_string()),
628            Some("!expr".to_string())
629        );
630        assert_eq!(value.as_scalar().unwrap().raw(), "foo");
631    }
632
633    #[test]
634    fn quoted_key_with_colon_round_trips() {
635        let doc = parse_yaml_document("\"foo:bar\": 1\n").expect("document");
636        let entry = doc.block_map().unwrap().entries().next().unwrap();
637        assert_eq!(entry.key_text().as_deref(), Some("foo:bar"));
638        assert_eq!(entry.key().unwrap().scalar().unwrap().raw(), "\"foo:bar\"");
639    }
640
641    #[test]
642    fn parse_yaml_documents_returns_all_documents() {
643        let docs = parse_yaml_documents("a: 1\n---\nb: 2\n");
644        assert_eq!(docs.len(), 2);
645    }
646
647    #[test]
648    fn invalid_yaml_yields_no_document() {
649        assert!(parse_yaml_document("k: [\n").is_none());
650    }
651}