Skip to main content

panache_parser/parser/yaml/
profile.rs

1//! Consumer profiles for YAML validation.
2//!
3//! Panache's YAML structural validator (`super::validator`) mirrors abstract
4//! **YAML 1.2** — the contract the vendored yaml-test-suite holds it to. But
5//! the *real* consumers of the YAML in a document are stricter and differ by
6//! context:
7//!
8//! - **Frontmatter** is read by **pandoc** → Haskell `yaml`/libyaml (≈ 1.1).
9//! - **Hashpipe `#|` cell options** are read by **quarto** → js-yaml (1.2).
10//! - In a **Quarto** doc, quarto parses the frontmatter (js-yaml) *and* hands
11//!   the document to pandoc, which re-parses the metadata (libyaml). So Quarto
12//!   frontmatter must satisfy **both** — the stricter wins.
13//!
14//! A [`YamlValidationContext`] captures which real consumers apply to a given
15//! (flavor, location), so the validator can layer consumer-only checks on top
16//! of the 1.2 substrate. The empirical basis for each consumer's accept/reject
17//! behavior is the oracle audit in `scripts/yaml-oracle/` and its classified
18//! output in `crates/panache-parser/tests/yaml/consumer-matrix.md`.
19//!
20//! The substrate path ([`YamlValidationContext::substrate`]) runs every 1.2
21//! check and **no** consumer-only checks — it is what the yaml-test-suite tests
22//! exercise, so its verdicts never change.
23
24use crate::options::Flavor;
25
26/// A real-world YAML parser whose accept/reject behavior Panache mirrors. These
27/// are distinct measured consumers, not interchangeable libyaml wrappers — see
28/// `scripts/yaml-oracle/oracle.json` and `tests/yaml/consumer-matrix.md`.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum YamlConsumer {
31    /// pandoc's Haskell `yaml` (libyaml, ≈ YAML 1.1). Reads frontmatter. The
32    /// lenient baseline: accepts duplicate keys (last value wins) and tabs in
33    /// some positions.
34    Libyaml,
35    /// js-yaml (YAML 1.2). Reads Quarto frontmatter and hashpipe `#|` options.
36    /// Rejects duplicate keys and tabs.
37    Jsyaml,
38    /// R's `yaml` package (libyaml-based), used by the RMarkdown toolchain —
39    /// `rmarkdown::yaml_front_matter` for frontmatter and knitr for `#|` chunk
40    /// options. Like libyaml but, measured against the suite, additionally
41    /// REJECTS duplicate keys and tabs.
42    RYaml,
43}
44
45/// A small bitset over [`YamlConsumer`].
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
47pub struct ConsumerSet(u8);
48
49impl ConsumerSet {
50    const fn bit(consumer: YamlConsumer) -> u8 {
51        match consumer {
52            YamlConsumer::Libyaml => 0b001,
53            YamlConsumer::Jsyaml => 0b010,
54            YamlConsumer::RYaml => 0b100,
55        }
56    }
57
58    /// Every consumer — the rejection set for a check all real parsers make.
59    pub const fn all() -> Self {
60        ConsumerSet(0b111)
61    }
62
63    /// The empty set — no real consumer applies (lenient).
64    pub const fn empty() -> Self {
65        ConsumerSet(0)
66    }
67
68    /// A single-consumer set.
69    pub const fn of(consumer: YamlConsumer) -> Self {
70        ConsumerSet(Self::bit(consumer))
71    }
72
73    /// Add a consumer.
74    pub const fn with(self, consumer: YamlConsumer) -> Self {
75        ConsumerSet(self.0 | Self::bit(consumer))
76    }
77
78    /// Does this set contain `consumer`?
79    pub const fn contains(self, consumer: YamlConsumer) -> bool {
80        self.0 & Self::bit(consumer) != 0
81    }
82
83    /// Do the two sets share any consumer?
84    pub const fn intersects(self, other: ConsumerSet) -> bool {
85        self.0 & other.0 != 0
86    }
87
88    /// Is this set empty?
89    pub const fn is_empty(self) -> bool {
90        self.0 == 0
91    }
92}
93
94/// Where in a document the YAML lives. Frontmatter is read by pandoc (and, in
95/// Quarto, also js-yaml); hashpipe `#|` cell options are read by the executable
96/// engine (js-yaml for Quarto, the R `yaml` package for RMarkdown).
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub enum YamlLocation {
99    Frontmatter,
100    Hashpipe,
101}
102
103/// The consumers that must accept a YAML region, derived from (flavor,
104/// location). Validation rejects a region iff **any** active consumer rejects
105/// it.
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
107pub struct YamlValidationContext {
108    consumers: ConsumerSet,
109    substrate: bool,
110}
111
112impl YamlValidationContext {
113    /// The abstract YAML-1.2 substrate: run every 1.2 check and no consumer-only
114    /// checks. This is the contract the yaml-test-suite holds the validator to,
115    /// so its verdicts are independent of any flavor/location.
116    pub const fn substrate() -> Self {
117        Self {
118            consumers: ConsumerSet::empty(),
119            substrate: true,
120        }
121    }
122
123    /// Build a production context for a (flavor, location).
124    pub fn new(flavor: Flavor, location: YamlLocation) -> Self {
125        let consumers = match location {
126            YamlLocation::Frontmatter => frontmatter_consumers(flavor),
127            YamlLocation::Hashpipe => hashpipe_consumers(flavor),
128        };
129        Self {
130            consumers,
131            substrate: false,
132        }
133    }
134
135    /// Convenience: a frontmatter context for `flavor`.
136    pub fn frontmatter(flavor: Flavor) -> Self {
137        Self::new(flavor, YamlLocation::Frontmatter)
138    }
139
140    /// Convenience: a hashpipe `#|` context for `flavor`.
141    pub fn hashpipe(flavor: Flavor) -> Self {
142        Self::new(flavor, YamlLocation::Hashpipe)
143    }
144
145    /// True for the 1.2 substrate path (suite tests). Consumer-only checks must
146    /// never run here.
147    pub const fn is_substrate(&self) -> bool {
148        self.substrate
149    }
150
151    /// The active consumer set.
152    pub const fn consumers(&self) -> ConsumerSet {
153        self.consumers
154    }
155
156    /// True when at least one active consumer is among `rejecting` — i.e. a
157    /// consumer-only check whose rejection set is `rejecting` should fire.
158    pub const fn any_rejects(&self, rejecting: ConsumerSet) -> bool {
159        self.consumers.intersects(rejecting)
160    }
161}
162
163/// Frontmatter consumers by flavor:
164/// - Pandoc: pandoc/libyaml only.
165/// - Quarto: quarto parses the frontmatter (js-yaml) and then hands the doc to
166///   pandoc (libyaml), so both must accept.
167/// - RMarkdown: `rmarkdown::yaml_front_matter` (R `yaml`) reads it, then the doc
168///   renders through pandoc (libyaml), so both must accept.
169/// - GFM/CommonMark/MultiMarkdown: no asserted YAML metadata consumer — lenient.
170///
171/// See `tests/yaml/consumer-matrix.md`.
172fn frontmatter_consumers(flavor: Flavor) -> ConsumerSet {
173    match flavor {
174        Flavor::Pandoc => ConsumerSet::of(YamlConsumer::Libyaml),
175        Flavor::Quarto => ConsumerSet::of(YamlConsumer::Libyaml).with(YamlConsumer::Jsyaml),
176        Flavor::RMarkdown => ConsumerSet::of(YamlConsumer::Libyaml).with(YamlConsumer::RYaml),
177        Flavor::Gfm | Flavor::CommonMark | Flavor::MultiMarkdown => ConsumerSet::empty(),
178    }
179}
180
181/// Hashpipe `#|` cell options are parsed by the executable engine: js-yaml for
182/// Quarto, the R `yaml` package for RMarkdown (via knitr). Other flavors do not
183/// recognize executable cells, so no hashpipe region reaches validation there.
184fn hashpipe_consumers(flavor: Flavor) -> ConsumerSet {
185    match flavor {
186        Flavor::Quarto => ConsumerSet::of(YamlConsumer::Jsyaml),
187        Flavor::RMarkdown => ConsumerSet::of(YamlConsumer::RYaml),
188        Flavor::Pandoc | Flavor::Gfm | Flavor::CommonMark | Flavor::MultiMarkdown => {
189            ConsumerSet::empty()
190        }
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn substrate_runs_no_consumer_checks() {
200        let ctx = YamlValidationContext::substrate();
201        assert!(ctx.is_substrate());
202        assert!(ctx.consumers().is_empty());
203    }
204
205    #[test]
206    fn pandoc_frontmatter_is_libyaml_only() {
207        let ctx = YamlValidationContext::frontmatter(Flavor::Pandoc);
208        assert!(!ctx.is_substrate());
209        assert!(ctx.consumers().contains(YamlConsumer::Libyaml));
210        assert!(!ctx.consumers().contains(YamlConsumer::Jsyaml));
211    }
212
213    #[test]
214    fn quarto_frontmatter_is_both() {
215        let ctx = YamlValidationContext::frontmatter(Flavor::Quarto);
216        assert!(ctx.consumers().contains(YamlConsumer::Libyaml));
217        assert!(ctx.consumers().contains(YamlConsumer::Jsyaml));
218    }
219
220    #[test]
221    fn quarto_hashpipe_is_jsyaml_only() {
222        let ctx = YamlValidationContext::hashpipe(Flavor::Quarto);
223        assert!(ctx.consumers().contains(YamlConsumer::Jsyaml));
224        assert!(!ctx.consumers().contains(YamlConsumer::Libyaml));
225    }
226
227    #[test]
228    fn rmarkdown_uses_pandoc_and_r_yaml() {
229        let fm = YamlValidationContext::frontmatter(Flavor::RMarkdown);
230        assert!(fm.consumers().contains(YamlConsumer::Libyaml)); // renders via pandoc
231        assert!(fm.consumers().contains(YamlConsumer::RYaml)); // rmarkdown::yaml_front_matter
232        assert!(!fm.consumers().contains(YamlConsumer::Jsyaml));
233
234        let hp = YamlValidationContext::hashpipe(Flavor::RMarkdown);
235        assert!(hp.consumers().contains(YamlConsumer::RYaml)); // knitr
236        assert!(!hp.consumers().contains(YamlConsumer::Jsyaml));
237        assert!(!hp.consumers().contains(YamlConsumer::Libyaml));
238    }
239
240    #[test]
241    fn commonmark_frontmatter_is_lenient() {
242        let ctx = YamlValidationContext::frontmatter(Flavor::CommonMark);
243        assert!(ctx.consumers().is_empty());
244        assert!(!ctx.is_substrate());
245    }
246
247    #[test]
248    fn any_rejects_matches_intersection() {
249        // implicit-empty-key rejects under every consumer.
250        let all = ConsumerSet::all();
251        assert!(YamlValidationContext::frontmatter(Flavor::Pandoc).any_rejects(all));
252        assert!(YamlValidationContext::frontmatter(Flavor::RMarkdown).any_rejects(all));
253
254        // duplicate-key rejects under js-yaml (Quarto) and R yaml (RMarkdown),
255        // not under pandoc/libyaml.
256        let dup = ConsumerSet::of(YamlConsumer::Jsyaml).with(YamlConsumer::RYaml);
257        assert!(!YamlValidationContext::frontmatter(Flavor::Pandoc).any_rejects(dup));
258        assert!(YamlValidationContext::frontmatter(Flavor::Quarto).any_rejects(dup));
259        assert!(YamlValidationContext::frontmatter(Flavor::RMarkdown).any_rejects(dup));
260        assert!(YamlValidationContext::hashpipe(Flavor::RMarkdown).any_rejects(dup));
261    }
262}