Skip to main content

rlsp_yaml_parser/
event.rs

1// SPDX-License-Identifier: MIT
2
3//! High-level parse events produced by the streaming parser.
4//!
5//! The public entry point is [`crate::parse_events`], which returns an
6//! iterator of <code>Result<([Event], [crate::pos::Span]), [crate::error::Error]></code>.
7//!
8//! Each event carries a [`crate::pos::Span`] covering the input bytes that
9//! contributed to it.  For zero-width synthetic events (e.g. `StreamStart`
10//! at the very beginning of input), the span has equal `start` and `end`.
11//!
12
13use std::borrow::Cow;
14
15use crate::pos::Span;
16
17/// Rare per-event fields for node-typed events (`Scalar`, `SequenceStart`, `MappingStart`).
18///
19/// Bundled behind `Option<Box<EventMeta>>` so that the common case — no anchor, no
20/// source-text tag — pays only one 8-byte pointer instead of ~96 bytes of inline storage.
21/// Events with tags and anchors are rare in block-heavy and Kubernetes YAML; boxing them
22/// moves the cost to the uncommon path.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct EventMeta<'input> {
25    /// The anchor name, if any (e.g. `&foo`).
26    pub anchor: Option<&'input str>,
27    /// Source span of the `&name` anchor token — from `&` through the last byte of the name.
28    /// `Some` when `anchor` is `Some`, `None` otherwise.
29    pub anchor_loc: Option<Span>,
30    /// The resolved tag, if any (e.g. `"tag:yaml.org,2002:str"` for `!!str`).
31    ///
32    /// Verbatim tags (`!<URI>`) borrow from input.  Shorthand tags resolved via `%TAG`
33    /// directives or the built-in `!!` default produce owned strings.
34    pub tag: Option<Cow<'input, str>>,
35    /// Source span of the tag token — from `!` through the last byte of the tag token.
36    /// `Some` when `tag` is `Some`, `None` otherwise.
37    pub tag_loc: Option<Span>,
38}
39
40/// Block scalar chomping mode per YAML 1.2 §8.1.1.2.
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum Chomp {
43    /// `-` — trailing newlines stripped.
44    Strip,
45    /// (default, no indicator) — single trailing newline kept.
46    Clip,
47    /// `+` — all trailing newlines kept.
48    Keep,
49}
50
51/// The style (block or flow) of a collection (sequence or mapping).
52///
53/// Currently only `Block` is produced; `Flow` will be used when flow sequences
54/// (`[a, b]`) and flow mappings (`{a: b}`) are implemented in Task 14.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum CollectionStyle {
57    /// A block-style collection using indentation and `-`/`:` indicators.
58    Block,
59    /// A flow-style collection using `[]` or `{}` delimiters (Task 14).
60    Flow,
61}
62
63/// The style in which a scalar value was written in the source.
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum ScalarStyle {
66    /// An unquoted plain scalar (YAML 1.2 §7.3.3).
67    Plain,
68    /// A `'single-quoted'` scalar (YAML 1.2 §7.3.2).
69    SingleQuoted,
70    /// A `"double-quoted"` scalar (YAML 1.2 §7.3.1).
71    DoubleQuoted,
72    /// A `|` literal block scalar (YAML 1.2 §8.1.2).
73    Literal(Chomp),
74    /// A `>` folded block scalar (YAML 1.2 §8.1.3).
75    ///
76    /// Line folding is applied to the collected content: a single line break
77    /// between two equally-indented non-blank lines becomes a space; N blank
78    /// lines between non-blank lines produce N newlines; more-indented lines
79    /// preserve their relative leading whitespace and the line break before
80    /// them is kept as `\n` rather than folded to a space.  Callers must not
81    /// treat the value as whitespace-safe — more-indented lines can inject
82    /// arbitrary leading spaces into the parsed value.
83    Folded(Chomp),
84}
85
86/// A high-level YAML parse event.
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub enum Event<'input> {
89    /// The YAML stream has started.
90    ///
91    /// Always the first event in any parse.  The associated span is a
92    /// zero-width span at [`crate::pos::Pos::ORIGIN`].
93    StreamStart,
94    /// The YAML stream has ended.
95    ///
96    /// Always the last event in any parse.  The associated span is a
97    /// zero-width span at the position immediately after the last byte of
98    /// input.
99    StreamEnd,
100    /// A YAML comment (YAML 1.2 §6.6).
101    ///
102    /// `text` is the comment body — the content of the line after the `#`
103    /// character, with the `#` itself excluded.  Leading whitespace after `#`
104    /// is preserved (e.g. `# hello` → text `" hello"`; `#nospace` → text
105    /// `"nospace"`).  The associated span covers from the `#` character
106    /// through the last byte of comment text (the newline is not included).
107    ///
108    /// One `Comment` event is emitted per physical line.
109    Comment {
110        /// Comment body (everything after the `#`, excluding the newline).
111        text: &'input str,
112    },
113    /// An alias node (`*name`) that references a previously anchored node.
114    ///
115    /// The associated span covers the entire `*name` token (from `*` through
116    /// the last character of the name).  Resolution of the alias to its
117    /// anchored node is the loader's responsibility (Task 20) — the parser
118    /// emits this event without expansion.
119    Alias {
120        /// The anchor name being referenced (e.g. `"foo"` for `*foo`).
121        /// Borrowed directly from input — no allocation.
122        name: &'input str,
123    },
124    /// A document has started.
125    ///
126    /// `explicit` is `true` when the document was introduced with `---`.
127    /// `false` for bare documents (no marker).
128    DocumentStart {
129        /// Whether the document was introduced with `---`.
130        explicit: bool,
131        /// Version from the `%YAML` directive preceding this document, if any.
132        ///
133        /// `Some((1, 2))` for `%YAML 1.2`, `None` when no `%YAML` directive was present.
134        version: Option<(u8, u8)>,
135        /// Tag handle/prefix pairs from `%TAG` directives preceding this document.
136        ///
137        /// Each entry is `(handle, prefix)` — e.g. `("!foo!", "tag:example.com,2026:")`.
138        /// Empty when no `%TAG` directives were present.
139        tag_directives: Vec<(String, String)>,
140    },
141    /// A document has ended.
142    ///
143    /// `explicit` is `true` when the document was closed with `...`.
144    /// `false` for implicitly-ended documents.
145    DocumentEnd {
146        /// Whether the document was closed with `...`.
147        explicit: bool,
148    },
149    /// A block or flow sequence has started.
150    ///
151    /// Followed by zero or more node events (scalars or nested collections),
152    /// then a matching [`Event::SequenceEnd`].
153    SequenceStart {
154        /// Whether this is a block (`-` indicator) or flow (`[...]`) sequence.
155        style: CollectionStyle,
156        /// Rare fields: `anchor`, `anchor_loc`, `tag`, `tag_loc`.
157        /// `None` when no anchor or source-text tag is present (the common case).
158        meta: Option<Box<EventMeta<'input>>>,
159    },
160    /// A sequence has ended.
161    ///
162    /// Matches the most recent [`Event::SequenceStart`] on the event stack.
163    SequenceEnd,
164    /// A block or flow mapping has started.
165    ///
166    /// Followed by alternating key/value node events (scalars or nested
167    /// collections), then a matching [`Event::MappingEnd`].
168    MappingStart {
169        /// Whether this is a block (indentation-based) or flow (`{...}`) mapping.
170        style: CollectionStyle,
171        /// Rare fields: `anchor`, `anchor_loc`, `tag`, `tag_loc`.
172        /// `None` when no anchor or source-text tag is present (the common case).
173        meta: Option<Box<EventMeta<'input>>>,
174    },
175    /// A mapping has ended.
176    ///
177    /// Matches the most recent [`Event::MappingStart`] on the event stack.
178    MappingEnd,
179    /// A scalar value.
180    ///
181    /// `value` borrows from input when no transformation is required (the
182    /// vast majority of plain scalars).  It owns when line folding produces
183    /// a string that doesn't exist contiguously in the input.
184    Scalar {
185        /// The scalar's decoded value.
186        value: Cow<'input, str>,
187        /// The style in which the scalar appeared in the source.
188        style: ScalarStyle,
189        /// Rare fields: `anchor`, `anchor_loc`, `tag`, `tag_loc`.
190        /// `None` when no anchor or source-text tag is present (the common case).
191        meta: Option<Box<EventMeta<'input>>>,
192    },
193}
194
195impl Event<'_> {
196    /// Returns the anchor name if this event defines one.
197    #[must_use]
198    #[inline]
199    pub fn anchor(&self) -> Option<&str> {
200        match self {
201            Self::Scalar { meta, .. }
202            | Self::SequenceStart { meta, .. }
203            | Self::MappingStart { meta, .. } => meta.as_ref().and_then(|m| m.anchor),
204            Self::StreamStart
205            | Self::StreamEnd
206            | Self::Comment { .. }
207            | Self::Alias { .. }
208            | Self::DocumentStart { .. }
209            | Self::DocumentEnd { .. }
210            | Self::SequenceEnd
211            | Self::MappingEnd => None,
212        }
213    }
214
215    /// Returns the source span of the `&name` anchor token, if any.
216    #[must_use]
217    #[inline]
218    pub fn anchor_loc(&self) -> Option<Span> {
219        match self {
220            Self::Scalar { meta, .. }
221            | Self::SequenceStart { meta, .. }
222            | Self::MappingStart { meta, .. } => meta.as_ref().and_then(|m| m.anchor_loc),
223            Self::StreamStart
224            | Self::StreamEnd
225            | Self::Comment { .. }
226            | Self::Alias { .. }
227            | Self::DocumentStart { .. }
228            | Self::DocumentEnd { .. }
229            | Self::SequenceEnd
230            | Self::MappingEnd => None,
231        }
232    }
233
234    /// Returns the resolved tag string, if any.
235    #[must_use]
236    #[inline]
237    pub fn tag(&self) -> Option<&str> {
238        match self {
239            Self::Scalar { meta, .. }
240            | Self::SequenceStart { meta, .. }
241            | Self::MappingStart { meta, .. } => meta.as_ref().and_then(|m| m.tag.as_deref()),
242            Self::StreamStart
243            | Self::StreamEnd
244            | Self::Comment { .. }
245            | Self::Alias { .. }
246            | Self::DocumentStart { .. }
247            | Self::DocumentEnd { .. }
248            | Self::SequenceEnd
249            | Self::MappingEnd => None,
250        }
251    }
252
253    /// Returns the source span of the tag token, if any.
254    #[must_use]
255    #[inline]
256    pub fn tag_loc(&self) -> Option<Span> {
257        match self {
258            Self::Scalar { meta, .. }
259            | Self::SequenceStart { meta, .. }
260            | Self::MappingStart { meta, .. } => meta.as_ref().and_then(|m| m.tag_loc),
261            Self::StreamStart
262            | Self::StreamEnd
263            | Self::Comment { .. }
264            | Self::Alias { .. }
265            | Self::DocumentStart { .. }
266            | Self::DocumentEnd { .. }
267            | Self::SequenceEnd
268            | Self::MappingEnd => None,
269        }
270    }
271}
272
273/// Build an `EventMeta` box when at least one field is `Some`.
274///
275/// Returns `None` when all four fields are `None` (the common case).
276#[expect(
277    clippy::redundant_pub_crate,
278    reason = "pub(crate) inside private module — accessibility requires crate-wide visibility"
279)]
280#[inline]
281pub(crate) fn make_meta<'input>(
282    anchor: Option<&'input str>,
283    anchor_loc: Option<Span>,
284    tag: Option<Cow<'input, str>>,
285    tag_loc: Option<Span>,
286) -> Option<Box<EventMeta<'input>>> {
287    if anchor.is_none() && tag.is_none() {
288        None
289    } else {
290        Some(Box::new(EventMeta {
291            anchor,
292            anchor_loc,
293            tag,
294            tag_loc,
295        }))
296    }
297}
298
299const _: () = assert!(
300    std::mem::size_of::<Event<'_>>() <= 56,
301    "Event must be at most 56 bytes after EventMeta boxing"
302);
303
304#[cfg(test)]
305#[expect(clippy::unwrap_used, reason = "test code")]
306mod tests {
307    use std::borrow::Cow;
308
309    use super::*;
310    use crate::pos::Span;
311
312    const SPAN: Span = Span { start: 0, end: 4 };
313
314    // EM-1: meta is None when all four fields are absent.
315    #[test]
316    fn make_meta_returns_none_when_all_fields_absent() {
317        let meta = make_meta(None, None, None, None);
318        assert!(
319            meta.is_none(),
320            "make_meta must return None when anchor and tag are both None"
321        );
322    }
323
324    // EM-2: meta is Some when only anchor is present.
325    #[test]
326    fn make_meta_returns_some_when_anchor_only() {
327        let meta = make_meta(Some("a"), Some(SPAN), None, None).unwrap();
328        assert_eq!(meta.anchor, Some("a"));
329        assert_eq!(meta.anchor_loc, Some(SPAN));
330        assert!(meta.tag.is_none());
331        assert!(meta.tag_loc.is_none());
332    }
333
334    // EM-3: meta is Some when only tag is present.
335    #[test]
336    fn make_meta_returns_some_when_tag_only() {
337        let meta = make_meta(None, None, Some(Cow::Borrowed("!str")), Some(SPAN)).unwrap();
338        assert!(meta.anchor.is_none());
339        assert!(meta.anchor_loc.is_none());
340        assert_eq!(meta.tag.as_deref(), Some("!str"));
341        assert_eq!(meta.tag_loc, Some(SPAN));
342    }
343
344    // EM-4: meta is Some when both anchor and tag are present.
345    #[test]
346    fn make_meta_returns_some_when_both_anchor_and_tag() {
347        let meta = make_meta(
348            Some("a"),
349            Some(SPAN),
350            Some(Cow::Borrowed("!str")),
351            Some(SPAN),
352        )
353        .unwrap();
354        assert_eq!(meta.anchor, Some("a"));
355        assert_eq!(meta.tag.as_deref(), Some("!str"));
356    }
357
358    // EM-5: Event size at or below 56 bytes.
359    #[test]
360    fn event_size_at_most_56_bytes() {
361        assert!(
362            std::mem::size_of::<Event<'_>>() <= 56,
363            "Event size {} exceeds 56 bytes",
364            std::mem::size_of::<Event<'_>>()
365        );
366    }
367}