rlsp_yaml_parser/event.rs
1// SPDX-License-Identifier: MIT
2
3//! High-level parse events produced by the streaming parser.
4//!
5//! The public entry point is [`crate::parse_events`], which returns an
6//! iterator of <code>Result<([Event], [crate::pos::Span]), [crate::error::Error]></code>.
7//!
8//! Each event carries a [`crate::pos::Span`] covering the input bytes that
9//! contributed to it. For zero-width synthetic events (e.g. `StreamStart`
10//! at the very beginning of input), the span has equal `start` and `end`.
11//!
12
13use std::borrow::Cow;
14
15use crate::pos::Span;
16
17/// Rare per-event fields for node-typed events (`Scalar`, `SequenceStart`, `MappingStart`).
18///
19/// Bundled behind `Option<Box<EventMeta>>` so that the common case — no anchor, no
20/// source-text tag — pays only one 8-byte pointer instead of ~96 bytes of inline storage.
21/// Events with tags and anchors are rare in block-heavy and Kubernetes YAML; boxing them
22/// moves the cost to the uncommon path.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct EventMeta<'input> {
25 /// The anchor name, if any (e.g. `&foo`).
26 pub anchor: Option<&'input str>,
27 /// Source span of the `&name` anchor token — from `&` through the last byte of the name.
28 /// `Some` when `anchor` is `Some`, `None` otherwise.
29 pub anchor_loc: Option<Span>,
30 /// The resolved tag, if any (e.g. `"tag:yaml.org,2002:str"` for `!!str`).
31 ///
32 /// Verbatim tags (`!<URI>`) borrow from input. Shorthand tags resolved via `%TAG`
33 /// directives or the built-in `!!` default produce owned strings.
34 pub tag: Option<Cow<'input, str>>,
35 /// Source span of the tag token — from `!` through the last byte of the tag token.
36 /// `Some` when `tag` is `Some`, `None` otherwise.
37 pub tag_loc: Option<Span>,
38}
39
40/// Block scalar chomping mode per YAML 1.2 §8.1.1.2.
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum Chomp {
43 /// `-` — trailing newlines stripped.
44 Strip,
45 /// (default, no indicator) — single trailing newline kept.
46 Clip,
47 /// `+` — all trailing newlines kept.
48 Keep,
49}
50
51/// The style (block or flow) of a collection (sequence or mapping).
52///
53/// Currently only `Block` is produced; `Flow` will be used when flow sequences
54/// (`[a, b]`) and flow mappings (`{a: b}`) are implemented in Task 14.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum CollectionStyle {
57 /// A block-style collection using indentation and `-`/`:` indicators.
58 Block,
59 /// A flow-style collection using `[]` or `{}` delimiters (Task 14).
60 Flow,
61}
62
63/// The style in which a scalar value was written in the source.
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum ScalarStyle {
66 /// An unquoted plain scalar (YAML 1.2 §7.3.3).
67 Plain,
68 /// A `'single-quoted'` scalar (YAML 1.2 §7.3.2).
69 SingleQuoted,
70 /// A `"double-quoted"` scalar (YAML 1.2 §7.3.1).
71 DoubleQuoted,
72 /// A `|` literal block scalar (YAML 1.2 §8.1.2).
73 Literal(Chomp),
74 /// A `>` folded block scalar (YAML 1.2 §8.1.3).
75 ///
76 /// Line folding is applied to the collected content: a single line break
77 /// between two equally-indented non-blank lines becomes a space; N blank
78 /// lines between non-blank lines produce N newlines; more-indented lines
79 /// preserve their relative leading whitespace and the line break before
80 /// them is kept as `\n` rather than folded to a space. Callers must not
81 /// treat the value as whitespace-safe — more-indented lines can inject
82 /// arbitrary leading spaces into the parsed value.
83 Folded(Chomp),
84}
85
86/// A high-level YAML parse event.
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub enum Event<'input> {
89 /// The YAML stream has started.
90 ///
91 /// Always the first event in any parse. The associated span is a
92 /// zero-width span at [`crate::pos::Pos::ORIGIN`].
93 StreamStart,
94 /// The YAML stream has ended.
95 ///
96 /// Always the last event in any parse. The associated span is a
97 /// zero-width span at the position immediately after the last byte of
98 /// input.
99 StreamEnd,
100 /// A YAML comment (YAML 1.2 §6.6).
101 ///
102 /// `text` is the comment body — the content of the line after the `#`
103 /// character, with the `#` itself excluded. Leading whitespace after `#`
104 /// is preserved (e.g. `# hello` → text `" hello"`; `#nospace` → text
105 /// `"nospace"`). The associated span covers from the `#` character
106 /// through the last byte of comment text (the newline is not included).
107 ///
108 /// One `Comment` event is emitted per physical line.
109 Comment {
110 /// Comment body (everything after the `#`, excluding the newline).
111 text: &'input str,
112 },
113 /// An alias node (`*name`) that references a previously anchored node.
114 ///
115 /// The associated span covers the entire `*name` token (from `*` through
116 /// the last character of the name). Resolution of the alias to its
117 /// anchored node is the loader's responsibility (Task 20) — the parser
118 /// emits this event without expansion.
119 Alias {
120 /// The anchor name being referenced (e.g. `"foo"` for `*foo`).
121 /// Borrowed directly from input — no allocation.
122 name: &'input str,
123 },
124 /// A document has started.
125 ///
126 /// `explicit` is `true` when the document was introduced with `---`.
127 /// `false` for bare documents (no marker).
128 DocumentStart {
129 /// Whether the document was introduced with `---`.
130 explicit: bool,
131 /// Version from the `%YAML` directive preceding this document, if any.
132 ///
133 /// `Some((1, 2))` for `%YAML 1.2`, `None` when no `%YAML` directive was present.
134 version: Option<(u8, u8)>,
135 /// Tag handle/prefix pairs from `%TAG` directives preceding this document.
136 ///
137 /// Each entry is `(handle, prefix)` — e.g. `("!foo!", "tag:example.com,2026:")`.
138 /// Empty when no `%TAG` directives were present.
139 tag_directives: Vec<(String, String)>,
140 },
141 /// A document has ended.
142 ///
143 /// `explicit` is `true` when the document was closed with `...`.
144 /// `false` for implicitly-ended documents.
145 DocumentEnd {
146 /// Whether the document was closed with `...`.
147 explicit: bool,
148 },
149 /// A block or flow sequence has started.
150 ///
151 /// Followed by zero or more node events (scalars or nested collections),
152 /// then a matching [`Event::SequenceEnd`].
153 SequenceStart {
154 /// Whether this is a block (`-` indicator) or flow (`[...]`) sequence.
155 style: CollectionStyle,
156 /// Rare fields: `anchor`, `anchor_loc`, `tag`, `tag_loc`.
157 /// `None` when no anchor or source-text tag is present (the common case).
158 meta: Option<Box<EventMeta<'input>>>,
159 },
160 /// A sequence has ended.
161 ///
162 /// Matches the most recent [`Event::SequenceStart`] on the event stack.
163 SequenceEnd,
164 /// A block or flow mapping has started.
165 ///
166 /// Followed by alternating key/value node events (scalars or nested
167 /// collections), then a matching [`Event::MappingEnd`].
168 MappingStart {
169 /// Whether this is a block (indentation-based) or flow (`{...}`) mapping.
170 style: CollectionStyle,
171 /// Rare fields: `anchor`, `anchor_loc`, `tag`, `tag_loc`.
172 /// `None` when no anchor or source-text tag is present (the common case).
173 meta: Option<Box<EventMeta<'input>>>,
174 },
175 /// A mapping has ended.
176 ///
177 /// Matches the most recent [`Event::MappingStart`] on the event stack.
178 MappingEnd,
179 /// A scalar value.
180 ///
181 /// `value` borrows from input when no transformation is required (the
182 /// vast majority of plain scalars). It owns when line folding produces
183 /// a string that doesn't exist contiguously in the input.
184 Scalar {
185 /// The scalar's decoded value.
186 value: Cow<'input, str>,
187 /// The style in which the scalar appeared in the source.
188 style: ScalarStyle,
189 /// Rare fields: `anchor`, `anchor_loc`, `tag`, `tag_loc`.
190 /// `None` when no anchor or source-text tag is present (the common case).
191 meta: Option<Box<EventMeta<'input>>>,
192 },
193}
194
195impl Event<'_> {
196 /// Returns the anchor name if this event defines one.
197 #[must_use]
198 #[inline]
199 pub fn anchor(&self) -> Option<&str> {
200 match self {
201 Self::Scalar { meta, .. }
202 | Self::SequenceStart { meta, .. }
203 | Self::MappingStart { meta, .. } => meta.as_ref().and_then(|m| m.anchor),
204 Self::StreamStart
205 | Self::StreamEnd
206 | Self::Comment { .. }
207 | Self::Alias { .. }
208 | Self::DocumentStart { .. }
209 | Self::DocumentEnd { .. }
210 | Self::SequenceEnd
211 | Self::MappingEnd => None,
212 }
213 }
214
215 /// Returns the source span of the `&name` anchor token, if any.
216 #[must_use]
217 #[inline]
218 pub fn anchor_loc(&self) -> Option<Span> {
219 match self {
220 Self::Scalar { meta, .. }
221 | Self::SequenceStart { meta, .. }
222 | Self::MappingStart { meta, .. } => meta.as_ref().and_then(|m| m.anchor_loc),
223 Self::StreamStart
224 | Self::StreamEnd
225 | Self::Comment { .. }
226 | Self::Alias { .. }
227 | Self::DocumentStart { .. }
228 | Self::DocumentEnd { .. }
229 | Self::SequenceEnd
230 | Self::MappingEnd => None,
231 }
232 }
233
234 /// Returns the resolved tag string, if any.
235 #[must_use]
236 #[inline]
237 pub fn tag(&self) -> Option<&str> {
238 match self {
239 Self::Scalar { meta, .. }
240 | Self::SequenceStart { meta, .. }
241 | Self::MappingStart { meta, .. } => meta.as_ref().and_then(|m| m.tag.as_deref()),
242 Self::StreamStart
243 | Self::StreamEnd
244 | Self::Comment { .. }
245 | Self::Alias { .. }
246 | Self::DocumentStart { .. }
247 | Self::DocumentEnd { .. }
248 | Self::SequenceEnd
249 | Self::MappingEnd => None,
250 }
251 }
252
253 /// Returns the source span of the tag token, if any.
254 #[must_use]
255 #[inline]
256 pub fn tag_loc(&self) -> Option<Span> {
257 match self {
258 Self::Scalar { meta, .. }
259 | Self::SequenceStart { meta, .. }
260 | Self::MappingStart { meta, .. } => meta.as_ref().and_then(|m| m.tag_loc),
261 Self::StreamStart
262 | Self::StreamEnd
263 | Self::Comment { .. }
264 | Self::Alias { .. }
265 | Self::DocumentStart { .. }
266 | Self::DocumentEnd { .. }
267 | Self::SequenceEnd
268 | Self::MappingEnd => None,
269 }
270 }
271}
272
273/// Build an `EventMeta` box when at least one field is `Some`.
274///
275/// Returns `None` when all four fields are `None` (the common case).
276#[expect(
277 clippy::redundant_pub_crate,
278 reason = "pub(crate) inside private module — accessibility requires crate-wide visibility"
279)]
280#[inline]
281pub(crate) fn make_meta<'input>(
282 anchor: Option<&'input str>,
283 anchor_loc: Option<Span>,
284 tag: Option<Cow<'input, str>>,
285 tag_loc: Option<Span>,
286) -> Option<Box<EventMeta<'input>>> {
287 if anchor.is_none() && tag.is_none() {
288 None
289 } else {
290 Some(Box::new(EventMeta {
291 anchor,
292 anchor_loc,
293 tag,
294 tag_loc,
295 }))
296 }
297}
298
299const _: () = assert!(
300 std::mem::size_of::<Event<'_>>() <= 56,
301 "Event must be at most 56 bytes after EventMeta boxing"
302);
303
304#[cfg(test)]
305#[expect(clippy::unwrap_used, reason = "test code")]
306mod tests {
307 use std::borrow::Cow;
308
309 use super::*;
310 use crate::pos::Span;
311
312 const SPAN: Span = Span { start: 0, end: 4 };
313
314 // EM-1: meta is None when all four fields are absent.
315 #[test]
316 fn make_meta_returns_none_when_all_fields_absent() {
317 let meta = make_meta(None, None, None, None);
318 assert!(
319 meta.is_none(),
320 "make_meta must return None when anchor and tag are both None"
321 );
322 }
323
324 // EM-2: meta is Some when only anchor is present.
325 #[test]
326 fn make_meta_returns_some_when_anchor_only() {
327 let meta = make_meta(Some("a"), Some(SPAN), None, None).unwrap();
328 assert_eq!(meta.anchor, Some("a"));
329 assert_eq!(meta.anchor_loc, Some(SPAN));
330 assert!(meta.tag.is_none());
331 assert!(meta.tag_loc.is_none());
332 }
333
334 // EM-3: meta is Some when only tag is present.
335 #[test]
336 fn make_meta_returns_some_when_tag_only() {
337 let meta = make_meta(None, None, Some(Cow::Borrowed("!str")), Some(SPAN)).unwrap();
338 assert!(meta.anchor.is_none());
339 assert!(meta.anchor_loc.is_none());
340 assert_eq!(meta.tag.as_deref(), Some("!str"));
341 assert_eq!(meta.tag_loc, Some(SPAN));
342 }
343
344 // EM-4: meta is Some when both anchor and tag are present.
345 #[test]
346 fn make_meta_returns_some_when_both_anchor_and_tag() {
347 let meta = make_meta(
348 Some("a"),
349 Some(SPAN),
350 Some(Cow::Borrowed("!str")),
351 Some(SPAN),
352 )
353 .unwrap();
354 assert_eq!(meta.anchor, Some("a"));
355 assert_eq!(meta.tag.as_deref(), Some("!str"));
356 }
357
358 // EM-5: Event size at or below 56 bytes.
359 #[test]
360 fn event_size_at_most_56_bytes() {
361 assert!(
362 std::mem::size_of::<Event<'_>>() <= 56,
363 "Event size {} exceeds 56 bytes",
364 std::mem::size_of::<Event<'_>>()
365 );
366 }
367}