rlsp_yaml_parser/lib.rs
1// SPDX-License-Identifier: MIT
2#![deny(clippy::panic)]
3
4mod chars;
5pub mod encoding;
6mod error;
7mod event;
8mod event_iter;
9mod lexer;
10pub mod limits;
11mod lines;
12pub mod loader;
13pub mod node;
14mod pos;
15pub use error::Error;
16pub use event::{Chomp, CollectionStyle, Event, ScalarStyle};
17pub use lines::{BreakType, Line, LineBuffer};
18pub use loader::{LoadError, LoadMode, Loader, LoaderBuilder, LoaderOptions, load};
19pub use node::{Document, Node};
20pub use pos::{Pos, Span};
21
22pub use limits::{
23 MAX_ANCHOR_NAME_BYTES, MAX_COLLECTION_DEPTH, MAX_COMMENT_LEN, MAX_DIRECTIVES_PER_DOC,
24 MAX_RESOLVED_TAG_LEN, MAX_TAG_HANDLE_BYTES, MAX_TAG_LEN,
25};
26use std::collections::VecDeque;
27
28use event_iter::{CollectionEntry, DirectiveScope, IterState, PendingAnchor, PendingTag};
29
30use lexer::Lexer;
31
32/// Parse a YAML string into a lazy event stream.
33///
34/// The iterator yields <code>Result<([Event], [Span]), [Error]></code> items.
35/// The first event is always [`Event::StreamStart`] and the last is always
36/// [`Event::StreamEnd`].
37///
38/// # Example
39///
40/// ```
41/// use rlsp_yaml_parser::{parse_events, Event};
42///
43/// let events: Vec<_> = parse_events("").collect();
44/// assert!(matches!(events.first(), Some(Ok((Event::StreamStart, _)))));
45/// assert!(matches!(events.last(), Some(Ok((Event::StreamEnd, _)))));
46/// ```
47pub fn parse_events(input: &str) -> impl Iterator<Item = Result<(Event<'_>, Span), Error>> + '_ {
48 EventIter::new(input)
49}
50
51// ---------------------------------------------------------------------------
52// Iterator implementation
53// ---------------------------------------------------------------------------
54
55/// Lazy iterator that yields events by walking a [`Lexer`].
56struct EventIter<'input> {
57 lexer: Lexer<'input>,
58 state: IterState,
59 /// Queued events to emit before resuming normal state dispatch.
60 ///
61 /// Used when a single parse step must produce multiple consecutive events —
62 /// e.g. `SequenceStart` before the first item, or multiple close events
63 /// when a dedent closes several nested collections at once.
64 queue: VecDeque<(Event<'input>, Span)>,
65 /// Stack of open block collections (sequences and mappings).
66 ///
67 /// Each entry records whether the open collection is a sequence or a
68 /// mapping, its indentation column, and (for mappings) whether the next
69 /// expected node is a key or a value. The combined length of this stack
70 /// is bounded by [`MAX_COLLECTION_DEPTH`].
71 coll_stack: Vec<CollectionEntry>,
72 /// A pending anchor that has been scanned but not yet attached to a node
73 /// event. The [`PendingAnchor`] variant encodes both the anchor name and
74 /// whether it was standalone (applies to the next node of any type) or
75 /// inline (applies to the key scalar, not the enclosing mapping).
76 pending_anchor: Option<PendingAnchor<'input>>,
77 /// A pending tag that has been scanned but not yet attached to a node event.
78 ///
79 /// Tags in YAML precede the node they annotate (YAML 1.2 §6.8.1). After
80 /// scanning `!tag`, `!!tag`, `!<uri>`, or `!`, the parser stores the tag
81 /// here and attaches it to the next `Scalar`, `SequenceStart`, or
82 /// `MappingStart` event.
83 ///
84 /// Tags are resolved against the current directive scope at scan time:
85 /// - `!<URI>` → stored as `Cow::Borrowed("URI")` (verbatim, no change)
86 /// - `!!suffix` → resolved via `!!` handle (default: `tag:yaml.org,2002:suffix`)
87 /// - `!suffix` → stored as `Cow::Borrowed("!suffix")` (local tag, no expansion)
88 /// - `!` → stored as `Cow::Borrowed("!")`
89 /// - `!handle!suffix` → resolved via `%TAG !handle! prefix` directive
90 ///
91 /// The [`PendingTag`] variant encodes both the resolved tag string and
92 /// whether it was standalone (applies to the next node of any type) or
93 /// inline (applies to the key scalar, not the enclosing mapping).
94 pending_tag: Option<PendingTag<'input>>,
95 /// Directive scope for the current document.
96 ///
97 /// Accumulated from `%YAML` and `%TAG` directives seen in `BetweenDocs`
98 /// state. Reset at document boundaries.
99 directive_scope: DirectiveScope,
100 /// Set to `true` once the root node of the current document has been
101 /// fully emitted (a scalar at the top level, or a collection after its
102 /// closing event empties `coll_stack`).
103 ///
104 /// Used to detect invalid extra content after the document root, such as
105 /// `foo:\n bar\ninvalid` where `invalid` appears after the root mapping
106 /// closes. Reset to `false` at each document boundary.
107 root_node_emitted: bool,
108 /// Set to `true` after consuming a `? ` explicit key indicator whose key
109 /// content will appear on the NEXT line (i.e., `had_key_inline = false`).
110 /// Cleared when the key content is processed.
111 ///
112 /// Used to allow a block sequence indicator on a line following `? ` to be
113 /// treated as the explicit key's content rather than triggering the
114 /// "invalid block sequence entry" guard.
115 explicit_key_pending: bool,
116 /// When a tag or anchor appears inline on a physical line (e.g. `!!str &a key:`),
117 /// the key content is prepended as a synthetic line with the key's column as its
118 /// indent. This field records the indent of the ORIGINAL physical line so that
119 /// `handle_mapping_entry` can open the mapping at the correct (original) indent
120 /// rather than the synthetic line's offset.
121 property_origin_indent: Option<usize>,
122}
123
124impl EventIter<'_> {
125 /// Current combined collection depth (sequences + mappings).
126 const fn collection_depth(&self) -> usize {
127 self.coll_stack.len()
128 }
129}
130
131/// Build an empty plain scalar event.
132pub(crate) const fn empty_scalar_event<'input>() -> Event<'input> {
133 Event::Scalar {
134 value: std::borrow::Cow::Borrowed(""),
135 style: ScalarStyle::Plain,
136 anchor: None,
137 tag: None,
138 }
139}
140
141/// Build a span that covers exactly the 3-byte document marker at `marker_pos`.
142pub(crate) const fn marker_span(marker_pos: Pos) -> Span {
143 Span {
144 start: marker_pos,
145 end: Pos {
146 byte_offset: marker_pos.byte_offset + 3,
147 line: marker_pos.line,
148 column: marker_pos.column + 3,
149 },
150 }
151}
152
153/// Build a zero-width span at `pos`.
154pub(crate) const fn zero_span(pos: Pos) -> Span {
155 Span {
156 start: pos,
157 end: pos,
158 }
159}