1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
use std::borrow::Cow;
#[cfg(feature = "properties")]
use std::collections::HashMap;
use std::mem;
#[cfg(feature = "properties")]
use std::rc::Rc;
use granit_parser::ScalarStyle;
use super::error::Error;
use super::tags::SfTag;
use crate::location::{Location, Locations};
/// Attach both reference and defined locations to an error for alias replay scenarios.
/// When both locations are known and different, creates an `AliasError` to report both.
/// This is used for errors occurring when deserializing aliased values.
///
/// During alias replay, errors may already have a location attached (the anchor's definition
/// location from the replayed events). We still want to create an `AliasError` with both
/// locations when the reference (alias) and defined (anchor) locations differ.
#[inline]
pub(super) fn attach_alias_locations_if_missing(
err: Error,
reference_location: Location,
defined_location: Location,
) -> Error {
// If both locations are known and different, create an AliasError to show both.
// This applies even if the error already has a location (from replayed anchor events),
// because we want to show where the alias was used, not just where the anchor was defined.
if reference_location != Location::UNKNOWN
&& defined_location != Location::UNKNOWN
&& reference_location != defined_location
{
Error::AliasError {
msg: err.to_string(),
locations: Locations {
reference_location,
defined_location,
},
}
} else if err.location().is_some() {
// Error already has a location and we don't have dual locations to add
err
} else {
// Fall back to single location (prefer reference, then defined)
let loc = if reference_location != Location::UNKNOWN {
reference_location
} else {
defined_location
};
err.with_location(loc)
}
}
/// Our simplified owned event kind that we feed into Serde.
///
/// This intentionally carries semantic YAML node data, anchors, tags, style, and
/// locations, but not presentation metadata such as comments. Live streams expose
/// comments through the `Events` comment hooks; replay streams can only preserve
/// comments that callers captured separately before buffering/replay.
#[derive(Clone, Debug)]
pub(crate) enum Ev<'a> {
/// Scalar value from YAML (text), with optional tag and style.
Scalar {
value: Cow<'a, str>,
tag: SfTag,
raw_tag: Option<Cow<'a, str>>,
style: ScalarStyle,
/// Numeric anchor id (0 if none) attached to this scalar node.
anchor: usize,
location: Location,
},
/// Start of a sequence (`[` / `-`-list).
SeqStart {
anchor: usize,
tag: SfTag,
raw_tag: Option<Cow<'a, str>>,
location: Location,
},
/// End of a sequence.
SeqEnd { location: Location },
/// Start of a mapping (`{` or block mapping).
MapStart { anchor: usize, location: Location },
/// End of a mapping.
MapEnd { location: Location },
/// The event has been taken from the array, with only its location remaining.
/// This should not appear in the event stream and is reserved for internal container state.
Taken { location: Location },
}
impl Default for Ev<'_> {
// Used for optimization
fn default() -> Self {
Ev::Taken {
location: Location::UNKNOWN,
}
}
}
impl Ev<'_> {
/// Get the source location attached to this event.
///
/// Returns:
/// - `Location` recorded when the event was produced.
///
/// Used by:
/// - Error reporting and "last seen location" tracking.
pub(crate) fn location(&self) -> Location {
match self {
Ev::Scalar { location, .. }
| Ev::SeqStart { location, .. }
| Ev::SeqEnd { location }
| Ev::MapStart { location, .. }
| Ev::MapEnd { location }
| Ev::Taken { location } => *location,
}
}
}
/// from_slice_multiple location-free representation of events for duplicate-key comparison.
/// Source of events with lookahead and alias-injection.
pub(crate) trait Events<'de> {
/// Pull the next event from the stream.
///
/// Returns:
/// - `Ok(Some(Ev))` for a real event,
/// - `Ok(None)` at true end-of-stream,
/// - `Err(Error)` on parser/structure failure.
///
/// Called by:
/// - The streaming deserializer (`Deser`) and helper scanners.
fn next(&mut self) -> Result<Option<Ev<'de>>, Error>;
/// Peek at the next event without consuming it.
///
/// Returns:
/// - `Ok(Some(&Ev))` with the event reference,
/// - `Ok(None)` at end-of-stream,
/// - `Err(Error)` on error.
///
/// Called by:
/// - Lookahead logic (merge, container boundaries, option/unit handling).
fn peek(&mut self) -> Result<Option<&Ev<'de>>, Error>;
/// Last location that `next` or `peek` has observed.
///
/// Used by:
/// - Error paths to attach a reasonable position when nothing else is available.
fn last_location(&self) -> Location;
/// Location of the *reference* to the next node (use-site).
///
/// This is the key primitive that enables `Spanned<T>` to report two different
/// locations:
/// - **referenced**: where the value is *used* in the YAML (the use-site)
/// - **defined**: where the value is *defined* (the definition-site; typically
/// the node's own [`Ev::location`])
///
/// Contract
/// - For a normal (non-alias) stream, `reference_location()` should be the
/// same as `peek()?.map(|ev| ev.location())`.
/// - While replaying an alias (`*a`), the *events* come from the anchored
/// definition buffer, so their `Ev::location()` points at the definition-site.
/// In that situation, `reference_location()` must instead return the location
/// of the alias token `*a` (the use-site), so callers can attribute values to
/// where they were referenced.
/// - During merge expansion (`<<: *m`), merge-derived entries should also
/// carry a use-site location (usually the `<<` entry / alias token) even
/// though the actual scalar nodes being replayed come from the merged mapping.
///
/// Subtlety: this method is used *together with* `peek()`.
/// Consumers typically do `peek()` (to ensure the next node is available), then
/// call `reference_location()` and/or `Ev::location()` for the same node.
/// Implementations therefore must keep the necessary context alive at least
/// until the node is consumed.
fn reference_location(&self) -> Location;
/// Take comments immediately above the next data node.
///
/// Implementations may fill lookahead while doing this. The default is empty
/// for replay buffers that do not carry presentation metadata. If a caller
/// needs comments for a captured node, it must take them from the live stream
/// before calling `capture_node` and carry them separately.
fn take_leading_comments_for_next_node(&mut self) -> Result<Vec<String>, Error> {
Ok(Vec::new())
}
/// Take same-line comments after a mapping key/value separator.
///
/// This is the `# comment` in `key: # comment`, separated from comments
/// immediately above the value node so nested containers do not treat the
/// separator comment as a child-key comment.
fn take_separator_comments_before_mapping_value(&mut self) -> Result<Vec<String>, Error> {
Ok(Vec::new())
}
/// Take same-line comments after a block sequence item marker.
///
/// This is the `# comment` in `- # comment`, separated from ordinary
/// trailing comments after the previous sequence value.
fn take_separator_comments_before_sequence_item_value(&mut self) -> Result<Vec<String>, Error> {
Ok(Vec::new())
}
/// Take same-line comments immediately after the node that was just deserialized.
fn take_trailing_comments_after_node(&mut self) -> Result<Vec<String>, Error> {
Ok(Vec::new())
}
/// Get the original input string for zero-copy borrowing.
///
/// Returns `Some(&str)` when the input is available for borrowing (string-based parsing),
/// or `None` when borrowing is not possible (reader-based parsing or replay buffers).
///
/// Used by:
/// - The deserializer to return borrowed `&str` references when possible.
///
/// This is used by string deserialization to return borrowed scalars when possible.
fn input_for_borrowing(&self) -> Option<&'de str> {
None // Default: borrowing not supported
}
/// Return the property map used for variable interpolation, if configured.
#[cfg(feature = "properties")]
fn property_map(&self) -> Option<&Rc<HashMap<String, String>>> {
None
}
}
#[cold]
pub(super) fn eof_with_loc(events: &dyn Events<'_>) -> Error {
Error::eof().with_location(events.last_location())
}
/// Event source that replays a pre-recorded buffer.
///
/// Replay buffers contain `Ev` values only. Comment hooks therefore use the
/// trait defaults and return empty comment sets; use-site comments must be passed
/// around separately by the map/sequence access code.
pub(super) struct ReplayEvents<'a> {
buf: Vec<Ev<'a>>,
/// Index of the next event to yield (0..=buf.len()).
idx: usize,
/// Optional override for the reference location (use-site) of the next node.
/// When we replay a captured subtree (e.g. an anchored mapping) we often want to
/// preserve *where it was referenced*, not just where it was originally defined.
///
/// Scope/when it applies
/// - The override is used by [`Events::reference_location`].
/// - It is intended to apply to the node currently at `idx` (i.e. the node visible via
/// `peek()`), and is typically kept for the whole replay.
/// - `next()` does not clear it: callers that need different reference locations for
/// different nested nodes should create nested replay sources (which we do during
/// recursive merge expansion).
ref_override: Option<Location>,
#[cfg(feature = "properties")]
property_map: Option<Rc<HashMap<String, String>>>,
}
impl<'a> ReplayEvents<'a> {
/// Create a replay source over `buf`, initially positioned at index 0.
///
/// Arguments:
/// - `buf`: previously captured events.
///
/// Called by:
/// - Merge expansion and recorded key/value deserialization.
pub(super) fn new(
buf: Vec<Ev<'a>>,
#[cfg(feature = "properties")] property_map: Option<Rc<HashMap<String, String>>>,
) -> Self {
Self {
buf,
idx: 0,
ref_override: None,
#[cfg(feature = "properties")]
property_map,
}
}
/// Create a replay source over `buf` with a fixed reference (use-site) location.
///
/// This is primarily used when a recorded node is replayed in a *different place*
/// than where it was defined:
/// - alias replay (`*a`) where the replayed events come from the anchor definition,
/// but `Spanned<T>.referenced` should point at the alias token.
/// - merge expansion (`<<: *m`) where merge-derived fields should point at the merge
/// entry (use-site) even though the actual events come from the merged mapping.
///
/// Note that this does not change the events themselves: `Ev::location()` still
/// points to where each event was originally produced/captured (definition-site).
/// The override only affects [`Events::reference_location`].
pub(super) fn with_reference(
buf: Vec<Ev<'a>>,
reference: Location,
#[cfg(feature = "properties")] property_map: Option<Rc<HashMap<String, String>>>,
) -> Self {
Self {
buf,
idx: 0,
ref_override: Some(reference),
#[cfg(feature = "properties")]
property_map,
}
}
}
impl<'a> Events<'a> for ReplayEvents<'a> {
/// See [`Events::next`]. Replays and advances the internal index.
fn next(&mut self) -> Result<Option<Ev<'a>>, Error> {
if self.idx >= self.buf.len() {
return Ok(None);
}
let location = self.buf[self.idx].location();
// Flag as taken to avoid unexpected reuse.
let ev = mem::replace(&mut self.buf[self.idx], Ev::Taken { location });
self.idx += 1;
Ok(Some(ev))
}
fn peek(&mut self) -> Result<Option<&Ev<'a>>, Error> {
Ok(self.buf.get(self.idx))
}
fn last_location(&self) -> Location {
let last = self.idx.saturating_sub(1);
self.buf
.get(last)
.map(|e| e.location())
.unwrap_or(Location::UNKNOWN)
}
fn reference_location(&self) -> Location {
if let Some(loc) = self.ref_override {
return loc;
}
self.buf
.get(self.idx)
.map(|e| e.location())
.unwrap_or_else(|| self.last_location())
}
#[cfg(feature = "properties")]
fn property_map(&self) -> Option<&Rc<HashMap<String, String>>> {
self.property_map.as_ref()
}
}