Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22//!
23//! # Accepted risks
24//!
25//! `expand_node` does not detect the case where an anchor-within-expansion
26//! references a previously defined anchor, forming an indirect cycle not
27//! caught by the `in_progress` set until the second traversal.  This
28//! limitation exists in the old loader and is acceptable in the LSP context
29//! where Lossless mode is the default.  The `expanded_nodes` volume limit
30//! provides the backstop.
31
32use std::borrow::Cow;
33use std::collections::{HashMap, HashSet};
34use std::iter::Peekable;
35
36use std::sync::Arc;
37
38use crate::error::{Error, ErrorKind};
39use crate::event::{Event, EventMeta, ScalarStyle};
40use crate::node::{Document, Node, NodeMeta};
41use crate::pos::{LineIndex, Pos, Span};
42use crate::schema::{CollectionKind, Schema, resolve_collection, resolve_scalar};
43
44use comments::{attach_leading_comments, attach_trailing_comment};
45use reloc::reloc;
46use stream::{
47    consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
48    with_hash_prefix,
49};
50
51mod comments;
52mod reloc;
53mod stream;
54
55// ---------------------------------------------------------------------------
56// Public error type
57// ---------------------------------------------------------------------------
58
59/// Errors produced by the loader.
60#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
61#[non_exhaustive]
62pub enum LoadError {
63    /// The event stream contained a parse error.
64    #[error("parse error at {pos:?}: {message}")]
65    #[non_exhaustive]
66    Parse {
67        /// Source position where the parse error was detected.
68        pos: Pos,
69        /// Human-readable description of the error.
70        message: String,
71        /// Broad category of the error, for routing without message-string matching.
72        kind: ErrorKind,
73    },
74
75    /// The event stream ended unexpectedly mid-document.
76    #[error("unexpected end of event stream")]
77    UnexpectedEndOfStream,
78
79    /// Nesting depth exceeded the configured limit.
80    #[error("nesting depth limit exceeded at {pos:?} (max: {limit})")]
81    NestingDepthLimitExceeded {
82        /// The configured nesting depth limit that was exceeded.
83        limit: usize,
84        /// Source position of the collection start that exceeded the limit.
85        pos: Pos,
86    },
87
88    /// Too many distinct anchor names were defined.
89    #[error("anchor count limit exceeded at {pos:?} (max: {limit})")]
90    AnchorCountLimitExceeded {
91        /// The configured anchor count limit that was exceeded.
92        limit: usize,
93        /// Source position of the anchor that exceeded the limit.
94        pos: Pos,
95    },
96
97    /// Alias expansion produced more nodes than the configured limit.
98    #[error("alias expansion node limit exceeded at {pos:?} (max: {limit})")]
99    AliasExpansionLimitExceeded {
100        /// The configured expansion node limit that was exceeded.
101        limit: usize,
102        /// Source position of the node that exceeded the expansion limit.
103        pos: Pos,
104    },
105
106    /// A circular alias reference was detected.
107    #[error("circular alias reference at {pos:?}: '{name}'")]
108    CircularAlias {
109        /// The anchor name involved in the cycle.
110        name: String,
111        /// Source position of the alias that triggered the cycle detection.
112        pos: Pos,
113    },
114
115    /// An alias referred to an anchor that was never defined.
116    #[error("undefined alias at {pos:?}: '{name}'")]
117    UndefinedAlias {
118        /// The alias name that had no corresponding anchor definition.
119        name: String,
120        /// Source position of the alias reference.
121        pos: Pos,
122    },
123
124    /// A plain scalar could not be resolved under the JSON schema.
125    ///
126    /// The JSON schema has no fallback: every untagged plain scalar must match
127    /// one of its patterns (null, bool, int, float).  If none match, the scalar
128    /// is an error per YAML 1.2.2 §10.2.
129    ///
130    /// `value` is truncated to 128 Unicode scalar values and ASCII control
131    /// characters (U+0000–U+001F, U+007F) are replaced with `\uXXXX` escapes
132    /// to prevent log injection via the `Display` impl.
133    #[error("JSON schema: plain scalar does not match any type pattern")]
134    UnresolvedScalar {
135        /// The sanitized, truncated scalar value that failed resolution.
136        value: String,
137        /// Source position of the scalar.
138        pos: Pos,
139    },
140}
141
142// Convenience alias used inside the module.
143type Result<T> = std::result::Result<T, LoadError>;
144
145// Type alias for the peekable event stream used throughout the loader.
146type EventStream<'a> =
147    Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
148
149/// Unpack an `Option<Box<EventMeta>>` into its four constituent fields.
150#[expect(
151    clippy::type_complexity,
152    reason = "four-tuple mirrors EventMeta fields; extracting a type alias here would obscure the one-to-one correspondence"
153)]
154#[inline]
155fn unpack_meta(
156    meta: Option<Box<EventMeta<'_>>>,
157) -> (
158    Option<&'_ str>,
159    Option<Span>,
160    Option<std::borrow::Cow<'_, str>>,
161    Option<Span>,
162) {
163    meta.map_or((None, None, None, None), |m| {
164        (m.anchor, m.anchor_loc, m.tag, m.tag_loc)
165    })
166}
167
168// ---------------------------------------------------------------------------
169// Configuration
170// ---------------------------------------------------------------------------
171
172/// Loader mode — controls how alias references are handled.
173#[derive(Debug, Clone, Copy, PartialEq, Eq)]
174pub enum LoadMode {
175    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
176    Lossless,
177    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
178    Resolved,
179}
180
181/// Security and behaviour options for the loader.
182#[derive(Debug, Clone)]
183pub struct LoaderOptions {
184    /// Maximum mapping/sequence nesting depth before returning
185    /// [`LoadError::NestingDepthLimitExceeded`] (default: 512).
186    pub max_nesting_depth: usize,
187    /// Maximum number of distinct anchor names per document before returning
188    /// [`LoadError::AnchorCountLimitExceeded`] (default: 10 000).
189    pub max_anchors: usize,
190    /// Maximum total nodes produced by alias expansion in resolved mode before
191    /// returning [`LoadError::AliasExpansionLimitExceeded`] (default: 1 000 000).
192    pub max_expanded_nodes: usize,
193    /// Controls how alias references are handled during loading.
194    pub mode: LoadMode,
195    /// YAML 1.2.2 §10 schema to apply during loading (default: [`Schema::Core`]).
196    ///
197    /// Each node's tag is resolved according to this schema after the node is
198    /// constructed.  Nodes with explicit source tags are left unchanged.
199    pub schema: Schema,
200}
201
202impl Default for LoaderOptions {
203    fn default() -> Self {
204        Self {
205            max_nesting_depth: 512,
206            max_anchors: 10_000,
207            max_expanded_nodes: 1_000_000,
208            mode: LoadMode::Lossless,
209            schema: Schema::Core,
210        }
211    }
212}
213
214// ---------------------------------------------------------------------------
215// Builder
216// ---------------------------------------------------------------------------
217
218/// Builder for configuring and creating a [`Loader`].
219///
220/// ```
221/// use rlsp_yaml_parser::loader::LoaderBuilder;
222///
223/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
224/// assert_eq!(docs.len(), 1);
225/// ```
226pub struct LoaderBuilder {
227    options: LoaderOptions,
228}
229
230impl LoaderBuilder {
231    /// Create a builder with default options (lossless mode, safe limits).
232    #[must_use]
233    pub fn new() -> Self {
234        Self {
235            options: LoaderOptions::default(),
236        }
237    }
238
239    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
240    #[must_use]
241    pub const fn lossless(mut self) -> Self {
242        self.options.mode = LoadMode::Lossless;
243        self
244    }
245
246    /// Use resolved mode — aliases are expanded inline.
247    #[must_use]
248    pub const fn resolved(mut self) -> Self {
249        self.options.mode = LoadMode::Resolved;
250        self
251    }
252
253    /// Override the maximum nesting depth.
254    #[must_use]
255    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
256        self.options.max_nesting_depth = limit;
257        self
258    }
259
260    /// Override the maximum anchor count.
261    #[must_use]
262    pub const fn max_anchors(mut self, limit: usize) -> Self {
263        self.options.max_anchors = limit;
264        self
265    }
266
267    /// Override the maximum expanded-node count (resolved mode only).
268    #[must_use]
269    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
270        self.options.max_expanded_nodes = limit;
271        self
272    }
273
274    /// Override the YAML 1.2.2 §10 schema used for tag resolution during loading.
275    ///
276    /// The default is [`Schema::Core`].  Untagged nodes receive resolved tag URIs
277    /// in the AST; nodes with explicit source tags are not modified.
278    #[must_use]
279    pub const fn schema(mut self, s: Schema) -> Self {
280        self.options.schema = s;
281        self
282    }
283
284    /// Consume the builder and produce a [`Loader`].
285    #[must_use]
286    pub const fn build(self) -> Loader {
287        Loader {
288            options: self.options,
289        }
290    }
291}
292
293impl Default for LoaderBuilder {
294    fn default() -> Self {
295        Self::new()
296    }
297}
298
299// ---------------------------------------------------------------------------
300// Loader
301// ---------------------------------------------------------------------------
302
303/// A configured YAML loader.
304pub struct Loader {
305    options: LoaderOptions,
306}
307
308impl Loader {
309    /// Load YAML text into a sequence of documents.
310    ///
311    /// # Errors
312    ///
313    /// Returns `Err` if the input contains a parse error, exceeds a configured
314    /// security limit, or (in resolved mode) references an undefined anchor.
315    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
316        let mut state = LoadState::new(&self.options, input);
317        let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
318            Box::new(crate::parse_events(input));
319        state.run(iter.peekable())
320    }
321}
322
323// ---------------------------------------------------------------------------
324// Convenience entry point
325// ---------------------------------------------------------------------------
326
327/// Load YAML text using lossless mode, default security limits, and Core schema tag
328/// resolution (YAML 1.2.2 §10.3).
329///
330/// Returns one `Document<Span>` per YAML document in the stream.  Untagged nodes
331/// receive resolved tag URIs according to the Core schema; nodes with explicit source
332/// tags are left unchanged.
333///
334/// # Errors
335///
336/// Returns `Err` if the input contains a parse error or exceeds a security
337/// limit (nesting depth or anchor count).
338///
339/// ```
340/// use rlsp_yaml_parser::loader::load;
341/// use rlsp_yaml_parser::Node;
342///
343/// let docs = load("hello\n").unwrap();
344/// assert_eq!(docs.len(), 1);
345/// let Node::Scalar { tag, .. } = &docs[0].root else { panic!() };
346/// assert_eq!(tag.as_deref(), Some("tag:yaml.org,2002:str"));
347/// ```
348pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
349    LoaderBuilder::new().lossless().build().load(input)
350}
351
352// ---------------------------------------------------------------------------
353// Internal loader state
354// ---------------------------------------------------------------------------
355
356struct LoadState<'opt> {
357    options: &'opt LoaderOptions,
358    /// Anchors registered so far in the current document: name → node.
359    anchor_map: HashMap<String, Node<Span>>,
360    /// Count of distinct anchors registered (resets per document).
361    anchor_count: usize,
362    /// Current nesting depth (incremented on Begin, decremented on End).
363    depth: usize,
364    /// Total nodes produced via alias expansion (resolved mode only).
365    expanded_nodes: usize,
366    /// Leading comments accumulated by `parse_node` when it encounters a
367    /// `Comment` event between a mapping key and its value's collection start,
368    /// or by a sequence/mapping loop when it hits End with leftover leading
369    /// comments.  The next mapping/sequence loop iteration picks these up and
370    /// prepends them to the next entry's leading comments.
371    pending_leading: Vec<String>,
372    /// Line index for the current document source; shared across all documents
373    /// produced from the same input via `Arc` to avoid N full copies.
374    line_index: Arc<LineIndex>,
375}
376
377impl<'opt> LoadState<'opt> {
378    fn new(options: &'opt LoaderOptions, input: &str) -> Self {
379        Self {
380            options,
381            anchor_map: HashMap::new(),
382            anchor_count: 0,
383            depth: 0,
384            expanded_nodes: 0,
385            pending_leading: Vec::new(),
386            line_index: Arc::new(LineIndex::new(input)),
387        }
388    }
389
390    fn reset_for_document(&mut self) {
391        self.anchor_map.clear();
392        self.anchor_count = 0;
393        self.expanded_nodes = 0;
394        self.pending_leading.clear();
395    }
396
397    fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
398        let mut docs: Vec<Document<Span>> = Vec::new();
399
400        // Skip StreamStart.
401        match stream.next() {
402            Some(Ok(_)) | None => {}
403            Some(Err(e)) => {
404                return Err(LoadError::Parse {
405                    pos: e.pos,
406                    message: e.message,
407                    kind: e.kind,
408                });
409            }
410        }
411
412        loop {
413            // Skip any leading comments or unknown events before a document.
414            match next_from(&mut stream)? {
415                None | Some((Event::StreamEnd, _)) => break,
416                Some((
417                    Event::DocumentStart {
418                        explicit,
419                        version,
420                        tag_directives,
421                    },
422                    _,
423                )) => {
424                    let doc_explicit_start = explicit;
425                    let doc_version = version;
426                    let doc_tags = tag_directives;
427                    self.reset_for_document();
428
429                    let mut doc_comments: Vec<String> = Vec::new();
430
431                    // Consume leading comments at document level.
432                    consume_leading_doc_comments(&mut stream, &mut doc_comments, &self.line_index)?;
433
434                    // Parse root node (may be absent for empty documents).
435                    let root = if is_document_end(stream.peek()) {
436                        // Empty document — emit an empty scalar as root.
437                        let mut node = empty_scalar();
438                        apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
439                        node
440                    } else {
441                        self.parse_node(&mut stream)?
442                    };
443
444                    // Consume DocumentEnd if present and capture its explicit flag.
445                    let doc_explicit_end =
446                        if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
447                            let end_explicit = *explicit;
448                            let _ = stream.next();
449                            end_explicit
450                        } else {
451                            false
452                        };
453
454                    docs.push(Document {
455                        root,
456                        version: doc_version,
457                        tags: doc_tags,
458                        comments: doc_comments,
459                        explicit_start: doc_explicit_start,
460                        explicit_end: doc_explicit_end,
461                        line_index: Some(self.line_index.clone()),
462                    });
463                }
464                Some(_) => {
465                    // Comment or any other stray event outside a document — skip.
466                }
467            }
468        }
469
470        Ok(docs)
471    }
472
473    /// Parse a single node from the stream.
474    ///
475    /// Advances the stream past the node (including end-of-container events).
476    #[expect(
477        clippy::too_many_lines,
478        reason = "match-on-event-type; splitting would obscure flow"
479    )]
480    fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
481        // Structural end events close the caller's collection loop — do NOT
482        // consume them here.  Return an empty scalar and leave the event in
483        // the stream so the outer mapping/sequence loop can see and consume it.
484        if matches!(
485            stream.peek(),
486            Some(Ok((
487                Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
488                _
489            )))
490        ) {
491            return Ok(empty_scalar());
492        }
493
494        let Some((event, span)) = next_from(stream)? else {
495            return Ok(empty_scalar());
496        };
497
498        match event {
499            Event::Scalar { value, style, meta } => {
500                let (anchor, anchor_loc, tag, tag_loc) = unpack_meta(meta);
501                let anchor = anchor.map(str::to_owned);
502                // Capture the anchor span before it moves into NodeMeta.
503                let anchor_span = anchor_loc.unwrap_or(span);
504                let mut node = Node::Scalar {
505                    value: value.into_owned(),
506                    style,
507                    tag: tag.map(|t| Cow::Owned(t.into_owned())),
508                    loc: span,
509                    meta: NodeMeta {
510                        anchor,
511                        anchor_loc,
512                        tag_loc,
513                        leading_comments: None,
514                        trailing_comment: None,
515                    }
516                    .into_option(),
517                };
518                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
519                if let Some(name) = node.anchor() {
520                    self.register_anchor(name.to_owned(), &node, anchor_span)?;
521                }
522                Ok(node)
523            }
524
525            Event::MappingStart { style, meta } => {
526                let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
527                let anchor = event_anchor.map(str::to_owned);
528                let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
529                let anchor_for_registration = anchor.clone();
530                // Capture the anchor span before it moves into NodeMeta.
531                let anchor_span = anchor_loc.unwrap_or(span);
532
533                self.depth += 1;
534                if self.depth > self.options.max_nesting_depth {
535                    return Err(LoadError::NestingDepthLimitExceeded {
536                        limit: self.options.max_nesting_depth,
537                        pos: span_start_to_pos(span.start, &self.line_index),
538                    });
539                }
540
541                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
542                let mut end_span = span;
543
544                loop {
545                    // Consume leading comments before the next key.  Also
546                    // collect any comments that spilled over from a sibling
547                    // value's collection end (stored in `pending_leading`).
548                    let raw_leading = consume_leading_comments(stream)?;
549                    let leading = if self.pending_leading.is_empty() {
550                        raw_leading
551                    } else {
552                        let mut combined = std::mem::take(&mut self.pending_leading);
553                        combined.extend(raw_leading);
554                        combined
555                    };
556
557                    match stream.peek() {
558                        None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
559                            // Save any collected leading comments so the next
560                            // sibling entry in the parent collection can inherit
561                            // them (e.g. a comment just before MappingEnd that
562                            // belongs to the following mapping entry).
563                            if !leading.is_empty() {
564                                self.pending_leading = leading;
565                            }
566                            break;
567                        }
568                        Some(Err(_)) => {
569                            // Consume the error.
570                            return Err(match stream.next() {
571                                Some(Err(e)) => LoadError::Parse {
572                                    pos: e.pos,
573                                    message: e.message,
574                                    kind: e.kind,
575                                },
576                                _ => LoadError::UnexpectedEndOfStream,
577                            });
578                        }
579                        Some(Ok(_)) => {}
580                    }
581
582                    let mut key = self.parse_node(stream)?;
583                    attach_leading_comments(&mut key, leading);
584
585                    let mut value = self.parse_node(stream)?;
586
587                    // Trailing comment on the value — peek for inline comment.
588                    // Block scalars (literal `|` and folded `>`) consume trailing
589                    // blank lines as part of chomping; their span.end falls on the
590                    // first line after the scalar, which can coincide with the
591                    // next comment's line number.  That would falsely attach a
592                    // leading inter-node comment as a trailing inline comment.
593                    // Block scalars never have an inline comment on their content
594                    // lines, so skip trailing-comment detection for them.
595                    if !is_block_scalar(&value)
596                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
597                    {
598                        let value_end_line = node_end_line(&value, &self.line_index);
599                        if let Some(trail) =
600                            peek_trailing_comment(stream, value_end_line, &self.line_index)?
601                        {
602                            attach_trailing_comment(&mut value, trail);
603                        }
604                    }
605
606                    entries.push((key, value));
607                }
608
609                // Consume MappingEnd and capture its span.
610                if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
611                    end_span = *end;
612                    let _ = stream.next();
613                }
614                self.depth -= 1;
615
616                let mut node = Node::Mapping {
617                    entries,
618                    style,
619                    tag,
620                    loc: Span {
621                        start: span.start,
622                        end: end_span.end,
623                    },
624                    meta: NodeMeta {
625                        anchor,
626                        anchor_loc,
627                        tag_loc,
628                        leading_comments: None,
629                        trailing_comment: None,
630                    }
631                    .into_option(),
632                };
633                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
634                if let Some(name) = anchor_for_registration {
635                    self.register_anchor(name, &node, anchor_span)?;
636                }
637                Ok(node)
638            }
639
640            Event::SequenceStart { style, meta } => {
641                let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
642                let anchor = event_anchor.map(str::to_owned);
643                let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
644                let anchor_for_registration = anchor.clone();
645                // Capture the anchor span before it moves into NodeMeta.
646                let anchor_span = anchor_loc.unwrap_or(span);
647
648                self.depth += 1;
649                if self.depth > self.options.max_nesting_depth {
650                    return Err(LoadError::NestingDepthLimitExceeded {
651                        limit: self.options.max_nesting_depth,
652                        pos: span_start_to_pos(span.start, &self.line_index),
653                    });
654                }
655
656                let mut items: Vec<Node<Span>> = Vec::new();
657                let mut end_span = span;
658
659                loop {
660                    // Collect leading comments before the next item.  Also
661                    // collect any comments that spilled over from a sibling
662                    // value's collection end (stored in `pending_leading`).
663                    let raw_leading = consume_leading_comments(stream)?;
664                    let leading = if self.pending_leading.is_empty() {
665                        raw_leading
666                    } else {
667                        let mut combined = std::mem::take(&mut self.pending_leading);
668                        combined.extend(raw_leading);
669                        combined
670                    };
671
672                    match stream.peek() {
673                        None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
674                            // Save any collected leading comments so the next
675                            // sibling entry in the parent collection can inherit
676                            // them (e.g. a comment just before SequenceEnd that
677                            // belongs to the following sequence item or mapping
678                            // entry in the parent).
679                            if !leading.is_empty() {
680                                self.pending_leading = leading;
681                            }
682                            break;
683                        }
684                        Some(Err(_)) => {
685                            // Consume the error.
686                            return Err(match stream.next() {
687                                Some(Err(e)) => LoadError::Parse {
688                                    pos: e.pos,
689                                    message: e.message,
690                                    kind: e.kind,
691                                },
692                                _ => LoadError::UnexpectedEndOfStream,
693                            });
694                        }
695                        Some(Ok(_)) => {}
696                    }
697
698                    let mut item = self.parse_node(stream)?;
699                    attach_leading_comments(&mut item, leading);
700
701                    // Trailing comment on the item — peek for inline comment.
702                    // Block scalars are excluded for the same reason as in the
703                    // mapping path: their span.end can coincide with the next
704                    // comment's line, falsely turning a leading comment into a
705                    // trailing one.
706                    if !is_block_scalar(&item)
707                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
708                    {
709                        let item_end_line = node_end_line(&item, &self.line_index);
710                        if let Some(trail) =
711                            peek_trailing_comment(stream, item_end_line, &self.line_index)?
712                        {
713                            attach_trailing_comment(&mut item, trail);
714                        }
715                    }
716
717                    items.push(item);
718                }
719
720                // Consume SequenceEnd and capture its span.
721                if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
722                    end_span = *end;
723                    let _ = stream.next();
724                }
725                self.depth -= 1;
726
727                let mut node = Node::Sequence {
728                    items,
729                    style,
730                    tag,
731                    loc: Span {
732                        start: span.start,
733                        end: end_span.end,
734                    },
735                    meta: NodeMeta {
736                        anchor,
737                        anchor_loc,
738                        tag_loc,
739                        leading_comments: None,
740                        trailing_comment: None,
741                    }
742                    .into_option(),
743                };
744                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
745                if let Some(name) = anchor_for_registration {
746                    self.register_anchor(name, &node, anchor_span)?;
747                }
748                Ok(node)
749            }
750
751            Event::Alias { name } => {
752                let name = name.to_owned();
753                self.resolve_alias(&name, span)
754            }
755
756            Event::Comment { text } => {
757                // Comment between a mapping key and its collection value (e.g.
758                // `key:\n  # comment\n  subkey: val`).  The comment appears
759                // after the key Scalar and before the MappingStart/SequenceStart
760                // that begins the value.  Save it in `pending_leading` so the
761                // first entry of the upcoming collection can inherit it.
762                self.pending_leading.push(with_hash_prefix(text));
763                self.parse_node(stream)
764            }
765
766            Event::StreamStart
767            | Event::StreamEnd
768            | Event::DocumentStart { .. }
769            | Event::DocumentEnd { .. }
770            | Event::MappingEnd
771            | Event::SequenceEnd => {
772                // Structural event where a node is expected — return empty scalar.
773                Ok(empty_scalar())
774            }
775        }
776    }
777
778    fn register_anchor(
779        &mut self,
780        name: String,
781        node: &Node<Span>,
782        anchor_span: Span,
783    ) -> Result<()> {
784        let pos = span_start_to_pos(anchor_span.start, &self.line_index);
785        if !self.anchor_map.contains_key(&name) {
786            self.anchor_count += 1;
787            if self.anchor_count > self.options.max_anchors {
788                return Err(LoadError::AnchorCountLimitExceeded {
789                    limit: self.options.max_anchors,
790                    pos,
791                });
792            }
793        }
794        // Count the anchor node itself toward the expansion budget in resolved
795        // mode so that the total reflects every node present in the expanded
796        // document (anchor definition + each alias expansion).
797        if self.options.mode == LoadMode::Resolved {
798            self.expanded_nodes += 1;
799            if self.expanded_nodes > self.options.max_expanded_nodes {
800                return Err(LoadError::AliasExpansionLimitExceeded {
801                    limit: self.options.max_expanded_nodes,
802                    pos,
803                });
804            }
805            self.anchor_map.insert(name, node.clone());
806        } else {
807            // Lossless mode never reads anchor_map for expansion; store a
808            // zero-cost placeholder so contains_key still detects re-definitions.
809            self.anchor_map.insert(name, empty_scalar());
810        }
811        Ok(())
812    }
813
814    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
815        match self.options.mode {
816            LoadMode::Lossless => Ok(Node::Alias {
817                name: name.to_owned(),
818                loc,
819                leading_comments: None,
820                trailing_comment: None,
821            }),
822            LoadMode::Resolved => {
823                let pos = span_start_to_pos(loc.start, &self.line_index);
824                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
825                    LoadError::UndefinedAlias {
826                        name: name.to_owned(),
827                        pos,
828                    }
829                })?;
830                let mut in_progress: HashSet<String> = HashSet::new();
831                self.expand_node(anchored, &mut in_progress, loc)
832            }
833        }
834    }
835
836    /// Recursively expand a node, counting every node produced against the
837    /// expansion limit and checking for cycles via `in_progress`.
838    ///
839    /// `alias_loc` is the span of the alias site that triggered this expansion
840    /// chain; it is used for error positions when the limit or a cycle is
841    /// detected inside expanded content.
842    fn expand_node(
843        &mut self,
844        node: Node<Span>,
845        in_progress: &mut HashSet<String>,
846        alias_loc: Span,
847    ) -> Result<Node<Span>> {
848        // Increment at the top — before child recursion — so every node
849        // (including non-alias nodes inside expanded trees) counts against the
850        // budget.
851        self.expanded_nodes += 1;
852        if self.expanded_nodes > self.options.max_expanded_nodes {
853            return Err(LoadError::AliasExpansionLimitExceeded {
854                limit: self.options.max_expanded_nodes,
855                pos: span_start_to_pos(alias_loc.start, &self.line_index),
856            });
857        }
858
859        match node {
860            Node::Alias { ref name, loc, .. } => {
861                let pos = span_start_to_pos(loc.start, &self.line_index);
862                if in_progress.contains(name) {
863                    return Err(LoadError::CircularAlias {
864                        name: name.clone(),
865                        pos,
866                    });
867                }
868                let target = self.anchor_map.get(name).cloned().ok_or_else(|| {
869                    LoadError::UndefinedAlias {
870                        name: name.clone(),
871                        pos,
872                    }
873                })?;
874                in_progress.insert(name.clone());
875                // Pass the inner alias loc as the new alias_loc for deeper expansion.
876                let expanded = self.expand_node(target, in_progress, loc)?;
877                in_progress.remove(name);
878                // Re-stamp with the alias site's location.
879                Ok(reloc(expanded, loc))
880            }
881            Node::Mapping {
882                entries,
883                style,
884                tag,
885                loc,
886                meta,
887            } => {
888                let mut expanded_entries = Vec::with_capacity(entries.len());
889                for (k, v) in entries {
890                    let ek = self.expand_node(k, in_progress, alias_loc)?;
891                    let ev = self.expand_node(v, in_progress, alias_loc)?;
892                    expanded_entries.push((ek, ev));
893                }
894                Ok(Node::Mapping {
895                    entries: expanded_entries,
896                    style,
897                    tag,
898                    loc,
899                    meta,
900                })
901            }
902            Node::Sequence {
903                items,
904                style,
905                tag,
906                loc,
907                meta,
908            } => {
909                let mut expanded_items = Vec::with_capacity(items.len());
910                for item in items {
911                    expanded_items.push(self.expand_node(item, in_progress, alias_loc)?);
912                }
913                Ok(Node::Sequence {
914                    items: expanded_items,
915                    style,
916                    tag,
917                    loc,
918                    meta,
919                })
920            }
921            // Scalars and already-resolved nodes — pass through.
922            scalar @ Node::Scalar { .. } => Ok(scalar),
923        }
924    }
925}
926
927/// Return `true` if the peeked item signals end of document (or stream).
928const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
929    matches!(
930        peeked,
931        None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
932    )
933}
934
935/// Convert a `Span.start` byte offset to a `Pos` with accurate line/column.
936#[inline]
937fn span_start_to_pos(offset: u32, line_index: &LineIndex) -> Pos {
938    let (line, column) = line_index.line_column(offset);
939    Pos {
940        byte_offset: offset as usize,
941        line: line as usize,
942        column: column as usize,
943    }
944}
945
946/// Return the line number of a node's span end position.
947///
948/// Used to determine whether the next `Comment` event is trailing (same line)
949/// or leading (different line).
950#[inline]
951fn node_end_line(node: &Node<Span>, line_index: &LineIndex) -> u32 {
952    let end_offset = match node {
953        Node::Scalar { loc, .. }
954        | Node::Mapping { loc, .. }
955        | Node::Sequence { loc, .. }
956        | Node::Alias { loc, .. } => loc.end,
957    };
958    line_index.line_column(end_offset).0
959}
960
961/// Return `true` if the node is a block scalar (literal `|` or folded `>`).
962///
963/// Block scalars consume trailing blank lines as part of chomping, so their
964/// `span.end` falls on the line *after* the last consumed line.  This means a
965/// comment on the immediately following line has the same line number as
966/// `span.end.line`, which would cause `peek_trailing_comment` to falsely
967/// classify it as an inline trailing comment.  The caller uses this predicate
968/// to skip trailing-comment detection for block scalars.
969#[inline]
970const fn is_block_scalar(node: &Node<Span>) -> bool {
971    matches!(
972        node,
973        Node::Scalar {
974            style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
975            ..
976        }
977    )
978}
979
980// ---------------------------------------------------------------------------
981// Schema resolution helpers
982// ---------------------------------------------------------------------------
983
984/// Maximum number of Unicode scalar values kept in [`LoadError::UnresolvedScalar`]
985/// value field.  Prevents unbounded allocation when storing user-supplied input
986/// in error messages.
987const UNRESOLVED_VALUE_MAX_CHARS: usize = 128;
988
989/// Sanitize a raw scalar value for inclusion in an error message.
990///
991/// - Truncates to [`UNRESOLVED_VALUE_MAX_CHARS`] Unicode scalar values,
992///   appending `"..."` when truncated.
993/// - Replaces ASCII control characters (U+0000–U+001F and U+007F) with
994///   `\uXXXX` hex escapes to prevent log injection via the `Display` impl.
995fn sanitize_scalar_for_error(raw: &str) -> String {
996    let mut out = String::with_capacity(raw.len().min(UNRESOLVED_VALUE_MAX_CHARS * 2));
997    let mut truncated = false;
998
999    for (i, ch) in raw.chars().enumerate() {
1000        if i >= UNRESOLVED_VALUE_MAX_CHARS {
1001            truncated = true;
1002            break;
1003        }
1004        if ch.is_ascii_control() {
1005            // Replace control chars with \uXXXX escape to prevent log injection.
1006            let escaped = format!("\\u{:04X}", ch as u32);
1007            out.push_str(&escaped);
1008        } else {
1009            out.push(ch);
1010        }
1011    }
1012
1013    if truncated {
1014        out.push_str("...");
1015    }
1016    out
1017}
1018
1019/// Apply schema tag resolution to a freshly-constructed node.
1020///
1021/// - For scalars: translates bare `!` to `None` (non-specific), then calls
1022///   `resolve_scalar`.
1023/// - For mappings/sequences: translates bare `!` to `None`, then calls
1024///   `resolve_collection`.
1025/// - On `Ok(Some(tag))`: overwrites `node.tag`; `tag_loc` is left `None`
1026///   (no source position for a resolved tag).
1027/// - On `Ok(None)` (explicit tag present): leaves `node.tag` unchanged.
1028///
1029/// # Errors
1030///
1031/// Returns [`LoadError::UnresolvedScalar`] when `schema` is [`Schema::Json`]
1032/// and a plain scalar does not match any JSON type pattern.
1033#[inline]
1034fn apply_schema_to_node(
1035    node: &mut Node<Span>,
1036    schema: Schema,
1037    line_index: &LineIndex,
1038) -> Result<()> {
1039    match node {
1040        Node::Scalar {
1041            value,
1042            style,
1043            tag,
1044            loc,
1045            meta,
1046        } => {
1047            // Bare `!` on a scalar is the non-specific scalar tag — it resolves
1048            // unconditionally to !!str regardless of content (YAML 1.2.2 §10.2.1,
1049            // §10.3.2: "non-specific" tag for scalars = Failsafe str).  We handle
1050            // it before calling the schema resolver so Core doesn't pattern-match
1051            // the value.
1052            //
1053            // `tag_loc` is preserved here (NOT cleared) because `!` is explicitly
1054            // written in the source.  Preserving `tag_loc` lets downstream consumers
1055            // (e.g. the formatter) distinguish user-authored tags from resolver-injected
1056            // ones, which is critical for correct idempotent output.
1057            if tag.as_deref() == Some("!") {
1058                *tag = Some(Cow::Borrowed(crate::schema::ResolvedTag::Str.as_str()));
1059                return Ok(());
1060            }
1061            // All other tags: pass through as-is (Some(non-!) = explicit tag → Ok(None)).
1062            match resolve_scalar(schema, *style, value, tag.as_deref()) {
1063                Ok(Some(resolved)) => {
1064                    *tag = Some(Cow::Borrowed(resolved.as_str()));
1065                    // Clear tag_loc: resolver-injected tags have no source position.
1066                    if let Some(m) = meta.as_mut() {
1067                        m.tag_loc = None;
1068                        if m.is_all_none() {
1069                            *meta = None;
1070                        }
1071                    }
1072                }
1073                Ok(None) => {}
1074                Err(_) => {
1075                    return Err(LoadError::UnresolvedScalar {
1076                        value: sanitize_scalar_for_error(value),
1077                        pos: span_start_to_pos(loc.start, line_index),
1078                    });
1079                }
1080            }
1081        }
1082        Node::Mapping { tag, meta, .. } => {
1083            // Bare `!` on a collection means non-specific collection tag — translate
1084            // to None so the resolver returns the kind-based tag (!!map / !!seq).
1085            let effective_tag = tag.as_deref().filter(|t| *t != "!");
1086            if let Some(resolved) =
1087                resolve_collection(schema, CollectionKind::Mapping, effective_tag)
1088            {
1089                *tag = Some(Cow::Borrowed(resolved.as_str()));
1090                if let Some(m) = meta.as_mut() {
1091                    m.tag_loc = None;
1092                    if m.is_all_none() {
1093                        *meta = None;
1094                    }
1095                }
1096            }
1097        }
1098        Node::Sequence { tag, meta, .. } => {
1099            let effective_tag = tag.as_deref().filter(|t| *t != "!");
1100            if let Some(resolved) =
1101                resolve_collection(schema, CollectionKind::Sequence, effective_tag)
1102            {
1103                *tag = Some(Cow::Borrowed(resolved.as_str()));
1104                if let Some(m) = meta.as_mut() {
1105                    m.tag_loc = None;
1106                    if m.is_all_none() {
1107                        *meta = None;
1108                    }
1109                }
1110            }
1111        }
1112        Node::Alias { .. } => {}
1113    }
1114    Ok(())
1115}
1116
1117// ---------------------------------------------------------------------------
1118// Node helpers
1119// ---------------------------------------------------------------------------
1120
1121const fn empty_scalar() -> Node<Span> {
1122    Node::Scalar {
1123        value: String::new(),
1124        style: ScalarStyle::Plain,
1125        tag: None,
1126        loc: Span { start: 0, end: 0 },
1127        meta: None,
1128    }
1129}
1130
1131// ---------------------------------------------------------------------------
1132// Tests
1133// ---------------------------------------------------------------------------
1134
1135#[cfg(test)]
1136#[expect(
1137    clippy::expect_used,
1138    clippy::unwrap_used,
1139    clippy::indexing_slicing,
1140    clippy::panic,
1141    reason = "test code"
1142)]
1143mod tests {
1144    use super::*;
1145    use rstest::rstest;
1146
1147    #[test]
1148    fn loader_state_resets_anchor_map_between_documents() {
1149        // In resolved mode: anchor defined in doc 1 must not be visible in doc 2.
1150        let result = LoaderBuilder::new()
1151            .resolved()
1152            .build()
1153            .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
1154        assert!(
1155            result.is_err(),
1156            "expected Err: *foo in doc 2 should be undefined"
1157        );
1158        assert!(matches!(
1159            result.unwrap_err(),
1160            LoadError::UndefinedAlias { .. }
1161        ));
1162    }
1163
1164    #[test]
1165    fn register_anchor_increments_count() {
1166        let options = LoaderOptions {
1167            max_anchors: 2,
1168            ..LoaderOptions::default()
1169        };
1170        let mut state = LoadState::new(&options, "");
1171        let node = Node::Scalar {
1172            value: "x".to_owned(),
1173            style: ScalarStyle::Plain,
1174            tag: None,
1175            loc: Span { start: 0, end: 0 },
1176            meta: None,
1177        };
1178        let dummy_span = Span { start: 0, end: 0 };
1179        assert!(
1180            state
1181                .register_anchor("a".to_owned(), &node, dummy_span)
1182                .is_ok()
1183        );
1184        assert!(
1185            state
1186                .register_anchor("b".to_owned(), &node, dummy_span)
1187                .is_ok()
1188        );
1189        let err = state
1190            .register_anchor("c".to_owned(), &node, dummy_span)
1191            .expect_err("expected AnchorCountLimitExceeded");
1192        assert!(matches!(
1193            err,
1194            LoadError::AnchorCountLimitExceeded { limit: 2, .. }
1195        ));
1196    }
1197
1198    #[test]
1199    fn expand_node_detects_circular_alias() {
1200        let options = LoaderOptions {
1201            mode: LoadMode::Resolved,
1202            ..LoaderOptions::default()
1203        };
1204        let mut state = LoadState::new(&options, "");
1205        // Insert a self-referential alias node.
1206        let alias_node = Node::Alias {
1207            name: "a".to_owned(),
1208            loc: Span { start: 0, end: 0 },
1209            leading_comments: None,
1210            trailing_comment: None,
1211        };
1212        state.anchor_map.insert("a".to_owned(), alias_node.clone());
1213        let mut in_progress = HashSet::new();
1214        let alias_loc = Span { start: 0, end: 0 };
1215        let result = state.expand_node(alias_node, &mut in_progress, alias_loc);
1216        assert!(
1217            matches!(result, Err(LoadError::CircularAlias { .. })),
1218            "expected CircularAlias, got: {result:?}"
1219        );
1220    }
1221
1222    // -----------------------------------------------------------------------
1223    // Comment between mapping key and nested collection is attached to first nested entry
1224    // -----------------------------------------------------------------------
1225
1226    #[test]
1227    fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
1228        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1229        let root = &docs[0].root;
1230        let Node::Mapping { entries, .. } = root else {
1231            panic!("expected root mapping");
1232        };
1233        assert_eq!(entries.len(), 1);
1234        let (_outer_key, outer_value) = &entries[0];
1235        let Node::Mapping {
1236            entries: nested, ..
1237        } = outer_value
1238        else {
1239            panic!("expected nested mapping");
1240        };
1241        assert_eq!(nested.len(), 1);
1242        let (inner_key, _) = &nested[0];
1243        assert_eq!(
1244            inner_key.leading_comments(),
1245            &["# Style 1"],
1246            "comment should be attached to the first nested key"
1247        );
1248    }
1249
1250    #[test]
1251    fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
1252        let docs = load("key:\n  # leading\n  - item1\n  - item2\n").unwrap();
1253        let root = &docs[0].root;
1254        let Node::Mapping { entries, .. } = root else {
1255            panic!("expected root mapping");
1256        };
1257        let (_key, seq_value) = &entries[0];
1258        let Node::Sequence { items, .. } = seq_value else {
1259            panic!("expected sequence value");
1260        };
1261        assert_eq!(
1262            items[0].leading_comments(),
1263            &["# leading"],
1264            "comment should be attached to first sequence item"
1265        );
1266    }
1267
1268    #[test]
1269    fn multiple_comments_between_key_and_collection_all_preserved() {
1270        let docs = load("key:\n  # first\n  # second\n  - item\n").unwrap();
1271        let root = &docs[0].root;
1272        let Node::Mapping { entries, .. } = root else {
1273            panic!("expected root mapping");
1274        };
1275        let (_key, seq_value) = &entries[0];
1276        let Node::Sequence { items, .. } = seq_value else {
1277            panic!("expected sequence value");
1278        };
1279        assert_eq!(
1280            items[0].leading_comments(),
1281            &["# first", "# second"],
1282            "both comments should be on first item"
1283        );
1284    }
1285
1286    #[test]
1287    fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1288        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1289        let root = &docs[0].root;
1290        let Node::Mapping { entries, .. } = root else {
1291            panic!("expected root mapping");
1292        };
1293        let (outer_key, _) = &entries[0];
1294        assert!(
1295            outer_key.leading_comments().is_empty(),
1296            "outer key should have no leading comments"
1297        );
1298        assert!(
1299            outer_key.trailing_comment().is_none(),
1300            "outer key should have no trailing comment"
1301        );
1302    }
1303
1304    #[test]
1305    fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1306        let docs = load("key:\n  inner: val\n").unwrap();
1307        let root = &docs[0].root;
1308        let Node::Mapping { entries, .. } = root else {
1309            panic!("expected root mapping");
1310        };
1311        let (_key, nested) = &entries[0];
1312        let Node::Mapping {
1313            entries: nested_entries,
1314            ..
1315        } = nested
1316        else {
1317            panic!("expected nested mapping");
1318        };
1319        let (inner_key, _) = &nested_entries[0];
1320        assert!(
1321            inner_key.leading_comments().is_empty(),
1322            "inner key should have no leading comments when there is no comment"
1323        );
1324    }
1325
1326    // -----------------------------------------------------------------------
1327    // Trailing comment of nested collection becomes leading comment on next sibling
1328    // -----------------------------------------------------------------------
1329
1330    #[test]
1331    fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1332        let input =
1333            "Lists:\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n    - item1\n";
1334        let docs = load(input).unwrap();
1335        let root = &docs[0].root;
1336        let Node::Mapping { entries, .. } = root else {
1337            panic!("expected root mapping");
1338        };
1339        let (_lists_key, nested) = &entries[0];
1340        let Node::Mapping {
1341            entries: nested_entries,
1342            ..
1343        } = nested
1344        else {
1345            panic!("expected nested mapping");
1346        };
1347        assert_eq!(nested_entries.len(), 2);
1348        let (list_b_key, _) = &nested_entries[1];
1349        assert_eq!(
1350            list_b_key.leading_comments(),
1351            &["# Style 2"],
1352            "# Style 2 should be leading comment on list-b key"
1353        );
1354    }
1355
1356    #[test]
1357    fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1358        let input = "outer:\n  a:\n    - x\n    # between\n  b: y\n";
1359        let docs = load(input).unwrap();
1360        let root = &docs[0].root;
1361        let Node::Mapping { entries, .. } = root else {
1362            panic!("expected root mapping");
1363        };
1364        let (_outer_key, outer_val) = &entries[0];
1365        let Node::Mapping {
1366            entries: nested, ..
1367        } = outer_val
1368        else {
1369            panic!("expected nested mapping");
1370        };
1371        assert_eq!(nested.len(), 2);
1372        let (b_key, _) = &nested[1];
1373        assert_eq!(
1374            b_key.leading_comments(),
1375            &["# between"],
1376            "# between should be leading comment on b key"
1377        );
1378    }
1379
1380    #[test]
1381    fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1382        let input = "parent:\n  child1:\n    k: v\n    # end-of-child1\n  child2: val\n";
1383        let docs = load(input).unwrap();
1384        let root = &docs[0].root;
1385        let Node::Mapping { entries, .. } = root else {
1386            panic!("expected root mapping");
1387        };
1388        let (_parent_key, parent_val) = &entries[0];
1389        let Node::Mapping {
1390            entries: siblings, ..
1391        } = parent_val
1392        else {
1393            panic!("expected parent mapping value");
1394        };
1395        assert_eq!(siblings.len(), 2);
1396        let (child2_key, _) = &siblings[1];
1397        assert_eq!(
1398            child2_key.leading_comments(),
1399            &["# end-of-child1"],
1400            "# end-of-child1 should be leading comment on child2 key"
1401        );
1402    }
1403
1404    #[test]
1405    fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1406        let input = "items:\n  - a\n  - b\n  # tail\n";
1407        let docs = load(input).unwrap();
1408        // The document must parse successfully (no panic, no error).
1409        assert!(!docs.is_empty(), "document should parse without error");
1410        // The # tail comment must not cause data loss — the sequence items are intact.
1411        let root = &docs[0].root;
1412        let Node::Mapping { entries, .. } = root else {
1413            panic!("expected root mapping");
1414        };
1415        let (_items_key, seq_val) = &entries[0];
1416        let Node::Sequence { items, .. } = seq_val else {
1417            panic!("expected sequence value");
1418        };
1419        assert_eq!(items.len(), 2, "sequence items must not be lost");
1420    }
1421
1422    #[test]
1423    fn no_overflow_comments_when_collection_ends_cleanly() {
1424        let docs = load("key:\n  - item1\n  - item2\n").unwrap();
1425        let root = &docs[0].root;
1426        let Node::Mapping { entries, .. } = root else {
1427            panic!("expected root mapping");
1428        };
1429        let (_key, seq_val) = &entries[0];
1430        let Node::Sequence { items, .. } = seq_val else {
1431            panic!("expected sequence value");
1432        };
1433        for item in items {
1434            assert!(
1435                item.leading_comments().is_empty(),
1436                "items should have no leading comments"
1437            );
1438        }
1439    }
1440
1441    // -----------------------------------------------------------------------
1442    // Combined scenarios
1443    // -----------------------------------------------------------------------
1444
1445    #[test]
1446    fn original_bug_report_input_preserves_both_comments() {
1447        let input = "Lists:\n  # Style 1\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n  - item1\n  - item2\n";
1448        let docs = load(input).unwrap();
1449        let root = &docs[0].root;
1450        let Node::Mapping { entries, .. } = root else {
1451            panic!("expected root mapping");
1452        };
1453        let (_lists_key, nested) = &entries[0];
1454        let Node::Mapping {
1455            entries: nested_entries,
1456            ..
1457        } = nested
1458        else {
1459            panic!("expected nested mapping");
1460        };
1461        assert_eq!(nested_entries.len(), 2);
1462        let (first_key, _) = &nested_entries[0];
1463        let (second_key, _) = &nested_entries[1];
1464        assert_eq!(
1465            first_key.leading_comments(),
1466            &["# Style 1"],
1467            "list-a should have # Style 1 as leading comment"
1468        );
1469        assert_eq!(
1470            second_key.leading_comments(),
1471            &["# Style 2"],
1472            "list-b should have # Style 2 as leading comment"
1473        );
1474    }
1475
1476    #[test]
1477    fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1478        let input = "map:\n  # leading\n  key: value  # trailing\n  # next-leading\n  key2: v2\n";
1479        let docs = load(input).unwrap();
1480        let root = &docs[0].root;
1481        let Node::Mapping { entries, .. } = root else {
1482            panic!("expected root mapping");
1483        };
1484        let (_map_key, map_val) = &entries[0];
1485        let Node::Mapping {
1486            entries: siblings, ..
1487        } = map_val
1488        else {
1489            panic!("expected mapping value");
1490        };
1491        assert_eq!(siblings.len(), 2);
1492        let (key1, val1) = &siblings[0];
1493        let (key2, _) = &siblings[1];
1494        assert_eq!(key1.leading_comments(), &["# leading"]);
1495        assert_eq!(val1.trailing_comment(), Some("# trailing"));
1496        assert_eq!(key2.leading_comments(), &["# next-leading"]);
1497    }
1498
1499    #[test]
1500    fn deeply_nested_overflow_comments_reach_correct_sibling() {
1501        let input = "top:\n  mid:\n    - x\n    # deep-overflow\n  next: y\n";
1502        let docs = load(input).unwrap();
1503        let root = &docs[0].root;
1504        let Node::Mapping { entries, .. } = root else {
1505            panic!("expected root mapping");
1506        };
1507        let (_top_key, top_val) = &entries[0];
1508        let Node::Mapping {
1509            entries: top_entries,
1510            ..
1511        } = top_val
1512        else {
1513            panic!("expected top-level mapping");
1514        };
1515        assert_eq!(top_entries.len(), 2);
1516        let (next_key, _) = &top_entries[1];
1517        assert_eq!(
1518            next_key.leading_comments(),
1519            &["# deep-overflow"],
1520            "# deep-overflow should propagate from nested sequence to next sibling"
1521        );
1522    }
1523
1524    // -----------------------------------------------------------------------
1525    // Document marker flags (explicit_start / explicit_end)
1526    // -----------------------------------------------------------------------
1527
1528    #[rstest]
1529    #[case::bare_document("key: value\n", false, false)]
1530    #[case::start_marker_only("---\nkey: value\n", true, false)]
1531    #[case::end_marker_only("key: value\n...\n", false, true)]
1532    #[case::both_markers("---\nkey: value\n...\n", true, true)]
1533    #[case::empty_with_both_markers("---\n...\n", true, true)]
1534    fn document_marker_flags_match_input(
1535        #[case] input: &str,
1536        #[case] expected_start: bool,
1537        #[case] expected_end: bool,
1538    ) {
1539        let docs = load(input).expect("load failed");
1540        assert_eq!(docs.len(), 1);
1541        assert_eq!(docs[0].explicit_start, expected_start, "explicit_start");
1542        assert_eq!(docs[0].explicit_end, expected_end, "explicit_end");
1543    }
1544
1545    #[test]
1546    fn multi_document_flags_are_independent() {
1547        let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1548        assert_eq!(docs.len(), 3);
1549        assert!(!docs[0].explicit_start, "doc1 explicit_start");
1550        assert!(!docs[0].explicit_end, "doc1 explicit_end");
1551        assert!(docs[1].explicit_start, "doc2 explicit_start");
1552        assert!(docs[1].explicit_end, "doc2 explicit_end");
1553        assert!(docs[2].explicit_start, "doc3 explicit_start");
1554        assert!(!docs[2].explicit_end, "doc3 explicit_end");
1555    }
1556
1557    // -----------------------------------------------------------------------
1558    // sanitize_scalar_for_error unit tests
1559    // -----------------------------------------------------------------------
1560
1561    #[rstest]
1562    #[case::newline("foo\nbar", '\n', "\\u000A", "foo\\u000Abar")]
1563    #[case::carriage_return("foo\rbar", '\r', "\\u000D", "foo\\u000Dbar")]
1564    #[case::null_byte("foo\0bar", '\0', "\\u0000", "foo\\u0000bar")]
1565    fn sanitize_replaces_control_char_with_escape(
1566        #[case] input: &str,
1567        #[case] raw_char: char,
1568        #[case] escape_seq: &str,
1569        #[case] expected: &str,
1570    ) {
1571        let result = sanitize_scalar_for_error(input);
1572        assert!(
1573            !result.contains(raw_char),
1574            "output must not contain the raw control character"
1575        );
1576        assert!(
1577            result.contains(escape_seq),
1578            "output must contain {escape_seq} escape, got: {result:?}"
1579        );
1580        assert_eq!(result, expected);
1581    }
1582
1583    #[test]
1584    fn sanitize_short_value_stored_verbatim() {
1585        let input = "hello";
1586        let result = sanitize_scalar_for_error(input);
1587        assert_eq!(result, "hello");
1588        assert!(
1589            !result.ends_with("..."),
1590            "short value must not be truncated"
1591        );
1592    }
1593
1594    #[test]
1595    fn sanitize_value_at_exact_limit_not_truncated() {
1596        let input = "a".repeat(128);
1597        let result = sanitize_scalar_for_error(&input);
1598        assert_eq!(
1599            result.len(),
1600            128,
1601            "128-char input must produce 128-char output"
1602        );
1603        assert!(
1604            !result.ends_with("..."),
1605            "value at exact limit must not be truncated"
1606        );
1607    }
1608
1609    #[test]
1610    fn sanitize_value_over_limit_truncated() {
1611        let input = "a".repeat(129);
1612        let result = sanitize_scalar_for_error(&input);
1613        assert!(
1614            result.ends_with("..."),
1615            "value over limit must end with '...'"
1616        );
1617        assert_eq!(
1618            result.len(),
1619            128 + 3,
1620            "truncated output must be 128 chars + 3 ellipsis chars"
1621        );
1622    }
1623
1624    #[test]
1625    fn sanitize_multibyte_char_boundary_not_split() {
1626        let input: String = "中".repeat(127) + "ab"; // 129 chars total
1627        let result = sanitize_scalar_for_error(&input);
1628        assert!(
1629            result.ends_with("..."),
1630            "129-char multibyte input should be truncated"
1631        );
1632        let char_count = result.trim_end_matches("...").chars().count();
1633        assert_eq!(
1634            char_count, 128,
1635            "truncated portion must be exactly 128 chars"
1636        );
1637    }
1638
1639    // -----------------------------------------------------------------------
1640    // Cow variant identity for resolver-injected vs user-authored tags
1641    // -----------------------------------------------------------------------
1642
1643    fn load_root(input: &str) -> Node<Span> {
1644        load(input).expect("load failed").remove(0).root
1645    }
1646
1647    fn node_tag(node: Node<Span>) -> Option<Cow<'static, str>> {
1648        match node {
1649            Node::Scalar { tag, .. } | Node::Mapping { tag, .. } | Node::Sequence { tag, .. } => {
1650                tag
1651            }
1652            Node::Alias { .. } => None,
1653        }
1654    }
1655
1656    #[rstest]
1657    #[case::str_tag("hello\n")]
1658    #[case::int_tag("42\n")]
1659    #[case::null_tag("null\n")]
1660    #[case::map_tag("a: 1\n")]
1661    #[case::seq_tag("- a\n")]
1662    #[case::bare_excl_tag("! hello\n")]
1663    fn resolver_emitted_tag_is_borrowed(#[case] input: &str) {
1664        let tag = node_tag(load_root(input));
1665        assert!(
1666            matches!(tag, Some(Cow::Borrowed(_))),
1667            "resolver-emitted tag must be Borrowed, got: {tag:?}"
1668        );
1669    }
1670
1671    #[rstest]
1672    #[case::scalar("!!str hello\n")]
1673    #[case::mapping("!!map\na: 1\n")]
1674    #[case::sequence("!!seq\n- a\n")]
1675    fn user_authored_tag_is_owned(#[case] input: &str) {
1676        let tag = node_tag(load_root(input));
1677        assert!(
1678            matches!(tag, Some(Cow::Owned(_))),
1679            "user-authored tag must be Owned, got: {tag:?}"
1680        );
1681    }
1682
1683    #[test]
1684    fn alias_node_has_no_tag_field() {
1685        let docs = LoaderBuilder::new()
1686            .build()
1687            .load("- &a x\n- *a\n")
1688            .expect("load failed");
1689        let Node::Sequence { items, .. } = &docs[0].root else {
1690            panic!("expected root sequence");
1691        };
1692        assert!(
1693            matches!(items[1], Node::Alias { .. }),
1694            "second item must be Alias in lossless mode"
1695        );
1696    }
1697
1698    #[test]
1699    fn tag_value_content_preserved_across_cow_variants() {
1700        let Node::Scalar {
1701            tag: tag_resolver, ..
1702        } = load_root("hello\n")
1703        else {
1704            panic!("expected scalar");
1705        };
1706        assert_eq!(tag_resolver.as_deref(), Some("tag:yaml.org,2002:str"));
1707
1708        let Node::Scalar { tag: tag_user, .. } = load_root("!custom hello\n") else {
1709            panic!("expected scalar");
1710        };
1711        assert_eq!(tag_user.as_deref(), Some("!custom"));
1712    }
1713
1714    // -----------------------------------------------------------------------
1715    // Loader correctly gates NodeMeta construction
1716    // -----------------------------------------------------------------------
1717
1718    fn node_meta_is_none(node: &Node<Span>) -> bool {
1719        matches!(
1720            node,
1721            Node::Scalar { meta: None, .. }
1722                | Node::Mapping { meta: None, .. }
1723                | Node::Sequence { meta: None, .. }
1724        )
1725    }
1726
1727    #[rstest]
1728    #[case::plain_scalar("hello\n")]
1729    #[case::plain_mapping("a: 1\n")]
1730    #[case::plain_sequence("- a\n")]
1731    fn loaded_node_with_no_meta_fields_has_meta_none(#[case] input: &str) {
1732        let docs = load(input).unwrap();
1733        let root = &docs[0].root;
1734        assert!(
1735            node_meta_is_none(root),
1736            "plain node must have meta: None, got: {root:?}"
1737        );
1738    }
1739
1740    #[test]
1741    fn loaded_anchored_scalar_has_meta_some() {
1742        let docs = load("- &foo bar\n").unwrap();
1743        let Node::Sequence { items, .. } = &docs[0].root else {
1744            panic!("expected root Sequence");
1745        };
1746        let item = &items[0];
1747        assert!(
1748            matches!(item, Node::Scalar { meta: Some(_), .. }),
1749            "anchored scalar must have meta: Some, got: {item:?}"
1750        );
1751        assert_eq!(item.anchor(), Some("foo"));
1752    }
1753
1754    #[test]
1755    fn loaded_scalar_with_anchor_has_meta_some_with_anchor_loc() {
1756        let docs = load("&tag hello\n").unwrap();
1757        let root = &docs[0].root;
1758        assert!(
1759            matches!(root, Node::Scalar { meta: Some(_), .. }),
1760            "anchored scalar must have meta: Some"
1761        );
1762        assert!(
1763            root.anchor_loc().is_some(),
1764            "anchor_loc() must be Some for anchored scalar"
1765        );
1766    }
1767
1768    // -----------------------------------------------------------------------
1769    // Property displacement promotion — combined anchor+tag on block collections
1770    // -----------------------------------------------------------------------
1771
1772    #[rstest]
1773    // Block mapping
1774    #[case::block_mapping_anchor_only("&a\nk: v\n", Some("a"), false)]
1775    #[case::block_mapping_tag_only("!mytag\nk: v\n", None, true)]
1776    #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n", Some("a"), true)]
1777    #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n", Some("a"), true)]
1778    // Block sequence
1779    #[case::block_sequence_anchor_only("&a\n- item\n", Some("a"), false)]
1780    #[case::block_sequence_tag_only("!mytag\n- item\n", None, true)]
1781    #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n", Some("a"), true)]
1782    #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n", Some("a"), true)]
1783    // Flow mapping
1784    #[case::flow_mapping_anchor_only("&a {k: v}\n", Some("a"), false)]
1785    #[case::flow_mapping_tag_only("!mytag {k: v}\n", None, true)]
1786    #[case::flow_mapping_anchor_then_tag("&a !mytag {k: v}\n", Some("a"), true)]
1787    #[case::flow_mapping_tag_then_anchor("!mytag &a {k: v}\n", Some("a"), true)]
1788    // Flow sequence
1789    #[case::flow_sequence_anchor_only("&a [item]\n", Some("a"), false)]
1790    #[case::flow_sequence_tag_only("!mytag [item]\n", None, true)]
1791    #[case::flow_sequence_anchor_then_tag("&a !mytag [item]\n", Some("a"), true)]
1792    #[case::flow_sequence_tag_then_anchor("!mytag &a [item]\n", Some("a"), true)]
1793    fn combined_properties_attach_to_root_collection(
1794        #[case] input: &str,
1795        #[case] expected_anchor: Option<&str>,
1796        #[case] expected_has_tag: bool,
1797    ) {
1798        let docs = load(input).unwrap();
1799        let root = &docs[0].root;
1800        assert_eq!(root.anchor(), expected_anchor, "anchor on root collection");
1801        assert_eq!(
1802            root.tag_loc().is_some(),
1803            expected_has_tag,
1804            "tag_loc on root collection"
1805        );
1806    }
1807
1808    // Block collections: first child must not inherit anchor or tag from the root
1809    #[rstest]
1810    // Block mapping
1811    #[case::block_mapping_anchor_only("&a\nk: v\n")]
1812    #[case::block_mapping_tag_only("!mytag\nk: v\n")]
1813    #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n")]
1814    #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n")]
1815    // Block sequence
1816    #[case::block_sequence_anchor_only("&a\n- item\n")]
1817    #[case::block_sequence_tag_only("!mytag\n- item\n")]
1818    #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n")]
1819    #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n")]
1820    fn first_child_of_block_collection_has_no_properties(#[case] input: &str) {
1821        let docs = load(input).unwrap();
1822        let root = &docs[0].root;
1823        let first_child: &Node<Span> = match root {
1824            Node::Mapping { entries, .. } => &entries[0].0,
1825            Node::Sequence { items, .. } => &items[0],
1826            Node::Scalar { .. } | Node::Alias { .. } => panic!("expected block collection"),
1827        };
1828        assert_eq!(
1829            first_child.anchor(),
1830            None,
1831            "anchor must not appear on first child"
1832        );
1833        assert!(
1834            first_child.tag_loc().is_none(),
1835            "tag_loc must not appear on first child"
1836        );
1837    }
1838
1839    // --- Alias registration smoke test ---
1840
1841    #[test]
1842    fn anchor_on_block_mapping_with_tag_is_resolvable_via_alias() {
1843        let input = "root:\n  tagged: &a !mytag\n    k: v\n  ref: *a\n";
1844        let result = LoaderBuilder::new().resolved().build().load(input);
1845        assert!(
1846            result.is_ok(),
1847            "alias *a must resolve — anchor must be on the mapping, not lost to first key: {result:?}"
1848        );
1849    }
1850}