Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22//!
23//! # Accepted risks
24//!
25//! `expand_node` does not detect the case where an anchor-within-expansion
26//! references a previously defined anchor, forming an indirect cycle not
27//! caught by the `in_progress` set until the second traversal.  This
28//! limitation exists in the old loader and is acceptable in the LSP context
29//! where Lossless mode is the default.  The `expanded_nodes` volume limit
30//! provides the backstop.
31
32mod comments;
33mod reloc;
34mod stream;
35
36use comments::{attach_leading_comments, attach_trailing_comment};
37use reloc::reloc;
38use stream::{
39    consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
40    with_hash_prefix,
41};
42
43use std::collections::{HashMap, HashSet};
44use std::iter::Peekable;
45
46use crate::error::Error;
47use crate::event::{Event, ScalarStyle};
48use crate::node::{Document, Node};
49use crate::pos::{Pos, Span};
50
51// ---------------------------------------------------------------------------
52// Public error type
53// ---------------------------------------------------------------------------
54
55/// Errors produced by the loader.
56#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
57pub enum LoadError {
58    /// The event stream contained a parse error.
59    #[error("parse error at {pos:?}: {message}")]
60    Parse {
61        /// Source position where the parse error was detected.
62        pos: Pos,
63        /// Human-readable description of the error.
64        message: String,
65    },
66
67    /// The event stream ended unexpectedly mid-document.
68    #[error("unexpected end of event stream")]
69    UnexpectedEndOfStream,
70
71    /// Nesting depth exceeded the configured limit.
72    #[error("nesting depth limit exceeded (max: {limit})")]
73    NestingDepthLimitExceeded {
74        /// The configured nesting depth limit that was exceeded.
75        limit: usize,
76    },
77
78    /// Too many distinct anchor names were defined.
79    #[error("anchor count limit exceeded (max: {limit})")]
80    AnchorCountLimitExceeded {
81        /// The configured anchor count limit that was exceeded.
82        limit: usize,
83    },
84
85    /// Alias expansion produced more nodes than the configured limit.
86    #[error("alias expansion node limit exceeded (max: {limit})")]
87    AliasExpansionLimitExceeded {
88        /// The configured expansion node limit that was exceeded.
89        limit: usize,
90    },
91
92    /// A circular alias reference was detected.
93    #[error("circular alias reference: '{name}'")]
94    CircularAlias {
95        /// The anchor name involved in the cycle.
96        name: String,
97    },
98
99    /// An alias referred to an anchor that was never defined.
100    #[error("undefined alias: '{name}'")]
101    UndefinedAlias {
102        /// The alias name that had no corresponding anchor definition.
103        name: String,
104    },
105}
106
107// Convenience alias used inside the module.
108type Result<T> = std::result::Result<T, LoadError>;
109
110// Type alias for the peekable event stream used throughout the loader.
111type EventStream<'a> =
112    Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
113
114// ---------------------------------------------------------------------------
115// Configuration
116// ---------------------------------------------------------------------------
117
118/// Loader mode — controls how alias references are handled.
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub enum LoadMode {
121    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
122    Lossless,
123    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
124    Resolved,
125}
126
127/// Security and behaviour options for the loader.
128#[derive(Debug, Clone)]
129pub struct LoaderOptions {
130    /// Maximum mapping/sequence nesting depth before returning
131    /// [`LoadError::NestingDepthLimitExceeded`] (default: 512).
132    pub max_nesting_depth: usize,
133    /// Maximum number of distinct anchor names per document before returning
134    /// [`LoadError::AnchorCountLimitExceeded`] (default: 10 000).
135    pub max_anchors: usize,
136    /// Maximum total nodes produced by alias expansion in resolved mode before
137    /// returning [`LoadError::AliasExpansionLimitExceeded`] (default: 1 000 000).
138    pub max_expanded_nodes: usize,
139    /// Controls how alias references are handled during loading.
140    pub mode: LoadMode,
141}
142
143impl Default for LoaderOptions {
144    fn default() -> Self {
145        Self {
146            max_nesting_depth: 512,
147            max_anchors: 10_000,
148            max_expanded_nodes: 1_000_000,
149            mode: LoadMode::Lossless,
150        }
151    }
152}
153
154// ---------------------------------------------------------------------------
155// Builder
156// ---------------------------------------------------------------------------
157
158/// Builder for configuring and creating a [`Loader`].
159///
160/// ```
161/// use rlsp_yaml_parser::loader::LoaderBuilder;
162///
163/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
164/// assert_eq!(docs.len(), 1);
165/// ```
166pub struct LoaderBuilder {
167    options: LoaderOptions,
168}
169
170impl LoaderBuilder {
171    /// Create a builder with default options (lossless mode, safe limits).
172    #[must_use]
173    pub fn new() -> Self {
174        Self {
175            options: LoaderOptions::default(),
176        }
177    }
178
179    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
180    #[must_use]
181    pub const fn lossless(mut self) -> Self {
182        self.options.mode = LoadMode::Lossless;
183        self
184    }
185
186    /// Use resolved mode — aliases are expanded inline.
187    #[must_use]
188    pub const fn resolved(mut self) -> Self {
189        self.options.mode = LoadMode::Resolved;
190        self
191    }
192
193    /// Override the maximum nesting depth.
194    #[must_use]
195    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
196        self.options.max_nesting_depth = limit;
197        self
198    }
199
200    /// Override the maximum anchor count.
201    #[must_use]
202    pub const fn max_anchors(mut self, limit: usize) -> Self {
203        self.options.max_anchors = limit;
204        self
205    }
206
207    /// Override the maximum expanded-node count (resolved mode only).
208    #[must_use]
209    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
210        self.options.max_expanded_nodes = limit;
211        self
212    }
213
214    /// Consume the builder and produce a [`Loader`].
215    #[must_use]
216    pub const fn build(self) -> Loader {
217        Loader {
218            options: self.options,
219        }
220    }
221}
222
223impl Default for LoaderBuilder {
224    fn default() -> Self {
225        Self::new()
226    }
227}
228
229// ---------------------------------------------------------------------------
230// Loader
231// ---------------------------------------------------------------------------
232
233/// A configured YAML loader.
234pub struct Loader {
235    options: LoaderOptions,
236}
237
238impl Loader {
239    /// Load YAML text into a sequence of documents.
240    ///
241    /// # Errors
242    ///
243    /// Returns `Err` if the input contains a parse error, exceeds a configured
244    /// security limit, or (in resolved mode) references an undefined anchor.
245    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
246        let mut state = LoadState::new(&self.options);
247        let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
248            Box::new(crate::parse_events(input));
249        state.run(iter.peekable())
250    }
251}
252
253// ---------------------------------------------------------------------------
254// Convenience entry point
255// ---------------------------------------------------------------------------
256
257/// Load YAML text using lossless mode and default security limits.
258///
259/// Returns one `Document<Span>` per YAML document in the stream.
260///
261/// # Errors
262///
263/// Returns `Err` if the input contains a parse error or exceeds a security
264/// limit (nesting depth or anchor count).
265///
266/// ```
267/// use rlsp_yaml_parser::loader::load;
268///
269/// let docs = load("hello\n").unwrap();
270/// assert_eq!(docs.len(), 1);
271/// ```
272pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
273    LoaderBuilder::new().lossless().build().load(input)
274}
275
276// ---------------------------------------------------------------------------
277// Internal loader state
278// ---------------------------------------------------------------------------
279
280struct LoadState<'opt> {
281    options: &'opt LoaderOptions,
282    /// Anchors registered so far in the current document: name → node.
283    anchor_map: HashMap<String, Node<Span>>,
284    /// Count of distinct anchors registered (resets per document).
285    anchor_count: usize,
286    /// Current nesting depth (incremented on Begin, decremented on End).
287    depth: usize,
288    /// Total nodes produced via alias expansion (resolved mode only).
289    expanded_nodes: usize,
290    /// Leading comments accumulated by `parse_node` when it encounters a
291    /// `Comment` event between a mapping key and its value's collection start,
292    /// or by a sequence/mapping loop when it hits End with leftover leading
293    /// comments.  The next mapping/sequence loop iteration picks these up and
294    /// prepends them to the next entry's leading comments.
295    pending_leading: Vec<String>,
296}
297
298impl<'opt> LoadState<'opt> {
299    fn new(options: &'opt LoaderOptions) -> Self {
300        Self {
301            options,
302            anchor_map: HashMap::new(),
303            anchor_count: 0,
304            depth: 0,
305            expanded_nodes: 0,
306            pending_leading: Vec::new(),
307        }
308    }
309
310    fn reset_for_document(&mut self) {
311        self.anchor_map.clear();
312        self.anchor_count = 0;
313        self.expanded_nodes = 0;
314        self.pending_leading.clear();
315    }
316
317    fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
318        let mut docs: Vec<Document<Span>> = Vec::new();
319
320        // Skip StreamStart.
321        match stream.next() {
322            Some(Ok(_)) | None => {}
323            Some(Err(e)) => {
324                return Err(LoadError::Parse {
325                    pos: e.pos,
326                    message: e.message,
327                });
328            }
329        }
330
331        loop {
332            // Skip any leading comments or unknown events before a document.
333            match next_from(&mut stream)? {
334                None | Some((Event::StreamEnd, _)) => break,
335                Some((
336                    Event::DocumentStart {
337                        explicit,
338                        version,
339                        tag_directives,
340                    },
341                    _,
342                )) => {
343                    let doc_explicit_start = explicit;
344                    let doc_version = version;
345                    let doc_tags = tag_directives;
346                    self.reset_for_document();
347
348                    let mut doc_comments: Vec<String> = Vec::new();
349
350                    // Consume leading comments at document level.
351                    consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
352
353                    // Parse root node (may be absent for empty documents).
354                    let root = if is_document_end(stream.peek()) {
355                        // Empty document — emit an empty scalar as root.
356                        empty_scalar()
357                    } else {
358                        self.parse_node(&mut stream)?
359                    };
360
361                    // Consume DocumentEnd if present and capture its explicit flag.
362                    let doc_explicit_end =
363                        if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
364                            let end_explicit = *explicit;
365                            let _ = stream.next();
366                            end_explicit
367                        } else {
368                            false
369                        };
370
371                    docs.push(Document {
372                        root,
373                        version: doc_version,
374                        tags: doc_tags,
375                        comments: doc_comments,
376                        explicit_start: doc_explicit_start,
377                        explicit_end: doc_explicit_end,
378                    });
379                }
380                Some(_) => {
381                    // Comment or any other stray event outside a document — skip.
382                }
383            }
384        }
385
386        Ok(docs)
387    }
388
389    /// Parse a single node from the stream.
390    ///
391    /// Advances the stream past the node (including end-of-container events).
392    #[expect(
393        clippy::too_many_lines,
394        reason = "match-on-event-type; splitting would obscure flow"
395    )]
396    fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
397        // Structural end events close the caller's collection loop — do NOT
398        // consume them here.  Return an empty scalar and leave the event in
399        // the stream so the outer mapping/sequence loop can see and consume it.
400        if matches!(
401            stream.peek(),
402            Some(Ok((
403                Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
404                _
405            )))
406        ) {
407            return Ok(empty_scalar());
408        }
409
410        let Some((event, span)) = next_from(stream)? else {
411            return Ok(empty_scalar());
412        };
413
414        match event {
415            Event::Scalar {
416                value,
417                style,
418                anchor,
419                anchor_loc,
420                tag,
421                tag_loc,
422                ..
423            } => {
424                let node = Node::Scalar {
425                    value: value.into_owned(),
426                    style,
427                    anchor: anchor.map(str::to_owned),
428                    anchor_loc,
429                    tag: tag.map(std::borrow::Cow::into_owned),
430                    tag_loc,
431                    loc: span,
432                    leading_comments: None,
433                    trailing_comment: None,
434                };
435                if let Some(name) = node.anchor() {
436                    self.register_anchor(name.to_owned(), &node)?;
437                }
438                Ok(node)
439            }
440
441            Event::MappingStart {
442                anchor,
443                anchor_loc: mapping_anchor_loc,
444                tag,
445                tag_loc: mapping_tag_loc,
446                style,
447                ..
448            } => {
449                let anchor = anchor.map(str::to_owned);
450                let anchor_loc = mapping_anchor_loc;
451                let tag_loc = mapping_tag_loc;
452                let tag = tag.map(std::borrow::Cow::into_owned);
453
454                self.depth += 1;
455                if self.depth > self.options.max_nesting_depth {
456                    return Err(LoadError::NestingDepthLimitExceeded {
457                        limit: self.options.max_nesting_depth,
458                    });
459                }
460
461                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
462                let mut end_span = span;
463
464                loop {
465                    // Consume leading comments before the next key.  Also
466                    // collect any comments that spilled over from a sibling
467                    // value's collection end (stored in `pending_leading`).
468                    let raw_leading = consume_leading_comments(stream)?;
469                    let leading = if self.pending_leading.is_empty() {
470                        raw_leading
471                    } else {
472                        let mut combined = std::mem::take(&mut self.pending_leading);
473                        combined.extend(raw_leading);
474                        combined
475                    };
476
477                    match stream.peek() {
478                        None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
479                            // Save any collected leading comments so the next
480                            // sibling entry in the parent collection can inherit
481                            // them (e.g. a comment just before MappingEnd that
482                            // belongs to the following mapping entry).
483                            if !leading.is_empty() {
484                                self.pending_leading = leading;
485                            }
486                            break;
487                        }
488                        Some(Err(_)) => {
489                            // Consume the error.
490                            return Err(match stream.next() {
491                                Some(Err(e)) => LoadError::Parse {
492                                    pos: e.pos,
493                                    message: e.message,
494                                },
495                                _ => LoadError::UnexpectedEndOfStream,
496                            });
497                        }
498                        Some(Ok(_)) => {}
499                    }
500
501                    let mut key = self.parse_node(stream)?;
502                    attach_leading_comments(&mut key, leading);
503
504                    let mut value = self.parse_node(stream)?;
505
506                    // Trailing comment on the value — peek for inline comment.
507                    // Block scalars (literal `|` and folded `>`) consume trailing
508                    // blank lines as part of chomping; their span.end falls on the
509                    // first line after the scalar, which can coincide with the
510                    // next comment's line number.  That would falsely attach a
511                    // leading inter-node comment as a trailing inline comment.
512                    // Block scalars never have an inline comment on their content
513                    // lines, so skip trailing-comment detection for them.
514                    if !is_block_scalar(&value)
515                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
516                    {
517                        let value_end_line = node_end_line(&value);
518                        if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
519                            attach_trailing_comment(&mut value, trail);
520                        }
521                    }
522
523                    entries.push((key, value));
524                }
525
526                // Consume MappingEnd and capture its span.
527                if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
528                    end_span = *end;
529                    let _ = stream.next();
530                }
531                self.depth -= 1;
532
533                let node = Node::Mapping {
534                    entries,
535                    style,
536                    anchor: anchor.clone(),
537                    anchor_loc,
538                    tag,
539                    tag_loc,
540                    loc: Span {
541                        start: span.start,
542                        end: end_span.end,
543                    },
544                    leading_comments: None,
545                    trailing_comment: None,
546                };
547                if let Some(name) = anchor {
548                    self.register_anchor(name, &node)?;
549                }
550                Ok(node)
551            }
552
553            Event::SequenceStart {
554                anchor,
555                anchor_loc: sequence_anchor_loc,
556                tag,
557                tag_loc: sequence_tag_loc,
558                style,
559                ..
560            } => {
561                let anchor = anchor.map(str::to_owned);
562                let anchor_loc = sequence_anchor_loc;
563                let tag_loc = sequence_tag_loc;
564                let tag = tag.map(std::borrow::Cow::into_owned);
565
566                self.depth += 1;
567                if self.depth > self.options.max_nesting_depth {
568                    return Err(LoadError::NestingDepthLimitExceeded {
569                        limit: self.options.max_nesting_depth,
570                    });
571                }
572
573                let mut items: Vec<Node<Span>> = Vec::new();
574                let mut end_span = span;
575
576                loop {
577                    // Collect leading comments before the next item.  Also
578                    // collect any comments that spilled over from a sibling
579                    // value's collection end (stored in `pending_leading`).
580                    let raw_leading = consume_leading_comments(stream)?;
581                    let leading = if self.pending_leading.is_empty() {
582                        raw_leading
583                    } else {
584                        let mut combined = std::mem::take(&mut self.pending_leading);
585                        combined.extend(raw_leading);
586                        combined
587                    };
588
589                    match stream.peek() {
590                        None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
591                            // Save any collected leading comments so the next
592                            // sibling entry in the parent collection can inherit
593                            // them (e.g. a comment just before SequenceEnd that
594                            // belongs to the following sequence item or mapping
595                            // entry in the parent).
596                            if !leading.is_empty() {
597                                self.pending_leading = leading;
598                            }
599                            break;
600                        }
601                        Some(Err(_)) => {
602                            // Consume the error.
603                            return Err(match stream.next() {
604                                Some(Err(e)) => LoadError::Parse {
605                                    pos: e.pos,
606                                    message: e.message,
607                                },
608                                _ => LoadError::UnexpectedEndOfStream,
609                            });
610                        }
611                        Some(Ok(_)) => {}
612                    }
613
614                    let mut item = self.parse_node(stream)?;
615                    attach_leading_comments(&mut item, leading);
616
617                    // Trailing comment on the item — peek for inline comment.
618                    // Block scalars are excluded for the same reason as in the
619                    // mapping path: their span.end can coincide with the next
620                    // comment's line, falsely turning a leading comment into a
621                    // trailing one.
622                    if !is_block_scalar(&item)
623                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
624                    {
625                        let item_end_line = node_end_line(&item);
626                        if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
627                            attach_trailing_comment(&mut item, trail);
628                        }
629                    }
630
631                    items.push(item);
632                }
633
634                // Consume SequenceEnd and capture its span.
635                if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
636                    end_span = *end;
637                    let _ = stream.next();
638                }
639                self.depth -= 1;
640
641                let node = Node::Sequence {
642                    items,
643                    style,
644                    anchor: anchor.clone(),
645                    anchor_loc,
646                    tag,
647                    tag_loc,
648                    loc: Span {
649                        start: span.start,
650                        end: end_span.end,
651                    },
652                    leading_comments: None,
653                    trailing_comment: None,
654                };
655                if let Some(name) = anchor {
656                    self.register_anchor(name, &node)?;
657                }
658                Ok(node)
659            }
660
661            Event::Alias { name } => {
662                let name = name.to_owned();
663                self.resolve_alias(&name, span)
664            }
665
666            Event::Comment { text } => {
667                // Comment between a mapping key and its collection value (e.g.
668                // `key:\n  # comment\n  subkey: val`).  The comment appears
669                // after the key Scalar and before the MappingStart/SequenceStart
670                // that begins the value.  Save it in `pending_leading` so the
671                // first entry of the upcoming collection can inherit it.
672                self.pending_leading.push(with_hash_prefix(text));
673                self.parse_node(stream)
674            }
675
676            Event::StreamStart
677            | Event::StreamEnd
678            | Event::DocumentStart { .. }
679            | Event::DocumentEnd { .. }
680            | Event::MappingEnd
681            | Event::SequenceEnd => {
682                // Structural event where a node is expected — return empty scalar.
683                Ok(empty_scalar())
684            }
685        }
686    }
687
688    fn register_anchor(&mut self, name: String, node: &Node<Span>) -> Result<()> {
689        if !self.anchor_map.contains_key(&name) {
690            self.anchor_count += 1;
691            if self.anchor_count > self.options.max_anchors {
692                return Err(LoadError::AnchorCountLimitExceeded {
693                    limit: self.options.max_anchors,
694                });
695            }
696        }
697        // Count the anchor node itself toward the expansion budget in resolved
698        // mode so that the total reflects every node present in the expanded
699        // document (anchor definition + each alias expansion).
700        if self.options.mode == LoadMode::Resolved {
701            self.expanded_nodes += 1;
702            if self.expanded_nodes > self.options.max_expanded_nodes {
703                return Err(LoadError::AliasExpansionLimitExceeded {
704                    limit: self.options.max_expanded_nodes,
705                });
706            }
707            self.anchor_map.insert(name, node.clone());
708        } else {
709            // Lossless mode never reads anchor_map for expansion; store a
710            // zero-cost placeholder so contains_key still detects re-definitions.
711            self.anchor_map.insert(name, empty_scalar());
712        }
713        Ok(())
714    }
715
716    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
717        match self.options.mode {
718            LoadMode::Lossless => Ok(Node::Alias {
719                name: name.to_owned(),
720                loc,
721                leading_comments: None,
722                trailing_comment: None,
723            }),
724            LoadMode::Resolved => {
725                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
726                    LoadError::UndefinedAlias {
727                        name: name.to_owned(),
728                    }
729                })?;
730                let mut in_progress: HashSet<String> = HashSet::new();
731                self.expand_node(anchored, &mut in_progress)
732            }
733        }
734    }
735
736    /// Recursively expand a node, counting every node produced against the
737    /// expansion limit and checking for cycles via `in_progress`.
738    fn expand_node(
739        &mut self,
740        node: Node<Span>,
741        in_progress: &mut HashSet<String>,
742    ) -> Result<Node<Span>> {
743        // Increment at the top — before child recursion — so every node
744        // (including non-alias nodes inside expanded trees) counts against the
745        // budget.
746        self.expanded_nodes += 1;
747        if self.expanded_nodes > self.options.max_expanded_nodes {
748            return Err(LoadError::AliasExpansionLimitExceeded {
749                limit: self.options.max_expanded_nodes,
750            });
751        }
752
753        match node {
754            Node::Alias { ref name, loc, .. } => {
755                if in_progress.contains(name) {
756                    return Err(LoadError::CircularAlias { name: name.clone() });
757                }
758                let target = self
759                    .anchor_map
760                    .get(name)
761                    .cloned()
762                    .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
763                in_progress.insert(name.clone());
764                let expanded = self.expand_node(target, in_progress)?;
765                in_progress.remove(name);
766                // Re-stamp with the alias site's location.
767                Ok(reloc(expanded, loc))
768            }
769            Node::Mapping {
770                entries,
771                style,
772                anchor,
773                anchor_loc,
774                tag,
775                tag_loc,
776                loc,
777                leading_comments,
778                trailing_comment,
779            } => {
780                let mut expanded_entries = Vec::with_capacity(entries.len());
781                for (k, v) in entries {
782                    let ek = self.expand_node(k, in_progress)?;
783                    let ev = self.expand_node(v, in_progress)?;
784                    expanded_entries.push((ek, ev));
785                }
786                Ok(Node::Mapping {
787                    entries: expanded_entries,
788                    style,
789                    anchor,
790                    anchor_loc,
791                    tag,
792                    tag_loc,
793                    loc,
794                    leading_comments,
795                    trailing_comment,
796                })
797            }
798            Node::Sequence {
799                items,
800                style,
801                anchor,
802                anchor_loc,
803                tag,
804                tag_loc,
805                loc,
806                leading_comments,
807                trailing_comment,
808            } => {
809                let mut expanded_items = Vec::with_capacity(items.len());
810                for item in items {
811                    expanded_items.push(self.expand_node(item, in_progress)?);
812                }
813                Ok(Node::Sequence {
814                    items: expanded_items,
815                    style,
816                    anchor,
817                    anchor_loc,
818                    tag,
819                    tag_loc,
820                    loc,
821                    leading_comments,
822                    trailing_comment,
823                })
824            }
825            // Scalars and already-resolved nodes — pass through.
826            scalar @ Node::Scalar { .. } => Ok(scalar),
827        }
828    }
829}
830
831/// Return `true` if the peeked item signals end of document (or stream).
832const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
833    matches!(
834        peeked,
835        None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
836    )
837}
838
839/// Return the line number of a node's span end position.
840///
841/// Used to determine whether the next `Comment` event is trailing (same line)
842/// or leading (different line).
843#[inline]
844const fn node_end_line(node: &Node<Span>) -> usize {
845    match node {
846        Node::Scalar { loc, .. }
847        | Node::Mapping { loc, .. }
848        | Node::Sequence { loc, .. }
849        | Node::Alias { loc, .. } => loc.end.line,
850    }
851}
852
853/// Return `true` if the node is a block scalar (literal `|` or folded `>`).
854///
855/// Block scalars consume trailing blank lines as part of chomping, so their
856/// `span.end` falls on the line *after* the last consumed line.  This means a
857/// comment on the immediately following line has the same line number as
858/// `span.end.line`, which would cause `peek_trailing_comment` to falsely
859/// classify it as an inline trailing comment.  The caller uses this predicate
860/// to skip trailing-comment detection for block scalars.
861#[inline]
862const fn is_block_scalar(node: &Node<Span>) -> bool {
863    matches!(
864        node,
865        Node::Scalar {
866            style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
867            ..
868        }
869    )
870}
871
872// ---------------------------------------------------------------------------
873// Node helpers
874// ---------------------------------------------------------------------------
875
876const fn empty_scalar() -> Node<Span> {
877    Node::Scalar {
878        value: String::new(),
879        style: ScalarStyle::Plain,
880        anchor: None,
881        anchor_loc: None,
882        tag: None,
883        tag_loc: None,
884        loc: Span {
885            start: Pos::ORIGIN,
886            end: Pos::ORIGIN,
887        },
888        leading_comments: None,
889        trailing_comment: None,
890    }
891}
892
893// ---------------------------------------------------------------------------
894// Tests
895// ---------------------------------------------------------------------------
896
897#[cfg(test)]
898#[expect(
899    clippy::expect_used,
900    clippy::unwrap_used,
901    clippy::indexing_slicing,
902    clippy::panic,
903    reason = "test code"
904)]
905mod tests {
906    use super::*;
907
908    // UT-1: loader_state_resets_anchor_map_between_documents
909    #[test]
910    fn loader_state_resets_anchor_map_between_documents() {
911        // In resolved mode: anchor defined in doc 1 must not be visible in doc 2.
912        let result = LoaderBuilder::new()
913            .resolved()
914            .build()
915            .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
916        assert!(
917            result.is_err(),
918            "expected Err: *foo in doc 2 should be undefined"
919        );
920        assert!(matches!(
921            result.unwrap_err(),
922            LoadError::UndefinedAlias { .. }
923        ));
924    }
925
926    // UT-2: register_anchor_increments_count
927    #[test]
928    fn register_anchor_increments_count() {
929        let options = LoaderOptions {
930            max_anchors: 2,
931            ..LoaderOptions::default()
932        };
933        let mut state = LoadState::new(&options);
934        let node = Node::Scalar {
935            value: "x".to_owned(),
936            style: ScalarStyle::Plain,
937            anchor: None,
938            anchor_loc: None,
939            tag: None,
940            tag_loc: None,
941            loc: Span {
942                start: Pos::ORIGIN,
943                end: Pos::ORIGIN,
944            },
945            leading_comments: None,
946            trailing_comment: None,
947        };
948        assert!(state.register_anchor("a".to_owned(), &node).is_ok());
949        assert!(state.register_anchor("b".to_owned(), &node).is_ok());
950        let err = state
951            .register_anchor("c".to_owned(), &node)
952            .expect_err("expected AnchorCountLimitExceeded");
953        assert!(matches!(
954            err,
955            LoadError::AnchorCountLimitExceeded { limit: 2 }
956        ));
957    }
958
959    // UT-3: expand_node_detects_circular_alias
960    #[test]
961    fn expand_node_detects_circular_alias() {
962        let options = LoaderOptions {
963            mode: LoadMode::Resolved,
964            ..LoaderOptions::default()
965        };
966        let mut state = LoadState::new(&options);
967        // Insert a self-referential alias node.
968        let alias_node = Node::Alias {
969            name: "a".to_owned(),
970            loc: Span {
971                start: Pos::ORIGIN,
972                end: Pos::ORIGIN,
973            },
974            leading_comments: None,
975            trailing_comment: None,
976        };
977        state.anchor_map.insert("a".to_owned(), alias_node.clone());
978        let mut in_progress = HashSet::new();
979        let result = state.expand_node(alias_node, &mut in_progress);
980        assert!(
981            matches!(result, Err(LoadError::CircularAlias { .. })),
982            "expected CircularAlias, got: {result:?}"
983        );
984    }
985
986    // -----------------------------------------------------------------------
987    // Bug A: comment between mapping key and its collection value
988    // -----------------------------------------------------------------------
989
990    // UT-A1: comment between key and nested mapping is attached to first entry.
991    #[test]
992    fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
993        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
994        let root = &docs[0].root;
995        // root is a mapping: outer -> { inner: val }
996        // The comment "# Style 1" appears between "outer" key and the nested
997        // MappingStart.  After the fix it must be attached to the "inner" key.
998        let Node::Mapping { entries, .. } = root else {
999            panic!("expected root mapping");
1000        };
1001        assert_eq!(entries.len(), 1);
1002        let (_outer_key, outer_value) = &entries[0];
1003        let Node::Mapping {
1004            entries: nested, ..
1005        } = outer_value
1006        else {
1007            panic!("expected nested mapping");
1008        };
1009        assert_eq!(nested.len(), 1);
1010        let (inner_key, _) = &nested[0];
1011        assert_eq!(
1012            inner_key.leading_comments(),
1013            &["# Style 1"],
1014            "comment should be attached to the first nested key"
1015        );
1016    }
1017
1018    // UT-A2: comment between key and nested sequence is attached to first item.
1019    #[test]
1020    fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
1021        let docs = load("key:\n  # leading\n  - item1\n  - item2\n").unwrap();
1022        let root = &docs[0].root;
1023        let Node::Mapping { entries, .. } = root else {
1024            panic!("expected root mapping");
1025        };
1026        let (_key, seq_value) = &entries[0];
1027        let Node::Sequence { items, .. } = seq_value else {
1028            panic!("expected sequence value");
1029        };
1030        // The comment "# leading" appears before the sequence items; after
1031        // the fix it is attached to the first item.
1032        assert_eq!(
1033            items[0].leading_comments(),
1034            &["# leading"],
1035            "comment should be attached to first sequence item"
1036        );
1037    }
1038
1039    // UT-A3: multiple consecutive comments before a collection are all preserved.
1040    #[test]
1041    fn multiple_comments_between_key_and_collection_all_preserved() {
1042        let docs = load("key:\n  # first\n  # second\n  - item\n").unwrap();
1043        let root = &docs[0].root;
1044        let Node::Mapping { entries, .. } = root else {
1045            panic!("expected root mapping");
1046        };
1047        let (_key, seq_value) = &entries[0];
1048        let Node::Sequence { items, .. } = seq_value else {
1049            panic!("expected sequence value");
1050        };
1051        assert_eq!(
1052            items[0].leading_comments(),
1053            &["# first", "# second"],
1054            "both comments should be on first item"
1055        );
1056    }
1057
1058    // UT-A4: the KEY node itself has no leading comments from Bug-A fix.
1059    #[test]
1060    fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1061        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1062        let root = &docs[0].root;
1063        let Node::Mapping { entries, .. } = root else {
1064            panic!("expected root mapping");
1065        };
1066        let (outer_key, _) = &entries[0];
1067        assert!(
1068            outer_key.leading_comments().is_empty(),
1069            "outer key should have no leading comments"
1070        );
1071        assert!(
1072            outer_key.trailing_comment().is_none(),
1073            "outer key should have no trailing comment"
1074        );
1075    }
1076
1077    // UT-A5: no comment between key and value leaves leading_comments empty.
1078    #[test]
1079    fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1080        let docs = load("key:\n  inner: val\n").unwrap();
1081        let root = &docs[0].root;
1082        let Node::Mapping { entries, .. } = root else {
1083            panic!("expected root mapping");
1084        };
1085        let (_key, nested) = &entries[0];
1086        let Node::Mapping {
1087            entries: nested_entries,
1088            ..
1089        } = nested
1090        else {
1091            panic!("expected nested mapping");
1092        };
1093        let (inner_key, _) = &nested_entries[0];
1094        assert!(
1095            inner_key.leading_comments().is_empty(),
1096            "inner key should have no leading comments when there is no comment"
1097        );
1098    }
1099
1100    // -----------------------------------------------------------------------
1101    // Bug B: comment at end of collection preserved as leading on next sibling
1102    // -----------------------------------------------------------------------
1103
1104    // UT-B1: comment before SequenceEnd becomes leading on next mapping entry.
1105    #[test]
1106    fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1107        let input =
1108            "Lists:\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n    - item1\n";
1109        let docs = load(input).unwrap();
1110        let root = &docs[0].root;
1111        let Node::Mapping { entries, .. } = root else {
1112            panic!("expected root mapping");
1113        };
1114        let (_lists_key, nested) = &entries[0];
1115        let Node::Mapping {
1116            entries: nested_entries,
1117            ..
1118        } = nested
1119        else {
1120            panic!("expected nested mapping");
1121        };
1122        assert_eq!(nested_entries.len(), 2);
1123        let (list_b_key, _) = &nested_entries[1];
1124        assert_eq!(
1125            list_b_key.leading_comments(),
1126            &["# Style 2"],
1127            "# Style 2 should be leading comment on list-b key"
1128        );
1129    }
1130
1131    // UT-B2: comment at end of nested sequence propagates to next mapping entry.
1132    #[test]
1133    fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1134        let input = "outer:\n  a:\n    - x\n    # between\n  b: y\n";
1135        let docs = load(input).unwrap();
1136        let root = &docs[0].root;
1137        let Node::Mapping { entries, .. } = root else {
1138            panic!("expected root mapping");
1139        };
1140        let (_outer_key, outer_val) = &entries[0];
1141        let Node::Mapping {
1142            entries: nested, ..
1143        } = outer_val
1144        else {
1145            panic!("expected nested mapping");
1146        };
1147        assert_eq!(nested.len(), 2);
1148        let (b_key, _) = &nested[1];
1149        assert_eq!(
1150            b_key.leading_comments(),
1151            &["# between"],
1152            "# between should be leading comment on b key"
1153        );
1154    }
1155
1156    // UT-B3: comment at end of nested mapping propagates to next sibling.
1157    #[test]
1158    fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1159        let input = "parent:\n  child1:\n    k: v\n    # end-of-child1\n  child2: val\n";
1160        let docs = load(input).unwrap();
1161        let root = &docs[0].root;
1162        let Node::Mapping { entries, .. } = root else {
1163            panic!("expected root mapping");
1164        };
1165        let (_parent_key, parent_val) = &entries[0];
1166        let Node::Mapping {
1167            entries: siblings, ..
1168        } = parent_val
1169        else {
1170            panic!("expected parent mapping value");
1171        };
1172        assert_eq!(siblings.len(), 2);
1173        let (child2_key, _) = &siblings[1];
1174        assert_eq!(
1175            child2_key.leading_comments(),
1176            &["# end-of-child1"],
1177            "# end-of-child1 should be leading comment on child2 key"
1178        );
1179    }
1180
1181    // UT-B4: overflow comment at top-level sequence end is not silently dropped.
1182    #[test]
1183    fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1184        // The comment "# tail" appears before SequenceEnd of the top-level items
1185        // sequence.  The fix saves it to pending_leading; since there is no next
1186        // sibling, it ends up in the document's root mapping's pending state and
1187        // is not lost.  We assert it appears somewhere reachable in the AST rather
1188        // than disappearing entirely.
1189        let input = "items:\n  - a\n  - b\n  # tail\n";
1190        let docs = load(input).unwrap();
1191        // The document must parse successfully (no panic, no error).
1192        assert!(!docs.is_empty(), "document should parse without error");
1193        // The # tail comment must not cause data loss — the sequence items are intact.
1194        let root = &docs[0].root;
1195        let Node::Mapping { entries, .. } = root else {
1196            panic!("expected root mapping");
1197        };
1198        let (_items_key, seq_val) = &entries[0];
1199        let Node::Sequence { items, .. } = seq_val else {
1200            panic!("expected sequence value");
1201        };
1202        assert_eq!(items.len(), 2, "sequence items must not be lost");
1203    }
1204
1205    // UT-B5: no overflow comments when collection ends cleanly.
1206    #[test]
1207    fn no_overflow_comments_when_collection_ends_cleanly() {
1208        let docs = load("key:\n  - item1\n  - item2\n").unwrap();
1209        let root = &docs[0].root;
1210        let Node::Mapping { entries, .. } = root else {
1211            panic!("expected root mapping");
1212        };
1213        let (_key, seq_val) = &entries[0];
1214        let Node::Sequence { items, .. } = seq_val else {
1215            panic!("expected sequence value");
1216        };
1217        for item in items {
1218            assert!(
1219                item.leading_comments().is_empty(),
1220                "items should have no leading comments"
1221            );
1222        }
1223    }
1224
1225    // -----------------------------------------------------------------------
1226    // Combined scenarios
1227    // -----------------------------------------------------------------------
1228
1229    // UT-C1: exact bug-report input — both comments survive.
1230    #[test]
1231    fn original_bug_report_input_preserves_both_comments() {
1232        let input = "Lists:\n  # Style 1\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n  - item1\n  - item2\n";
1233        let docs = load(input).unwrap();
1234        let root = &docs[0].root;
1235        let Node::Mapping { entries, .. } = root else {
1236            panic!("expected root mapping");
1237        };
1238        let (_lists_key, nested) = &entries[0];
1239        let Node::Mapping {
1240            entries: nested_entries,
1241            ..
1242        } = nested
1243        else {
1244            panic!("expected nested mapping");
1245        };
1246        assert_eq!(nested_entries.len(), 2);
1247        let (first_key, _) = &nested_entries[0];
1248        let (second_key, _) = &nested_entries[1];
1249        assert_eq!(
1250            first_key.leading_comments(),
1251            &["# Style 1"],
1252            "list-a should have # Style 1 as leading comment"
1253        );
1254        assert_eq!(
1255            second_key.leading_comments(),
1256            &["# Style 2"],
1257            "list-b should have # Style 2 as leading comment"
1258        );
1259    }
1260
1261    // UT-C2: leading and trailing comments on sibling entries both preserved.
1262    #[test]
1263    fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1264        let input = "map:\n  # leading\n  key: value  # trailing\n  # next-leading\n  key2: v2\n";
1265        let docs = load(input).unwrap();
1266        let root = &docs[0].root;
1267        let Node::Mapping { entries, .. } = root else {
1268            panic!("expected root mapping");
1269        };
1270        let (_map_key, map_val) = &entries[0];
1271        let Node::Mapping {
1272            entries: siblings, ..
1273        } = map_val
1274        else {
1275            panic!("expected mapping value");
1276        };
1277        assert_eq!(siblings.len(), 2);
1278        let (key1, val1) = &siblings[0];
1279        let (key2, _) = &siblings[1];
1280        assert_eq!(key1.leading_comments(), &["# leading"]);
1281        assert_eq!(val1.trailing_comment(), Some("# trailing"));
1282        assert_eq!(key2.leading_comments(), &["# next-leading"]);
1283    }
1284
1285    // UT-C3: deeply nested overflow comments propagate to correct sibling.
1286    #[test]
1287    fn deeply_nested_overflow_comments_reach_correct_sibling() {
1288        let input = "top:\n  mid:\n    - x\n    # deep-overflow\n  next: y\n";
1289        let docs = load(input).unwrap();
1290        let root = &docs[0].root;
1291        let Node::Mapping { entries, .. } = root else {
1292            panic!("expected root mapping");
1293        };
1294        let (_top_key, top_val) = &entries[0];
1295        let Node::Mapping {
1296            entries: top_entries,
1297            ..
1298        } = top_val
1299        else {
1300            panic!("expected top-level mapping");
1301        };
1302        assert_eq!(top_entries.len(), 2);
1303        let (next_key, _) = &top_entries[1];
1304        assert_eq!(
1305            next_key.leading_comments(),
1306            &["# deep-overflow"],
1307            "# deep-overflow should propagate from nested sequence to next sibling"
1308        );
1309    }
1310
1311    // -----------------------------------------------------------------------
1312    // UT-D: Document marker flags (explicit_start / explicit_end)
1313    // -----------------------------------------------------------------------
1314
1315    // UT-D1: Bare document (no markers) → both flags false
1316    #[test]
1317    fn bare_document_has_both_flags_false() {
1318        let docs = load("key: value\n").expect("load failed");
1319        assert_eq!(docs.len(), 1);
1320        assert!(!docs[0].explicit_start, "expected explicit_start=false");
1321        assert!(!docs[0].explicit_end, "expected explicit_end=false");
1322    }
1323
1324    // UT-D2: Document with `---` start marker → explicit_start true, explicit_end false
1325    #[test]
1326    fn document_with_start_marker_has_explicit_start_true() {
1327        let docs = load("---\nkey: value\n").expect("load failed");
1328        assert_eq!(docs.len(), 1);
1329        assert!(docs[0].explicit_start, "expected explicit_start=true");
1330        assert!(!docs[0].explicit_end, "expected explicit_end=false");
1331    }
1332
1333    // UT-D3: Document with `...` end marker → explicit_start false, explicit_end true
1334    #[test]
1335    fn document_with_end_marker_has_explicit_end_true() {
1336        let docs = load("key: value\n...\n").expect("load failed");
1337        assert_eq!(docs.len(), 1);
1338        assert!(!docs[0].explicit_start, "expected explicit_start=false");
1339        assert!(docs[0].explicit_end, "expected explicit_end=true");
1340    }
1341
1342    // UT-D4: Document with both `---` and `...` → both flags true
1343    #[test]
1344    fn document_with_both_markers_has_both_flags_true() {
1345        let docs = load("---\nkey: value\n...\n").expect("load failed");
1346        assert_eq!(docs.len(), 1);
1347        assert!(docs[0].explicit_start, "expected explicit_start=true");
1348        assert!(docs[0].explicit_end, "expected explicit_end=true");
1349    }
1350
1351    // UT-D5: Multi-document — each document's flags are independent
1352    #[test]
1353    fn multi_document_flags_are_independent() {
1354        // doc1: no explicit start/end (bare)
1355        // doc2: explicit start (---), explicit end (...)
1356        // doc3: explicit start (---), no explicit end
1357        let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1358        assert_eq!(docs.len(), 3);
1359        assert!(!docs[0].explicit_start, "doc1 explicit_start");
1360        assert!(!docs[0].explicit_end, "doc1 explicit_end");
1361        assert!(docs[1].explicit_start, "doc2 explicit_start");
1362        assert!(docs[1].explicit_end, "doc2 explicit_end");
1363        assert!(docs[2].explicit_start, "doc3 explicit_start");
1364        assert!(!docs[2].explicit_end, "doc3 explicit_end");
1365    }
1366
1367    // UT-D6: Empty document with explicit markers → flags are set
1368    #[test]
1369    fn empty_document_with_explicit_markers_has_both_flags_true() {
1370        let docs = load("---\n...\n").expect("load failed");
1371        assert_eq!(docs.len(), 1);
1372        assert!(docs[0].explicit_start, "expected explicit_start=true");
1373        assert!(docs[0].explicit_end, "expected explicit_end=true");
1374    }
1375}