Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22//!
23//! # Accepted risks
24//!
25//! `expand_node` does not detect the case where an anchor-within-expansion
26//! references a previously defined anchor, forming an indirect cycle not
27//! caught by the `in_progress` set until the second traversal.  This
28//! limitation exists in the old loader and is acceptable in the LSP context
29//! where Lossless mode is the default.  The `expanded_nodes` volume limit
30//! provides the backstop.
31
32mod comments;
33mod reloc;
34mod stream;
35
36use comments::{attach_leading_comments, attach_trailing_comment};
37use reloc::reloc;
38use stream::{
39    consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
40};
41
42use std::collections::{HashMap, HashSet};
43use std::iter::Peekable;
44
45use crate::error::Error;
46use crate::event::{Event, ScalarStyle};
47use crate::node::{Document, Node};
48use crate::pos::{Pos, Span};
49
50// ---------------------------------------------------------------------------
51// Public error type
52// ---------------------------------------------------------------------------
53
54/// Errors produced by the loader.
55#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
56pub enum LoadError {
57    /// The event stream contained a parse error.
58    #[error("parse error at {pos:?}: {message}")]
59    Parse {
60        /// Source position where the parse error was detected.
61        pos: Pos,
62        /// Human-readable description of the error.
63        message: String,
64    },
65
66    /// The event stream ended unexpectedly mid-document.
67    #[error("unexpected end of event stream")]
68    UnexpectedEndOfStream,
69
70    /// Nesting depth exceeded the configured limit.
71    #[error("nesting depth limit exceeded (max: {limit})")]
72    NestingDepthLimitExceeded {
73        /// The configured nesting depth limit that was exceeded.
74        limit: usize,
75    },
76
77    /// Too many distinct anchor names were defined.
78    #[error("anchor count limit exceeded (max: {limit})")]
79    AnchorCountLimitExceeded {
80        /// The configured anchor count limit that was exceeded.
81        limit: usize,
82    },
83
84    /// Alias expansion produced more nodes than the configured limit.
85    #[error("alias expansion node limit exceeded (max: {limit})")]
86    AliasExpansionLimitExceeded {
87        /// The configured expansion node limit that was exceeded.
88        limit: usize,
89    },
90
91    /// A circular alias reference was detected.
92    #[error("circular alias reference: '{name}'")]
93    CircularAlias {
94        /// The anchor name involved in the cycle.
95        name: String,
96    },
97
98    /// An alias referred to an anchor that was never defined.
99    #[error("undefined alias: '{name}'")]
100    UndefinedAlias {
101        /// The alias name that had no corresponding anchor definition.
102        name: String,
103    },
104}
105
106// Convenience alias used inside the module.
107type Result<T> = std::result::Result<T, LoadError>;
108
109// Type alias for the peekable event stream used throughout the loader.
110type EventStream<'a> =
111    Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
112
113// ---------------------------------------------------------------------------
114// Configuration
115// ---------------------------------------------------------------------------
116
117/// Loader mode — controls how alias references are handled.
118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
119pub enum LoadMode {
120    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
121    Lossless,
122    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
123    Resolved,
124}
125
126/// Security and behaviour options for the loader.
127#[derive(Debug, Clone)]
128pub struct LoaderOptions {
129    /// Maximum mapping/sequence nesting depth before returning
130    /// [`LoadError::NestingDepthLimitExceeded`] (default: 512).
131    pub max_nesting_depth: usize,
132    /// Maximum number of distinct anchor names per document before returning
133    /// [`LoadError::AnchorCountLimitExceeded`] (default: 10 000).
134    pub max_anchors: usize,
135    /// Maximum total nodes produced by alias expansion in resolved mode before
136    /// returning [`LoadError::AliasExpansionLimitExceeded`] (default: 1 000 000).
137    pub max_expanded_nodes: usize,
138    /// Controls how alias references are handled during loading.
139    pub mode: LoadMode,
140}
141
142impl Default for LoaderOptions {
143    fn default() -> Self {
144        Self {
145            max_nesting_depth: 512,
146            max_anchors: 10_000,
147            max_expanded_nodes: 1_000_000,
148            mode: LoadMode::Lossless,
149        }
150    }
151}
152
153// ---------------------------------------------------------------------------
154// Builder
155// ---------------------------------------------------------------------------
156
157/// Builder for configuring and creating a [`Loader`].
158///
159/// ```
160/// use rlsp_yaml_parser::loader::LoaderBuilder;
161///
162/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
163/// assert_eq!(docs.len(), 1);
164/// ```
165pub struct LoaderBuilder {
166    options: LoaderOptions,
167}
168
169impl LoaderBuilder {
170    /// Create a builder with default options (lossless mode, safe limits).
171    #[must_use]
172    pub fn new() -> Self {
173        Self {
174            options: LoaderOptions::default(),
175        }
176    }
177
178    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
179    #[must_use]
180    pub const fn lossless(mut self) -> Self {
181        self.options.mode = LoadMode::Lossless;
182        self
183    }
184
185    /// Use resolved mode — aliases are expanded inline.
186    #[must_use]
187    pub const fn resolved(mut self) -> Self {
188        self.options.mode = LoadMode::Resolved;
189        self
190    }
191
192    /// Override the maximum nesting depth.
193    #[must_use]
194    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
195        self.options.max_nesting_depth = limit;
196        self
197    }
198
199    /// Override the maximum anchor count.
200    #[must_use]
201    pub const fn max_anchors(mut self, limit: usize) -> Self {
202        self.options.max_anchors = limit;
203        self
204    }
205
206    /// Override the maximum expanded-node count (resolved mode only).
207    #[must_use]
208    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
209        self.options.max_expanded_nodes = limit;
210        self
211    }
212
213    /// Consume the builder and produce a [`Loader`].
214    #[must_use]
215    pub const fn build(self) -> Loader {
216        Loader {
217            options: self.options,
218        }
219    }
220}
221
222impl Default for LoaderBuilder {
223    fn default() -> Self {
224        Self::new()
225    }
226}
227
228// ---------------------------------------------------------------------------
229// Loader
230// ---------------------------------------------------------------------------
231
232/// A configured YAML loader.
233pub struct Loader {
234    options: LoaderOptions,
235}
236
237impl Loader {
238    /// Load YAML text into a sequence of documents.
239    ///
240    /// # Errors
241    ///
242    /// Returns `Err` if the input contains a parse error, exceeds a configured
243    /// security limit, or (in resolved mode) references an undefined anchor.
244    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
245        let mut state = LoadState::new(&self.options);
246        let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
247            Box::new(crate::parse_events(input));
248        state.run(iter.peekable())
249    }
250}
251
252// ---------------------------------------------------------------------------
253// Convenience entry point
254// ---------------------------------------------------------------------------
255
256/// Load YAML text using lossless mode and default security limits.
257///
258/// Returns one `Document<Span>` per YAML document in the stream.
259///
260/// # Errors
261///
262/// Returns `Err` if the input contains a parse error or exceeds a security
263/// limit (nesting depth or anchor count).
264///
265/// ```
266/// use rlsp_yaml_parser::loader::load;
267///
268/// let docs = load("hello\n").unwrap();
269/// assert_eq!(docs.len(), 1);
270/// ```
271pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
272    LoaderBuilder::new().lossless().build().load(input)
273}
274
275// ---------------------------------------------------------------------------
276// Internal loader state
277// ---------------------------------------------------------------------------
278
279struct LoadState<'opt> {
280    options: &'opt LoaderOptions,
281    /// Anchors registered so far in the current document: name → node.
282    anchor_map: HashMap<String, Node<Span>>,
283    /// Count of distinct anchors registered (resets per document).
284    anchor_count: usize,
285    /// Current nesting depth (incremented on Begin, decremented on End).
286    depth: usize,
287    /// Total nodes produced via alias expansion (resolved mode only).
288    expanded_nodes: usize,
289    /// Leading comments accumulated by `parse_node` when it encounters a
290    /// `Comment` event between a mapping key and its value's collection start,
291    /// or by a sequence/mapping loop when it hits End with leftover leading
292    /// comments.  The next mapping/sequence loop iteration picks these up and
293    /// prepends them to the next entry's leading comments.
294    pending_leading: Vec<String>,
295}
296
297impl<'opt> LoadState<'opt> {
298    fn new(options: &'opt LoaderOptions) -> Self {
299        Self {
300            options,
301            anchor_map: HashMap::new(),
302            anchor_count: 0,
303            depth: 0,
304            expanded_nodes: 0,
305            pending_leading: Vec::new(),
306        }
307    }
308
309    fn reset_for_document(&mut self) {
310        self.anchor_map.clear();
311        self.anchor_count = 0;
312        self.expanded_nodes = 0;
313        self.pending_leading.clear();
314    }
315
316    fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
317        let mut docs: Vec<Document<Span>> = Vec::new();
318
319        // Skip StreamStart.
320        match stream.next() {
321            Some(Ok(_)) | None => {}
322            Some(Err(e)) => {
323                return Err(LoadError::Parse {
324                    pos: e.pos,
325                    message: e.message,
326                });
327            }
328        }
329
330        loop {
331            // Skip any leading comments or unknown events before a document.
332            match next_from(&mut stream)? {
333                None | Some((Event::StreamEnd, _)) => break,
334                Some((
335                    Event::DocumentStart {
336                        explicit,
337                        version,
338                        tag_directives,
339                    },
340                    _,
341                )) => {
342                    let doc_explicit_start = explicit;
343                    let doc_version = version;
344                    let doc_tags = tag_directives;
345                    self.reset_for_document();
346
347                    let mut doc_comments: Vec<String> = Vec::new();
348
349                    // Consume leading comments at document level.
350                    consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
351
352                    // Parse root node (may be absent for empty documents).
353                    let root = if is_document_end(stream.peek()) {
354                        // Empty document — emit an empty scalar as root.
355                        empty_scalar()
356                    } else {
357                        self.parse_node(&mut stream)?
358                    };
359
360                    // Consume DocumentEnd if present and capture its explicit flag.
361                    let doc_explicit_end =
362                        if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
363                            let end_explicit = *explicit;
364                            let _ = stream.next();
365                            end_explicit
366                        } else {
367                            false
368                        };
369
370                    docs.push(Document {
371                        root,
372                        version: doc_version,
373                        tags: doc_tags,
374                        comments: doc_comments,
375                        explicit_start: doc_explicit_start,
376                        explicit_end: doc_explicit_end,
377                    });
378                }
379                Some(_) => {
380                    // Comment or any other stray event outside a document — skip.
381                }
382            }
383        }
384
385        Ok(docs)
386    }
387
388    /// Parse a single node from the stream.
389    ///
390    /// Advances the stream past the node (including end-of-container events).
391    #[expect(
392        clippy::too_many_lines,
393        reason = "match-on-event-type; splitting would obscure flow"
394    )]
395    fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
396        // Structural end events close the caller's collection loop — do NOT
397        // consume them here.  Return an empty scalar and leave the event in
398        // the stream so the outer mapping/sequence loop can see and consume it.
399        if matches!(
400            stream.peek(),
401            Some(Ok((
402                Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
403                _
404            )))
405        ) {
406            return Ok(empty_scalar());
407        }
408
409        let Some((event, span)) = next_from(stream)? else {
410            return Ok(empty_scalar());
411        };
412
413        match event {
414            Event::Scalar {
415                value,
416                style,
417                anchor,
418                tag,
419            } => {
420                let node = Node::Scalar {
421                    value: value.into_owned(),
422                    style,
423                    anchor: anchor.map(str::to_owned),
424                    tag: tag.map(std::borrow::Cow::into_owned),
425                    loc: span,
426                    leading_comments: Vec::new(),
427                    trailing_comment: None,
428                };
429                if let Some(name) = node.anchor() {
430                    self.register_anchor(name.to_owned(), node.clone())?;
431                }
432                Ok(node)
433            }
434
435            Event::MappingStart { anchor, tag, style } => {
436                let anchor = anchor.map(str::to_owned);
437                let tag = tag.map(std::borrow::Cow::into_owned);
438
439                self.depth += 1;
440                if self.depth > self.options.max_nesting_depth {
441                    return Err(LoadError::NestingDepthLimitExceeded {
442                        limit: self.options.max_nesting_depth,
443                    });
444                }
445
446                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
447                let mut end_span = span;
448
449                loop {
450                    // Consume leading comments before the next key.  Also
451                    // collect any comments that spilled over from a sibling
452                    // value's collection end (stored in `pending_leading`).
453                    let raw_leading = consume_leading_comments(stream)?;
454                    let leading = if self.pending_leading.is_empty() {
455                        raw_leading
456                    } else {
457                        let mut combined = std::mem::take(&mut self.pending_leading);
458                        combined.extend(raw_leading);
459                        combined
460                    };
461
462                    match stream.peek() {
463                        None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
464                            // Save any collected leading comments so the next
465                            // sibling entry in the parent collection can inherit
466                            // them (e.g. a comment just before MappingEnd that
467                            // belongs to the following mapping entry).
468                            if !leading.is_empty() {
469                                self.pending_leading = leading;
470                            }
471                            break;
472                        }
473                        Some(Err(_)) => {
474                            // Consume the error.
475                            return Err(match stream.next() {
476                                Some(Err(e)) => LoadError::Parse {
477                                    pos: e.pos,
478                                    message: e.message,
479                                },
480                                _ => LoadError::UnexpectedEndOfStream,
481                            });
482                        }
483                        Some(Ok(_)) => {}
484                    }
485
486                    let mut key = self.parse_node(stream)?;
487                    attach_leading_comments(&mut key, leading);
488
489                    let mut value = self.parse_node(stream)?;
490
491                    // Trailing comment on the value — peek for inline comment.
492                    // Block scalars (literal `|` and folded `>`) consume trailing
493                    // blank lines as part of chomping; their span.end falls on the
494                    // first line after the scalar, which can coincide with the
495                    // next comment's line number.  That would falsely attach a
496                    // leading inter-node comment as a trailing inline comment.
497                    // Block scalars never have an inline comment on their content
498                    // lines, so skip trailing-comment detection for them.
499                    if !is_block_scalar(&value) {
500                        let value_end_line = node_end_line(&value);
501                        if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
502                            attach_trailing_comment(&mut value, trail);
503                        }
504                    }
505
506                    entries.push((key, value));
507                }
508
509                // Consume MappingEnd and capture its span.
510                if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
511                    end_span = *end;
512                    let _ = stream.next();
513                }
514                self.depth -= 1;
515
516                let node = Node::Mapping {
517                    entries,
518                    style,
519                    anchor: anchor.clone(),
520                    tag,
521                    loc: Span {
522                        start: span.start,
523                        end: end_span.end,
524                    },
525                    leading_comments: Vec::new(),
526                    trailing_comment: None,
527                };
528                if let Some(name) = anchor {
529                    self.register_anchor(name, node.clone())?;
530                }
531                Ok(node)
532            }
533
534            Event::SequenceStart { anchor, tag, style } => {
535                let anchor = anchor.map(str::to_owned);
536                let tag = tag.map(std::borrow::Cow::into_owned);
537
538                self.depth += 1;
539                if self.depth > self.options.max_nesting_depth {
540                    return Err(LoadError::NestingDepthLimitExceeded {
541                        limit: self.options.max_nesting_depth,
542                    });
543                }
544
545                let mut items: Vec<Node<Span>> = Vec::new();
546                let mut end_span = span;
547
548                loop {
549                    // Collect leading comments before the next item.  Also
550                    // collect any comments that spilled over from a sibling
551                    // value's collection end (stored in `pending_leading`).
552                    let raw_leading = consume_leading_comments(stream)?;
553                    let leading = if self.pending_leading.is_empty() {
554                        raw_leading
555                    } else {
556                        let mut combined = std::mem::take(&mut self.pending_leading);
557                        combined.extend(raw_leading);
558                        combined
559                    };
560
561                    match stream.peek() {
562                        None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
563                            // Save any collected leading comments so the next
564                            // sibling entry in the parent collection can inherit
565                            // them (e.g. a comment just before SequenceEnd that
566                            // belongs to the following sequence item or mapping
567                            // entry in the parent).
568                            if !leading.is_empty() {
569                                self.pending_leading = leading;
570                            }
571                            break;
572                        }
573                        Some(Err(_)) => {
574                            // Consume the error.
575                            return Err(match stream.next() {
576                                Some(Err(e)) => LoadError::Parse {
577                                    pos: e.pos,
578                                    message: e.message,
579                                },
580                                _ => LoadError::UnexpectedEndOfStream,
581                            });
582                        }
583                        Some(Ok(_)) => {}
584                    }
585
586                    let mut item = self.parse_node(stream)?;
587                    attach_leading_comments(&mut item, leading);
588
589                    // Trailing comment on the item — peek for inline comment.
590                    // Block scalars are excluded for the same reason as in the
591                    // mapping path: their span.end can coincide with the next
592                    // comment's line, falsely turning a leading comment into a
593                    // trailing one.
594                    if !is_block_scalar(&item) {
595                        let item_end_line = node_end_line(&item);
596                        if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
597                            attach_trailing_comment(&mut item, trail);
598                        }
599                    }
600
601                    items.push(item);
602                }
603
604                // Consume SequenceEnd and capture its span.
605                if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
606                    end_span = *end;
607                    let _ = stream.next();
608                }
609                self.depth -= 1;
610
611                let node = Node::Sequence {
612                    items,
613                    style,
614                    anchor: anchor.clone(),
615                    tag,
616                    loc: Span {
617                        start: span.start,
618                        end: end_span.end,
619                    },
620                    leading_comments: Vec::new(),
621                    trailing_comment: None,
622                };
623                if let Some(name) = anchor {
624                    self.register_anchor(name, node.clone())?;
625                }
626                Ok(node)
627            }
628
629            Event::Alias { name } => {
630                let name = name.to_owned();
631                self.resolve_alias(&name, span)
632            }
633
634            Event::Comment { text } => {
635                // Comment between a mapping key and its collection value (e.g.
636                // `key:\n  # comment\n  subkey: val`).  The comment appears
637                // after the key Scalar and before the MappingStart/SequenceStart
638                // that begins the value.  Save it in `pending_leading` so the
639                // first entry of the upcoming collection can inherit it.
640                self.pending_leading.push(format!("#{text}"));
641                self.parse_node(stream)
642            }
643
644            Event::StreamStart
645            | Event::StreamEnd
646            | Event::DocumentStart { .. }
647            | Event::DocumentEnd { .. }
648            | Event::MappingEnd
649            | Event::SequenceEnd => {
650                // Structural event where a node is expected — return empty scalar.
651                Ok(empty_scalar())
652            }
653        }
654    }
655
656    fn register_anchor(&mut self, name: String, node: Node<Span>) -> Result<()> {
657        if !self.anchor_map.contains_key(&name) {
658            self.anchor_count += 1;
659            if self.anchor_count > self.options.max_anchors {
660                return Err(LoadError::AnchorCountLimitExceeded {
661                    limit: self.options.max_anchors,
662                });
663            }
664        }
665        // Count the anchor node itself toward the expansion budget in resolved
666        // mode so that the total reflects every node present in the expanded
667        // document (anchor definition + each alias expansion).
668        if self.options.mode == LoadMode::Resolved {
669            self.expanded_nodes += 1;
670            if self.expanded_nodes > self.options.max_expanded_nodes {
671                return Err(LoadError::AliasExpansionLimitExceeded {
672                    limit: self.options.max_expanded_nodes,
673                });
674            }
675        }
676        self.anchor_map.insert(name, node);
677        Ok(())
678    }
679
680    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
681        match self.options.mode {
682            LoadMode::Lossless => Ok(Node::Alias {
683                name: name.to_owned(),
684                loc,
685                leading_comments: Vec::new(),
686                trailing_comment: None,
687            }),
688            LoadMode::Resolved => {
689                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
690                    LoadError::UndefinedAlias {
691                        name: name.to_owned(),
692                    }
693                })?;
694                let mut in_progress: HashSet<String> = HashSet::new();
695                self.expand_node(anchored, &mut in_progress)
696            }
697        }
698    }
699
700    /// Recursively expand a node, counting every node produced against the
701    /// expansion limit and checking for cycles via `in_progress`.
702    fn expand_node(
703        &mut self,
704        node: Node<Span>,
705        in_progress: &mut HashSet<String>,
706    ) -> Result<Node<Span>> {
707        // Increment at the top — before child recursion — so every node
708        // (including non-alias nodes inside expanded trees) counts against the
709        // budget.
710        self.expanded_nodes += 1;
711        if self.expanded_nodes > self.options.max_expanded_nodes {
712            return Err(LoadError::AliasExpansionLimitExceeded {
713                limit: self.options.max_expanded_nodes,
714            });
715        }
716
717        match node {
718            Node::Alias { ref name, loc, .. } => {
719                if in_progress.contains(name) {
720                    return Err(LoadError::CircularAlias { name: name.clone() });
721                }
722                let target = self
723                    .anchor_map
724                    .get(name)
725                    .cloned()
726                    .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
727                in_progress.insert(name.clone());
728                let expanded = self.expand_node(target, in_progress)?;
729                in_progress.remove(name);
730                // Re-stamp with the alias site's location.
731                Ok(reloc(expanded, loc))
732            }
733            Node::Mapping {
734                entries,
735                style,
736                anchor,
737                tag,
738                loc,
739                leading_comments,
740                trailing_comment,
741            } => {
742                let mut expanded_entries = Vec::with_capacity(entries.len());
743                for (k, v) in entries {
744                    let ek = self.expand_node(k, in_progress)?;
745                    let ev = self.expand_node(v, in_progress)?;
746                    expanded_entries.push((ek, ev));
747                }
748                Ok(Node::Mapping {
749                    entries: expanded_entries,
750                    style,
751                    anchor,
752                    tag,
753                    loc,
754                    leading_comments,
755                    trailing_comment,
756                })
757            }
758            Node::Sequence {
759                items,
760                style,
761                anchor,
762                tag,
763                loc,
764                leading_comments,
765                trailing_comment,
766            } => {
767                let mut expanded_items = Vec::with_capacity(items.len());
768                for item in items {
769                    expanded_items.push(self.expand_node(item, in_progress)?);
770                }
771                Ok(Node::Sequence {
772                    items: expanded_items,
773                    style,
774                    anchor,
775                    tag,
776                    loc,
777                    leading_comments,
778                    trailing_comment,
779                })
780            }
781            // Scalars and already-resolved nodes — pass through.
782            scalar @ Node::Scalar { .. } => Ok(scalar),
783        }
784    }
785}
786
787/// Return `true` if the peeked item signals end of document (or stream).
788const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
789    matches!(
790        peeked,
791        None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
792    )
793}
794
795/// Return the line number of a node's span end position.
796///
797/// Used to determine whether the next `Comment` event is trailing (same line)
798/// or leading (different line).
799const fn node_end_line(node: &Node<Span>) -> usize {
800    match node {
801        Node::Scalar { loc, .. }
802        | Node::Mapping { loc, .. }
803        | Node::Sequence { loc, .. }
804        | Node::Alias { loc, .. } => loc.end.line,
805    }
806}
807
808/// Return `true` if the node is a block scalar (literal `|` or folded `>`).
809///
810/// Block scalars consume trailing blank lines as part of chomping, so their
811/// `span.end` falls on the line *after* the last consumed line.  This means a
812/// comment on the immediately following line has the same line number as
813/// `span.end.line`, which would cause `peek_trailing_comment` to falsely
814/// classify it as an inline trailing comment.  The caller uses this predicate
815/// to skip trailing-comment detection for block scalars.
816const fn is_block_scalar(node: &Node<Span>) -> bool {
817    matches!(
818        node,
819        Node::Scalar {
820            style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
821            ..
822        }
823    )
824}
825
826// ---------------------------------------------------------------------------
827// Node helpers
828// ---------------------------------------------------------------------------
829
830const fn empty_scalar() -> Node<Span> {
831    Node::Scalar {
832        value: String::new(),
833        style: ScalarStyle::Plain,
834        anchor: None,
835        tag: None,
836        loc: Span {
837            start: Pos::ORIGIN,
838            end: Pos::ORIGIN,
839        },
840        leading_comments: Vec::new(),
841        trailing_comment: None,
842    }
843}
844
845// ---------------------------------------------------------------------------
846// Tests
847// ---------------------------------------------------------------------------
848
849#[cfg(test)]
850#[expect(
851    clippy::expect_used,
852    clippy::unwrap_used,
853    clippy::indexing_slicing,
854    clippy::panic,
855    reason = "test code"
856)]
857mod tests {
858    use super::*;
859
860    // UT-1: loader_state_resets_anchor_map_between_documents
861    #[test]
862    fn loader_state_resets_anchor_map_between_documents() {
863        // In resolved mode: anchor defined in doc 1 must not be visible in doc 2.
864        let result = LoaderBuilder::new()
865            .resolved()
866            .build()
867            .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
868        assert!(
869            result.is_err(),
870            "expected Err: *foo in doc 2 should be undefined"
871        );
872        assert!(matches!(
873            result.unwrap_err(),
874            LoadError::UndefinedAlias { .. }
875        ));
876    }
877
878    // UT-2: register_anchor_increments_count
879    #[test]
880    fn register_anchor_increments_count() {
881        let options = LoaderOptions {
882            max_anchors: 2,
883            ..LoaderOptions::default()
884        };
885        let mut state = LoadState::new(&options);
886        let node = Node::Scalar {
887            value: "x".to_owned(),
888            style: ScalarStyle::Plain,
889            anchor: None,
890            tag: None,
891            loc: Span {
892                start: Pos::ORIGIN,
893                end: Pos::ORIGIN,
894            },
895            leading_comments: Vec::new(),
896            trailing_comment: None,
897        };
898        assert!(state.register_anchor("a".to_owned(), node.clone()).is_ok());
899        assert!(state.register_anchor("b".to_owned(), node.clone()).is_ok());
900        let err = state
901            .register_anchor("c".to_owned(), node)
902            .expect_err("expected AnchorCountLimitExceeded");
903        assert!(matches!(
904            err,
905            LoadError::AnchorCountLimitExceeded { limit: 2 }
906        ));
907    }
908
909    // UT-3: expand_node_detects_circular_alias
910    #[test]
911    fn expand_node_detects_circular_alias() {
912        let options = LoaderOptions {
913            mode: LoadMode::Resolved,
914            ..LoaderOptions::default()
915        };
916        let mut state = LoadState::new(&options);
917        // Insert a self-referential alias node.
918        let alias_node = Node::Alias {
919            name: "a".to_owned(),
920            loc: Span {
921                start: Pos::ORIGIN,
922                end: Pos::ORIGIN,
923            },
924            leading_comments: Vec::new(),
925            trailing_comment: None,
926        };
927        state.anchor_map.insert("a".to_owned(), alias_node.clone());
928        let mut in_progress = HashSet::new();
929        let result = state.expand_node(alias_node, &mut in_progress);
930        assert!(
931            matches!(result, Err(LoadError::CircularAlias { .. })),
932            "expected CircularAlias, got: {result:?}"
933        );
934    }
935
936    // -----------------------------------------------------------------------
937    // Bug A: comment between mapping key and its collection value
938    // -----------------------------------------------------------------------
939
940    // UT-A1: comment between key and nested mapping is attached to first entry.
941    #[test]
942    fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
943        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
944        let root = &docs[0].root;
945        // root is a mapping: outer -> { inner: val }
946        // The comment "# Style 1" appears between "outer" key and the nested
947        // MappingStart.  After the fix it must be attached to the "inner" key.
948        let Node::Mapping { entries, .. } = root else {
949            panic!("expected root mapping");
950        };
951        assert_eq!(entries.len(), 1);
952        let (_outer_key, outer_value) = &entries[0];
953        let Node::Mapping {
954            entries: nested, ..
955        } = outer_value
956        else {
957            panic!("expected nested mapping");
958        };
959        assert_eq!(nested.len(), 1);
960        let (inner_key, _) = &nested[0];
961        assert_eq!(
962            inner_key.leading_comments(),
963            &["# Style 1"],
964            "comment should be attached to the first nested key"
965        );
966    }
967
968    // UT-A2: comment between key and nested sequence is attached to first item.
969    #[test]
970    fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
971        let docs = load("key:\n  # leading\n  - item1\n  - item2\n").unwrap();
972        let root = &docs[0].root;
973        let Node::Mapping { entries, .. } = root else {
974            panic!("expected root mapping");
975        };
976        let (_key, seq_value) = &entries[0];
977        let Node::Sequence { items, .. } = seq_value else {
978            panic!("expected sequence value");
979        };
980        // The comment "# leading" appears before the sequence items; after
981        // the fix it is attached to the first item.
982        assert_eq!(
983            items[0].leading_comments(),
984            &["# leading"],
985            "comment should be attached to first sequence item"
986        );
987    }
988
989    // UT-A3: multiple consecutive comments before a collection are all preserved.
990    #[test]
991    fn multiple_comments_between_key_and_collection_all_preserved() {
992        let docs = load("key:\n  # first\n  # second\n  - item\n").unwrap();
993        let root = &docs[0].root;
994        let Node::Mapping { entries, .. } = root else {
995            panic!("expected root mapping");
996        };
997        let (_key, seq_value) = &entries[0];
998        let Node::Sequence { items, .. } = seq_value else {
999            panic!("expected sequence value");
1000        };
1001        assert_eq!(
1002            items[0].leading_comments(),
1003            &["# first", "# second"],
1004            "both comments should be on first item"
1005        );
1006    }
1007
1008    // UT-A4: the KEY node itself has no leading comments from Bug-A fix.
1009    #[test]
1010    fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1011        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1012        let root = &docs[0].root;
1013        let Node::Mapping { entries, .. } = root else {
1014            panic!("expected root mapping");
1015        };
1016        let (outer_key, _) = &entries[0];
1017        assert!(
1018            outer_key.leading_comments().is_empty(),
1019            "outer key should have no leading comments"
1020        );
1021        assert!(
1022            outer_key.trailing_comment().is_none(),
1023            "outer key should have no trailing comment"
1024        );
1025    }
1026
1027    // UT-A5: no comment between key and value leaves leading_comments empty.
1028    #[test]
1029    fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1030        let docs = load("key:\n  inner: val\n").unwrap();
1031        let root = &docs[0].root;
1032        let Node::Mapping { entries, .. } = root else {
1033            panic!("expected root mapping");
1034        };
1035        let (_key, nested) = &entries[0];
1036        let Node::Mapping {
1037            entries: nested_entries,
1038            ..
1039        } = nested
1040        else {
1041            panic!("expected nested mapping");
1042        };
1043        let (inner_key, _) = &nested_entries[0];
1044        assert!(
1045            inner_key.leading_comments().is_empty(),
1046            "inner key should have no leading comments when there is no comment"
1047        );
1048    }
1049
1050    // -----------------------------------------------------------------------
1051    // Bug B: comment at end of collection preserved as leading on next sibling
1052    // -----------------------------------------------------------------------
1053
1054    // UT-B1: comment before SequenceEnd becomes leading on next mapping entry.
1055    #[test]
1056    fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1057        let input =
1058            "Lists:\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n    - item1\n";
1059        let docs = load(input).unwrap();
1060        let root = &docs[0].root;
1061        let Node::Mapping { entries, .. } = root else {
1062            panic!("expected root mapping");
1063        };
1064        let (_lists_key, nested) = &entries[0];
1065        let Node::Mapping {
1066            entries: nested_entries,
1067            ..
1068        } = nested
1069        else {
1070            panic!("expected nested mapping");
1071        };
1072        assert_eq!(nested_entries.len(), 2);
1073        let (list_b_key, _) = &nested_entries[1];
1074        assert_eq!(
1075            list_b_key.leading_comments(),
1076            &["# Style 2"],
1077            "# Style 2 should be leading comment on list-b key"
1078        );
1079    }
1080
1081    // UT-B2: comment at end of nested sequence propagates to next mapping entry.
1082    #[test]
1083    fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1084        let input = "outer:\n  a:\n    - x\n    # between\n  b: y\n";
1085        let docs = load(input).unwrap();
1086        let root = &docs[0].root;
1087        let Node::Mapping { entries, .. } = root else {
1088            panic!("expected root mapping");
1089        };
1090        let (_outer_key, outer_val) = &entries[0];
1091        let Node::Mapping {
1092            entries: nested, ..
1093        } = outer_val
1094        else {
1095            panic!("expected nested mapping");
1096        };
1097        assert_eq!(nested.len(), 2);
1098        let (b_key, _) = &nested[1];
1099        assert_eq!(
1100            b_key.leading_comments(),
1101            &["# between"],
1102            "# between should be leading comment on b key"
1103        );
1104    }
1105
1106    // UT-B3: comment at end of nested mapping propagates to next sibling.
1107    #[test]
1108    fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1109        let input = "parent:\n  child1:\n    k: v\n    # end-of-child1\n  child2: val\n";
1110        let docs = load(input).unwrap();
1111        let root = &docs[0].root;
1112        let Node::Mapping { entries, .. } = root else {
1113            panic!("expected root mapping");
1114        };
1115        let (_parent_key, parent_val) = &entries[0];
1116        let Node::Mapping {
1117            entries: siblings, ..
1118        } = parent_val
1119        else {
1120            panic!("expected parent mapping value");
1121        };
1122        assert_eq!(siblings.len(), 2);
1123        let (child2_key, _) = &siblings[1];
1124        assert_eq!(
1125            child2_key.leading_comments(),
1126            &["# end-of-child1"],
1127            "# end-of-child1 should be leading comment on child2 key"
1128        );
1129    }
1130
1131    // UT-B4: overflow comment at top-level sequence end is not silently dropped.
1132    #[test]
1133    fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1134        // The comment "# tail" appears before SequenceEnd of the top-level items
1135        // sequence.  The fix saves it to pending_leading; since there is no next
1136        // sibling, it ends up in the document's root mapping's pending state and
1137        // is not lost.  We assert it appears somewhere reachable in the AST rather
1138        // than disappearing entirely.
1139        let input = "items:\n  - a\n  - b\n  # tail\n";
1140        let docs = load(input).unwrap();
1141        // The document must parse successfully (no panic, no error).
1142        assert!(!docs.is_empty(), "document should parse without error");
1143        // The # tail comment must not cause data loss — the sequence items are intact.
1144        let root = &docs[0].root;
1145        let Node::Mapping { entries, .. } = root else {
1146            panic!("expected root mapping");
1147        };
1148        let (_items_key, seq_val) = &entries[0];
1149        let Node::Sequence { items, .. } = seq_val else {
1150            panic!("expected sequence value");
1151        };
1152        assert_eq!(items.len(), 2, "sequence items must not be lost");
1153    }
1154
1155    // UT-B5: no overflow comments when collection ends cleanly.
1156    #[test]
1157    fn no_overflow_comments_when_collection_ends_cleanly() {
1158        let docs = load("key:\n  - item1\n  - item2\n").unwrap();
1159        let root = &docs[0].root;
1160        let Node::Mapping { entries, .. } = root else {
1161            panic!("expected root mapping");
1162        };
1163        let (_key, seq_val) = &entries[0];
1164        let Node::Sequence { items, .. } = seq_val else {
1165            panic!("expected sequence value");
1166        };
1167        for item in items {
1168            assert!(
1169                item.leading_comments().is_empty(),
1170                "items should have no leading comments"
1171            );
1172        }
1173    }
1174
1175    // -----------------------------------------------------------------------
1176    // Combined scenarios
1177    // -----------------------------------------------------------------------
1178
1179    // UT-C1: exact bug-report input — both comments survive.
1180    #[test]
1181    fn original_bug_report_input_preserves_both_comments() {
1182        let input = "Lists:\n  # Style 1\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n  - item1\n  - item2\n";
1183        let docs = load(input).unwrap();
1184        let root = &docs[0].root;
1185        let Node::Mapping { entries, .. } = root else {
1186            panic!("expected root mapping");
1187        };
1188        let (_lists_key, nested) = &entries[0];
1189        let Node::Mapping {
1190            entries: nested_entries,
1191            ..
1192        } = nested
1193        else {
1194            panic!("expected nested mapping");
1195        };
1196        assert_eq!(nested_entries.len(), 2);
1197        let (first_key, _) = &nested_entries[0];
1198        let (second_key, _) = &nested_entries[1];
1199        assert_eq!(
1200            first_key.leading_comments(),
1201            &["# Style 1"],
1202            "list-a should have # Style 1 as leading comment"
1203        );
1204        assert_eq!(
1205            second_key.leading_comments(),
1206            &["# Style 2"],
1207            "list-b should have # Style 2 as leading comment"
1208        );
1209    }
1210
1211    // UT-C2: leading and trailing comments on sibling entries both preserved.
1212    #[test]
1213    fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1214        let input = "map:\n  # leading\n  key: value  # trailing\n  # next-leading\n  key2: v2\n";
1215        let docs = load(input).unwrap();
1216        let root = &docs[0].root;
1217        let Node::Mapping { entries, .. } = root else {
1218            panic!("expected root mapping");
1219        };
1220        let (_map_key, map_val) = &entries[0];
1221        let Node::Mapping {
1222            entries: siblings, ..
1223        } = map_val
1224        else {
1225            panic!("expected mapping value");
1226        };
1227        assert_eq!(siblings.len(), 2);
1228        let (key1, val1) = &siblings[0];
1229        let (key2, _) = &siblings[1];
1230        assert_eq!(key1.leading_comments(), &["# leading"]);
1231        assert_eq!(val1.trailing_comment(), Some("# trailing"));
1232        assert_eq!(key2.leading_comments(), &["# next-leading"]);
1233    }
1234
1235    // UT-C3: deeply nested overflow comments propagate to correct sibling.
1236    #[test]
1237    fn deeply_nested_overflow_comments_reach_correct_sibling() {
1238        let input = "top:\n  mid:\n    - x\n    # deep-overflow\n  next: y\n";
1239        let docs = load(input).unwrap();
1240        let root = &docs[0].root;
1241        let Node::Mapping { entries, .. } = root else {
1242            panic!("expected root mapping");
1243        };
1244        let (_top_key, top_val) = &entries[0];
1245        let Node::Mapping {
1246            entries: top_entries,
1247            ..
1248        } = top_val
1249        else {
1250            panic!("expected top-level mapping");
1251        };
1252        assert_eq!(top_entries.len(), 2);
1253        let (next_key, _) = &top_entries[1];
1254        assert_eq!(
1255            next_key.leading_comments(),
1256            &["# deep-overflow"],
1257            "# deep-overflow should propagate from nested sequence to next sibling"
1258        );
1259    }
1260
1261    // -----------------------------------------------------------------------
1262    // UT-D: Document marker flags (explicit_start / explicit_end)
1263    // -----------------------------------------------------------------------
1264
1265    // UT-D1: Bare document (no markers) → both flags false
1266    #[test]
1267    fn bare_document_has_both_flags_false() {
1268        let docs = load("key: value\n").expect("load failed");
1269        assert_eq!(docs.len(), 1);
1270        assert!(!docs[0].explicit_start, "expected explicit_start=false");
1271        assert!(!docs[0].explicit_end, "expected explicit_end=false");
1272    }
1273
1274    // UT-D2: Document with `---` start marker → explicit_start true, explicit_end false
1275    #[test]
1276    fn document_with_start_marker_has_explicit_start_true() {
1277        let docs = load("---\nkey: value\n").expect("load failed");
1278        assert_eq!(docs.len(), 1);
1279        assert!(docs[0].explicit_start, "expected explicit_start=true");
1280        assert!(!docs[0].explicit_end, "expected explicit_end=false");
1281    }
1282
1283    // UT-D3: Document with `...` end marker → explicit_start false, explicit_end true
1284    #[test]
1285    fn document_with_end_marker_has_explicit_end_true() {
1286        let docs = load("key: value\n...\n").expect("load failed");
1287        assert_eq!(docs.len(), 1);
1288        assert!(!docs[0].explicit_start, "expected explicit_start=false");
1289        assert!(docs[0].explicit_end, "expected explicit_end=true");
1290    }
1291
1292    // UT-D4: Document with both `---` and `...` → both flags true
1293    #[test]
1294    fn document_with_both_markers_has_both_flags_true() {
1295        let docs = load("---\nkey: value\n...\n").expect("load failed");
1296        assert_eq!(docs.len(), 1);
1297        assert!(docs[0].explicit_start, "expected explicit_start=true");
1298        assert!(docs[0].explicit_end, "expected explicit_end=true");
1299    }
1300
1301    // UT-D5: Multi-document — each document's flags are independent
1302    #[test]
1303    fn multi_document_flags_are_independent() {
1304        // doc1: no explicit start/end (bare)
1305        // doc2: explicit start (---), explicit end (...)
1306        // doc3: explicit start (---), no explicit end
1307        let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1308        assert_eq!(docs.len(), 3);
1309        assert!(!docs[0].explicit_start, "doc1 explicit_start");
1310        assert!(!docs[0].explicit_end, "doc1 explicit_end");
1311        assert!(docs[1].explicit_start, "doc2 explicit_start");
1312        assert!(docs[1].explicit_end, "doc2 explicit_end");
1313        assert!(docs[2].explicit_start, "doc3 explicit_start");
1314        assert!(!docs[2].explicit_end, "doc3 explicit_end");
1315    }
1316
1317    // UT-D6: Empty document with explicit markers → flags are set
1318    #[test]
1319    fn empty_document_with_explicit_markers_has_both_flags_true() {
1320        let docs = load("---\n...\n").expect("load failed");
1321        assert_eq!(docs.len(), 1);
1322        assert!(docs[0].explicit_start, "expected explicit_start=true");
1323        assert!(docs[0].explicit_end, "expected explicit_end=true");
1324    }
1325}