Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22//!
23//! # Accepted risks
24//!
25//! `expand_node` does not detect the case where an anchor-within-expansion
26//! references a previously defined anchor, forming an indirect cycle not
27//! caught by the `in_progress` set until the second traversal.  This
28//! limitation exists in the old loader and is acceptable in the LSP context
29//! where Lossless mode is the default.  The `expanded_nodes` volume limit
30//! provides the backstop.
31
32mod comments;
33mod reloc;
34mod stream;
35
36use comments::{attach_leading_comments, attach_trailing_comment};
37use reloc::reloc;
38use stream::{
39    consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
40    with_hash_prefix,
41};
42
43use std::collections::{HashMap, HashSet};
44use std::iter::Peekable;
45
46use crate::error::Error;
47use crate::event::{Event, ScalarStyle};
48use crate::node::{Document, Node};
49use crate::pos::{Pos, Span};
50
51// ---------------------------------------------------------------------------
52// Public error type
53// ---------------------------------------------------------------------------
54
55/// Errors produced by the loader.
56#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
57pub enum LoadError {
58    /// The event stream contained a parse error.
59    #[error("parse error at {pos:?}: {message}")]
60    Parse {
61        /// Source position where the parse error was detected.
62        pos: Pos,
63        /// Human-readable description of the error.
64        message: String,
65    },
66
67    /// The event stream ended unexpectedly mid-document.
68    #[error("unexpected end of event stream")]
69    UnexpectedEndOfStream,
70
71    /// Nesting depth exceeded the configured limit.
72    #[error("nesting depth limit exceeded (max: {limit})")]
73    NestingDepthLimitExceeded {
74        /// The configured nesting depth limit that was exceeded.
75        limit: usize,
76    },
77
78    /// Too many distinct anchor names were defined.
79    #[error("anchor count limit exceeded (max: {limit})")]
80    AnchorCountLimitExceeded {
81        /// The configured anchor count limit that was exceeded.
82        limit: usize,
83    },
84
85    /// Alias expansion produced more nodes than the configured limit.
86    #[error("alias expansion node limit exceeded (max: {limit})")]
87    AliasExpansionLimitExceeded {
88        /// The configured expansion node limit that was exceeded.
89        limit: usize,
90    },
91
92    /// A circular alias reference was detected.
93    #[error("circular alias reference: '{name}'")]
94    CircularAlias {
95        /// The anchor name involved in the cycle.
96        name: String,
97    },
98
99    /// An alias referred to an anchor that was never defined.
100    #[error("undefined alias: '{name}'")]
101    UndefinedAlias {
102        /// The alias name that had no corresponding anchor definition.
103        name: String,
104    },
105}
106
107// Convenience alias used inside the module.
108type Result<T> = std::result::Result<T, LoadError>;
109
110// Type alias for the peekable event stream used throughout the loader.
111type EventStream<'a> =
112    Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
113
114// ---------------------------------------------------------------------------
115// Configuration
116// ---------------------------------------------------------------------------
117
118/// Loader mode — controls how alias references are handled.
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub enum LoadMode {
121    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
122    Lossless,
123    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
124    Resolved,
125}
126
127/// Security and behaviour options for the loader.
128#[derive(Debug, Clone)]
129pub struct LoaderOptions {
130    /// Maximum mapping/sequence nesting depth before returning
131    /// [`LoadError::NestingDepthLimitExceeded`] (default: 512).
132    pub max_nesting_depth: usize,
133    /// Maximum number of distinct anchor names per document before returning
134    /// [`LoadError::AnchorCountLimitExceeded`] (default: 10 000).
135    pub max_anchors: usize,
136    /// Maximum total nodes produced by alias expansion in resolved mode before
137    /// returning [`LoadError::AliasExpansionLimitExceeded`] (default: 1 000 000).
138    pub max_expanded_nodes: usize,
139    /// Controls how alias references are handled during loading.
140    pub mode: LoadMode,
141}
142
143impl Default for LoaderOptions {
144    fn default() -> Self {
145        Self {
146            max_nesting_depth: 512,
147            max_anchors: 10_000,
148            max_expanded_nodes: 1_000_000,
149            mode: LoadMode::Lossless,
150        }
151    }
152}
153
154// ---------------------------------------------------------------------------
155// Builder
156// ---------------------------------------------------------------------------
157
158/// Builder for configuring and creating a [`Loader`].
159///
160/// ```
161/// use rlsp_yaml_parser::loader::LoaderBuilder;
162///
163/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
164/// assert_eq!(docs.len(), 1);
165/// ```
166pub struct LoaderBuilder {
167    options: LoaderOptions,
168}
169
170impl LoaderBuilder {
171    /// Create a builder with default options (lossless mode, safe limits).
172    #[must_use]
173    pub fn new() -> Self {
174        Self {
175            options: LoaderOptions::default(),
176        }
177    }
178
179    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
180    #[must_use]
181    pub const fn lossless(mut self) -> Self {
182        self.options.mode = LoadMode::Lossless;
183        self
184    }
185
186    /// Use resolved mode — aliases are expanded inline.
187    #[must_use]
188    pub const fn resolved(mut self) -> Self {
189        self.options.mode = LoadMode::Resolved;
190        self
191    }
192
193    /// Override the maximum nesting depth.
194    #[must_use]
195    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
196        self.options.max_nesting_depth = limit;
197        self
198    }
199
200    /// Override the maximum anchor count.
201    #[must_use]
202    pub const fn max_anchors(mut self, limit: usize) -> Self {
203        self.options.max_anchors = limit;
204        self
205    }
206
207    /// Override the maximum expanded-node count (resolved mode only).
208    #[must_use]
209    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
210        self.options.max_expanded_nodes = limit;
211        self
212    }
213
214    /// Consume the builder and produce a [`Loader`].
215    #[must_use]
216    pub const fn build(self) -> Loader {
217        Loader {
218            options: self.options,
219        }
220    }
221}
222
223impl Default for LoaderBuilder {
224    fn default() -> Self {
225        Self::new()
226    }
227}
228
229// ---------------------------------------------------------------------------
230// Loader
231// ---------------------------------------------------------------------------
232
233/// A configured YAML loader.
234pub struct Loader {
235    options: LoaderOptions,
236}
237
238impl Loader {
239    /// Load YAML text into a sequence of documents.
240    ///
241    /// # Errors
242    ///
243    /// Returns `Err` if the input contains a parse error, exceeds a configured
244    /// security limit, or (in resolved mode) references an undefined anchor.
245    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
246        let mut state = LoadState::new(&self.options);
247        let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
248            Box::new(crate::parse_events(input));
249        state.run(iter.peekable())
250    }
251}
252
253// ---------------------------------------------------------------------------
254// Convenience entry point
255// ---------------------------------------------------------------------------
256
257/// Load YAML text using lossless mode and default security limits.
258///
259/// Returns one `Document<Span>` per YAML document in the stream.
260///
261/// # Errors
262///
263/// Returns `Err` if the input contains a parse error or exceeds a security
264/// limit (nesting depth or anchor count).
265///
266/// ```
267/// use rlsp_yaml_parser::loader::load;
268///
269/// let docs = load("hello\n").unwrap();
270/// assert_eq!(docs.len(), 1);
271/// ```
272pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
273    LoaderBuilder::new().lossless().build().load(input)
274}
275
276// ---------------------------------------------------------------------------
277// Internal loader state
278// ---------------------------------------------------------------------------
279
280struct LoadState<'opt> {
281    options: &'opt LoaderOptions,
282    /// Anchors registered so far in the current document: name → node.
283    anchor_map: HashMap<String, Node<Span>>,
284    /// Count of distinct anchors registered (resets per document).
285    anchor_count: usize,
286    /// Current nesting depth (incremented on Begin, decremented on End).
287    depth: usize,
288    /// Total nodes produced via alias expansion (resolved mode only).
289    expanded_nodes: usize,
290    /// Leading comments accumulated by `parse_node` when it encounters a
291    /// `Comment` event between a mapping key and its value's collection start,
292    /// or by a sequence/mapping loop when it hits End with leftover leading
293    /// comments.  The next mapping/sequence loop iteration picks these up and
294    /// prepends them to the next entry's leading comments.
295    pending_leading: Vec<String>,
296}
297
298impl<'opt> LoadState<'opt> {
299    fn new(options: &'opt LoaderOptions) -> Self {
300        Self {
301            options,
302            anchor_map: HashMap::new(),
303            anchor_count: 0,
304            depth: 0,
305            expanded_nodes: 0,
306            pending_leading: Vec::new(),
307        }
308    }
309
310    fn reset_for_document(&mut self) {
311        self.anchor_map.clear();
312        self.anchor_count = 0;
313        self.expanded_nodes = 0;
314        self.pending_leading.clear();
315    }
316
317    fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
318        let mut docs: Vec<Document<Span>> = Vec::new();
319
320        // Skip StreamStart.
321        match stream.next() {
322            Some(Ok(_)) | None => {}
323            Some(Err(e)) => {
324                return Err(LoadError::Parse {
325                    pos: e.pos,
326                    message: e.message,
327                });
328            }
329        }
330
331        loop {
332            // Skip any leading comments or unknown events before a document.
333            match next_from(&mut stream)? {
334                None | Some((Event::StreamEnd, _)) => break,
335                Some((
336                    Event::DocumentStart {
337                        explicit,
338                        version,
339                        tag_directives,
340                    },
341                    _,
342                )) => {
343                    let doc_explicit_start = explicit;
344                    let doc_version = version;
345                    let doc_tags = tag_directives;
346                    self.reset_for_document();
347
348                    let mut doc_comments: Vec<String> = Vec::new();
349
350                    // Consume leading comments at document level.
351                    consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
352
353                    // Parse root node (may be absent for empty documents).
354                    let root = if is_document_end(stream.peek()) {
355                        // Empty document — emit an empty scalar as root.
356                        empty_scalar()
357                    } else {
358                        self.parse_node(&mut stream)?
359                    };
360
361                    // Consume DocumentEnd if present and capture its explicit flag.
362                    let doc_explicit_end =
363                        if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
364                            let end_explicit = *explicit;
365                            let _ = stream.next();
366                            end_explicit
367                        } else {
368                            false
369                        };
370
371                    docs.push(Document {
372                        root,
373                        version: doc_version,
374                        tags: doc_tags,
375                        comments: doc_comments,
376                        explicit_start: doc_explicit_start,
377                        explicit_end: doc_explicit_end,
378                    });
379                }
380                Some(_) => {
381                    // Comment or any other stray event outside a document — skip.
382                }
383            }
384        }
385
386        Ok(docs)
387    }
388
389    /// Parse a single node from the stream.
390    ///
391    /// Advances the stream past the node (including end-of-container events).
392    #[expect(
393        clippy::too_many_lines,
394        reason = "match-on-event-type; splitting would obscure flow"
395    )]
396    fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
397        // Structural end events close the caller's collection loop — do NOT
398        // consume them here.  Return an empty scalar and leave the event in
399        // the stream so the outer mapping/sequence loop can see and consume it.
400        if matches!(
401            stream.peek(),
402            Some(Ok((
403                Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
404                _
405            )))
406        ) {
407            return Ok(empty_scalar());
408        }
409
410        let Some((event, span)) = next_from(stream)? else {
411            return Ok(empty_scalar());
412        };
413
414        match event {
415            Event::Scalar {
416                value,
417                style,
418                anchor,
419                tag,
420            } => {
421                let node = Node::Scalar {
422                    value: value.into_owned(),
423                    style,
424                    anchor: anchor.map(str::to_owned),
425                    tag: tag.map(std::borrow::Cow::into_owned),
426                    loc: span,
427                    leading_comments: None,
428                    trailing_comment: None,
429                };
430                if let Some(name) = node.anchor() {
431                    self.register_anchor(name.to_owned(), &node)?;
432                }
433                Ok(node)
434            }
435
436            Event::MappingStart { anchor, tag, style } => {
437                let anchor = anchor.map(str::to_owned);
438                let tag = tag.map(std::borrow::Cow::into_owned);
439
440                self.depth += 1;
441                if self.depth > self.options.max_nesting_depth {
442                    return Err(LoadError::NestingDepthLimitExceeded {
443                        limit: self.options.max_nesting_depth,
444                    });
445                }
446
447                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
448                let mut end_span = span;
449
450                loop {
451                    // Consume leading comments before the next key.  Also
452                    // collect any comments that spilled over from a sibling
453                    // value's collection end (stored in `pending_leading`).
454                    let raw_leading = consume_leading_comments(stream)?;
455                    let leading = if self.pending_leading.is_empty() {
456                        raw_leading
457                    } else {
458                        let mut combined = std::mem::take(&mut self.pending_leading);
459                        combined.extend(raw_leading);
460                        combined
461                    };
462
463                    match stream.peek() {
464                        None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
465                            // Save any collected leading comments so the next
466                            // sibling entry in the parent collection can inherit
467                            // them (e.g. a comment just before MappingEnd that
468                            // belongs to the following mapping entry).
469                            if !leading.is_empty() {
470                                self.pending_leading = leading;
471                            }
472                            break;
473                        }
474                        Some(Err(_)) => {
475                            // Consume the error.
476                            return Err(match stream.next() {
477                                Some(Err(e)) => LoadError::Parse {
478                                    pos: e.pos,
479                                    message: e.message,
480                                },
481                                _ => LoadError::UnexpectedEndOfStream,
482                            });
483                        }
484                        Some(Ok(_)) => {}
485                    }
486
487                    let mut key = self.parse_node(stream)?;
488                    attach_leading_comments(&mut key, leading);
489
490                    let mut value = self.parse_node(stream)?;
491
492                    // Trailing comment on the value — peek for inline comment.
493                    // Block scalars (literal `|` and folded `>`) consume trailing
494                    // blank lines as part of chomping; their span.end falls on the
495                    // first line after the scalar, which can coincide with the
496                    // next comment's line number.  That would falsely attach a
497                    // leading inter-node comment as a trailing inline comment.
498                    // Block scalars never have an inline comment on their content
499                    // lines, so skip trailing-comment detection for them.
500                    if !is_block_scalar(&value)
501                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
502                    {
503                        let value_end_line = node_end_line(&value);
504                        if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
505                            attach_trailing_comment(&mut value, trail);
506                        }
507                    }
508
509                    entries.push((key, value));
510                }
511
512                // Consume MappingEnd and capture its span.
513                if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
514                    end_span = *end;
515                    let _ = stream.next();
516                }
517                self.depth -= 1;
518
519                let node = Node::Mapping {
520                    entries,
521                    style,
522                    anchor: anchor.clone(),
523                    tag,
524                    loc: Span {
525                        start: span.start,
526                        end: end_span.end,
527                    },
528                    leading_comments: None,
529                    trailing_comment: None,
530                };
531                if let Some(name) = anchor {
532                    self.register_anchor(name, &node)?;
533                }
534                Ok(node)
535            }
536
537            Event::SequenceStart { anchor, tag, style } => {
538                let anchor = anchor.map(str::to_owned);
539                let tag = tag.map(std::borrow::Cow::into_owned);
540
541                self.depth += 1;
542                if self.depth > self.options.max_nesting_depth {
543                    return Err(LoadError::NestingDepthLimitExceeded {
544                        limit: self.options.max_nesting_depth,
545                    });
546                }
547
548                let mut items: Vec<Node<Span>> = Vec::new();
549                let mut end_span = span;
550
551                loop {
552                    // Collect leading comments before the next item.  Also
553                    // collect any comments that spilled over from a sibling
554                    // value's collection end (stored in `pending_leading`).
555                    let raw_leading = consume_leading_comments(stream)?;
556                    let leading = if self.pending_leading.is_empty() {
557                        raw_leading
558                    } else {
559                        let mut combined = std::mem::take(&mut self.pending_leading);
560                        combined.extend(raw_leading);
561                        combined
562                    };
563
564                    match stream.peek() {
565                        None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
566                            // Save any collected leading comments so the next
567                            // sibling entry in the parent collection can inherit
568                            // them (e.g. a comment just before SequenceEnd that
569                            // belongs to the following sequence item or mapping
570                            // entry in the parent).
571                            if !leading.is_empty() {
572                                self.pending_leading = leading;
573                            }
574                            break;
575                        }
576                        Some(Err(_)) => {
577                            // Consume the error.
578                            return Err(match stream.next() {
579                                Some(Err(e)) => LoadError::Parse {
580                                    pos: e.pos,
581                                    message: e.message,
582                                },
583                                _ => LoadError::UnexpectedEndOfStream,
584                            });
585                        }
586                        Some(Ok(_)) => {}
587                    }
588
589                    let mut item = self.parse_node(stream)?;
590                    attach_leading_comments(&mut item, leading);
591
592                    // Trailing comment on the item — peek for inline comment.
593                    // Block scalars are excluded for the same reason as in the
594                    // mapping path: their span.end can coincide with the next
595                    // comment's line, falsely turning a leading comment into a
596                    // trailing one.
597                    if !is_block_scalar(&item)
598                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
599                    {
600                        let item_end_line = node_end_line(&item);
601                        if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
602                            attach_trailing_comment(&mut item, trail);
603                        }
604                    }
605
606                    items.push(item);
607                }
608
609                // Consume SequenceEnd and capture its span.
610                if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
611                    end_span = *end;
612                    let _ = stream.next();
613                }
614                self.depth -= 1;
615
616                let node = Node::Sequence {
617                    items,
618                    style,
619                    anchor: anchor.clone(),
620                    tag,
621                    loc: Span {
622                        start: span.start,
623                        end: end_span.end,
624                    },
625                    leading_comments: None,
626                    trailing_comment: None,
627                };
628                if let Some(name) = anchor {
629                    self.register_anchor(name, &node)?;
630                }
631                Ok(node)
632            }
633
634            Event::Alias { name } => {
635                let name = name.to_owned();
636                self.resolve_alias(&name, span)
637            }
638
639            Event::Comment { text } => {
640                // Comment between a mapping key and its collection value (e.g.
641                // `key:\n  # comment\n  subkey: val`).  The comment appears
642                // after the key Scalar and before the MappingStart/SequenceStart
643                // that begins the value.  Save it in `pending_leading` so the
644                // first entry of the upcoming collection can inherit it.
645                self.pending_leading.push(with_hash_prefix(text));
646                self.parse_node(stream)
647            }
648
649            Event::StreamStart
650            | Event::StreamEnd
651            | Event::DocumentStart { .. }
652            | Event::DocumentEnd { .. }
653            | Event::MappingEnd
654            | Event::SequenceEnd => {
655                // Structural event where a node is expected — return empty scalar.
656                Ok(empty_scalar())
657            }
658        }
659    }
660
661    fn register_anchor(&mut self, name: String, node: &Node<Span>) -> Result<()> {
662        if !self.anchor_map.contains_key(&name) {
663            self.anchor_count += 1;
664            if self.anchor_count > self.options.max_anchors {
665                return Err(LoadError::AnchorCountLimitExceeded {
666                    limit: self.options.max_anchors,
667                });
668            }
669        }
670        // Count the anchor node itself toward the expansion budget in resolved
671        // mode so that the total reflects every node present in the expanded
672        // document (anchor definition + each alias expansion).
673        if self.options.mode == LoadMode::Resolved {
674            self.expanded_nodes += 1;
675            if self.expanded_nodes > self.options.max_expanded_nodes {
676                return Err(LoadError::AliasExpansionLimitExceeded {
677                    limit: self.options.max_expanded_nodes,
678                });
679            }
680            self.anchor_map.insert(name, node.clone());
681        } else {
682            // Lossless mode never reads anchor_map for expansion; store a
683            // zero-cost placeholder so contains_key still detects re-definitions.
684            self.anchor_map.insert(name, empty_scalar());
685        }
686        Ok(())
687    }
688
689    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
690        match self.options.mode {
691            LoadMode::Lossless => Ok(Node::Alias {
692                name: name.to_owned(),
693                loc,
694                leading_comments: None,
695                trailing_comment: None,
696            }),
697            LoadMode::Resolved => {
698                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
699                    LoadError::UndefinedAlias {
700                        name: name.to_owned(),
701                    }
702                })?;
703                let mut in_progress: HashSet<String> = HashSet::new();
704                self.expand_node(anchored, &mut in_progress)
705            }
706        }
707    }
708
709    /// Recursively expand a node, counting every node produced against the
710    /// expansion limit and checking for cycles via `in_progress`.
711    fn expand_node(
712        &mut self,
713        node: Node<Span>,
714        in_progress: &mut HashSet<String>,
715    ) -> Result<Node<Span>> {
716        // Increment at the top — before child recursion — so every node
717        // (including non-alias nodes inside expanded trees) counts against the
718        // budget.
719        self.expanded_nodes += 1;
720        if self.expanded_nodes > self.options.max_expanded_nodes {
721            return Err(LoadError::AliasExpansionLimitExceeded {
722                limit: self.options.max_expanded_nodes,
723            });
724        }
725
726        match node {
727            Node::Alias { ref name, loc, .. } => {
728                if in_progress.contains(name) {
729                    return Err(LoadError::CircularAlias { name: name.clone() });
730                }
731                let target = self
732                    .anchor_map
733                    .get(name)
734                    .cloned()
735                    .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
736                in_progress.insert(name.clone());
737                let expanded = self.expand_node(target, in_progress)?;
738                in_progress.remove(name);
739                // Re-stamp with the alias site's location.
740                Ok(reloc(expanded, loc))
741            }
742            Node::Mapping {
743                entries,
744                style,
745                anchor,
746                tag,
747                loc,
748                leading_comments,
749                trailing_comment,
750            } => {
751                let mut expanded_entries = Vec::with_capacity(entries.len());
752                for (k, v) in entries {
753                    let ek = self.expand_node(k, in_progress)?;
754                    let ev = self.expand_node(v, in_progress)?;
755                    expanded_entries.push((ek, ev));
756                }
757                Ok(Node::Mapping {
758                    entries: expanded_entries,
759                    style,
760                    anchor,
761                    tag,
762                    loc,
763                    leading_comments,
764                    trailing_comment,
765                })
766            }
767            Node::Sequence {
768                items,
769                style,
770                anchor,
771                tag,
772                loc,
773                leading_comments,
774                trailing_comment,
775            } => {
776                let mut expanded_items = Vec::with_capacity(items.len());
777                for item in items {
778                    expanded_items.push(self.expand_node(item, in_progress)?);
779                }
780                Ok(Node::Sequence {
781                    items: expanded_items,
782                    style,
783                    anchor,
784                    tag,
785                    loc,
786                    leading_comments,
787                    trailing_comment,
788                })
789            }
790            // Scalars and already-resolved nodes — pass through.
791            scalar @ Node::Scalar { .. } => Ok(scalar),
792        }
793    }
794}
795
796/// Return `true` if the peeked item signals end of document (or stream).
797const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
798    matches!(
799        peeked,
800        None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
801    )
802}
803
804/// Return the line number of a node's span end position.
805///
806/// Used to determine whether the next `Comment` event is trailing (same line)
807/// or leading (different line).
808#[inline]
809const fn node_end_line(node: &Node<Span>) -> usize {
810    match node {
811        Node::Scalar { loc, .. }
812        | Node::Mapping { loc, .. }
813        | Node::Sequence { loc, .. }
814        | Node::Alias { loc, .. } => loc.end.line,
815    }
816}
817
818/// Return `true` if the node is a block scalar (literal `|` or folded `>`).
819///
820/// Block scalars consume trailing blank lines as part of chomping, so their
821/// `span.end` falls on the line *after* the last consumed line.  This means a
822/// comment on the immediately following line has the same line number as
823/// `span.end.line`, which would cause `peek_trailing_comment` to falsely
824/// classify it as an inline trailing comment.  The caller uses this predicate
825/// to skip trailing-comment detection for block scalars.
826#[inline]
827const fn is_block_scalar(node: &Node<Span>) -> bool {
828    matches!(
829        node,
830        Node::Scalar {
831            style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
832            ..
833        }
834    )
835}
836
837// ---------------------------------------------------------------------------
838// Node helpers
839// ---------------------------------------------------------------------------
840
841const fn empty_scalar() -> Node<Span> {
842    Node::Scalar {
843        value: String::new(),
844        style: ScalarStyle::Plain,
845        anchor: None,
846        tag: None,
847        loc: Span {
848            start: Pos::ORIGIN,
849            end: Pos::ORIGIN,
850        },
851        leading_comments: None,
852        trailing_comment: None,
853    }
854}
855
856// ---------------------------------------------------------------------------
857// Tests
858// ---------------------------------------------------------------------------
859
860#[cfg(test)]
861#[expect(
862    clippy::expect_used,
863    clippy::unwrap_used,
864    clippy::indexing_slicing,
865    clippy::panic,
866    reason = "test code"
867)]
868mod tests {
869    use super::*;
870
871    // UT-1: loader_state_resets_anchor_map_between_documents
872    #[test]
873    fn loader_state_resets_anchor_map_between_documents() {
874        // In resolved mode: anchor defined in doc 1 must not be visible in doc 2.
875        let result = LoaderBuilder::new()
876            .resolved()
877            .build()
878            .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
879        assert!(
880            result.is_err(),
881            "expected Err: *foo in doc 2 should be undefined"
882        );
883        assert!(matches!(
884            result.unwrap_err(),
885            LoadError::UndefinedAlias { .. }
886        ));
887    }
888
889    // UT-2: register_anchor_increments_count
890    #[test]
891    fn register_anchor_increments_count() {
892        let options = LoaderOptions {
893            max_anchors: 2,
894            ..LoaderOptions::default()
895        };
896        let mut state = LoadState::new(&options);
897        let node = Node::Scalar {
898            value: "x".to_owned(),
899            style: ScalarStyle::Plain,
900            anchor: None,
901            tag: None,
902            loc: Span {
903                start: Pos::ORIGIN,
904                end: Pos::ORIGIN,
905            },
906            leading_comments: None,
907            trailing_comment: None,
908        };
909        assert!(state.register_anchor("a".to_owned(), &node).is_ok());
910        assert!(state.register_anchor("b".to_owned(), &node).is_ok());
911        let err = state
912            .register_anchor("c".to_owned(), &node)
913            .expect_err("expected AnchorCountLimitExceeded");
914        assert!(matches!(
915            err,
916            LoadError::AnchorCountLimitExceeded { limit: 2 }
917        ));
918    }
919
920    // UT-3: expand_node_detects_circular_alias
921    #[test]
922    fn expand_node_detects_circular_alias() {
923        let options = LoaderOptions {
924            mode: LoadMode::Resolved,
925            ..LoaderOptions::default()
926        };
927        let mut state = LoadState::new(&options);
928        // Insert a self-referential alias node.
929        let alias_node = Node::Alias {
930            name: "a".to_owned(),
931            loc: Span {
932                start: Pos::ORIGIN,
933                end: Pos::ORIGIN,
934            },
935            leading_comments: None,
936            trailing_comment: None,
937        };
938        state.anchor_map.insert("a".to_owned(), alias_node.clone());
939        let mut in_progress = HashSet::new();
940        let result = state.expand_node(alias_node, &mut in_progress);
941        assert!(
942            matches!(result, Err(LoadError::CircularAlias { .. })),
943            "expected CircularAlias, got: {result:?}"
944        );
945    }
946
947    // -----------------------------------------------------------------------
948    // Bug A: comment between mapping key and its collection value
949    // -----------------------------------------------------------------------
950
951    // UT-A1: comment between key and nested mapping is attached to first entry.
952    #[test]
953    fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
954        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
955        let root = &docs[0].root;
956        // root is a mapping: outer -> { inner: val }
957        // The comment "# Style 1" appears between "outer" key and the nested
958        // MappingStart.  After the fix it must be attached to the "inner" key.
959        let Node::Mapping { entries, .. } = root else {
960            panic!("expected root mapping");
961        };
962        assert_eq!(entries.len(), 1);
963        let (_outer_key, outer_value) = &entries[0];
964        let Node::Mapping {
965            entries: nested, ..
966        } = outer_value
967        else {
968            panic!("expected nested mapping");
969        };
970        assert_eq!(nested.len(), 1);
971        let (inner_key, _) = &nested[0];
972        assert_eq!(
973            inner_key.leading_comments(),
974            &["# Style 1"],
975            "comment should be attached to the first nested key"
976        );
977    }
978
979    // UT-A2: comment between key and nested sequence is attached to first item.
980    #[test]
981    fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
982        let docs = load("key:\n  # leading\n  - item1\n  - item2\n").unwrap();
983        let root = &docs[0].root;
984        let Node::Mapping { entries, .. } = root else {
985            panic!("expected root mapping");
986        };
987        let (_key, seq_value) = &entries[0];
988        let Node::Sequence { items, .. } = seq_value else {
989            panic!("expected sequence value");
990        };
991        // The comment "# leading" appears before the sequence items; after
992        // the fix it is attached to the first item.
993        assert_eq!(
994            items[0].leading_comments(),
995            &["# leading"],
996            "comment should be attached to first sequence item"
997        );
998    }
999
1000    // UT-A3: multiple consecutive comments before a collection are all preserved.
1001    #[test]
1002    fn multiple_comments_between_key_and_collection_all_preserved() {
1003        let docs = load("key:\n  # first\n  # second\n  - item\n").unwrap();
1004        let root = &docs[0].root;
1005        let Node::Mapping { entries, .. } = root else {
1006            panic!("expected root mapping");
1007        };
1008        let (_key, seq_value) = &entries[0];
1009        let Node::Sequence { items, .. } = seq_value else {
1010            panic!("expected sequence value");
1011        };
1012        assert_eq!(
1013            items[0].leading_comments(),
1014            &["# first", "# second"],
1015            "both comments should be on first item"
1016        );
1017    }
1018
1019    // UT-A4: the KEY node itself has no leading comments from Bug-A fix.
1020    #[test]
1021    fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1022        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1023        let root = &docs[0].root;
1024        let Node::Mapping { entries, .. } = root else {
1025            panic!("expected root mapping");
1026        };
1027        let (outer_key, _) = &entries[0];
1028        assert!(
1029            outer_key.leading_comments().is_empty(),
1030            "outer key should have no leading comments"
1031        );
1032        assert!(
1033            outer_key.trailing_comment().is_none(),
1034            "outer key should have no trailing comment"
1035        );
1036    }
1037
1038    // UT-A5: no comment between key and value leaves leading_comments empty.
1039    #[test]
1040    fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1041        let docs = load("key:\n  inner: val\n").unwrap();
1042        let root = &docs[0].root;
1043        let Node::Mapping { entries, .. } = root else {
1044            panic!("expected root mapping");
1045        };
1046        let (_key, nested) = &entries[0];
1047        let Node::Mapping {
1048            entries: nested_entries,
1049            ..
1050        } = nested
1051        else {
1052            panic!("expected nested mapping");
1053        };
1054        let (inner_key, _) = &nested_entries[0];
1055        assert!(
1056            inner_key.leading_comments().is_empty(),
1057            "inner key should have no leading comments when there is no comment"
1058        );
1059    }
1060
1061    // -----------------------------------------------------------------------
1062    // Bug B: comment at end of collection preserved as leading on next sibling
1063    // -----------------------------------------------------------------------
1064
1065    // UT-B1: comment before SequenceEnd becomes leading on next mapping entry.
1066    #[test]
1067    fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1068        let input =
1069            "Lists:\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n    - item1\n";
1070        let docs = load(input).unwrap();
1071        let root = &docs[0].root;
1072        let Node::Mapping { entries, .. } = root else {
1073            panic!("expected root mapping");
1074        };
1075        let (_lists_key, nested) = &entries[0];
1076        let Node::Mapping {
1077            entries: nested_entries,
1078            ..
1079        } = nested
1080        else {
1081            panic!("expected nested mapping");
1082        };
1083        assert_eq!(nested_entries.len(), 2);
1084        let (list_b_key, _) = &nested_entries[1];
1085        assert_eq!(
1086            list_b_key.leading_comments(),
1087            &["# Style 2"],
1088            "# Style 2 should be leading comment on list-b key"
1089        );
1090    }
1091
1092    // UT-B2: comment at end of nested sequence propagates to next mapping entry.
1093    #[test]
1094    fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1095        let input = "outer:\n  a:\n    - x\n    # between\n  b: y\n";
1096        let docs = load(input).unwrap();
1097        let root = &docs[0].root;
1098        let Node::Mapping { entries, .. } = root else {
1099            panic!("expected root mapping");
1100        };
1101        let (_outer_key, outer_val) = &entries[0];
1102        let Node::Mapping {
1103            entries: nested, ..
1104        } = outer_val
1105        else {
1106            panic!("expected nested mapping");
1107        };
1108        assert_eq!(nested.len(), 2);
1109        let (b_key, _) = &nested[1];
1110        assert_eq!(
1111            b_key.leading_comments(),
1112            &["# between"],
1113            "# between should be leading comment on b key"
1114        );
1115    }
1116
1117    // UT-B3: comment at end of nested mapping propagates to next sibling.
1118    #[test]
1119    fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1120        let input = "parent:\n  child1:\n    k: v\n    # end-of-child1\n  child2: val\n";
1121        let docs = load(input).unwrap();
1122        let root = &docs[0].root;
1123        let Node::Mapping { entries, .. } = root else {
1124            panic!("expected root mapping");
1125        };
1126        let (_parent_key, parent_val) = &entries[0];
1127        let Node::Mapping {
1128            entries: siblings, ..
1129        } = parent_val
1130        else {
1131            panic!("expected parent mapping value");
1132        };
1133        assert_eq!(siblings.len(), 2);
1134        let (child2_key, _) = &siblings[1];
1135        assert_eq!(
1136            child2_key.leading_comments(),
1137            &["# end-of-child1"],
1138            "# end-of-child1 should be leading comment on child2 key"
1139        );
1140    }
1141
1142    // UT-B4: overflow comment at top-level sequence end is not silently dropped.
1143    #[test]
1144    fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1145        // The comment "# tail" appears before SequenceEnd of the top-level items
1146        // sequence.  The fix saves it to pending_leading; since there is no next
1147        // sibling, it ends up in the document's root mapping's pending state and
1148        // is not lost.  We assert it appears somewhere reachable in the AST rather
1149        // than disappearing entirely.
1150        let input = "items:\n  - a\n  - b\n  # tail\n";
1151        let docs = load(input).unwrap();
1152        // The document must parse successfully (no panic, no error).
1153        assert!(!docs.is_empty(), "document should parse without error");
1154        // The # tail comment must not cause data loss — the sequence items are intact.
1155        let root = &docs[0].root;
1156        let Node::Mapping { entries, .. } = root else {
1157            panic!("expected root mapping");
1158        };
1159        let (_items_key, seq_val) = &entries[0];
1160        let Node::Sequence { items, .. } = seq_val else {
1161            panic!("expected sequence value");
1162        };
1163        assert_eq!(items.len(), 2, "sequence items must not be lost");
1164    }
1165
1166    // UT-B5: no overflow comments when collection ends cleanly.
1167    #[test]
1168    fn no_overflow_comments_when_collection_ends_cleanly() {
1169        let docs = load("key:\n  - item1\n  - item2\n").unwrap();
1170        let root = &docs[0].root;
1171        let Node::Mapping { entries, .. } = root else {
1172            panic!("expected root mapping");
1173        };
1174        let (_key, seq_val) = &entries[0];
1175        let Node::Sequence { items, .. } = seq_val else {
1176            panic!("expected sequence value");
1177        };
1178        for item in items {
1179            assert!(
1180                item.leading_comments().is_empty(),
1181                "items should have no leading comments"
1182            );
1183        }
1184    }
1185
1186    // -----------------------------------------------------------------------
1187    // Combined scenarios
1188    // -----------------------------------------------------------------------
1189
1190    // UT-C1: exact bug-report input — both comments survive.
1191    #[test]
1192    fn original_bug_report_input_preserves_both_comments() {
1193        let input = "Lists:\n  # Style 1\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n  - item1\n  - item2\n";
1194        let docs = load(input).unwrap();
1195        let root = &docs[0].root;
1196        let Node::Mapping { entries, .. } = root else {
1197            panic!("expected root mapping");
1198        };
1199        let (_lists_key, nested) = &entries[0];
1200        let Node::Mapping {
1201            entries: nested_entries,
1202            ..
1203        } = nested
1204        else {
1205            panic!("expected nested mapping");
1206        };
1207        assert_eq!(nested_entries.len(), 2);
1208        let (first_key, _) = &nested_entries[0];
1209        let (second_key, _) = &nested_entries[1];
1210        assert_eq!(
1211            first_key.leading_comments(),
1212            &["# Style 1"],
1213            "list-a should have # Style 1 as leading comment"
1214        );
1215        assert_eq!(
1216            second_key.leading_comments(),
1217            &["# Style 2"],
1218            "list-b should have # Style 2 as leading comment"
1219        );
1220    }
1221
1222    // UT-C2: leading and trailing comments on sibling entries both preserved.
1223    #[test]
1224    fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1225        let input = "map:\n  # leading\n  key: value  # trailing\n  # next-leading\n  key2: v2\n";
1226        let docs = load(input).unwrap();
1227        let root = &docs[0].root;
1228        let Node::Mapping { entries, .. } = root else {
1229            panic!("expected root mapping");
1230        };
1231        let (_map_key, map_val) = &entries[0];
1232        let Node::Mapping {
1233            entries: siblings, ..
1234        } = map_val
1235        else {
1236            panic!("expected mapping value");
1237        };
1238        assert_eq!(siblings.len(), 2);
1239        let (key1, val1) = &siblings[0];
1240        let (key2, _) = &siblings[1];
1241        assert_eq!(key1.leading_comments(), &["# leading"]);
1242        assert_eq!(val1.trailing_comment(), Some("# trailing"));
1243        assert_eq!(key2.leading_comments(), &["# next-leading"]);
1244    }
1245
1246    // UT-C3: deeply nested overflow comments propagate to correct sibling.
1247    #[test]
1248    fn deeply_nested_overflow_comments_reach_correct_sibling() {
1249        let input = "top:\n  mid:\n    - x\n    # deep-overflow\n  next: y\n";
1250        let docs = load(input).unwrap();
1251        let root = &docs[0].root;
1252        let Node::Mapping { entries, .. } = root else {
1253            panic!("expected root mapping");
1254        };
1255        let (_top_key, top_val) = &entries[0];
1256        let Node::Mapping {
1257            entries: top_entries,
1258            ..
1259        } = top_val
1260        else {
1261            panic!("expected top-level mapping");
1262        };
1263        assert_eq!(top_entries.len(), 2);
1264        let (next_key, _) = &top_entries[1];
1265        assert_eq!(
1266            next_key.leading_comments(),
1267            &["# deep-overflow"],
1268            "# deep-overflow should propagate from nested sequence to next sibling"
1269        );
1270    }
1271
1272    // -----------------------------------------------------------------------
1273    // UT-D: Document marker flags (explicit_start / explicit_end)
1274    // -----------------------------------------------------------------------
1275
1276    // UT-D1: Bare document (no markers) → both flags false
1277    #[test]
1278    fn bare_document_has_both_flags_false() {
1279        let docs = load("key: value\n").expect("load failed");
1280        assert_eq!(docs.len(), 1);
1281        assert!(!docs[0].explicit_start, "expected explicit_start=false");
1282        assert!(!docs[0].explicit_end, "expected explicit_end=false");
1283    }
1284
1285    // UT-D2: Document with `---` start marker → explicit_start true, explicit_end false
1286    #[test]
1287    fn document_with_start_marker_has_explicit_start_true() {
1288        let docs = load("---\nkey: value\n").expect("load failed");
1289        assert_eq!(docs.len(), 1);
1290        assert!(docs[0].explicit_start, "expected explicit_start=true");
1291        assert!(!docs[0].explicit_end, "expected explicit_end=false");
1292    }
1293
1294    // UT-D3: Document with `...` end marker → explicit_start false, explicit_end true
1295    #[test]
1296    fn document_with_end_marker_has_explicit_end_true() {
1297        let docs = load("key: value\n...\n").expect("load failed");
1298        assert_eq!(docs.len(), 1);
1299        assert!(!docs[0].explicit_start, "expected explicit_start=false");
1300        assert!(docs[0].explicit_end, "expected explicit_end=true");
1301    }
1302
1303    // UT-D4: Document with both `---` and `...` → both flags true
1304    #[test]
1305    fn document_with_both_markers_has_both_flags_true() {
1306        let docs = load("---\nkey: value\n...\n").expect("load failed");
1307        assert_eq!(docs.len(), 1);
1308        assert!(docs[0].explicit_start, "expected explicit_start=true");
1309        assert!(docs[0].explicit_end, "expected explicit_end=true");
1310    }
1311
1312    // UT-D5: Multi-document — each document's flags are independent
1313    #[test]
1314    fn multi_document_flags_are_independent() {
1315        // doc1: no explicit start/end (bare)
1316        // doc2: explicit start (---), explicit end (...)
1317        // doc3: explicit start (---), no explicit end
1318        let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1319        assert_eq!(docs.len(), 3);
1320        assert!(!docs[0].explicit_start, "doc1 explicit_start");
1321        assert!(!docs[0].explicit_end, "doc1 explicit_end");
1322        assert!(docs[1].explicit_start, "doc2 explicit_start");
1323        assert!(docs[1].explicit_end, "doc2 explicit_end");
1324        assert!(docs[2].explicit_start, "doc3 explicit_start");
1325        assert!(!docs[2].explicit_end, "doc3 explicit_end");
1326    }
1327
1328    // UT-D6: Empty document with explicit markers → flags are set
1329    #[test]
1330    fn empty_document_with_explicit_markers_has_both_flags_true() {
1331        let docs = load("---\n...\n").expect("load failed");
1332        assert_eq!(docs.len(), 1);
1333        assert!(docs[0].explicit_start, "expected explicit_start=true");
1334        assert!(docs[0].explicit_end, "expected explicit_end=true");
1335    }
1336}