Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22//!
23//! # Accepted risks
24//!
25//! `expand_node` does not detect the case where an anchor-within-expansion
26//! references a previously defined anchor, forming an indirect cycle not
27//! caught by the `in_progress` set until the second traversal.  This
28//! limitation exists in the old loader and is acceptable in the LSP context
29//! where Lossless mode is the default.  The `expanded_nodes` volume limit
30//! provides the backstop.
31
32use std::collections::{HashMap, HashSet};
33use std::iter::Peekable;
34
35use crate::error::Error;
36use crate::event::{Event, ScalarStyle};
37use crate::node::{Document, Node};
38use crate::pos::{Pos, Span};
39
40// ---------------------------------------------------------------------------
41// Public error type
42// ---------------------------------------------------------------------------
43
44/// Errors produced by the loader.
45#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
46pub enum LoadError {
47    /// The event stream contained a parse error.
48    #[error("parse error at {pos:?}: {message}")]
49    Parse { pos: Pos, message: String },
50
51    /// The event stream ended unexpectedly mid-document.
52    #[error("unexpected end of event stream")]
53    UnexpectedEndOfStream,
54
55    /// Nesting depth exceeded the configured limit.
56    #[error("nesting depth limit exceeded (max: {limit})")]
57    NestingDepthLimitExceeded { limit: usize },
58
59    /// Too many distinct anchor names were defined.
60    #[error("anchor count limit exceeded (max: {limit})")]
61    AnchorCountLimitExceeded { limit: usize },
62
63    /// Alias expansion produced more nodes than the configured limit.
64    #[error("alias expansion node limit exceeded (max: {limit})")]
65    AliasExpansionLimitExceeded { limit: usize },
66
67    /// A circular alias reference was detected.
68    #[error("circular alias reference: '{name}'")]
69    CircularAlias { name: String },
70
71    /// An alias referred to an anchor that was never defined.
72    #[error("undefined alias: '{name}'")]
73    UndefinedAlias { name: String },
74}
75
76// Convenience alias used inside the module.
77type Result<T> = std::result::Result<T, LoadError>;
78
79// Type alias for the peekable event stream used throughout the loader.
80type EventStream<'a> =
81    Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
82
83// ---------------------------------------------------------------------------
84// Configuration
85// ---------------------------------------------------------------------------
86
87/// Loader mode — controls how alias references are handled.
88#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89pub enum LoadMode {
90    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
91    Lossless,
92    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
93    Resolved,
94}
95
96/// Security and behaviour options for the loader.
97#[derive(Debug, Clone)]
98pub struct LoaderOptions {
99    /// Maximum mapping/sequence nesting depth (default: 512).
100    pub max_nesting_depth: usize,
101    /// Maximum number of distinct anchor names per document (default: 10 000).
102    pub max_anchors: usize,
103    /// Maximum total nodes produced by alias expansion, resolved mode only
104    /// (default: 1 000 000).
105    pub max_expanded_nodes: usize,
106    /// Loader mode.
107    pub mode: LoadMode,
108}
109
110impl Default for LoaderOptions {
111    fn default() -> Self {
112        Self {
113            max_nesting_depth: 512,
114            max_anchors: 10_000,
115            max_expanded_nodes: 1_000_000,
116            mode: LoadMode::Lossless,
117        }
118    }
119}
120
121// ---------------------------------------------------------------------------
122// Builder
123// ---------------------------------------------------------------------------
124
125/// Builder for configuring and creating a [`Loader`].
126///
127/// ```
128/// use rlsp_yaml_parser::loader::LoaderBuilder;
129///
130/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
131/// assert_eq!(docs.len(), 1);
132/// ```
133pub struct LoaderBuilder {
134    options: LoaderOptions,
135}
136
137impl LoaderBuilder {
138    /// Create a builder with default options (lossless mode, safe limits).
139    #[must_use]
140    pub fn new() -> Self {
141        Self {
142            options: LoaderOptions::default(),
143        }
144    }
145
146    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
147    #[must_use]
148    pub const fn lossless(mut self) -> Self {
149        self.options.mode = LoadMode::Lossless;
150        self
151    }
152
153    /// Use resolved mode — aliases are expanded inline.
154    #[must_use]
155    pub const fn resolved(mut self) -> Self {
156        self.options.mode = LoadMode::Resolved;
157        self
158    }
159
160    /// Override the maximum nesting depth.
161    #[must_use]
162    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
163        self.options.max_nesting_depth = limit;
164        self
165    }
166
167    /// Override the maximum anchor count.
168    #[must_use]
169    pub const fn max_anchors(mut self, limit: usize) -> Self {
170        self.options.max_anchors = limit;
171        self
172    }
173
174    /// Override the maximum expanded-node count (resolved mode only).
175    #[must_use]
176    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
177        self.options.max_expanded_nodes = limit;
178        self
179    }
180
181    /// Consume the builder and produce a [`Loader`].
182    #[must_use]
183    pub const fn build(self) -> Loader {
184        Loader {
185            options: self.options,
186        }
187    }
188}
189
190impl Default for LoaderBuilder {
191    fn default() -> Self {
192        Self::new()
193    }
194}
195
196// ---------------------------------------------------------------------------
197// Loader
198// ---------------------------------------------------------------------------
199
200/// A configured YAML loader.
201pub struct Loader {
202    options: LoaderOptions,
203}
204
205impl Loader {
206    /// Load YAML text into a sequence of documents.
207    ///
208    /// # Errors
209    ///
210    /// Returns `Err` if the input contains a parse error, exceeds a configured
211    /// security limit, or (in resolved mode) references an undefined anchor.
212    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
213        let mut state = LoadState::new(&self.options);
214        let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
215            Box::new(crate::parse_events(input));
216        state.run(iter.peekable())
217    }
218}
219
220// ---------------------------------------------------------------------------
221// Convenience entry point
222// ---------------------------------------------------------------------------
223
224/// Load YAML text using lossless mode and default security limits.
225///
226/// Returns one `Document<Span>` per YAML document in the stream.
227///
228/// # Errors
229///
230/// Returns `Err` if the input contains a parse error or exceeds a security
231/// limit (nesting depth or anchor count).
232///
233/// ```
234/// use rlsp_yaml_parser::loader::load;
235///
236/// let docs = load("hello\n").unwrap();
237/// assert_eq!(docs.len(), 1);
238/// ```
239pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
240    LoaderBuilder::new().lossless().build().load(input)
241}
242
243// ---------------------------------------------------------------------------
244// Internal loader state
245// ---------------------------------------------------------------------------
246
247struct LoadState<'opt> {
248    options: &'opt LoaderOptions,
249    /// Anchors registered so far in the current document: name → node.
250    anchor_map: HashMap<String, Node<Span>>,
251    /// Count of distinct anchors registered (resets per document).
252    anchor_count: usize,
253    /// Current nesting depth (incremented on Begin, decremented on End).
254    depth: usize,
255    /// Total nodes produced via alias expansion (resolved mode only).
256    expanded_nodes: usize,
257}
258
259impl<'opt> LoadState<'opt> {
260    fn new(options: &'opt LoaderOptions) -> Self {
261        Self {
262            options,
263            anchor_map: HashMap::new(),
264            anchor_count: 0,
265            depth: 0,
266            expanded_nodes: 0,
267        }
268    }
269
270    fn reset_for_document(&mut self) {
271        self.anchor_map.clear();
272        self.anchor_count = 0;
273        self.expanded_nodes = 0;
274    }
275
276    fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
277        let mut docs: Vec<Document<Span>> = Vec::new();
278
279        // Skip StreamStart.
280        match stream.next() {
281            Some(Ok(_)) | None => {}
282            Some(Err(e)) => {
283                return Err(LoadError::Parse {
284                    pos: e.pos,
285                    message: e.message,
286                });
287            }
288        }
289
290        loop {
291            // Skip any leading comments or unknown events before a document.
292            match next_from(&mut stream)? {
293                None | Some((Event::StreamEnd, _)) => break,
294                Some((
295                    Event::DocumentStart {
296                        version,
297                        tag_directives,
298                        ..
299                    },
300                    _,
301                )) => {
302                    let doc_version = version;
303                    let doc_tags = tag_directives;
304                    self.reset_for_document();
305
306                    let mut doc_comments: Vec<String> = Vec::new();
307
308                    // Consume leading comments at document level.
309                    consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
310
311                    // Parse root node (may be absent for empty documents).
312                    let root = if is_document_end(stream.peek()) {
313                        // Empty document — emit an empty scalar as root.
314                        empty_scalar()
315                    } else {
316                        self.parse_node(&mut stream)?
317                    };
318
319                    // Consume DocumentEnd if present.
320                    if matches!(stream.peek(), Some(Ok((Event::DocumentEnd { .. }, _)))) {
321                        let _ = stream.next();
322                    }
323
324                    docs.push(Document {
325                        root,
326                        version: doc_version,
327                        tags: doc_tags,
328                        comments: doc_comments,
329                    });
330                }
331                Some(_) => {
332                    // Comment or any other stray event outside a document — skip.
333                }
334            }
335        }
336
337        Ok(docs)
338    }
339
340    /// Parse a single node from the stream.
341    ///
342    /// Advances the stream past the node (including end-of-container events).
343    #[allow(clippy::too_many_lines)] // match-on-event-type; splitting would obscure flow
344    fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
345        let Some((event, span)) = next_from(stream)? else {
346            return Ok(empty_scalar());
347        };
348
349        match event {
350            Event::Scalar {
351                value,
352                style,
353                anchor,
354                tag,
355            } => {
356                let node = Node::Scalar {
357                    value: value.into_owned(),
358                    style,
359                    anchor: anchor.map(str::to_owned),
360                    tag: tag.map(std::borrow::Cow::into_owned),
361                    loc: span,
362                    leading_comments: Vec::new(),
363                    trailing_comment: None,
364                };
365                if let Some(name) = node.anchor() {
366                    self.register_anchor(name.to_owned(), node.clone())?;
367                }
368                Ok(node)
369            }
370
371            Event::MappingStart { anchor, tag, .. } => {
372                let anchor = anchor.map(str::to_owned);
373                let tag = tag.map(std::borrow::Cow::into_owned);
374
375                self.depth += 1;
376                if self.depth > self.options.max_nesting_depth {
377                    return Err(LoadError::NestingDepthLimitExceeded {
378                        limit: self.options.max_nesting_depth,
379                    });
380                }
381
382                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
383                let mut end_span = span;
384
385                loop {
386                    // Peek to detect MappingEnd or end of stream before
387                    // consuming leading comments.
388                    let leading = consume_leading_comments(stream)?;
389
390                    match stream.peek() {
391                        None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => break,
392                        Some(Err(_)) => {
393                            // Consume the error.
394                            return Err(match stream.next() {
395                                Some(Err(e)) => LoadError::Parse {
396                                    pos: e.pos,
397                                    message: e.message,
398                                },
399                                _ => LoadError::UnexpectedEndOfStream,
400                            });
401                        }
402                        Some(Ok(_)) => {}
403                    }
404
405                    let mut key = self.parse_node(stream)?;
406                    attach_leading_comments(&mut key, leading);
407
408                    let mut value = self.parse_node(stream)?;
409
410                    // Trailing comment on the value — peek for inline comment.
411                    let value_end_line = node_end_line(&value);
412                    if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
413                        attach_trailing_comment(&mut value, trail);
414                    }
415
416                    entries.push((key, value));
417                }
418
419                // Consume MappingEnd and capture its span.
420                if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
421                    end_span = *end;
422                    let _ = stream.next();
423                }
424                self.depth -= 1;
425
426                let node = Node::Mapping {
427                    entries,
428                    anchor: anchor.clone(),
429                    tag,
430                    loc: Span {
431                        start: span.start,
432                        end: end_span.end,
433                    },
434                    leading_comments: Vec::new(),
435                    trailing_comment: None,
436                };
437                if let Some(name) = anchor {
438                    self.register_anchor(name, node.clone())?;
439                }
440                Ok(node)
441            }
442
443            Event::SequenceStart { anchor, tag, .. } => {
444                let anchor = anchor.map(str::to_owned);
445                let tag = tag.map(std::borrow::Cow::into_owned);
446
447                self.depth += 1;
448                if self.depth > self.options.max_nesting_depth {
449                    return Err(LoadError::NestingDepthLimitExceeded {
450                        limit: self.options.max_nesting_depth,
451                    });
452                }
453
454                let mut items: Vec<Node<Span>> = Vec::new();
455                let mut end_span = span;
456
457                loop {
458                    // Collect leading comments before the next item.
459                    let leading = consume_leading_comments(stream)?;
460
461                    match stream.peek() {
462                        None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => break,
463                        Some(Err(_)) => {
464                            // Consume the error.
465                            return Err(match stream.next() {
466                                Some(Err(e)) => LoadError::Parse {
467                                    pos: e.pos,
468                                    message: e.message,
469                                },
470                                _ => LoadError::UnexpectedEndOfStream,
471                            });
472                        }
473                        Some(Ok(_)) => {}
474                    }
475
476                    let mut item = self.parse_node(stream)?;
477                    attach_leading_comments(&mut item, leading);
478
479                    // Trailing comment on the item — peek for inline comment.
480                    let item_end_line = node_end_line(&item);
481                    if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
482                        attach_trailing_comment(&mut item, trail);
483                    }
484
485                    items.push(item);
486                }
487
488                // Consume SequenceEnd and capture its span.
489                if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
490                    end_span = *end;
491                    let _ = stream.next();
492                }
493                self.depth -= 1;
494
495                let node = Node::Sequence {
496                    items,
497                    anchor: anchor.clone(),
498                    tag,
499                    loc: Span {
500                        start: span.start,
501                        end: end_span.end,
502                    },
503                    leading_comments: Vec::new(),
504                    trailing_comment: None,
505                };
506                if let Some(name) = anchor {
507                    self.register_anchor(name, node.clone())?;
508                }
509                Ok(node)
510            }
511
512            Event::Alias { name } => {
513                let name = name.to_owned();
514                self.resolve_alias(&name, span)
515            }
516
517            Event::Comment { .. } => {
518                // Comment between nodes — skip and continue.
519                self.parse_node(stream)
520            }
521
522            Event::StreamStart
523            | Event::StreamEnd
524            | Event::DocumentStart { .. }
525            | Event::DocumentEnd { .. }
526            | Event::MappingEnd
527            | Event::SequenceEnd => {
528                // Structural event where a node is expected — return empty scalar.
529                Ok(empty_scalar())
530            }
531        }
532    }
533
534    fn register_anchor(&mut self, name: String, node: Node<Span>) -> Result<()> {
535        if !self.anchor_map.contains_key(&name) {
536            self.anchor_count += 1;
537            if self.anchor_count > self.options.max_anchors {
538                return Err(LoadError::AnchorCountLimitExceeded {
539                    limit: self.options.max_anchors,
540                });
541            }
542        }
543        // Count the anchor node itself toward the expansion budget in resolved
544        // mode so that the total reflects every node present in the expanded
545        // document (anchor definition + each alias expansion).
546        if self.options.mode == LoadMode::Resolved {
547            self.expanded_nodes += 1;
548            if self.expanded_nodes > self.options.max_expanded_nodes {
549                return Err(LoadError::AliasExpansionLimitExceeded {
550                    limit: self.options.max_expanded_nodes,
551                });
552            }
553        }
554        self.anchor_map.insert(name, node);
555        Ok(())
556    }
557
558    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
559        match self.options.mode {
560            LoadMode::Lossless => Ok(Node::Alias {
561                name: name.to_owned(),
562                loc,
563                leading_comments: Vec::new(),
564                trailing_comment: None,
565            }),
566            LoadMode::Resolved => {
567                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
568                    LoadError::UndefinedAlias {
569                        name: name.to_owned(),
570                    }
571                })?;
572                let mut in_progress: HashSet<String> = HashSet::new();
573                self.expand_node(anchored, &mut in_progress)
574            }
575        }
576    }
577
578    /// Recursively expand a node, counting every node produced against the
579    /// expansion limit and checking for cycles via `in_progress`.
580    fn expand_node(
581        &mut self,
582        node: Node<Span>,
583        in_progress: &mut HashSet<String>,
584    ) -> Result<Node<Span>> {
585        // Increment at the top — before child recursion — so every node
586        // (including non-alias nodes inside expanded trees) counts against the
587        // budget.
588        self.expanded_nodes += 1;
589        if self.expanded_nodes > self.options.max_expanded_nodes {
590            return Err(LoadError::AliasExpansionLimitExceeded {
591                limit: self.options.max_expanded_nodes,
592            });
593        }
594
595        match node {
596            Node::Alias { ref name, loc, .. } => {
597                if in_progress.contains(name) {
598                    return Err(LoadError::CircularAlias { name: name.clone() });
599                }
600                let target = self
601                    .anchor_map
602                    .get(name)
603                    .cloned()
604                    .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
605                in_progress.insert(name.clone());
606                let expanded = self.expand_node(target, in_progress)?;
607                in_progress.remove(name);
608                // Re-stamp with the alias site's location.
609                Ok(reloc(expanded, loc))
610            }
611            Node::Mapping {
612                entries,
613                anchor,
614                tag,
615                loc,
616                leading_comments,
617                trailing_comment,
618            } => {
619                let mut expanded_entries = Vec::with_capacity(entries.len());
620                for (k, v) in entries {
621                    let ek = self.expand_node(k, in_progress)?;
622                    let ev = self.expand_node(v, in_progress)?;
623                    expanded_entries.push((ek, ev));
624                }
625                Ok(Node::Mapping {
626                    entries: expanded_entries,
627                    anchor,
628                    tag,
629                    loc,
630                    leading_comments,
631                    trailing_comment,
632                })
633            }
634            Node::Sequence {
635                items,
636                anchor,
637                tag,
638                loc,
639                leading_comments,
640                trailing_comment,
641            } => {
642                let mut expanded_items = Vec::with_capacity(items.len());
643                for item in items {
644                    expanded_items.push(self.expand_node(item, in_progress)?);
645                }
646                Ok(Node::Sequence {
647                    items: expanded_items,
648                    anchor,
649                    tag,
650                    loc,
651                    leading_comments,
652                    trailing_comment,
653                })
654            }
655            // Scalars and already-resolved nodes — pass through.
656            scalar @ Node::Scalar { .. } => Ok(scalar),
657        }
658    }
659}
660
661// ---------------------------------------------------------------------------
662// Stream helpers
663// ---------------------------------------------------------------------------
664
665/// Pull the next event from the stream, converting parse errors to `LoadError`.
666fn next_from<'a>(stream: &mut EventStream<'a>) -> Result<Option<(Event<'a>, Span)>> {
667    match stream.next() {
668        None => Ok(None),
669        Some(Ok(item)) => Ok(Some(item)),
670        Some(Err(e)) => Err(LoadError::Parse {
671            pos: e.pos,
672            message: e.message,
673        }),
674    }
675}
676
677/// Return `true` if the peeked item signals end of document (or stream).
678const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
679    matches!(
680        peeked,
681        None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
682    )
683}
684
685/// Consume leading block-level Comment events at document level, appending
686/// them to `doc_comments`.  Stops at the first non-Comment event.
687///
688/// Block-level comments have `span.end.line > span.start.line`.
689fn consume_leading_doc_comments(
690    stream: &mut EventStream<'_>,
691    doc_comments: &mut Vec<String>,
692) -> Result<()> {
693    while matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _)))) {
694        if let Some((Event::Comment { text }, span)) = next_from(stream)? {
695            if span.end.line > span.start.line {
696                doc_comments.push(format!("#{text}"));
697            }
698        }
699    }
700    Ok(())
701}
702
703/// Consume leading block-level Comment events before a collection item or
704/// mapping key.  Returns the captured comment texts.
705///
706/// A "leading" comment is any `Comment` event that appears before the next
707/// non-comment structural event.  By the time this function is called,
708/// `peek_trailing_comment` has already consumed any trailing comment that was
709/// on the same line as the preceding value — so every remaining `Comment` here
710/// is on its own line and belongs to the upcoming key/item as a leading comment.
711fn consume_leading_comments(stream: &mut EventStream<'_>) -> Result<Vec<String>> {
712    let mut leading = Vec::new();
713    while matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _)))) {
714        if let Some((Event::Comment { text }, _)) = next_from(stream)? {
715            leading.push(format!("#{text}"));
716        }
717    }
718    Ok(leading)
719}
720
721/// If the next event is a trailing Comment on the same line as `preceding_end_line`,
722/// consume it and return the text.  Otherwise return `None`.
723///
724/// libfyaml (`fy_attach_comments_if_any` in `fy-parse.c`) uses the same
725/// criterion: a comment is "trailing" when its line equals the preceding
726/// token's end line (`fym.line == fyt->handle.end_mark.line`).  The new
727/// parser emits trailing comments with real spans (not zero-width), so the
728/// old `span.start == span.end` sentinel from the original parser does not
729/// apply here.
730fn peek_trailing_comment(
731    stream: &mut EventStream<'_>,
732    preceding_end_line: usize,
733) -> Result<Option<String>> {
734    if matches!(
735        stream.peek(),
736        Some(Ok((Event::Comment { .. }, span))) if span.start.line == preceding_end_line
737    ) {
738        if let Some((Event::Comment { text }, _)) = next_from(stream)? {
739            return Ok(Some(format!("#{text}")));
740        }
741    }
742    Ok(None)
743}
744
745/// Return the line number of a node's span end position.
746///
747/// Used to determine whether the next `Comment` event is trailing (same line)
748/// or leading (different line).
749const fn node_end_line(node: &Node<Span>) -> usize {
750    match node {
751        Node::Scalar { loc, .. }
752        | Node::Mapping { loc, .. }
753        | Node::Sequence { loc, .. }
754        | Node::Alias { loc, .. } => loc.end.line,
755    }
756}
757
758// ---------------------------------------------------------------------------
759// Node helpers
760// ---------------------------------------------------------------------------
761
762const fn empty_scalar() -> Node<Span> {
763    Node::Scalar {
764        value: String::new(),
765        style: ScalarStyle::Plain,
766        anchor: None,
767        tag: None,
768        loc: Span {
769            start: Pos::ORIGIN,
770            end: Pos::ORIGIN,
771        },
772        leading_comments: Vec::new(),
773        trailing_comment: None,
774    }
775}
776
777/// Replace the location of a node (used when stamping alias-site spans).
778fn reloc(node: Node<Span>, loc: Span) -> Node<Span> {
779    match node {
780        Node::Scalar {
781            value,
782            style,
783            anchor,
784            tag,
785            leading_comments,
786            trailing_comment,
787            ..
788        } => Node::Scalar {
789            value,
790            style,
791            anchor,
792            tag,
793            loc,
794            leading_comments,
795            trailing_comment,
796        },
797        Node::Mapping {
798            entries,
799            anchor,
800            tag,
801            leading_comments,
802            trailing_comment,
803            ..
804        } => Node::Mapping {
805            entries,
806            anchor,
807            tag,
808            loc,
809            leading_comments,
810            trailing_comment,
811        },
812        Node::Sequence {
813            items,
814            anchor,
815            tag,
816            leading_comments,
817            trailing_comment,
818            ..
819        } => Node::Sequence {
820            items,
821            anchor,
822            tag,
823            loc,
824            leading_comments,
825            trailing_comment,
826        },
827        Node::Alias {
828            name,
829            leading_comments,
830            trailing_comment,
831            ..
832        } => Node::Alias {
833            name,
834            loc,
835            leading_comments,
836            trailing_comment,
837        },
838    }
839}
840
841// ---------------------------------------------------------------------------
842// Comment attachment helpers
843// ---------------------------------------------------------------------------
844
845/// Attach `leading_comments` to a node's `leading_comments` field.
846fn attach_leading_comments(node: &mut Node<Span>, comments: Vec<String>) {
847    if comments.is_empty() {
848        return;
849    }
850    match node {
851        Node::Scalar {
852            leading_comments, ..
853        }
854        | Node::Mapping {
855            leading_comments, ..
856        }
857        | Node::Sequence {
858            leading_comments, ..
859        }
860        | Node::Alias {
861            leading_comments, ..
862        } => {
863            *leading_comments = comments;
864        }
865    }
866}
867
868/// Attach a trailing comment to a node's `trailing_comment` field.
869fn attach_trailing_comment(node: &mut Node<Span>, comment: String) {
870    match node {
871        Node::Scalar {
872            trailing_comment, ..
873        }
874        | Node::Mapping {
875            trailing_comment, ..
876        }
877        | Node::Sequence {
878            trailing_comment, ..
879        }
880        | Node::Alias {
881            trailing_comment, ..
882        } => {
883            *trailing_comment = Some(comment);
884        }
885    }
886}
887
888// ---------------------------------------------------------------------------
889// Tests
890// ---------------------------------------------------------------------------
891
892#[cfg(test)]
893#[allow(
894    clippy::indexing_slicing,
895    clippy::expect_used,
896    clippy::unwrap_used,
897    clippy::too_many_lines,
898    clippy::doc_markdown
899)]
900mod tests {
901    use super::*;
902
903    #[allow(dead_code)]
904    fn load_one(input: &str) -> Node<Span> {
905        let docs = load(input).expect("load failed");
906        assert_eq!(docs.len(), 1, "expected 1 document, got {}", docs.len());
907        docs.into_iter().next().unwrap().root
908    }
909
910    // UT-1: loader_state_resets_anchor_map_between_documents
911    #[test]
912    fn loader_state_resets_anchor_map_between_documents() {
913        // In resolved mode: anchor defined in doc 1 must not be visible in doc 2.
914        let result = LoaderBuilder::new()
915            .resolved()
916            .build()
917            .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
918        assert!(
919            result.is_err(),
920            "expected Err: *foo in doc 2 should be undefined"
921        );
922        assert!(matches!(
923            result.unwrap_err(),
924            LoadError::UndefinedAlias { .. }
925        ));
926    }
927
928    // UT-2: register_anchor_increments_count
929    #[test]
930    fn register_anchor_increments_count() {
931        let options = LoaderOptions {
932            max_anchors: 2,
933            ..LoaderOptions::default()
934        };
935        let mut state = LoadState::new(&options);
936        let node = Node::Scalar {
937            value: "x".to_owned(),
938            style: ScalarStyle::Plain,
939            anchor: None,
940            tag: None,
941            loc: Span {
942                start: Pos::ORIGIN,
943                end: Pos::ORIGIN,
944            },
945            leading_comments: Vec::new(),
946            trailing_comment: None,
947        };
948        assert!(state.register_anchor("a".to_owned(), node.clone()).is_ok());
949        assert!(state.register_anchor("b".to_owned(), node.clone()).is_ok());
950        let err = state
951            .register_anchor("c".to_owned(), node)
952            .expect_err("expected AnchorCountLimitExceeded");
953        assert!(matches!(
954            err,
955            LoadError::AnchorCountLimitExceeded { limit: 2 }
956        ));
957    }
958
959    // UT-3: expand_node_detects_circular_alias
960    #[test]
961    fn expand_node_detects_circular_alias() {
962        let options = LoaderOptions {
963            mode: LoadMode::Resolved,
964            ..LoaderOptions::default()
965        };
966        let mut state = LoadState::new(&options);
967        // Insert a self-referential alias node.
968        let alias_node = Node::Alias {
969            name: "a".to_owned(),
970            loc: Span {
971                start: Pos::ORIGIN,
972                end: Pos::ORIGIN,
973            },
974            leading_comments: Vec::new(),
975            trailing_comment: None,
976        };
977        state.anchor_map.insert("a".to_owned(), alias_node.clone());
978        let mut in_progress = HashSet::new();
979        let result = state.expand_node(alias_node, &mut in_progress);
980        assert!(
981            matches!(result, Err(LoadError::CircularAlias { .. })),
982            "expected CircularAlias, got: {result:?}"
983        );
984    }
985}