Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22//!
23//! # Accepted risks
24//!
25//! `expand_node` does not detect the case where an anchor-within-expansion
26//! references a previously defined anchor, forming an indirect cycle not
27//! caught by the `in_progress` set until the second traversal.  This
28//! limitation exists in the old loader and is acceptable in the LSP context
29//! where Lossless mode is the default.  The `expanded_nodes` volume limit
30//! provides the backstop.
31
32use std::borrow::Cow;
33use std::collections::{HashMap, HashSet};
34use std::iter::Peekable;
35
36use std::sync::Arc;
37
38use crate::error::Error;
39use crate::event::{Event, EventMeta, ScalarStyle};
40use crate::node::{Document, Node, NodeMeta};
41use crate::pos::{LineIndex, Pos, Span};
42use crate::schema::{CollectionKind, Schema, resolve_collection, resolve_scalar};
43
44use comments::{attach_leading_comments, attach_trailing_comment};
45use reloc::reloc;
46use stream::{
47    consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
48    with_hash_prefix,
49};
50
51mod comments;
52mod reloc;
53mod stream;
54
55// ---------------------------------------------------------------------------
56// Public error type
57// ---------------------------------------------------------------------------
58
59/// Errors produced by the loader.
60#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
61pub enum LoadError {
62    /// The event stream contained a parse error.
63    #[error("parse error at {pos:?}: {message}")]
64    Parse {
65        /// Source position where the parse error was detected.
66        pos: Pos,
67        /// Human-readable description of the error.
68        message: String,
69    },
70
71    /// The event stream ended unexpectedly mid-document.
72    #[error("unexpected end of event stream")]
73    UnexpectedEndOfStream,
74
75    /// Nesting depth exceeded the configured limit.
76    #[error("nesting depth limit exceeded (max: {limit})")]
77    NestingDepthLimitExceeded {
78        /// The configured nesting depth limit that was exceeded.
79        limit: usize,
80    },
81
82    /// Too many distinct anchor names were defined.
83    #[error("anchor count limit exceeded (max: {limit})")]
84    AnchorCountLimitExceeded {
85        /// The configured anchor count limit that was exceeded.
86        limit: usize,
87    },
88
89    /// Alias expansion produced more nodes than the configured limit.
90    #[error("alias expansion node limit exceeded (max: {limit})")]
91    AliasExpansionLimitExceeded {
92        /// The configured expansion node limit that was exceeded.
93        limit: usize,
94    },
95
96    /// A circular alias reference was detected.
97    #[error("circular alias reference: '{name}'")]
98    CircularAlias {
99        /// The anchor name involved in the cycle.
100        name: String,
101    },
102
103    /// An alias referred to an anchor that was never defined.
104    #[error("undefined alias: '{name}'")]
105    UndefinedAlias {
106        /// The alias name that had no corresponding anchor definition.
107        name: String,
108    },
109
110    /// A plain scalar could not be resolved under the JSON schema.
111    ///
112    /// The JSON schema has no fallback: every untagged plain scalar must match
113    /// one of its patterns (null, bool, int, float).  If none match, the scalar
114    /// is an error per YAML 1.2.2 §10.2.
115    ///
116    /// `value` is truncated to 128 Unicode scalar values and ASCII control
117    /// characters (U+0000–U+001F, U+007F) are replaced with `\uXXXX` escapes
118    /// to prevent log injection via the `Display` impl.
119    #[error("JSON schema: plain scalar does not match any type pattern")]
120    UnresolvedScalar {
121        /// The sanitized, truncated scalar value that failed resolution.
122        value: String,
123        /// Source position of the scalar.
124        pos: Pos,
125    },
126}
127
128// Convenience alias used inside the module.
129type Result<T> = std::result::Result<T, LoadError>;
130
131// Type alias for the peekable event stream used throughout the loader.
132type EventStream<'a> =
133    Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
134
135/// Unpack an `Option<Box<EventMeta>>` into its four constituent fields.
136#[expect(
137    clippy::type_complexity,
138    reason = "four-tuple mirrors EventMeta fields; extracting a type alias here would obscure the one-to-one correspondence"
139)]
140#[inline]
141fn unpack_meta(
142    meta: Option<Box<EventMeta<'_>>>,
143) -> (
144    Option<&'_ str>,
145    Option<Span>,
146    Option<std::borrow::Cow<'_, str>>,
147    Option<Span>,
148) {
149    meta.map_or((None, None, None, None), |m| {
150        (m.anchor, m.anchor_loc, m.tag, m.tag_loc)
151    })
152}
153
154// ---------------------------------------------------------------------------
155// Configuration
156// ---------------------------------------------------------------------------
157
158/// Loader mode — controls how alias references are handled.
159#[derive(Debug, Clone, Copy, PartialEq, Eq)]
160pub enum LoadMode {
161    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
162    Lossless,
163    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
164    Resolved,
165}
166
167/// Security and behaviour options for the loader.
168#[derive(Debug, Clone)]
169pub struct LoaderOptions {
170    /// Maximum mapping/sequence nesting depth before returning
171    /// [`LoadError::NestingDepthLimitExceeded`] (default: 512).
172    pub max_nesting_depth: usize,
173    /// Maximum number of distinct anchor names per document before returning
174    /// [`LoadError::AnchorCountLimitExceeded`] (default: 10 000).
175    pub max_anchors: usize,
176    /// Maximum total nodes produced by alias expansion in resolved mode before
177    /// returning [`LoadError::AliasExpansionLimitExceeded`] (default: 1 000 000).
178    pub max_expanded_nodes: usize,
179    /// Controls how alias references are handled during loading.
180    pub mode: LoadMode,
181    /// YAML 1.2.2 §10 schema to apply during loading (default: [`Schema::Core`]).
182    ///
183    /// Each node's tag is resolved according to this schema after the node is
184    /// constructed.  Nodes with explicit source tags are left unchanged.
185    pub schema: Schema,
186}
187
188impl Default for LoaderOptions {
189    fn default() -> Self {
190        Self {
191            max_nesting_depth: 512,
192            max_anchors: 10_000,
193            max_expanded_nodes: 1_000_000,
194            mode: LoadMode::Lossless,
195            schema: Schema::Core,
196        }
197    }
198}
199
200// ---------------------------------------------------------------------------
201// Builder
202// ---------------------------------------------------------------------------
203
204/// Builder for configuring and creating a [`Loader`].
205///
206/// ```
207/// use rlsp_yaml_parser::loader::LoaderBuilder;
208///
209/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
210/// assert_eq!(docs.len(), 1);
211/// ```
212pub struct LoaderBuilder {
213    options: LoaderOptions,
214}
215
216impl LoaderBuilder {
217    /// Create a builder with default options (lossless mode, safe limits).
218    #[must_use]
219    pub fn new() -> Self {
220        Self {
221            options: LoaderOptions::default(),
222        }
223    }
224
225    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
226    #[must_use]
227    pub const fn lossless(mut self) -> Self {
228        self.options.mode = LoadMode::Lossless;
229        self
230    }
231
232    /// Use resolved mode — aliases are expanded inline.
233    #[must_use]
234    pub const fn resolved(mut self) -> Self {
235        self.options.mode = LoadMode::Resolved;
236        self
237    }
238
239    /// Override the maximum nesting depth.
240    #[must_use]
241    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
242        self.options.max_nesting_depth = limit;
243        self
244    }
245
246    /// Override the maximum anchor count.
247    #[must_use]
248    pub const fn max_anchors(mut self, limit: usize) -> Self {
249        self.options.max_anchors = limit;
250        self
251    }
252
253    /// Override the maximum expanded-node count (resolved mode only).
254    #[must_use]
255    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
256        self.options.max_expanded_nodes = limit;
257        self
258    }
259
260    /// Override the YAML 1.2.2 §10 schema used for tag resolution during loading.
261    ///
262    /// The default is [`Schema::Core`].  Untagged nodes receive resolved tag URIs
263    /// in the AST; nodes with explicit source tags are not modified.
264    #[must_use]
265    pub const fn schema(mut self, s: Schema) -> Self {
266        self.options.schema = s;
267        self
268    }
269
270    /// Consume the builder and produce a [`Loader`].
271    #[must_use]
272    pub const fn build(self) -> Loader {
273        Loader {
274            options: self.options,
275        }
276    }
277}
278
279impl Default for LoaderBuilder {
280    fn default() -> Self {
281        Self::new()
282    }
283}
284
285// ---------------------------------------------------------------------------
286// Loader
287// ---------------------------------------------------------------------------
288
289/// A configured YAML loader.
290pub struct Loader {
291    options: LoaderOptions,
292}
293
294impl Loader {
295    /// Load YAML text into a sequence of documents.
296    ///
297    /// # Errors
298    ///
299    /// Returns `Err` if the input contains a parse error, exceeds a configured
300    /// security limit, or (in resolved mode) references an undefined anchor.
301    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
302        let mut state = LoadState::new(&self.options, input);
303        let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
304            Box::new(crate::parse_events(input));
305        state.run(iter.peekable())
306    }
307}
308
309// ---------------------------------------------------------------------------
310// Convenience entry point
311// ---------------------------------------------------------------------------
312
313/// Load YAML text using lossless mode, default security limits, and Core schema tag
314/// resolution (YAML 1.2.2 §10.3).
315///
316/// Returns one `Document<Span>` per YAML document in the stream.  Untagged nodes
317/// receive resolved tag URIs according to the Core schema; nodes with explicit source
318/// tags are left unchanged.
319///
320/// # Errors
321///
322/// Returns `Err` if the input contains a parse error or exceeds a security
323/// limit (nesting depth or anchor count).
324///
325/// ```
326/// use rlsp_yaml_parser::loader::load;
327/// use rlsp_yaml_parser::Node;
328///
329/// let docs = load("hello\n").unwrap();
330/// assert_eq!(docs.len(), 1);
331/// let Node::Scalar { tag, .. } = &docs[0].root else { panic!() };
332/// assert_eq!(tag.as_deref(), Some("tag:yaml.org,2002:str"));
333/// ```
334pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
335    LoaderBuilder::new().lossless().build().load(input)
336}
337
338// ---------------------------------------------------------------------------
339// Internal loader state
340// ---------------------------------------------------------------------------
341
342struct LoadState<'opt> {
343    options: &'opt LoaderOptions,
344    /// Anchors registered so far in the current document: name → node.
345    anchor_map: HashMap<String, Node<Span>>,
346    /// Count of distinct anchors registered (resets per document).
347    anchor_count: usize,
348    /// Current nesting depth (incremented on Begin, decremented on End).
349    depth: usize,
350    /// Total nodes produced via alias expansion (resolved mode only).
351    expanded_nodes: usize,
352    /// Leading comments accumulated by `parse_node` when it encounters a
353    /// `Comment` event between a mapping key and its value's collection start,
354    /// or by a sequence/mapping loop when it hits End with leftover leading
355    /// comments.  The next mapping/sequence loop iteration picks these up and
356    /// prepends them to the next entry's leading comments.
357    pending_leading: Vec<String>,
358    /// Line index for the current document source; shared across all documents
359    /// produced from the same input via `Arc` to avoid N full copies.
360    line_index: Arc<LineIndex>,
361}
362
363impl<'opt> LoadState<'opt> {
364    fn new(options: &'opt LoaderOptions, input: &str) -> Self {
365        Self {
366            options,
367            anchor_map: HashMap::new(),
368            anchor_count: 0,
369            depth: 0,
370            expanded_nodes: 0,
371            pending_leading: Vec::new(),
372            line_index: Arc::new(LineIndex::new(input)),
373        }
374    }
375
376    fn reset_for_document(&mut self) {
377        self.anchor_map.clear();
378        self.anchor_count = 0;
379        self.expanded_nodes = 0;
380        self.pending_leading.clear();
381    }
382
383    fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
384        let mut docs: Vec<Document<Span>> = Vec::new();
385
386        // Skip StreamStart.
387        match stream.next() {
388            Some(Ok(_)) | None => {}
389            Some(Err(e)) => {
390                return Err(LoadError::Parse {
391                    pos: e.pos,
392                    message: e.message,
393                });
394            }
395        }
396
397        loop {
398            // Skip any leading comments or unknown events before a document.
399            match next_from(&mut stream)? {
400                None | Some((Event::StreamEnd, _)) => break,
401                Some((
402                    Event::DocumentStart {
403                        explicit,
404                        version,
405                        tag_directives,
406                    },
407                    _,
408                )) => {
409                    let doc_explicit_start = explicit;
410                    let doc_version = version;
411                    let doc_tags = tag_directives;
412                    self.reset_for_document();
413
414                    let mut doc_comments: Vec<String> = Vec::new();
415
416                    // Consume leading comments at document level.
417                    consume_leading_doc_comments(&mut stream, &mut doc_comments, &self.line_index)?;
418
419                    // Parse root node (may be absent for empty documents).
420                    let root = if is_document_end(stream.peek()) {
421                        // Empty document — emit an empty scalar as root.
422                        let mut node = empty_scalar();
423                        apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
424                        node
425                    } else {
426                        self.parse_node(&mut stream)?
427                    };
428
429                    // Consume DocumentEnd if present and capture its explicit flag.
430                    let doc_explicit_end =
431                        if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
432                            let end_explicit = *explicit;
433                            let _ = stream.next();
434                            end_explicit
435                        } else {
436                            false
437                        };
438
439                    docs.push(Document {
440                        root,
441                        version: doc_version,
442                        tags: doc_tags,
443                        comments: doc_comments,
444                        explicit_start: doc_explicit_start,
445                        explicit_end: doc_explicit_end,
446                        line_index: Some(self.line_index.clone()),
447                    });
448                }
449                Some(_) => {
450                    // Comment or any other stray event outside a document — skip.
451                }
452            }
453        }
454
455        Ok(docs)
456    }
457
458    /// Parse a single node from the stream.
459    ///
460    /// Advances the stream past the node (including end-of-container events).
461    #[expect(
462        clippy::too_many_lines,
463        reason = "match-on-event-type; splitting would obscure flow"
464    )]
465    fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
466        // Structural end events close the caller's collection loop — do NOT
467        // consume them here.  Return an empty scalar and leave the event in
468        // the stream so the outer mapping/sequence loop can see and consume it.
469        if matches!(
470            stream.peek(),
471            Some(Ok((
472                Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
473                _
474            )))
475        ) {
476            return Ok(empty_scalar());
477        }
478
479        let Some((event, span)) = next_from(stream)? else {
480            return Ok(empty_scalar());
481        };
482
483        match event {
484            Event::Scalar { value, style, meta } => {
485                let (anchor, anchor_loc, tag, tag_loc) = unpack_meta(meta);
486                let anchor = anchor.map(str::to_owned);
487                let mut node = Node::Scalar {
488                    value: value.into_owned(),
489                    style,
490                    tag: tag.map(|t| Cow::Owned(t.into_owned())),
491                    loc: span,
492                    meta: NodeMeta {
493                        anchor,
494                        anchor_loc,
495                        tag_loc,
496                        leading_comments: None,
497                        trailing_comment: None,
498                    }
499                    .into_option(),
500                };
501                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
502                if let Some(name) = node.anchor() {
503                    self.register_anchor(name.to_owned(), &node)?;
504                }
505                Ok(node)
506            }
507
508            Event::MappingStart { style, meta } => {
509                let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
510                let anchor = event_anchor.map(str::to_owned);
511                let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
512                let anchor_for_registration = anchor.clone();
513
514                self.depth += 1;
515                if self.depth > self.options.max_nesting_depth {
516                    return Err(LoadError::NestingDepthLimitExceeded {
517                        limit: self.options.max_nesting_depth,
518                    });
519                }
520
521                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
522                let mut end_span = span;
523
524                loop {
525                    // Consume leading comments before the next key.  Also
526                    // collect any comments that spilled over from a sibling
527                    // value's collection end (stored in `pending_leading`).
528                    let raw_leading = consume_leading_comments(stream)?;
529                    let leading = if self.pending_leading.is_empty() {
530                        raw_leading
531                    } else {
532                        let mut combined = std::mem::take(&mut self.pending_leading);
533                        combined.extend(raw_leading);
534                        combined
535                    };
536
537                    match stream.peek() {
538                        None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
539                            // Save any collected leading comments so the next
540                            // sibling entry in the parent collection can inherit
541                            // them (e.g. a comment just before MappingEnd that
542                            // belongs to the following mapping entry).
543                            if !leading.is_empty() {
544                                self.pending_leading = leading;
545                            }
546                            break;
547                        }
548                        Some(Err(_)) => {
549                            // Consume the error.
550                            return Err(match stream.next() {
551                                Some(Err(e)) => LoadError::Parse {
552                                    pos: e.pos,
553                                    message: e.message,
554                                },
555                                _ => LoadError::UnexpectedEndOfStream,
556                            });
557                        }
558                        Some(Ok(_)) => {}
559                    }
560
561                    let mut key = self.parse_node(stream)?;
562                    attach_leading_comments(&mut key, leading);
563
564                    let mut value = self.parse_node(stream)?;
565
566                    // Trailing comment on the value — peek for inline comment.
567                    // Block scalars (literal `|` and folded `>`) consume trailing
568                    // blank lines as part of chomping; their span.end falls on the
569                    // first line after the scalar, which can coincide with the
570                    // next comment's line number.  That would falsely attach a
571                    // leading inter-node comment as a trailing inline comment.
572                    // Block scalars never have an inline comment on their content
573                    // lines, so skip trailing-comment detection for them.
574                    if !is_block_scalar(&value)
575                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
576                    {
577                        let value_end_line = node_end_line(&value, &self.line_index);
578                        if let Some(trail) =
579                            peek_trailing_comment(stream, value_end_line, &self.line_index)?
580                        {
581                            attach_trailing_comment(&mut value, trail);
582                        }
583                    }
584
585                    entries.push((key, value));
586                }
587
588                // Consume MappingEnd and capture its span.
589                if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
590                    end_span = *end;
591                    let _ = stream.next();
592                }
593                self.depth -= 1;
594
595                let mut node = Node::Mapping {
596                    entries,
597                    style,
598                    tag,
599                    loc: Span {
600                        start: span.start,
601                        end: end_span.end,
602                    },
603                    meta: NodeMeta {
604                        anchor,
605                        anchor_loc,
606                        tag_loc,
607                        leading_comments: None,
608                        trailing_comment: None,
609                    }
610                    .into_option(),
611                };
612                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
613                if let Some(name) = anchor_for_registration {
614                    self.register_anchor(name, &node)?;
615                }
616                Ok(node)
617            }
618
619            Event::SequenceStart { style, meta } => {
620                let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
621                let anchor = event_anchor.map(str::to_owned);
622                let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
623                let anchor_for_registration = anchor.clone();
624
625                self.depth += 1;
626                if self.depth > self.options.max_nesting_depth {
627                    return Err(LoadError::NestingDepthLimitExceeded {
628                        limit: self.options.max_nesting_depth,
629                    });
630                }
631
632                let mut items: Vec<Node<Span>> = Vec::new();
633                let mut end_span = span;
634
635                loop {
636                    // Collect leading comments before the next item.  Also
637                    // collect any comments that spilled over from a sibling
638                    // value's collection end (stored in `pending_leading`).
639                    let raw_leading = consume_leading_comments(stream)?;
640                    let leading = if self.pending_leading.is_empty() {
641                        raw_leading
642                    } else {
643                        let mut combined = std::mem::take(&mut self.pending_leading);
644                        combined.extend(raw_leading);
645                        combined
646                    };
647
648                    match stream.peek() {
649                        None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
650                            // Save any collected leading comments so the next
651                            // sibling entry in the parent collection can inherit
652                            // them (e.g. a comment just before SequenceEnd that
653                            // belongs to the following sequence item or mapping
654                            // entry in the parent).
655                            if !leading.is_empty() {
656                                self.pending_leading = leading;
657                            }
658                            break;
659                        }
660                        Some(Err(_)) => {
661                            // Consume the error.
662                            return Err(match stream.next() {
663                                Some(Err(e)) => LoadError::Parse {
664                                    pos: e.pos,
665                                    message: e.message,
666                                },
667                                _ => LoadError::UnexpectedEndOfStream,
668                            });
669                        }
670                        Some(Ok(_)) => {}
671                    }
672
673                    let mut item = self.parse_node(stream)?;
674                    attach_leading_comments(&mut item, leading);
675
676                    // Trailing comment on the item — peek for inline comment.
677                    // Block scalars are excluded for the same reason as in the
678                    // mapping path: their span.end can coincide with the next
679                    // comment's line, falsely turning a leading comment into a
680                    // trailing one.
681                    if !is_block_scalar(&item)
682                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
683                    {
684                        let item_end_line = node_end_line(&item, &self.line_index);
685                        if let Some(trail) =
686                            peek_trailing_comment(stream, item_end_line, &self.line_index)?
687                        {
688                            attach_trailing_comment(&mut item, trail);
689                        }
690                    }
691
692                    items.push(item);
693                }
694
695                // Consume SequenceEnd and capture its span.
696                if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
697                    end_span = *end;
698                    let _ = stream.next();
699                }
700                self.depth -= 1;
701
702                let mut node = Node::Sequence {
703                    items,
704                    style,
705                    tag,
706                    loc: Span {
707                        start: span.start,
708                        end: end_span.end,
709                    },
710                    meta: NodeMeta {
711                        anchor,
712                        anchor_loc,
713                        tag_loc,
714                        leading_comments: None,
715                        trailing_comment: None,
716                    }
717                    .into_option(),
718                };
719                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
720                if let Some(name) = anchor_for_registration {
721                    self.register_anchor(name, &node)?;
722                }
723                Ok(node)
724            }
725
726            Event::Alias { name } => {
727                let name = name.to_owned();
728                self.resolve_alias(&name, span)
729            }
730
731            Event::Comment { text } => {
732                // Comment between a mapping key and its collection value (e.g.
733                // `key:\n  # comment\n  subkey: val`).  The comment appears
734                // after the key Scalar and before the MappingStart/SequenceStart
735                // that begins the value.  Save it in `pending_leading` so the
736                // first entry of the upcoming collection can inherit it.
737                self.pending_leading.push(with_hash_prefix(text));
738                self.parse_node(stream)
739            }
740
741            Event::StreamStart
742            | Event::StreamEnd
743            | Event::DocumentStart { .. }
744            | Event::DocumentEnd { .. }
745            | Event::MappingEnd
746            | Event::SequenceEnd => {
747                // Structural event where a node is expected — return empty scalar.
748                Ok(empty_scalar())
749            }
750        }
751    }
752
753    fn register_anchor(&mut self, name: String, node: &Node<Span>) -> Result<()> {
754        if !self.anchor_map.contains_key(&name) {
755            self.anchor_count += 1;
756            if self.anchor_count > self.options.max_anchors {
757                return Err(LoadError::AnchorCountLimitExceeded {
758                    limit: self.options.max_anchors,
759                });
760            }
761        }
762        // Count the anchor node itself toward the expansion budget in resolved
763        // mode so that the total reflects every node present in the expanded
764        // document (anchor definition + each alias expansion).
765        if self.options.mode == LoadMode::Resolved {
766            self.expanded_nodes += 1;
767            if self.expanded_nodes > self.options.max_expanded_nodes {
768                return Err(LoadError::AliasExpansionLimitExceeded {
769                    limit: self.options.max_expanded_nodes,
770                });
771            }
772            self.anchor_map.insert(name, node.clone());
773        } else {
774            // Lossless mode never reads anchor_map for expansion; store a
775            // zero-cost placeholder so contains_key still detects re-definitions.
776            self.anchor_map.insert(name, empty_scalar());
777        }
778        Ok(())
779    }
780
781    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
782        match self.options.mode {
783            LoadMode::Lossless => Ok(Node::Alias {
784                name: name.to_owned(),
785                loc,
786                leading_comments: None,
787                trailing_comment: None,
788            }),
789            LoadMode::Resolved => {
790                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
791                    LoadError::UndefinedAlias {
792                        name: name.to_owned(),
793                    }
794                })?;
795                let mut in_progress: HashSet<String> = HashSet::new();
796                self.expand_node(anchored, &mut in_progress)
797            }
798        }
799    }
800
801    /// Recursively expand a node, counting every node produced against the
802    /// expansion limit and checking for cycles via `in_progress`.
803    fn expand_node(
804        &mut self,
805        node: Node<Span>,
806        in_progress: &mut HashSet<String>,
807    ) -> Result<Node<Span>> {
808        // Increment at the top — before child recursion — so every node
809        // (including non-alias nodes inside expanded trees) counts against the
810        // budget.
811        self.expanded_nodes += 1;
812        if self.expanded_nodes > self.options.max_expanded_nodes {
813            return Err(LoadError::AliasExpansionLimitExceeded {
814                limit: self.options.max_expanded_nodes,
815            });
816        }
817
818        match node {
819            Node::Alias { ref name, loc, .. } => {
820                if in_progress.contains(name) {
821                    return Err(LoadError::CircularAlias { name: name.clone() });
822                }
823                let target = self
824                    .anchor_map
825                    .get(name)
826                    .cloned()
827                    .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
828                in_progress.insert(name.clone());
829                let expanded = self.expand_node(target, in_progress)?;
830                in_progress.remove(name);
831                // Re-stamp with the alias site's location.
832                Ok(reloc(expanded, loc))
833            }
834            Node::Mapping {
835                entries,
836                style,
837                tag,
838                loc,
839                meta,
840            } => {
841                let mut expanded_entries = Vec::with_capacity(entries.len());
842                for (k, v) in entries {
843                    let ek = self.expand_node(k, in_progress)?;
844                    let ev = self.expand_node(v, in_progress)?;
845                    expanded_entries.push((ek, ev));
846                }
847                Ok(Node::Mapping {
848                    entries: expanded_entries,
849                    style,
850                    tag,
851                    loc,
852                    meta,
853                })
854            }
855            Node::Sequence {
856                items,
857                style,
858                tag,
859                loc,
860                meta,
861            } => {
862                let mut expanded_items = Vec::with_capacity(items.len());
863                for item in items {
864                    expanded_items.push(self.expand_node(item, in_progress)?);
865                }
866                Ok(Node::Sequence {
867                    items: expanded_items,
868                    style,
869                    tag,
870                    loc,
871                    meta,
872                })
873            }
874            // Scalars and already-resolved nodes — pass through.
875            scalar @ Node::Scalar { .. } => Ok(scalar),
876        }
877    }
878}
879
880/// Return `true` if the peeked item signals end of document (or stream).
881const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
882    matches!(
883        peeked,
884        None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
885    )
886}
887
888/// Convert a `Span.start` byte offset to a `Pos` with accurate line/column.
889#[inline]
890fn span_start_to_pos(offset: u32, line_index: &LineIndex) -> Pos {
891    let (line, column) = line_index.line_column(offset);
892    Pos {
893        byte_offset: offset as usize,
894        line: line as usize,
895        column: column as usize,
896    }
897}
898
899/// Return the line number of a node's span end position.
900///
901/// Used to determine whether the next `Comment` event is trailing (same line)
902/// or leading (different line).
903#[inline]
904fn node_end_line(node: &Node<Span>, line_index: &LineIndex) -> u32 {
905    let end_offset = match node {
906        Node::Scalar { loc, .. }
907        | Node::Mapping { loc, .. }
908        | Node::Sequence { loc, .. }
909        | Node::Alias { loc, .. } => loc.end,
910    };
911    line_index.line_column(end_offset).0
912}
913
914/// Return `true` if the node is a block scalar (literal `|` or folded `>`).
915///
916/// Block scalars consume trailing blank lines as part of chomping, so their
917/// `span.end` falls on the line *after* the last consumed line.  This means a
918/// comment on the immediately following line has the same line number as
919/// `span.end.line`, which would cause `peek_trailing_comment` to falsely
920/// classify it as an inline trailing comment.  The caller uses this predicate
921/// to skip trailing-comment detection for block scalars.
922#[inline]
923const fn is_block_scalar(node: &Node<Span>) -> bool {
924    matches!(
925        node,
926        Node::Scalar {
927            style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
928            ..
929        }
930    )
931}
932
933// ---------------------------------------------------------------------------
934// Schema resolution helpers
935// ---------------------------------------------------------------------------
936
937/// Maximum number of Unicode scalar values kept in [`LoadError::UnresolvedScalar`]
938/// value field.  Prevents unbounded allocation when storing user-supplied input
939/// in error messages.
940const UNRESOLVED_VALUE_MAX_CHARS: usize = 128;
941
942/// Sanitize a raw scalar value for inclusion in an error message.
943///
944/// - Truncates to [`UNRESOLVED_VALUE_MAX_CHARS`] Unicode scalar values,
945///   appending `"..."` when truncated.
946/// - Replaces ASCII control characters (U+0000–U+001F and U+007F) with
947///   `\uXXXX` hex escapes to prevent log injection via the `Display` impl.
948fn sanitize_scalar_for_error(raw: &str) -> String {
949    let mut out = String::with_capacity(raw.len().min(UNRESOLVED_VALUE_MAX_CHARS * 2));
950    let mut truncated = false;
951
952    for (i, ch) in raw.chars().enumerate() {
953        if i >= UNRESOLVED_VALUE_MAX_CHARS {
954            truncated = true;
955            break;
956        }
957        if ch.is_ascii_control() {
958            // Replace control chars with \uXXXX escape to prevent log injection.
959            let escaped = format!("\\u{:04X}", ch as u32);
960            out.push_str(&escaped);
961        } else {
962            out.push(ch);
963        }
964    }
965
966    if truncated {
967        out.push_str("...");
968    }
969    out
970}
971
972/// Apply schema tag resolution to a freshly-constructed node.
973///
974/// - For scalars: translates bare `!` to `None` (non-specific), then calls
975///   `resolve_scalar`.
976/// - For mappings/sequences: translates bare `!` to `None`, then calls
977///   `resolve_collection`.
978/// - On `Ok(Some(tag))`: overwrites `node.tag`; `tag_loc` is left `None`
979///   (no source position for a resolved tag).
980/// - On `Ok(None)` (explicit tag present): leaves `node.tag` unchanged.
981///
982/// # Errors
983///
984/// Returns [`LoadError::UnresolvedScalar`] when `schema` is [`Schema::Json`]
985/// and a plain scalar does not match any JSON type pattern.
986#[inline]
987fn apply_schema_to_node(
988    node: &mut Node<Span>,
989    schema: Schema,
990    line_index: &LineIndex,
991) -> Result<()> {
992    match node {
993        Node::Scalar {
994            value,
995            style,
996            tag,
997            loc,
998            meta,
999        } => {
1000            // Bare `!` on a scalar is the non-specific scalar tag — it resolves
1001            // unconditionally to !!str regardless of content (YAML 1.2.2 §10.2.1,
1002            // §10.3.2: "non-specific" tag for scalars = Failsafe str).  We handle
1003            // it before calling the schema resolver so Core doesn't pattern-match
1004            // the value.
1005            //
1006            // `tag_loc` is preserved here (NOT cleared) because `!` is explicitly
1007            // written in the source.  Preserving `tag_loc` lets downstream consumers
1008            // (e.g. the formatter) distinguish user-authored tags from resolver-injected
1009            // ones, which is critical for correct idempotent output.
1010            if tag.as_deref() == Some("!") {
1011                *tag = Some(Cow::Borrowed(crate::schema::ResolvedTag::Str.as_str()));
1012                return Ok(());
1013            }
1014            // All other tags: pass through as-is (Some(non-!) = explicit tag → Ok(None)).
1015            match resolve_scalar(schema, *style, value, tag.as_deref()) {
1016                Ok(Some(resolved)) => {
1017                    *tag = Some(Cow::Borrowed(resolved.as_str()));
1018                    // Clear tag_loc: resolver-injected tags have no source position.
1019                    if let Some(m) = meta.as_mut() {
1020                        m.tag_loc = None;
1021                        if m.is_all_none() {
1022                            *meta = None;
1023                        }
1024                    }
1025                }
1026                Ok(None) => {}
1027                Err(_) => {
1028                    return Err(LoadError::UnresolvedScalar {
1029                        value: sanitize_scalar_for_error(value),
1030                        pos: span_start_to_pos(loc.start, line_index),
1031                    });
1032                }
1033            }
1034        }
1035        Node::Mapping { tag, meta, .. } => {
1036            // Bare `!` on a collection means non-specific collection tag — translate
1037            // to None so the resolver returns the kind-based tag (!!map / !!seq).
1038            let effective_tag = tag.as_deref().filter(|t| *t != "!");
1039            if let Some(resolved) =
1040                resolve_collection(schema, CollectionKind::Mapping, effective_tag)
1041            {
1042                *tag = Some(Cow::Borrowed(resolved.as_str()));
1043                if let Some(m) = meta.as_mut() {
1044                    m.tag_loc = None;
1045                    if m.is_all_none() {
1046                        *meta = None;
1047                    }
1048                }
1049            }
1050        }
1051        Node::Sequence { tag, meta, .. } => {
1052            let effective_tag = tag.as_deref().filter(|t| *t != "!");
1053            if let Some(resolved) =
1054                resolve_collection(schema, CollectionKind::Sequence, effective_tag)
1055            {
1056                *tag = Some(Cow::Borrowed(resolved.as_str()));
1057                if let Some(m) = meta.as_mut() {
1058                    m.tag_loc = None;
1059                    if m.is_all_none() {
1060                        *meta = None;
1061                    }
1062                }
1063            }
1064        }
1065        Node::Alias { .. } => {}
1066    }
1067    Ok(())
1068}
1069
1070// ---------------------------------------------------------------------------
1071// Node helpers
1072// ---------------------------------------------------------------------------
1073
1074const fn empty_scalar() -> Node<Span> {
1075    Node::Scalar {
1076        value: String::new(),
1077        style: ScalarStyle::Plain,
1078        tag: None,
1079        loc: Span { start: 0, end: 0 },
1080        meta: None,
1081    }
1082}
1083
1084// ---------------------------------------------------------------------------
1085// Tests
1086// ---------------------------------------------------------------------------
1087
1088#[cfg(test)]
1089#[expect(
1090    clippy::expect_used,
1091    clippy::unwrap_used,
1092    clippy::indexing_slicing,
1093    clippy::panic,
1094    reason = "test code"
1095)]
1096mod tests {
1097    use super::*;
1098    use rstest::rstest;
1099
1100    #[test]
1101    fn loader_state_resets_anchor_map_between_documents() {
1102        // In resolved mode: anchor defined in doc 1 must not be visible in doc 2.
1103        let result = LoaderBuilder::new()
1104            .resolved()
1105            .build()
1106            .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
1107        assert!(
1108            result.is_err(),
1109            "expected Err: *foo in doc 2 should be undefined"
1110        );
1111        assert!(matches!(
1112            result.unwrap_err(),
1113            LoadError::UndefinedAlias { .. }
1114        ));
1115    }
1116
1117    #[test]
1118    fn register_anchor_increments_count() {
1119        let options = LoaderOptions {
1120            max_anchors: 2,
1121            ..LoaderOptions::default()
1122        };
1123        let mut state = LoadState::new(&options, "");
1124        let node = Node::Scalar {
1125            value: "x".to_owned(),
1126            style: ScalarStyle::Plain,
1127            tag: None,
1128            loc: Span { start: 0, end: 0 },
1129            meta: None,
1130        };
1131        assert!(state.register_anchor("a".to_owned(), &node).is_ok());
1132        assert!(state.register_anchor("b".to_owned(), &node).is_ok());
1133        let err = state
1134            .register_anchor("c".to_owned(), &node)
1135            .expect_err("expected AnchorCountLimitExceeded");
1136        assert!(matches!(
1137            err,
1138            LoadError::AnchorCountLimitExceeded { limit: 2 }
1139        ));
1140    }
1141
1142    #[test]
1143    fn expand_node_detects_circular_alias() {
1144        let options = LoaderOptions {
1145            mode: LoadMode::Resolved,
1146            ..LoaderOptions::default()
1147        };
1148        let mut state = LoadState::new(&options, "");
1149        // Insert a self-referential alias node.
1150        let alias_node = Node::Alias {
1151            name: "a".to_owned(),
1152            loc: Span { start: 0, end: 0 },
1153            leading_comments: None,
1154            trailing_comment: None,
1155        };
1156        state.anchor_map.insert("a".to_owned(), alias_node.clone());
1157        let mut in_progress = HashSet::new();
1158        let result = state.expand_node(alias_node, &mut in_progress);
1159        assert!(
1160            matches!(result, Err(LoadError::CircularAlias { .. })),
1161            "expected CircularAlias, got: {result:?}"
1162        );
1163    }
1164
1165    // -----------------------------------------------------------------------
1166    // Comment between mapping key and nested collection is attached to first nested entry
1167    // -----------------------------------------------------------------------
1168
1169    #[test]
1170    fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
1171        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1172        let root = &docs[0].root;
1173        let Node::Mapping { entries, .. } = root else {
1174            panic!("expected root mapping");
1175        };
1176        assert_eq!(entries.len(), 1);
1177        let (_outer_key, outer_value) = &entries[0];
1178        let Node::Mapping {
1179            entries: nested, ..
1180        } = outer_value
1181        else {
1182            panic!("expected nested mapping");
1183        };
1184        assert_eq!(nested.len(), 1);
1185        let (inner_key, _) = &nested[0];
1186        assert_eq!(
1187            inner_key.leading_comments(),
1188            &["# Style 1"],
1189            "comment should be attached to the first nested key"
1190        );
1191    }
1192
1193    #[test]
1194    fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
1195        let docs = load("key:\n  # leading\n  - item1\n  - item2\n").unwrap();
1196        let root = &docs[0].root;
1197        let Node::Mapping { entries, .. } = root else {
1198            panic!("expected root mapping");
1199        };
1200        let (_key, seq_value) = &entries[0];
1201        let Node::Sequence { items, .. } = seq_value else {
1202            panic!("expected sequence value");
1203        };
1204        assert_eq!(
1205            items[0].leading_comments(),
1206            &["# leading"],
1207            "comment should be attached to first sequence item"
1208        );
1209    }
1210
1211    #[test]
1212    fn multiple_comments_between_key_and_collection_all_preserved() {
1213        let docs = load("key:\n  # first\n  # second\n  - item\n").unwrap();
1214        let root = &docs[0].root;
1215        let Node::Mapping { entries, .. } = root else {
1216            panic!("expected root mapping");
1217        };
1218        let (_key, seq_value) = &entries[0];
1219        let Node::Sequence { items, .. } = seq_value else {
1220            panic!("expected sequence value");
1221        };
1222        assert_eq!(
1223            items[0].leading_comments(),
1224            &["# first", "# second"],
1225            "both comments should be on first item"
1226        );
1227    }
1228
1229    #[test]
1230    fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1231        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1232        let root = &docs[0].root;
1233        let Node::Mapping { entries, .. } = root else {
1234            panic!("expected root mapping");
1235        };
1236        let (outer_key, _) = &entries[0];
1237        assert!(
1238            outer_key.leading_comments().is_empty(),
1239            "outer key should have no leading comments"
1240        );
1241        assert!(
1242            outer_key.trailing_comment().is_none(),
1243            "outer key should have no trailing comment"
1244        );
1245    }
1246
1247    #[test]
1248    fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1249        let docs = load("key:\n  inner: val\n").unwrap();
1250        let root = &docs[0].root;
1251        let Node::Mapping { entries, .. } = root else {
1252            panic!("expected root mapping");
1253        };
1254        let (_key, nested) = &entries[0];
1255        let Node::Mapping {
1256            entries: nested_entries,
1257            ..
1258        } = nested
1259        else {
1260            panic!("expected nested mapping");
1261        };
1262        let (inner_key, _) = &nested_entries[0];
1263        assert!(
1264            inner_key.leading_comments().is_empty(),
1265            "inner key should have no leading comments when there is no comment"
1266        );
1267    }
1268
1269    // -----------------------------------------------------------------------
1270    // Trailing comment of nested collection becomes leading comment on next sibling
1271    // -----------------------------------------------------------------------
1272
1273    #[test]
1274    fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1275        let input =
1276            "Lists:\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n    - item1\n";
1277        let docs = load(input).unwrap();
1278        let root = &docs[0].root;
1279        let Node::Mapping { entries, .. } = root else {
1280            panic!("expected root mapping");
1281        };
1282        let (_lists_key, nested) = &entries[0];
1283        let Node::Mapping {
1284            entries: nested_entries,
1285            ..
1286        } = nested
1287        else {
1288            panic!("expected nested mapping");
1289        };
1290        assert_eq!(nested_entries.len(), 2);
1291        let (list_b_key, _) = &nested_entries[1];
1292        assert_eq!(
1293            list_b_key.leading_comments(),
1294            &["# Style 2"],
1295            "# Style 2 should be leading comment on list-b key"
1296        );
1297    }
1298
1299    #[test]
1300    fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1301        let input = "outer:\n  a:\n    - x\n    # between\n  b: y\n";
1302        let docs = load(input).unwrap();
1303        let root = &docs[0].root;
1304        let Node::Mapping { entries, .. } = root else {
1305            panic!("expected root mapping");
1306        };
1307        let (_outer_key, outer_val) = &entries[0];
1308        let Node::Mapping {
1309            entries: nested, ..
1310        } = outer_val
1311        else {
1312            panic!("expected nested mapping");
1313        };
1314        assert_eq!(nested.len(), 2);
1315        let (b_key, _) = &nested[1];
1316        assert_eq!(
1317            b_key.leading_comments(),
1318            &["# between"],
1319            "# between should be leading comment on b key"
1320        );
1321    }
1322
1323    #[test]
1324    fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1325        let input = "parent:\n  child1:\n    k: v\n    # end-of-child1\n  child2: val\n";
1326        let docs = load(input).unwrap();
1327        let root = &docs[0].root;
1328        let Node::Mapping { entries, .. } = root else {
1329            panic!("expected root mapping");
1330        };
1331        let (_parent_key, parent_val) = &entries[0];
1332        let Node::Mapping {
1333            entries: siblings, ..
1334        } = parent_val
1335        else {
1336            panic!("expected parent mapping value");
1337        };
1338        assert_eq!(siblings.len(), 2);
1339        let (child2_key, _) = &siblings[1];
1340        assert_eq!(
1341            child2_key.leading_comments(),
1342            &["# end-of-child1"],
1343            "# end-of-child1 should be leading comment on child2 key"
1344        );
1345    }
1346
1347    #[test]
1348    fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1349        let input = "items:\n  - a\n  - b\n  # tail\n";
1350        let docs = load(input).unwrap();
1351        // The document must parse successfully (no panic, no error).
1352        assert!(!docs.is_empty(), "document should parse without error");
1353        // The # tail comment must not cause data loss — the sequence items are intact.
1354        let root = &docs[0].root;
1355        let Node::Mapping { entries, .. } = root else {
1356            panic!("expected root mapping");
1357        };
1358        let (_items_key, seq_val) = &entries[0];
1359        let Node::Sequence { items, .. } = seq_val else {
1360            panic!("expected sequence value");
1361        };
1362        assert_eq!(items.len(), 2, "sequence items must not be lost");
1363    }
1364
1365    #[test]
1366    fn no_overflow_comments_when_collection_ends_cleanly() {
1367        let docs = load("key:\n  - item1\n  - item2\n").unwrap();
1368        let root = &docs[0].root;
1369        let Node::Mapping { entries, .. } = root else {
1370            panic!("expected root mapping");
1371        };
1372        let (_key, seq_val) = &entries[0];
1373        let Node::Sequence { items, .. } = seq_val else {
1374            panic!("expected sequence value");
1375        };
1376        for item in items {
1377            assert!(
1378                item.leading_comments().is_empty(),
1379                "items should have no leading comments"
1380            );
1381        }
1382    }
1383
1384    // -----------------------------------------------------------------------
1385    // Combined scenarios
1386    // -----------------------------------------------------------------------
1387
1388    #[test]
1389    fn original_bug_report_input_preserves_both_comments() {
1390        let input = "Lists:\n  # Style 1\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n  - item1\n  - item2\n";
1391        let docs = load(input).unwrap();
1392        let root = &docs[0].root;
1393        let Node::Mapping { entries, .. } = root else {
1394            panic!("expected root mapping");
1395        };
1396        let (_lists_key, nested) = &entries[0];
1397        let Node::Mapping {
1398            entries: nested_entries,
1399            ..
1400        } = nested
1401        else {
1402            panic!("expected nested mapping");
1403        };
1404        assert_eq!(nested_entries.len(), 2);
1405        let (first_key, _) = &nested_entries[0];
1406        let (second_key, _) = &nested_entries[1];
1407        assert_eq!(
1408            first_key.leading_comments(),
1409            &["# Style 1"],
1410            "list-a should have # Style 1 as leading comment"
1411        );
1412        assert_eq!(
1413            second_key.leading_comments(),
1414            &["# Style 2"],
1415            "list-b should have # Style 2 as leading comment"
1416        );
1417    }
1418
1419    #[test]
1420    fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1421        let input = "map:\n  # leading\n  key: value  # trailing\n  # next-leading\n  key2: v2\n";
1422        let docs = load(input).unwrap();
1423        let root = &docs[0].root;
1424        let Node::Mapping { entries, .. } = root else {
1425            panic!("expected root mapping");
1426        };
1427        let (_map_key, map_val) = &entries[0];
1428        let Node::Mapping {
1429            entries: siblings, ..
1430        } = map_val
1431        else {
1432            panic!("expected mapping value");
1433        };
1434        assert_eq!(siblings.len(), 2);
1435        let (key1, val1) = &siblings[0];
1436        let (key2, _) = &siblings[1];
1437        assert_eq!(key1.leading_comments(), &["# leading"]);
1438        assert_eq!(val1.trailing_comment(), Some("# trailing"));
1439        assert_eq!(key2.leading_comments(), &["# next-leading"]);
1440    }
1441
1442    #[test]
1443    fn deeply_nested_overflow_comments_reach_correct_sibling() {
1444        let input = "top:\n  mid:\n    - x\n    # deep-overflow\n  next: y\n";
1445        let docs = load(input).unwrap();
1446        let root = &docs[0].root;
1447        let Node::Mapping { entries, .. } = root else {
1448            panic!("expected root mapping");
1449        };
1450        let (_top_key, top_val) = &entries[0];
1451        let Node::Mapping {
1452            entries: top_entries,
1453            ..
1454        } = top_val
1455        else {
1456            panic!("expected top-level mapping");
1457        };
1458        assert_eq!(top_entries.len(), 2);
1459        let (next_key, _) = &top_entries[1];
1460        assert_eq!(
1461            next_key.leading_comments(),
1462            &["# deep-overflow"],
1463            "# deep-overflow should propagate from nested sequence to next sibling"
1464        );
1465    }
1466
1467    // -----------------------------------------------------------------------
1468    // Document marker flags (explicit_start / explicit_end)
1469    // -----------------------------------------------------------------------
1470
1471    #[rstest]
1472    #[case::bare_document("key: value\n", false, false)]
1473    #[case::start_marker_only("---\nkey: value\n", true, false)]
1474    #[case::end_marker_only("key: value\n...\n", false, true)]
1475    #[case::both_markers("---\nkey: value\n...\n", true, true)]
1476    #[case::empty_with_both_markers("---\n...\n", true, true)]
1477    fn document_marker_flags_match_input(
1478        #[case] input: &str,
1479        #[case] expected_start: bool,
1480        #[case] expected_end: bool,
1481    ) {
1482        let docs = load(input).expect("load failed");
1483        assert_eq!(docs.len(), 1);
1484        assert_eq!(docs[0].explicit_start, expected_start, "explicit_start");
1485        assert_eq!(docs[0].explicit_end, expected_end, "explicit_end");
1486    }
1487
1488    #[test]
1489    fn multi_document_flags_are_independent() {
1490        let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1491        assert_eq!(docs.len(), 3);
1492        assert!(!docs[0].explicit_start, "doc1 explicit_start");
1493        assert!(!docs[0].explicit_end, "doc1 explicit_end");
1494        assert!(docs[1].explicit_start, "doc2 explicit_start");
1495        assert!(docs[1].explicit_end, "doc2 explicit_end");
1496        assert!(docs[2].explicit_start, "doc3 explicit_start");
1497        assert!(!docs[2].explicit_end, "doc3 explicit_end");
1498    }
1499
1500    // -----------------------------------------------------------------------
1501    // sanitize_scalar_for_error unit tests
1502    // -----------------------------------------------------------------------
1503
1504    #[rstest]
1505    #[case::newline("foo\nbar", '\n', "\\u000A", "foo\\u000Abar")]
1506    #[case::carriage_return("foo\rbar", '\r', "\\u000D", "foo\\u000Dbar")]
1507    #[case::null_byte("foo\0bar", '\0', "\\u0000", "foo\\u0000bar")]
1508    fn sanitize_replaces_control_char_with_escape(
1509        #[case] input: &str,
1510        #[case] raw_char: char,
1511        #[case] escape_seq: &str,
1512        #[case] expected: &str,
1513    ) {
1514        let result = sanitize_scalar_for_error(input);
1515        assert!(
1516            !result.contains(raw_char),
1517            "output must not contain the raw control character"
1518        );
1519        assert!(
1520            result.contains(escape_seq),
1521            "output must contain {escape_seq} escape, got: {result:?}"
1522        );
1523        assert_eq!(result, expected);
1524    }
1525
1526    #[test]
1527    fn sanitize_short_value_stored_verbatim() {
1528        let input = "hello";
1529        let result = sanitize_scalar_for_error(input);
1530        assert_eq!(result, "hello");
1531        assert!(
1532            !result.ends_with("..."),
1533            "short value must not be truncated"
1534        );
1535    }
1536
1537    #[test]
1538    fn sanitize_value_at_exact_limit_not_truncated() {
1539        let input = "a".repeat(128);
1540        let result = sanitize_scalar_for_error(&input);
1541        assert_eq!(
1542            result.len(),
1543            128,
1544            "128-char input must produce 128-char output"
1545        );
1546        assert!(
1547            !result.ends_with("..."),
1548            "value at exact limit must not be truncated"
1549        );
1550    }
1551
1552    #[test]
1553    fn sanitize_value_over_limit_truncated() {
1554        let input = "a".repeat(129);
1555        let result = sanitize_scalar_for_error(&input);
1556        assert!(
1557            result.ends_with("..."),
1558            "value over limit must end with '...'"
1559        );
1560        assert_eq!(
1561            result.len(),
1562            128 + 3,
1563            "truncated output must be 128 chars + 3 ellipsis chars"
1564        );
1565    }
1566
1567    #[test]
1568    fn sanitize_multibyte_char_boundary_not_split() {
1569        let input: String = "中".repeat(127) + "ab"; // 129 chars total
1570        let result = sanitize_scalar_for_error(&input);
1571        assert!(
1572            result.ends_with("..."),
1573            "129-char multibyte input should be truncated"
1574        );
1575        let char_count = result.trim_end_matches("...").chars().count();
1576        assert_eq!(
1577            char_count, 128,
1578            "truncated portion must be exactly 128 chars"
1579        );
1580    }
1581
1582    // -----------------------------------------------------------------------
1583    // Cow variant identity for resolver-injected vs user-authored tags
1584    // -----------------------------------------------------------------------
1585
1586    fn load_root(input: &str) -> Node<Span> {
1587        load(input).expect("load failed").remove(0).root
1588    }
1589
1590    fn node_tag(node: Node<Span>) -> Option<Cow<'static, str>> {
1591        match node {
1592            Node::Scalar { tag, .. } | Node::Mapping { tag, .. } | Node::Sequence { tag, .. } => {
1593                tag
1594            }
1595            Node::Alias { .. } => None,
1596        }
1597    }
1598
1599    #[rstest]
1600    #[case::str_tag("hello\n")]
1601    #[case::int_tag("42\n")]
1602    #[case::null_tag("null\n")]
1603    #[case::map_tag("a: 1\n")]
1604    #[case::seq_tag("- a\n")]
1605    #[case::bare_excl_tag("! hello\n")]
1606    fn resolver_emitted_tag_is_borrowed(#[case] input: &str) {
1607        let tag = node_tag(load_root(input));
1608        assert!(
1609            matches!(tag, Some(Cow::Borrowed(_))),
1610            "resolver-emitted tag must be Borrowed, got: {tag:?}"
1611        );
1612    }
1613
1614    #[rstest]
1615    #[case::scalar("!!str hello\n")]
1616    #[case::mapping("!!map\na: 1\n")]
1617    #[case::sequence("!!seq\n- a\n")]
1618    fn user_authored_tag_is_owned(#[case] input: &str) {
1619        let tag = node_tag(load_root(input));
1620        assert!(
1621            matches!(tag, Some(Cow::Owned(_))),
1622            "user-authored tag must be Owned, got: {tag:?}"
1623        );
1624    }
1625
1626    #[test]
1627    fn alias_node_has_no_tag_field() {
1628        let docs = LoaderBuilder::new()
1629            .build()
1630            .load("- &a x\n- *a\n")
1631            .expect("load failed");
1632        let Node::Sequence { items, .. } = &docs[0].root else {
1633            panic!("expected root sequence");
1634        };
1635        assert!(
1636            matches!(items[1], Node::Alias { .. }),
1637            "second item must be Alias in lossless mode"
1638        );
1639    }
1640
1641    #[test]
1642    fn tag_value_content_preserved_across_cow_variants() {
1643        let Node::Scalar {
1644            tag: tag_resolver, ..
1645        } = load_root("hello\n")
1646        else {
1647            panic!("expected scalar");
1648        };
1649        assert_eq!(tag_resolver.as_deref(), Some("tag:yaml.org,2002:str"));
1650
1651        let Node::Scalar { tag: tag_user, .. } = load_root("!custom hello\n") else {
1652            panic!("expected scalar");
1653        };
1654        assert_eq!(tag_user.as_deref(), Some("!custom"));
1655    }
1656
1657    // -----------------------------------------------------------------------
1658    // Loader correctly gates NodeMeta construction
1659    // -----------------------------------------------------------------------
1660
1661    fn node_meta_is_none(node: &Node<Span>) -> bool {
1662        matches!(
1663            node,
1664            Node::Scalar { meta: None, .. }
1665                | Node::Mapping { meta: None, .. }
1666                | Node::Sequence { meta: None, .. }
1667        )
1668    }
1669
1670    #[rstest]
1671    #[case::plain_scalar("hello\n")]
1672    #[case::plain_mapping("a: 1\n")]
1673    #[case::plain_sequence("- a\n")]
1674    fn loaded_node_with_no_meta_fields_has_meta_none(#[case] input: &str) {
1675        let docs = load(input).unwrap();
1676        let root = &docs[0].root;
1677        assert!(
1678            node_meta_is_none(root),
1679            "plain node must have meta: None, got: {root:?}"
1680        );
1681    }
1682
1683    #[test]
1684    fn loaded_anchored_scalar_has_meta_some() {
1685        let docs = load("- &foo bar\n").unwrap();
1686        let Node::Sequence { items, .. } = &docs[0].root else {
1687            panic!("expected root Sequence");
1688        };
1689        let item = &items[0];
1690        assert!(
1691            matches!(item, Node::Scalar { meta: Some(_), .. }),
1692            "anchored scalar must have meta: Some, got: {item:?}"
1693        );
1694        assert_eq!(item.anchor(), Some("foo"));
1695    }
1696
1697    #[test]
1698    fn loaded_scalar_with_anchor_has_meta_some_with_anchor_loc() {
1699        let docs = load("&tag hello\n").unwrap();
1700        let root = &docs[0].root;
1701        assert!(
1702            matches!(root, Node::Scalar { meta: Some(_), .. }),
1703            "anchored scalar must have meta: Some"
1704        );
1705        assert!(
1706            root.anchor_loc().is_some(),
1707            "anchor_loc() must be Some for anchored scalar"
1708        );
1709    }
1710
1711    // -----------------------------------------------------------------------
1712    // Property displacement promotion — combined anchor+tag on block collections
1713    // -----------------------------------------------------------------------
1714
1715    #[rstest]
1716    // Block mapping
1717    #[case::block_mapping_anchor_only("&a\nk: v\n", Some("a"), false)]
1718    #[case::block_mapping_tag_only("!mytag\nk: v\n", None, true)]
1719    #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n", Some("a"), true)]
1720    #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n", Some("a"), true)]
1721    // Block sequence
1722    #[case::block_sequence_anchor_only("&a\n- item\n", Some("a"), false)]
1723    #[case::block_sequence_tag_only("!mytag\n- item\n", None, true)]
1724    #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n", Some("a"), true)]
1725    #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n", Some("a"), true)]
1726    // Flow mapping
1727    #[case::flow_mapping_anchor_only("&a {k: v}\n", Some("a"), false)]
1728    #[case::flow_mapping_tag_only("!mytag {k: v}\n", None, true)]
1729    #[case::flow_mapping_anchor_then_tag("&a !mytag {k: v}\n", Some("a"), true)]
1730    #[case::flow_mapping_tag_then_anchor("!mytag &a {k: v}\n", Some("a"), true)]
1731    // Flow sequence
1732    #[case::flow_sequence_anchor_only("&a [item]\n", Some("a"), false)]
1733    #[case::flow_sequence_tag_only("!mytag [item]\n", None, true)]
1734    #[case::flow_sequence_anchor_then_tag("&a !mytag [item]\n", Some("a"), true)]
1735    #[case::flow_sequence_tag_then_anchor("!mytag &a [item]\n", Some("a"), true)]
1736    fn combined_properties_attach_to_root_collection(
1737        #[case] input: &str,
1738        #[case] expected_anchor: Option<&str>,
1739        #[case] expected_has_tag: bool,
1740    ) {
1741        let docs = load(input).unwrap();
1742        let root = &docs[0].root;
1743        assert_eq!(root.anchor(), expected_anchor, "anchor on root collection");
1744        assert_eq!(
1745            root.tag_loc().is_some(),
1746            expected_has_tag,
1747            "tag_loc on root collection"
1748        );
1749    }
1750
1751    // Block collections: first child must not inherit anchor or tag from the root
1752    #[rstest]
1753    // Block mapping
1754    #[case::block_mapping_anchor_only("&a\nk: v\n")]
1755    #[case::block_mapping_tag_only("!mytag\nk: v\n")]
1756    #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n")]
1757    #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n")]
1758    // Block sequence
1759    #[case::block_sequence_anchor_only("&a\n- item\n")]
1760    #[case::block_sequence_tag_only("!mytag\n- item\n")]
1761    #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n")]
1762    #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n")]
1763    fn first_child_of_block_collection_has_no_properties(#[case] input: &str) {
1764        let docs = load(input).unwrap();
1765        let root = &docs[0].root;
1766        let first_child: &Node<Span> = match root {
1767            Node::Mapping { entries, .. } => &entries[0].0,
1768            Node::Sequence { items, .. } => &items[0],
1769            Node::Scalar { .. } | Node::Alias { .. } => panic!("expected block collection"),
1770        };
1771        assert_eq!(
1772            first_child.anchor(),
1773            None,
1774            "anchor must not appear on first child"
1775        );
1776        assert!(
1777            first_child.tag_loc().is_none(),
1778            "tag_loc must not appear on first child"
1779        );
1780    }
1781
1782    // --- Alias registration smoke test ---
1783
1784    #[test]
1785    fn anchor_on_block_mapping_with_tag_is_resolvable_via_alias() {
1786        let input = "root:\n  tagged: &a !mytag\n    k: v\n  ref: *a\n";
1787        let result = LoaderBuilder::new().resolved().build().load(input);
1788        assert!(
1789            result.is_ok(),
1790            "alias *a must resolve — anchor must be on the mapping, not lost to first key: {result:?}"
1791        );
1792    }
1793}