Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22//!
23//! # Accepted risks
24//!
25//! `expand_node` does not detect the case where an anchor-within-expansion
26//! references a previously defined anchor, forming an indirect cycle not
27//! caught by the `in_progress` set until the second traversal.  This
28//! limitation exists in the old loader and is acceptable in the LSP context
29//! where Lossless mode is the default.  The `expanded_nodes` volume limit
30//! provides the backstop.
31
32use std::borrow::Cow;
33use std::collections::{HashMap, HashSet};
34use std::iter::Peekable;
35
36use std::sync::Arc;
37
38use crate::error::{Error, ErrorKind};
39use crate::event::{Event, EventMeta, ScalarStyle};
40use crate::node::{Document, Node, NodeMeta};
41use crate::pos::{LineIndex, Pos, Span};
42use crate::schema::{CollectionKind, Schema, resolve_collection, resolve_scalar};
43
44use comments::{attach_leading_comments, attach_trailing_comment};
45use reloc::reloc;
46use stream::{
47    consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
48    with_hash_prefix,
49};
50
51mod comments;
52mod reloc;
53mod stream;
54
55// ---------------------------------------------------------------------------
56// Public error type
57// ---------------------------------------------------------------------------
58
59/// Errors produced by the loader.
60#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
61#[non_exhaustive]
62pub enum LoadError {
63    /// The event stream contained a parse error.
64    #[error("parse error at {pos:?}: {message}")]
65    #[non_exhaustive]
66    Parse {
67        /// Source position where the parse error was detected.
68        pos: Pos,
69        /// Human-readable description of the error.
70        message: String,
71        /// Broad category of the error, for routing without message-string matching.
72        kind: ErrorKind,
73    },
74
75    /// The event stream ended unexpectedly mid-document.
76    #[error("unexpected end of event stream")]
77    UnexpectedEndOfStream,
78
79    /// Nesting depth exceeded the configured limit.
80    #[error("nesting depth limit exceeded at {pos:?} (max: {limit})")]
81    NestingDepthLimitExceeded {
82        /// The configured nesting depth limit that was exceeded.
83        limit: usize,
84        /// Source position of the collection start that exceeded the limit.
85        pos: Pos,
86    },
87
88    /// Too many distinct anchor names were defined.
89    #[error("anchor count limit exceeded at {pos:?} (max: {limit})")]
90    AnchorCountLimitExceeded {
91        /// The configured anchor count limit that was exceeded.
92        limit: usize,
93        /// Source position of the anchor that exceeded the limit.
94        pos: Pos,
95    },
96
97    /// Alias expansion produced more nodes than the configured limit.
98    #[error("alias expansion node limit exceeded at {pos:?} (max: {limit})")]
99    AliasExpansionLimitExceeded {
100        /// The configured expansion node limit that was exceeded.
101        limit: usize,
102        /// Source position of the node that exceeded the expansion limit.
103        pos: Pos,
104    },
105
106    /// A circular alias reference was detected.
107    #[error("circular alias reference at {pos:?}: '{name}'")]
108    CircularAlias {
109        /// The anchor name involved in the cycle.
110        name: String,
111        /// Source position of the alias that triggered the cycle detection.
112        pos: Pos,
113    },
114
115    /// An alias referred to an anchor that was never defined.
116    #[error("undefined alias at {pos:?}: '{name}'")]
117    UndefinedAlias {
118        /// The alias name that had no corresponding anchor definition.
119        name: String,
120        /// Source position of the alias reference.
121        pos: Pos,
122    },
123
124    /// A plain scalar could not be resolved under the JSON schema.
125    ///
126    /// The JSON schema has no fallback: every untagged plain scalar must match
127    /// one of its patterns (null, bool, int, float).  If none match, the scalar
128    /// is an error per YAML 1.2.2 §10.2.
129    ///
130    /// `value` is truncated to 128 Unicode scalar values and ASCII control
131    /// characters (U+0000–U+001F, U+007F) are replaced with `\uXXXX` escapes
132    /// to prevent log injection via the `Display` impl.
133    #[error("JSON schema: plain scalar does not match any type pattern")]
134    UnresolvedScalar {
135        /// The sanitized, truncated scalar value that failed resolution.
136        value: String,
137        /// Source position of the scalar.
138        pos: Pos,
139    },
140}
141
142// Convenience alias used inside the module.
143type Result<T> = std::result::Result<T, LoadError>;
144
145// Type alias for the peekable event stream used throughout the loader.
146type EventStream<'a> =
147    Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
148
149/// Unpack an `Option<Box<EventMeta>>` into its four constituent fields.
150#[expect(
151    clippy::type_complexity,
152    reason = "four-tuple mirrors EventMeta fields; extracting a type alias here would obscure the one-to-one correspondence"
153)]
154#[inline]
155fn unpack_meta(
156    meta: Option<Box<EventMeta<'_>>>,
157) -> (
158    Option<&'_ str>,
159    Option<Span>,
160    Option<std::borrow::Cow<'_, str>>,
161    Option<Span>,
162) {
163    meta.map_or((None, None, None, None), |m| {
164        (m.anchor, m.anchor_loc, m.tag, m.tag_loc)
165    })
166}
167
168// ---------------------------------------------------------------------------
169// Configuration
170// ---------------------------------------------------------------------------
171
172/// Loader mode — controls how alias references are handled.
173#[derive(Debug, Clone, Copy, PartialEq, Eq)]
174pub enum LoadMode {
175    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
176    Lossless,
177    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
178    Resolved,
179}
180
181/// Security and behaviour options for the loader.
182#[derive(Debug, Clone)]
183pub struct LoaderOptions {
184    /// Maximum mapping/sequence nesting depth before returning
185    /// [`LoadError::NestingDepthLimitExceeded`] (default: 512).
186    pub max_nesting_depth: usize,
187    /// Maximum number of distinct anchor names per document before returning
188    /// [`LoadError::AnchorCountLimitExceeded`] (default: 10 000).
189    pub max_anchors: usize,
190    /// Maximum total nodes produced by alias expansion in resolved mode before
191    /// returning [`LoadError::AliasExpansionLimitExceeded`] (default: 1 000 000).
192    pub max_expanded_nodes: usize,
193    /// Controls how alias references are handled during loading.
194    pub mode: LoadMode,
195    /// YAML 1.2.2 §10 schema to apply during loading (default: [`Schema::Core`]).
196    ///
197    /// Each node's tag is resolved according to this schema after the node is
198    /// constructed.  Nodes with explicit source tags are left unchanged.
199    pub schema: Schema,
200}
201
202impl Default for LoaderOptions {
203    fn default() -> Self {
204        Self {
205            max_nesting_depth: 512,
206            max_anchors: 10_000,
207            max_expanded_nodes: 1_000_000,
208            mode: LoadMode::Lossless,
209            schema: Schema::Core,
210        }
211    }
212}
213
214// ---------------------------------------------------------------------------
215// Builder
216// ---------------------------------------------------------------------------
217
218/// Builder for configuring and creating a [`Loader`].
219///
220/// ```
221/// use rlsp_yaml_parser::loader::LoaderBuilder;
222///
223/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
224/// assert_eq!(docs.len(), 1);
225/// ```
226pub struct LoaderBuilder {
227    options: LoaderOptions,
228}
229
230impl LoaderBuilder {
231    /// Create a builder with default options (lossless mode, safe limits).
232    #[must_use]
233    pub fn new() -> Self {
234        Self {
235            options: LoaderOptions::default(),
236        }
237    }
238
239    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
240    #[must_use]
241    pub const fn lossless(mut self) -> Self {
242        self.options.mode = LoadMode::Lossless;
243        self
244    }
245
246    /// Use resolved mode — aliases are expanded inline.
247    #[must_use]
248    pub const fn resolved(mut self) -> Self {
249        self.options.mode = LoadMode::Resolved;
250        self
251    }
252
253    /// Override the maximum nesting depth.
254    #[must_use]
255    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
256        self.options.max_nesting_depth = limit;
257        self
258    }
259
260    /// Override the maximum anchor count.
261    #[must_use]
262    pub const fn max_anchors(mut self, limit: usize) -> Self {
263        self.options.max_anchors = limit;
264        self
265    }
266
267    /// Override the maximum expanded-node count (resolved mode only).
268    #[must_use]
269    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
270        self.options.max_expanded_nodes = limit;
271        self
272    }
273
274    /// Override the YAML 1.2.2 §10 schema used for tag resolution during loading.
275    ///
276    /// The default is [`Schema::Core`].  Untagged nodes receive resolved tag URIs
277    /// in the AST; nodes with explicit source tags are not modified.
278    #[must_use]
279    pub const fn schema(mut self, s: Schema) -> Self {
280        self.options.schema = s;
281        self
282    }
283
284    /// Consume the builder and produce a [`Loader`].
285    #[must_use]
286    pub const fn build(self) -> Loader {
287        Loader {
288            options: self.options,
289        }
290    }
291}
292
293impl Default for LoaderBuilder {
294    fn default() -> Self {
295        Self::new()
296    }
297}
298
299// ---------------------------------------------------------------------------
300// Loader
301// ---------------------------------------------------------------------------
302
303/// A configured YAML loader.
304pub struct Loader {
305    options: LoaderOptions,
306}
307
308impl Loader {
309    /// Load YAML text into a sequence of documents.
310    ///
311    /// # Errors
312    ///
313    /// Returns `Err` if the input contains a parse error, exceeds a configured
314    /// security limit, or (in resolved mode) references an undefined anchor.
315    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
316        let mut state = LoadState::new(&self.options, input);
317        let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
318            Box::new(crate::parse_events(input));
319        state.run(iter.peekable())
320    }
321}
322
323// ---------------------------------------------------------------------------
324// Convenience entry point
325// ---------------------------------------------------------------------------
326
327/// Load YAML text using lossless mode, default security limits, and Core schema tag
328/// resolution (YAML 1.2.2 §10.3).
329///
330/// Returns one `Document<Span>` per YAML document in the stream.  Untagged nodes
331/// receive resolved tag URIs according to the Core schema; nodes with explicit source
332/// tags are left unchanged.
333///
334/// # Errors
335///
336/// Returns `Err` if the input contains a parse error or exceeds a security
337/// limit (nesting depth or anchor count).
338///
339/// ```
340/// use rlsp_yaml_parser::loader::load;
341/// use rlsp_yaml_parser::Node;
342///
343/// let docs = load("hello\n").unwrap();
344/// assert_eq!(docs.len(), 1);
345/// let Node::Scalar { tag, .. } = &docs[0].root else { panic!() };
346/// assert_eq!(tag.as_deref(), Some("tag:yaml.org,2002:str"));
347/// ```
348pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
349    LoaderBuilder::new().lossless().build().load(input)
350}
351
352// ---------------------------------------------------------------------------
353// Internal loader state
354// ---------------------------------------------------------------------------
355
356struct LoadState<'opt> {
357    options: &'opt LoaderOptions,
358    /// Anchors registered so far in the current document: name → node.
359    anchor_map: HashMap<String, Node<Span>>,
360    /// Count of distinct anchors registered (resets per document).
361    anchor_count: usize,
362    /// Current nesting depth (incremented on Begin, decremented on End).
363    depth: usize,
364    /// Total nodes produced via alias expansion (resolved mode only).
365    expanded_nodes: usize,
366    /// Leading comments accumulated by `parse_node` when it encounters a
367    /// `Comment` event between a mapping key and its value's collection start,
368    /// or by a sequence/mapping loop when it hits End with leftover leading
369    /// comments.  The next mapping/sequence loop iteration picks these up and
370    /// prepends them to the next entry's leading comments.
371    pending_leading: Vec<String>,
372    /// Line index for the current document source; shared across all documents
373    /// produced from the same input via `Arc` to avoid N full copies.
374    line_index: Arc<LineIndex>,
375}
376
377impl<'opt> LoadState<'opt> {
378    fn new(options: &'opt LoaderOptions, input: &str) -> Self {
379        Self {
380            options,
381            anchor_map: HashMap::new(),
382            anchor_count: 0,
383            depth: 0,
384            expanded_nodes: 0,
385            pending_leading: Vec::new(),
386            line_index: Arc::new(LineIndex::new(input)),
387        }
388    }
389
390    fn reset_for_document(&mut self) {
391        self.anchor_map.clear();
392        self.anchor_count = 0;
393        self.expanded_nodes = 0;
394        self.pending_leading.clear();
395    }
396
397    fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
398        let mut docs: Vec<Document<Span>> = Vec::new();
399
400        // Skip StreamStart.
401        match stream.next() {
402            Some(Ok(_)) | None => {}
403            Some(Err(e)) => {
404                return Err(LoadError::Parse {
405                    pos: e.pos,
406                    message: e.message,
407                    kind: e.kind,
408                });
409            }
410        }
411
412        loop {
413            // Skip any leading comments or unknown events before a document.
414            match next_from(&mut stream)? {
415                None | Some((Event::StreamEnd, _)) => break,
416                Some((
417                    Event::DocumentStart {
418                        explicit,
419                        version,
420                        tag_directives,
421                    },
422                    _,
423                )) => {
424                    let doc_explicit_start = explicit;
425                    let doc_version = version;
426                    let doc_tags = tag_directives;
427                    self.reset_for_document();
428
429                    let mut doc_comments: Vec<String> = Vec::new();
430
431                    // Consume leading comments at document level.
432                    consume_leading_doc_comments(&mut stream, &mut doc_comments, &self.line_index)?;
433
434                    // Parse root node (may be absent for empty documents).
435                    let root = if is_document_end(stream.peek()) {
436                        // Empty document — emit an empty scalar as root.
437                        let mut node = empty_scalar();
438                        apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
439                        node
440                    } else {
441                        self.parse_node(&mut stream)?
442                    };
443
444                    // Consume DocumentEnd if present and capture its explicit flag.
445                    let doc_explicit_end =
446                        if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
447                            let end_explicit = *explicit;
448                            let _ = stream.next();
449                            end_explicit
450                        } else {
451                            false
452                        };
453
454                    docs.push(Document {
455                        root,
456                        version: doc_version,
457                        tags: doc_tags,
458                        comments: doc_comments,
459                        explicit_start: doc_explicit_start,
460                        explicit_end: doc_explicit_end,
461                        line_index: Some(self.line_index.clone()),
462                    });
463                }
464                Some(_) => {
465                    // Comment or any other stray event outside a document — skip.
466                }
467            }
468        }
469
470        Ok(docs)
471    }
472
473    /// Parse a single node from the stream.
474    ///
475    /// Advances the stream past the node (including end-of-container events).
476    #[expect(
477        clippy::too_many_lines,
478        reason = "match-on-event-type; splitting would obscure flow"
479    )]
480    fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
481        // Structural end events close the caller's collection loop — do NOT
482        // consume them here.  Return an empty scalar and leave the event in
483        // the stream so the outer mapping/sequence loop can see and consume it.
484        if matches!(
485            stream.peek(),
486            Some(Ok((
487                Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
488                _
489            )))
490        ) {
491            return Ok(empty_scalar());
492        }
493
494        let Some((event, span)) = next_from(stream)? else {
495            return Ok(empty_scalar());
496        };
497
498        match event {
499            Event::Scalar { value, style, meta } => {
500                let (anchor, anchor_loc, tag, tag_loc) = unpack_meta(meta);
501                let anchor = anchor.map(str::to_owned);
502                // Capture the anchor span before it moves into NodeMeta.
503                let anchor_span = anchor_loc.unwrap_or(span);
504                let mut node = Node::Scalar {
505                    value: value.into_owned(),
506                    style,
507                    tag: tag.map(|t| Cow::Owned(t.into_owned())),
508                    loc: span,
509                    meta: NodeMeta {
510                        anchor,
511                        anchor_loc,
512                        tag_loc,
513                        leading_comments: None,
514                        trailing_comment: None,
515                    }
516                    .into_option(),
517                };
518                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
519                if let Some(name) = node.anchor() {
520                    self.register_anchor(name.to_owned(), &node, anchor_span)?;
521                }
522                Ok(node)
523            }
524
525            Event::MappingStart { style, meta } => {
526                let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
527                let anchor = event_anchor.map(str::to_owned);
528                let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
529                let anchor_for_registration = anchor.clone();
530                // Capture the anchor span before it moves into NodeMeta.
531                let anchor_span = anchor_loc.unwrap_or(span);
532
533                self.depth += 1;
534                if self.depth > self.options.max_nesting_depth {
535                    return Err(LoadError::NestingDepthLimitExceeded {
536                        limit: self.options.max_nesting_depth,
537                        pos: span_start_to_pos(span.start, &self.line_index),
538                    });
539                }
540
541                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
542                let mut end_span = span;
543
544                loop {
545                    // Consume leading comments before the next key.  Also
546                    // collect any comments that spilled over from a sibling
547                    // value's collection end (stored in `pending_leading`).
548                    let raw_leading = consume_leading_comments(stream)?;
549                    let leading = if self.pending_leading.is_empty() {
550                        raw_leading
551                    } else {
552                        let mut combined = std::mem::take(&mut self.pending_leading);
553                        combined.extend(raw_leading);
554                        combined
555                    };
556
557                    match stream.peek() {
558                        None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
559                            // Save any collected leading comments so the next
560                            // sibling entry in the parent collection can inherit
561                            // them (e.g. a comment just before MappingEnd that
562                            // belongs to the following mapping entry).
563                            if !leading.is_empty() {
564                                self.pending_leading = leading;
565                            }
566                            break;
567                        }
568                        Some(Err(_)) => {
569                            // Consume the error.
570                            return Err(match stream.next() {
571                                Some(Err(e)) => LoadError::Parse {
572                                    pos: e.pos,
573                                    message: e.message,
574                                    kind: e.kind,
575                                },
576                                _ => LoadError::UnexpectedEndOfStream,
577                            });
578                        }
579                        Some(Ok(_)) => {}
580                    }
581
582                    let mut key = self.parse_node(stream)?;
583                    attach_leading_comments(&mut key, leading);
584
585                    let mut value = self.parse_node(stream)?;
586
587                    // Trailing comment on the value — peek for inline comment.
588                    // Block scalars (literal `|` and folded `>`) consume trailing
589                    // blank lines as part of chomping; their span.end falls on the
590                    // first line after the scalar, which can coincide with the
591                    // next comment's line number.  That would falsely attach a
592                    // leading inter-node comment as a trailing inline comment.
593                    // Block scalars never have an inline comment on their content
594                    // lines, so skip trailing-comment detection for them.
595                    if !is_block_scalar(&value)
596                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
597                    {
598                        let value_end_line = node_end_line(&value, &self.line_index);
599                        if let Some(trail) =
600                            peek_trailing_comment(stream, value_end_line, &self.line_index)?
601                        {
602                            attach_trailing_comment(&mut value, trail);
603                        }
604                    }
605
606                    entries.push((key, value));
607                }
608
609                // Consume MappingEnd and capture its span.
610                if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
611                    end_span = *end;
612                    let _ = stream.next();
613                }
614                self.depth -= 1;
615
616                let mut node = Node::Mapping {
617                    entries,
618                    style,
619                    tag,
620                    loc: Span {
621                        start: span.start,
622                        end: end_span.end,
623                    },
624                    meta: NodeMeta {
625                        anchor,
626                        anchor_loc,
627                        tag_loc,
628                        leading_comments: None,
629                        trailing_comment: None,
630                    }
631                    .into_option(),
632                };
633                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
634                if let Some(name) = anchor_for_registration {
635                    self.register_anchor(name, &node, anchor_span)?;
636                }
637                Ok(node)
638            }
639
640            Event::SequenceStart { style, meta } => {
641                let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
642                let anchor = event_anchor.map(str::to_owned);
643                let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
644                let anchor_for_registration = anchor.clone();
645                // Capture the anchor span before it moves into NodeMeta.
646                let anchor_span = anchor_loc.unwrap_or(span);
647
648                self.depth += 1;
649                if self.depth > self.options.max_nesting_depth {
650                    return Err(LoadError::NestingDepthLimitExceeded {
651                        limit: self.options.max_nesting_depth,
652                        pos: span_start_to_pos(span.start, &self.line_index),
653                    });
654                }
655
656                let mut items: Vec<Node<Span>> = Vec::new();
657                let mut end_span = span;
658
659                loop {
660                    // Collect leading comments before the next item.  Also
661                    // collect any comments that spilled over from a sibling
662                    // value's collection end (stored in `pending_leading`).
663                    let raw_leading = consume_leading_comments(stream)?;
664                    let leading = if self.pending_leading.is_empty() {
665                        raw_leading
666                    } else {
667                        let mut combined = std::mem::take(&mut self.pending_leading);
668                        combined.extend(raw_leading);
669                        combined
670                    };
671
672                    match stream.peek() {
673                        None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
674                            // Save any collected leading comments so the next
675                            // sibling entry in the parent collection can inherit
676                            // them (e.g. a comment just before SequenceEnd that
677                            // belongs to the following sequence item or mapping
678                            // entry in the parent).
679                            if !leading.is_empty() {
680                                self.pending_leading = leading;
681                            }
682                            break;
683                        }
684                        Some(Err(_)) => {
685                            // Consume the error.
686                            return Err(match stream.next() {
687                                Some(Err(e)) => LoadError::Parse {
688                                    pos: e.pos,
689                                    message: e.message,
690                                    kind: e.kind,
691                                },
692                                _ => LoadError::UnexpectedEndOfStream,
693                            });
694                        }
695                        Some(Ok(_)) => {}
696                    }
697
698                    let mut item = self.parse_node(stream)?;
699                    attach_leading_comments(&mut item, leading);
700
701                    // Trailing comment on the item — peek for inline comment.
702                    // Block scalars are excluded for the same reason as in the
703                    // mapping path: their span.end can coincide with the next
704                    // comment's line, falsely turning a leading comment into a
705                    // trailing one.
706                    if !is_block_scalar(&item)
707                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
708                    {
709                        let item_end_line = node_end_line(&item, &self.line_index);
710                        if let Some(trail) =
711                            peek_trailing_comment(stream, item_end_line, &self.line_index)?
712                        {
713                            attach_trailing_comment(&mut item, trail);
714                        }
715                    }
716
717                    items.push(item);
718                }
719
720                // Consume SequenceEnd and capture its span.
721                if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
722                    end_span = *end;
723                    let _ = stream.next();
724                }
725                self.depth -= 1;
726
727                let mut node = Node::Sequence {
728                    items,
729                    style,
730                    tag,
731                    loc: Span {
732                        start: span.start,
733                        end: end_span.end,
734                    },
735                    meta: NodeMeta {
736                        anchor,
737                        anchor_loc,
738                        tag_loc,
739                        leading_comments: None,
740                        trailing_comment: None,
741                    }
742                    .into_option(),
743                };
744                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
745                if let Some(name) = anchor_for_registration {
746                    self.register_anchor(name, &node, anchor_span)?;
747                }
748                Ok(node)
749            }
750
751            Event::Alias { name } => {
752                let name = name.to_owned();
753                self.resolve_alias(&name, span)
754            }
755
756            Event::Comment { text } => {
757                // Comment between a mapping key and its collection value (e.g.
758                // `key:\n  # comment\n  subkey: val`).  The comment appears
759                // after the key Scalar and before the MappingStart/SequenceStart
760                // that begins the value.  Save it in `pending_leading` so the
761                // first entry of the upcoming collection can inherit it.
762                self.pending_leading.push(with_hash_prefix(text));
763                self.parse_node(stream)
764            }
765
766            Event::StreamStart
767            | Event::StreamEnd
768            | Event::DocumentStart { .. }
769            | Event::DocumentEnd { .. }
770            | Event::MappingEnd
771            | Event::SequenceEnd => {
772                // Structural event where a node is expected — return empty scalar.
773                Ok(empty_scalar())
774            }
775        }
776    }
777
778    fn register_anchor(
779        &mut self,
780        name: String,
781        node: &Node<Span>,
782        anchor_span: Span,
783    ) -> Result<()> {
784        let pos = span_start_to_pos(anchor_span.start, &self.line_index);
785        if !self.anchor_map.contains_key(&name) {
786            self.anchor_count += 1;
787            if self.anchor_count > self.options.max_anchors {
788                return Err(LoadError::AnchorCountLimitExceeded {
789                    limit: self.options.max_anchors,
790                    pos,
791                });
792            }
793        }
794        // Count the anchor node itself toward the expansion budget in resolved
795        // mode so that the total reflects every node present in the expanded
796        // document (anchor definition + each alias expansion).
797        if self.options.mode == LoadMode::Resolved {
798            self.expanded_nodes += 1;
799            if self.expanded_nodes > self.options.max_expanded_nodes {
800                return Err(LoadError::AliasExpansionLimitExceeded {
801                    limit: self.options.max_expanded_nodes,
802                    pos,
803                });
804            }
805            self.anchor_map.insert(name, node.clone());
806        } else {
807            // Lossless mode never reads anchor_map for expansion; store a
808            // zero-cost placeholder so contains_key still detects re-definitions.
809            self.anchor_map.insert(name, empty_scalar());
810        }
811        Ok(())
812    }
813
814    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
815        match self.options.mode {
816            LoadMode::Lossless => Ok(Node::Alias {
817                name: name.to_owned(),
818                loc,
819                leading_comments: None,
820                trailing_comment: None,
821            }),
822            LoadMode::Resolved => {
823                let pos = span_start_to_pos(loc.start, &self.line_index);
824                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
825                    LoadError::UndefinedAlias {
826                        name: name.to_owned(),
827                        pos,
828                    }
829                })?;
830                let mut in_progress: HashSet<String> = HashSet::new();
831                self.expand_node(anchored, &mut in_progress, loc)
832            }
833        }
834    }
835
836    /// Recursively expand a node, counting every node produced against the
837    /// expansion limit and checking for cycles via `in_progress`.
838    ///
839    /// `alias_loc` is the span of the alias site that triggered this expansion
840    /// chain; it is used for error positions when the limit or a cycle is
841    /// detected inside expanded content.
842    fn expand_node(
843        &mut self,
844        node: Node<Span>,
845        in_progress: &mut HashSet<String>,
846        alias_loc: Span,
847    ) -> Result<Node<Span>> {
848        // Increment at the top — before child recursion — so every node
849        // (including non-alias nodes inside expanded trees) counts against the
850        // budget.
851        self.expanded_nodes += 1;
852        if self.expanded_nodes > self.options.max_expanded_nodes {
853            return Err(LoadError::AliasExpansionLimitExceeded {
854                limit: self.options.max_expanded_nodes,
855                pos: span_start_to_pos(alias_loc.start, &self.line_index),
856            });
857        }
858
859        match node {
860            Node::Alias { ref name, loc, .. } => {
861                let pos = span_start_to_pos(loc.start, &self.line_index);
862                if in_progress.contains(name) {
863                    return Err(LoadError::CircularAlias {
864                        name: name.clone(),
865                        pos,
866                    });
867                }
868                let target = self.anchor_map.get(name).cloned().ok_or_else(|| {
869                    LoadError::UndefinedAlias {
870                        name: name.clone(),
871                        pos,
872                    }
873                })?;
874                in_progress.insert(name.clone());
875                // Pass the inner alias loc as the new alias_loc for deeper expansion.
876                let expanded = self.expand_node(target, in_progress, loc)?;
877                in_progress.remove(name);
878                // Re-stamp with the alias site's location.
879                Ok(reloc(expanded, loc))
880            }
881            Node::Mapping {
882                entries,
883                style,
884                tag,
885                loc,
886                meta,
887            } => {
888                let mut expanded_entries = Vec::with_capacity(entries.len());
889                for (k, v) in entries {
890                    let ek = self.expand_node(k, in_progress, alias_loc)?;
891                    let ev = self.expand_node(v, in_progress, alias_loc)?;
892                    expanded_entries.push((ek, ev));
893                }
894                Ok(Node::Mapping {
895                    entries: expanded_entries,
896                    style,
897                    tag,
898                    loc,
899                    meta,
900                })
901            }
902            Node::Sequence {
903                items,
904                style,
905                tag,
906                loc,
907                meta,
908            } => {
909                let mut expanded_items = Vec::with_capacity(items.len());
910                for item in items {
911                    expanded_items.push(self.expand_node(item, in_progress, alias_loc)?);
912                }
913                Ok(Node::Sequence {
914                    items: expanded_items,
915                    style,
916                    tag,
917                    loc,
918                    meta,
919                })
920            }
921            // Scalars and already-resolved nodes — pass through.
922            scalar @ Node::Scalar { .. } => Ok(scalar),
923        }
924    }
925}
926
927/// Return `true` if the peeked item signals end of document (or stream).
928const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
929    matches!(
930        peeked,
931        None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
932    )
933}
934
935/// Convert a `Span.start` byte offset to a `Pos` with accurate line/column.
936#[inline]
937fn span_start_to_pos(offset: u32, line_index: &LineIndex) -> Pos {
938    let (line, column) = line_index.line_column(offset);
939    Pos {
940        byte_offset: offset as usize,
941        line: line as usize,
942        column: column as usize,
943    }
944}
945
946/// Return the line number of a node's span end position.
947///
948/// Used to determine whether the next `Comment` event is trailing (same line)
949/// or leading (different line).
950#[inline]
951fn node_end_line(node: &Node<Span>, line_index: &LineIndex) -> u32 {
952    let end_offset = match node {
953        Node::Scalar { loc, .. }
954        | Node::Mapping { loc, .. }
955        | Node::Sequence { loc, .. }
956        | Node::Alias { loc, .. } => loc.end,
957    };
958    line_index.line_column(end_offset).0
959}
960
961/// Return `true` if the node is a block scalar (literal `|` or folded `>`).
962///
963/// Block scalars consume trailing blank lines as part of chomping, so their
964/// `span.end` falls on the line *after* the last consumed line.  This means a
965/// comment on the immediately following line has the same line number as
966/// `span.end.line`, which would cause `peek_trailing_comment` to falsely
967/// classify it as an inline trailing comment.  The caller uses this predicate
968/// to skip trailing-comment detection for block scalars.
969#[inline]
970const fn is_block_scalar(node: &Node<Span>) -> bool {
971    matches!(
972        node,
973        Node::Scalar {
974            style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
975            ..
976        }
977    )
978}
979
980// ---------------------------------------------------------------------------
981// Schema resolution helpers
982// ---------------------------------------------------------------------------
983
984/// Maximum number of Unicode scalar values kept in [`LoadError::UnresolvedScalar`]
985/// value field.  Prevents unbounded allocation when storing user-supplied input
986/// in error messages.
987const UNRESOLVED_VALUE_MAX_CHARS: usize = 128;
988
989/// Sanitize a raw scalar value for inclusion in an error message.
990///
991/// - Truncates to [`UNRESOLVED_VALUE_MAX_CHARS`] Unicode scalar values,
992///   appending `"..."` when truncated.
993/// - Replaces ASCII control characters (U+0000–U+001F and U+007F) with
994///   `\uXXXX` hex escapes to prevent log injection via the `Display` impl.
995fn sanitize_scalar_for_error(raw: &str) -> String {
996    let mut out = String::with_capacity(raw.len().min(UNRESOLVED_VALUE_MAX_CHARS * 2));
997    let mut truncated = false;
998
999    for (i, ch) in raw.chars().enumerate() {
1000        if i >= UNRESOLVED_VALUE_MAX_CHARS {
1001            truncated = true;
1002            break;
1003        }
1004        if ch.is_ascii_control() {
1005            // Replace control chars with \uXXXX escape to prevent log injection.
1006            let escaped = format!("\\u{:04X}", ch as u32);
1007            out.push_str(&escaped);
1008        } else {
1009            out.push(ch);
1010        }
1011    }
1012
1013    if truncated {
1014        out.push_str("...");
1015    }
1016    out
1017}
1018
1019/// Apply schema tag resolution to a freshly-constructed node.
1020///
1021/// - For scalars: translates bare `!` to `None` (non-specific), then calls
1022///   `resolve_scalar`.
1023/// - For mappings/sequences: translates bare `!` to `None`, then calls
1024///   `resolve_collection`.
1025/// - On `Ok(Some(tag))`: overwrites `node.tag`; `tag_loc` is left `None`
1026///   (no source position for a resolved tag).
1027/// - On `Ok(None)` (explicit tag present): leaves `node.tag` unchanged.
1028///
1029/// # Errors
1030///
1031/// Returns [`LoadError::UnresolvedScalar`] when `schema` is [`Schema::Json`]
1032/// and a plain scalar does not match any JSON type pattern.
1033#[inline]
1034fn apply_schema_to_node(
1035    node: &mut Node<Span>,
1036    schema: Schema,
1037    line_index: &LineIndex,
1038) -> Result<()> {
1039    match node {
1040        Node::Scalar {
1041            value,
1042            style,
1043            tag,
1044            loc,
1045            meta,
1046        } => {
1047            // Bare `!` on a scalar is the non-specific scalar tag — it resolves
1048            // unconditionally to !!str regardless of content (YAML 1.2.2 §10.2.1,
1049            // §10.3.2: "non-specific" tag for scalars = Failsafe str).  We handle
1050            // it before calling the schema resolver so Core doesn't pattern-match
1051            // the value.
1052            //
1053            // `tag_loc` is preserved here (NOT cleared) because `!` is explicitly
1054            // written in the source.  Preserving `tag_loc` lets downstream consumers
1055            // (e.g. the formatter) distinguish user-authored tags from resolver-injected
1056            // ones, which is critical for correct idempotent output.
1057            if tag.as_deref() == Some("!") {
1058                *tag = Some(Cow::Borrowed(crate::schema::ResolvedTag::Str.as_str()));
1059                return Ok(());
1060            }
1061            // All other tags: pass through as-is (Some(non-!) = explicit tag → Ok(None)).
1062            match resolve_scalar(schema, *style, value, tag.as_deref()) {
1063                Ok(Some(resolved)) => {
1064                    *tag = Some(Cow::Borrowed(resolved.as_str()));
1065                    // Clear tag_loc: resolver-injected tags have no source position.
1066                    if let Some(m) = meta.as_mut() {
1067                        m.tag_loc = None;
1068                        if m.is_all_none() {
1069                            *meta = None;
1070                        }
1071                    }
1072                }
1073                Ok(None) => {}
1074                Err(_) => {
1075                    return Err(LoadError::UnresolvedScalar {
1076                        value: sanitize_scalar_for_error(value),
1077                        pos: span_start_to_pos(loc.start, line_index),
1078                    });
1079                }
1080            }
1081        }
1082        Node::Mapping { tag, meta, .. } => {
1083            // Bare `!` on a collection means non-specific collection tag — translate
1084            // to None so the resolver returns the kind-based tag (!!map / !!seq).
1085            let effective_tag = tag.as_deref().filter(|t| *t != "!");
1086            if let Some(resolved) =
1087                resolve_collection(schema, CollectionKind::Mapping, effective_tag)
1088            {
1089                *tag = Some(Cow::Borrowed(resolved.as_str()));
1090                if let Some(m) = meta.as_mut() {
1091                    m.tag_loc = None;
1092                    if m.is_all_none() {
1093                        *meta = None;
1094                    }
1095                }
1096            }
1097        }
1098        Node::Sequence { tag, meta, .. } => {
1099            let effective_tag = tag.as_deref().filter(|t| *t != "!");
1100            if let Some(resolved) =
1101                resolve_collection(schema, CollectionKind::Sequence, effective_tag)
1102            {
1103                *tag = Some(Cow::Borrowed(resolved.as_str()));
1104                if let Some(m) = meta.as_mut() {
1105                    m.tag_loc = None;
1106                    if m.is_all_none() {
1107                        *meta = None;
1108                    }
1109                }
1110            }
1111        }
1112        Node::Alias { .. } => {}
1113    }
1114    Ok(())
1115}
1116
1117// ---------------------------------------------------------------------------
1118// Node helpers
1119// ---------------------------------------------------------------------------
1120
1121const fn empty_scalar() -> Node<Span> {
1122    Node::Scalar {
1123        value: String::new(),
1124        style: ScalarStyle::Plain,
1125        tag: None,
1126        loc: Span { start: 0, end: 0 },
1127        meta: None,
1128    }
1129}
1130
1131// ---------------------------------------------------------------------------
1132// Tests
1133// ---------------------------------------------------------------------------
1134
1135#[cfg(test)]
1136mod tests {
1137    use super::*;
1138    use rstest::rstest;
1139
1140    #[test]
1141    fn loader_state_resets_anchor_map_between_documents() {
1142        // In resolved mode: anchor defined in doc 1 must not be visible in doc 2.
1143        let result = LoaderBuilder::new()
1144            .resolved()
1145            .build()
1146            .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
1147        assert!(
1148            result.is_err(),
1149            "expected Err: *foo in doc 2 should be undefined"
1150        );
1151        assert!(matches!(
1152            result.unwrap_err(),
1153            LoadError::UndefinedAlias { .. }
1154        ));
1155    }
1156
1157    #[test]
1158    fn register_anchor_increments_count() {
1159        let options = LoaderOptions {
1160            max_anchors: 2,
1161            ..LoaderOptions::default()
1162        };
1163        let mut state = LoadState::new(&options, "");
1164        let node = Node::Scalar {
1165            value: "x".to_owned(),
1166            style: ScalarStyle::Plain,
1167            tag: None,
1168            loc: Span { start: 0, end: 0 },
1169            meta: None,
1170        };
1171        let dummy_span = Span { start: 0, end: 0 };
1172        assert!(
1173            state
1174                .register_anchor("a".to_owned(), &node, dummy_span)
1175                .is_ok()
1176        );
1177        assert!(
1178            state
1179                .register_anchor("b".to_owned(), &node, dummy_span)
1180                .is_ok()
1181        );
1182        let err = state
1183            .register_anchor("c".to_owned(), &node, dummy_span)
1184            .expect_err("expected AnchorCountLimitExceeded");
1185        assert!(matches!(
1186            err,
1187            LoadError::AnchorCountLimitExceeded { limit: 2, .. }
1188        ));
1189    }
1190
1191    #[test]
1192    fn expand_node_detects_circular_alias() {
1193        let options = LoaderOptions {
1194            mode: LoadMode::Resolved,
1195            ..LoaderOptions::default()
1196        };
1197        let mut state = LoadState::new(&options, "");
1198        // Insert a self-referential alias node.
1199        let alias_node = Node::Alias {
1200            name: "a".to_owned(),
1201            loc: Span { start: 0, end: 0 },
1202            leading_comments: None,
1203            trailing_comment: None,
1204        };
1205        state.anchor_map.insert("a".to_owned(), alias_node.clone());
1206        let mut in_progress = HashSet::new();
1207        let alias_loc = Span { start: 0, end: 0 };
1208        let result = state.expand_node(alias_node, &mut in_progress, alias_loc);
1209        assert!(
1210            matches!(result, Err(LoadError::CircularAlias { .. })),
1211            "expected CircularAlias, got: {result:?}"
1212        );
1213    }
1214
1215    // -----------------------------------------------------------------------
1216    // Comment between mapping key and nested collection is attached to first nested entry
1217    // -----------------------------------------------------------------------
1218
1219    #[test]
1220    fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
1221        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1222        let root = &docs[0].root;
1223        let Node::Mapping { entries, .. } = root else {
1224            panic!("expected root mapping");
1225        };
1226        assert_eq!(entries.len(), 1);
1227        let (_outer_key, outer_value) = &entries[0];
1228        let Node::Mapping {
1229            entries: nested, ..
1230        } = outer_value
1231        else {
1232            panic!("expected nested mapping");
1233        };
1234        assert_eq!(nested.len(), 1);
1235        let (inner_key, _) = &nested[0];
1236        assert_eq!(
1237            inner_key.leading_comments(),
1238            &["# Style 1"],
1239            "comment should be attached to the first nested key"
1240        );
1241    }
1242
1243    #[test]
1244    fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
1245        let docs = load("key:\n  # leading\n  - item1\n  - item2\n").unwrap();
1246        let root = &docs[0].root;
1247        let Node::Mapping { entries, .. } = root else {
1248            panic!("expected root mapping");
1249        };
1250        let (_key, seq_value) = &entries[0];
1251        let Node::Sequence { items, .. } = seq_value else {
1252            panic!("expected sequence value");
1253        };
1254        assert_eq!(
1255            items[0].leading_comments(),
1256            &["# leading"],
1257            "comment should be attached to first sequence item"
1258        );
1259    }
1260
1261    #[test]
1262    fn multiple_comments_between_key_and_collection_all_preserved() {
1263        let docs = load("key:\n  # first\n  # second\n  - item\n").unwrap();
1264        let root = &docs[0].root;
1265        let Node::Mapping { entries, .. } = root else {
1266            panic!("expected root mapping");
1267        };
1268        let (_key, seq_value) = &entries[0];
1269        let Node::Sequence { items, .. } = seq_value else {
1270            panic!("expected sequence value");
1271        };
1272        assert_eq!(
1273            items[0].leading_comments(),
1274            &["# first", "# second"],
1275            "both comments should be on first item"
1276        );
1277    }
1278
1279    #[test]
1280    fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1281        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1282        let root = &docs[0].root;
1283        let Node::Mapping { entries, .. } = root else {
1284            panic!("expected root mapping");
1285        };
1286        let (outer_key, _) = &entries[0];
1287        assert!(
1288            outer_key.leading_comments().is_empty(),
1289            "outer key should have no leading comments"
1290        );
1291        assert!(
1292            outer_key.trailing_comment().is_none(),
1293            "outer key should have no trailing comment"
1294        );
1295    }
1296
1297    #[test]
1298    fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1299        let docs = load("key:\n  inner: val\n").unwrap();
1300        let root = &docs[0].root;
1301        let Node::Mapping { entries, .. } = root else {
1302            panic!("expected root mapping");
1303        };
1304        let (_key, nested) = &entries[0];
1305        let Node::Mapping {
1306            entries: nested_entries,
1307            ..
1308        } = nested
1309        else {
1310            panic!("expected nested mapping");
1311        };
1312        let (inner_key, _) = &nested_entries[0];
1313        assert!(
1314            inner_key.leading_comments().is_empty(),
1315            "inner key should have no leading comments when there is no comment"
1316        );
1317    }
1318
1319    // -----------------------------------------------------------------------
1320    // Trailing comment of nested collection becomes leading comment on next sibling
1321    // -----------------------------------------------------------------------
1322
1323    #[test]
1324    fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1325        let input =
1326            "Lists:\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n    - item1\n";
1327        let docs = load(input).unwrap();
1328        let root = &docs[0].root;
1329        let Node::Mapping { entries, .. } = root else {
1330            panic!("expected root mapping");
1331        };
1332        let (_lists_key, nested) = &entries[0];
1333        let Node::Mapping {
1334            entries: nested_entries,
1335            ..
1336        } = nested
1337        else {
1338            panic!("expected nested mapping");
1339        };
1340        assert_eq!(nested_entries.len(), 2);
1341        let (list_b_key, _) = &nested_entries[1];
1342        assert_eq!(
1343            list_b_key.leading_comments(),
1344            &["# Style 2"],
1345            "# Style 2 should be leading comment on list-b key"
1346        );
1347    }
1348
1349    #[test]
1350    fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1351        let input = "outer:\n  a:\n    - x\n    # between\n  b: y\n";
1352        let docs = load(input).unwrap();
1353        let root = &docs[0].root;
1354        let Node::Mapping { entries, .. } = root else {
1355            panic!("expected root mapping");
1356        };
1357        let (_outer_key, outer_val) = &entries[0];
1358        let Node::Mapping {
1359            entries: nested, ..
1360        } = outer_val
1361        else {
1362            panic!("expected nested mapping");
1363        };
1364        assert_eq!(nested.len(), 2);
1365        let (b_key, _) = &nested[1];
1366        assert_eq!(
1367            b_key.leading_comments(),
1368            &["# between"],
1369            "# between should be leading comment on b key"
1370        );
1371    }
1372
1373    #[test]
1374    fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1375        let input = "parent:\n  child1:\n    k: v\n    # end-of-child1\n  child2: val\n";
1376        let docs = load(input).unwrap();
1377        let root = &docs[0].root;
1378        let Node::Mapping { entries, .. } = root else {
1379            panic!("expected root mapping");
1380        };
1381        let (_parent_key, parent_val) = &entries[0];
1382        let Node::Mapping {
1383            entries: siblings, ..
1384        } = parent_val
1385        else {
1386            panic!("expected parent mapping value");
1387        };
1388        assert_eq!(siblings.len(), 2);
1389        let (child2_key, _) = &siblings[1];
1390        assert_eq!(
1391            child2_key.leading_comments(),
1392            &["# end-of-child1"],
1393            "# end-of-child1 should be leading comment on child2 key"
1394        );
1395    }
1396
1397    #[test]
1398    fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1399        let input = "items:\n  - a\n  - b\n  # tail\n";
1400        let docs = load(input).unwrap();
1401        // The document must parse successfully (no panic, no error).
1402        assert!(!docs.is_empty(), "document should parse without error");
1403        // The # tail comment must not cause data loss — the sequence items are intact.
1404        let root = &docs[0].root;
1405        let Node::Mapping { entries, .. } = root else {
1406            panic!("expected root mapping");
1407        };
1408        let (_items_key, seq_val) = &entries[0];
1409        let Node::Sequence { items, .. } = seq_val else {
1410            panic!("expected sequence value");
1411        };
1412        assert_eq!(items.len(), 2, "sequence items must not be lost");
1413    }
1414
1415    #[test]
1416    fn no_overflow_comments_when_collection_ends_cleanly() {
1417        let docs = load("key:\n  - item1\n  - item2\n").unwrap();
1418        let root = &docs[0].root;
1419        let Node::Mapping { entries, .. } = root else {
1420            panic!("expected root mapping");
1421        };
1422        let (_key, seq_val) = &entries[0];
1423        let Node::Sequence { items, .. } = seq_val else {
1424            panic!("expected sequence value");
1425        };
1426        for item in items {
1427            assert!(
1428                item.leading_comments().is_empty(),
1429                "items should have no leading comments"
1430            );
1431        }
1432    }
1433
1434    // -----------------------------------------------------------------------
1435    // Combined scenarios
1436    // -----------------------------------------------------------------------
1437
1438    #[test]
1439    fn original_bug_report_input_preserves_both_comments() {
1440        let input = "Lists:\n  # Style 1\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n  - item1\n  - item2\n";
1441        let docs = load(input).unwrap();
1442        let root = &docs[0].root;
1443        let Node::Mapping { entries, .. } = root else {
1444            panic!("expected root mapping");
1445        };
1446        let (_lists_key, nested) = &entries[0];
1447        let Node::Mapping {
1448            entries: nested_entries,
1449            ..
1450        } = nested
1451        else {
1452            panic!("expected nested mapping");
1453        };
1454        assert_eq!(nested_entries.len(), 2);
1455        let (first_key, _) = &nested_entries[0];
1456        let (second_key, _) = &nested_entries[1];
1457        assert_eq!(
1458            first_key.leading_comments(),
1459            &["# Style 1"],
1460            "list-a should have # Style 1 as leading comment"
1461        );
1462        assert_eq!(
1463            second_key.leading_comments(),
1464            &["# Style 2"],
1465            "list-b should have # Style 2 as leading comment"
1466        );
1467    }
1468
1469    #[test]
1470    fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1471        let input = "map:\n  # leading\n  key: value  # trailing\n  # next-leading\n  key2: v2\n";
1472        let docs = load(input).unwrap();
1473        let root = &docs[0].root;
1474        let Node::Mapping { entries, .. } = root else {
1475            panic!("expected root mapping");
1476        };
1477        let (_map_key, map_val) = &entries[0];
1478        let Node::Mapping {
1479            entries: siblings, ..
1480        } = map_val
1481        else {
1482            panic!("expected mapping value");
1483        };
1484        assert_eq!(siblings.len(), 2);
1485        let (key1, val1) = &siblings[0];
1486        let (key2, _) = &siblings[1];
1487        assert_eq!(key1.leading_comments(), &["# leading"]);
1488        assert_eq!(val1.trailing_comment(), Some("# trailing"));
1489        assert_eq!(key2.leading_comments(), &["# next-leading"]);
1490    }
1491
1492    #[test]
1493    fn deeply_nested_overflow_comments_reach_correct_sibling() {
1494        let input = "top:\n  mid:\n    - x\n    # deep-overflow\n  next: y\n";
1495        let docs = load(input).unwrap();
1496        let root = &docs[0].root;
1497        let Node::Mapping { entries, .. } = root else {
1498            panic!("expected root mapping");
1499        };
1500        let (_top_key, top_val) = &entries[0];
1501        let Node::Mapping {
1502            entries: top_entries,
1503            ..
1504        } = top_val
1505        else {
1506            panic!("expected top-level mapping");
1507        };
1508        assert_eq!(top_entries.len(), 2);
1509        let (next_key, _) = &top_entries[1];
1510        assert_eq!(
1511            next_key.leading_comments(),
1512            &["# deep-overflow"],
1513            "# deep-overflow should propagate from nested sequence to next sibling"
1514        );
1515    }
1516
1517    // -----------------------------------------------------------------------
1518    // Document marker flags (explicit_start / explicit_end)
1519    // -----------------------------------------------------------------------
1520
1521    #[rstest]
1522    #[case::bare_document("key: value\n", false, false)]
1523    #[case::start_marker_only("---\nkey: value\n", true, false)]
1524    #[case::end_marker_only("key: value\n...\n", false, true)]
1525    #[case::both_markers("---\nkey: value\n...\n", true, true)]
1526    #[case::empty_with_both_markers("---\n...\n", true, true)]
1527    fn document_marker_flags_match_input(
1528        #[case] input: &str,
1529        #[case] expected_start: bool,
1530        #[case] expected_end: bool,
1531    ) {
1532        let docs = load(input).expect("load failed");
1533        assert_eq!(docs.len(), 1);
1534        assert_eq!(docs[0].explicit_start, expected_start, "explicit_start");
1535        assert_eq!(docs[0].explicit_end, expected_end, "explicit_end");
1536    }
1537
1538    #[test]
1539    fn multi_document_flags_are_independent() {
1540        let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1541        assert_eq!(docs.len(), 3);
1542        assert!(!docs[0].explicit_start, "doc1 explicit_start");
1543        assert!(!docs[0].explicit_end, "doc1 explicit_end");
1544        assert!(docs[1].explicit_start, "doc2 explicit_start");
1545        assert!(docs[1].explicit_end, "doc2 explicit_end");
1546        assert!(docs[2].explicit_start, "doc3 explicit_start");
1547        assert!(!docs[2].explicit_end, "doc3 explicit_end");
1548    }
1549
1550    // -----------------------------------------------------------------------
1551    // sanitize_scalar_for_error unit tests
1552    // -----------------------------------------------------------------------
1553
1554    #[rstest]
1555    #[case::newline("foo\nbar", '\n', "\\u000A", "foo\\u000Abar")]
1556    #[case::carriage_return("foo\rbar", '\r', "\\u000D", "foo\\u000Dbar")]
1557    #[case::null_byte("foo\0bar", '\0', "\\u0000", "foo\\u0000bar")]
1558    fn sanitize_replaces_control_char_with_escape(
1559        #[case] input: &str,
1560        #[case] raw_char: char,
1561        #[case] escape_seq: &str,
1562        #[case] expected: &str,
1563    ) {
1564        let result = sanitize_scalar_for_error(input);
1565        assert!(
1566            !result.contains(raw_char),
1567            "output must not contain the raw control character"
1568        );
1569        assert!(
1570            result.contains(escape_seq),
1571            "output must contain {escape_seq} escape, got: {result:?}"
1572        );
1573        assert_eq!(result, expected);
1574    }
1575
1576    #[test]
1577    fn sanitize_short_value_stored_verbatim() {
1578        let input = "hello";
1579        let result = sanitize_scalar_for_error(input);
1580        assert_eq!(result, "hello");
1581        assert!(
1582            !result.ends_with("..."),
1583            "short value must not be truncated"
1584        );
1585    }
1586
1587    #[test]
1588    fn sanitize_value_at_exact_limit_not_truncated() {
1589        let input = "a".repeat(128);
1590        let result = sanitize_scalar_for_error(&input);
1591        assert_eq!(
1592            result.len(),
1593            128,
1594            "128-char input must produce 128-char output"
1595        );
1596        assert!(
1597            !result.ends_with("..."),
1598            "value at exact limit must not be truncated"
1599        );
1600    }
1601
1602    #[test]
1603    fn sanitize_value_over_limit_truncated() {
1604        let input = "a".repeat(129);
1605        let result = sanitize_scalar_for_error(&input);
1606        assert!(
1607            result.ends_with("..."),
1608            "value over limit must end with '...'"
1609        );
1610        assert_eq!(
1611            result.len(),
1612            128 + 3,
1613            "truncated output must be 128 chars + 3 ellipsis chars"
1614        );
1615    }
1616
1617    #[test]
1618    fn sanitize_multibyte_char_boundary_not_split() {
1619        let input: String = "中".repeat(127) + "ab"; // 129 chars total
1620        let result = sanitize_scalar_for_error(&input);
1621        assert!(
1622            result.ends_with("..."),
1623            "129-char multibyte input should be truncated"
1624        );
1625        let char_count = result.trim_end_matches("...").chars().count();
1626        assert_eq!(
1627            char_count, 128,
1628            "truncated portion must be exactly 128 chars"
1629        );
1630    }
1631
1632    // -----------------------------------------------------------------------
1633    // Cow variant identity for resolver-injected vs user-authored tags
1634    // -----------------------------------------------------------------------
1635
1636    fn load_root(input: &str) -> Node<Span> {
1637        load(input).expect("load failed").remove(0).root
1638    }
1639
1640    fn node_tag(node: Node<Span>) -> Option<Cow<'static, str>> {
1641        match node {
1642            Node::Scalar { tag, .. } | Node::Mapping { tag, .. } | Node::Sequence { tag, .. } => {
1643                tag
1644            }
1645            Node::Alias { .. } => None,
1646        }
1647    }
1648
1649    #[rstest]
1650    #[case::str_tag("hello\n")]
1651    #[case::int_tag("42\n")]
1652    #[case::null_tag("null\n")]
1653    #[case::map_tag("a: 1\n")]
1654    #[case::seq_tag("- a\n")]
1655    #[case::bare_excl_tag("! hello\n")]
1656    fn resolver_emitted_tag_is_borrowed(#[case] input: &str) {
1657        let tag = node_tag(load_root(input));
1658        assert!(
1659            matches!(tag, Some(Cow::Borrowed(_))),
1660            "resolver-emitted tag must be Borrowed, got: {tag:?}"
1661        );
1662    }
1663
1664    #[rstest]
1665    #[case::scalar("!!str hello\n")]
1666    #[case::mapping("!!map\na: 1\n")]
1667    #[case::sequence("!!seq\n- a\n")]
1668    fn user_authored_tag_is_owned(#[case] input: &str) {
1669        let tag = node_tag(load_root(input));
1670        assert!(
1671            matches!(tag, Some(Cow::Owned(_))),
1672            "user-authored tag must be Owned, got: {tag:?}"
1673        );
1674    }
1675
1676    #[test]
1677    fn alias_node_has_no_tag_field() {
1678        let docs = LoaderBuilder::new()
1679            .build()
1680            .load("- &a x\n- *a\n")
1681            .expect("load failed");
1682        let Node::Sequence { items, .. } = &docs[0].root else {
1683            panic!("expected root sequence");
1684        };
1685        assert!(
1686            matches!(items[1], Node::Alias { .. }),
1687            "second item must be Alias in lossless mode"
1688        );
1689    }
1690
1691    #[test]
1692    fn tag_value_content_preserved_across_cow_variants() {
1693        let Node::Scalar {
1694            tag: tag_resolver, ..
1695        } = load_root("hello\n")
1696        else {
1697            panic!("expected scalar");
1698        };
1699        assert_eq!(tag_resolver.as_deref(), Some("tag:yaml.org,2002:str"));
1700
1701        let Node::Scalar { tag: tag_user, .. } = load_root("!custom hello\n") else {
1702            panic!("expected scalar");
1703        };
1704        assert_eq!(tag_user.as_deref(), Some("!custom"));
1705    }
1706
1707    // -----------------------------------------------------------------------
1708    // Loader correctly gates NodeMeta construction
1709    // -----------------------------------------------------------------------
1710
1711    fn node_meta_is_none(node: &Node<Span>) -> bool {
1712        matches!(
1713            node,
1714            Node::Scalar { meta: None, .. }
1715                | Node::Mapping { meta: None, .. }
1716                | Node::Sequence { meta: None, .. }
1717        )
1718    }
1719
1720    #[rstest]
1721    #[case::plain_scalar("hello\n")]
1722    #[case::plain_mapping("a: 1\n")]
1723    #[case::plain_sequence("- a\n")]
1724    fn loaded_node_with_no_meta_fields_has_meta_none(#[case] input: &str) {
1725        let docs = load(input).unwrap();
1726        let root = &docs[0].root;
1727        assert!(
1728            node_meta_is_none(root),
1729            "plain node must have meta: None, got: {root:?}"
1730        );
1731    }
1732
1733    #[test]
1734    fn loaded_anchored_scalar_has_meta_some() {
1735        let docs = load("- &foo bar\n").unwrap();
1736        let Node::Sequence { items, .. } = &docs[0].root else {
1737            panic!("expected root Sequence");
1738        };
1739        let item = &items[0];
1740        assert!(
1741            matches!(item, Node::Scalar { meta: Some(_), .. }),
1742            "anchored scalar must have meta: Some, got: {item:?}"
1743        );
1744        assert_eq!(item.anchor(), Some("foo"));
1745    }
1746
1747    #[test]
1748    fn loaded_scalar_with_anchor_has_meta_some_with_anchor_loc() {
1749        let docs = load("&tag hello\n").unwrap();
1750        let root = &docs[0].root;
1751        assert!(
1752            matches!(root, Node::Scalar { meta: Some(_), .. }),
1753            "anchored scalar must have meta: Some"
1754        );
1755        assert!(
1756            root.anchor_loc().is_some(),
1757            "anchor_loc() must be Some for anchored scalar"
1758        );
1759    }
1760
1761    // -----------------------------------------------------------------------
1762    // Property displacement promotion — combined anchor+tag on block collections
1763    // -----------------------------------------------------------------------
1764
1765    #[rstest]
1766    // Block mapping
1767    #[case::block_mapping_anchor_only("&a\nk: v\n", Some("a"), false)]
1768    #[case::block_mapping_tag_only("!mytag\nk: v\n", None, true)]
1769    #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n", Some("a"), true)]
1770    #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n", Some("a"), true)]
1771    // Block sequence
1772    #[case::block_sequence_anchor_only("&a\n- item\n", Some("a"), false)]
1773    #[case::block_sequence_tag_only("!mytag\n- item\n", None, true)]
1774    #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n", Some("a"), true)]
1775    #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n", Some("a"), true)]
1776    // Flow mapping
1777    #[case::flow_mapping_anchor_only("&a {k: v}\n", Some("a"), false)]
1778    #[case::flow_mapping_tag_only("!mytag {k: v}\n", None, true)]
1779    #[case::flow_mapping_anchor_then_tag("&a !mytag {k: v}\n", Some("a"), true)]
1780    #[case::flow_mapping_tag_then_anchor("!mytag &a {k: v}\n", Some("a"), true)]
1781    // Flow sequence
1782    #[case::flow_sequence_anchor_only("&a [item]\n", Some("a"), false)]
1783    #[case::flow_sequence_tag_only("!mytag [item]\n", None, true)]
1784    #[case::flow_sequence_anchor_then_tag("&a !mytag [item]\n", Some("a"), true)]
1785    #[case::flow_sequence_tag_then_anchor("!mytag &a [item]\n", Some("a"), true)]
1786    fn combined_properties_attach_to_root_collection(
1787        #[case] input: &str,
1788        #[case] expected_anchor: Option<&str>,
1789        #[case] expected_has_tag: bool,
1790    ) {
1791        let docs = load(input).unwrap();
1792        let root = &docs[0].root;
1793        assert_eq!(root.anchor(), expected_anchor, "anchor on root collection");
1794        assert_eq!(
1795            root.tag_loc().is_some(),
1796            expected_has_tag,
1797            "tag_loc on root collection"
1798        );
1799    }
1800
1801    // Block collections: first child must not inherit anchor or tag from the root
1802    #[rstest]
1803    // Block mapping
1804    #[case::block_mapping_anchor_only("&a\nk: v\n")]
1805    #[case::block_mapping_tag_only("!mytag\nk: v\n")]
1806    #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n")]
1807    #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n")]
1808    // Block sequence
1809    #[case::block_sequence_anchor_only("&a\n- item\n")]
1810    #[case::block_sequence_tag_only("!mytag\n- item\n")]
1811    #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n")]
1812    #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n")]
1813    fn first_child_of_block_collection_has_no_properties(#[case] input: &str) {
1814        let docs = load(input).unwrap();
1815        let root = &docs[0].root;
1816        let first_child: &Node<Span> = match root {
1817            Node::Mapping { entries, .. } => &entries[0].0,
1818            Node::Sequence { items, .. } => &items[0],
1819            Node::Scalar { .. } | Node::Alias { .. } => panic!("expected block collection"),
1820        };
1821        assert_eq!(
1822            first_child.anchor(),
1823            None,
1824            "anchor must not appear on first child"
1825        );
1826        assert!(
1827            first_child.tag_loc().is_none(),
1828            "tag_loc must not appear on first child"
1829        );
1830    }
1831
1832    // --- Alias registration smoke test ---
1833
1834    #[test]
1835    fn anchor_on_block_mapping_with_tag_is_resolvable_via_alias() {
1836        let input = "root:\n  tagged: &a !mytag\n    k: v\n  ref: *a\n";
1837        let result = LoaderBuilder::new().resolved().build().load(input);
1838        assert!(
1839            result.is_ok(),
1840            "alias *a must resolve — anchor must be on the mapping, not lost to first key: {result:?}"
1841        );
1842    }
1843}