Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22//!
23//! # Accepted risks
24//!
25//! `expand_node` does not detect the case where an anchor-within-expansion
26//! references a previously defined anchor, forming an indirect cycle not
27//! caught by the `in_progress` set until the second traversal.  This
28//! limitation exists in the old loader and is acceptable in the LSP context
29//! where Lossless mode is the default.  The `expanded_nodes` volume limit
30//! provides the backstop.
31
32use std::borrow::Cow;
33use std::collections::{HashMap, HashSet};
34use std::iter::Peekable;
35
36use std::sync::Arc;
37
38use crate::error::Error;
39use crate::event::{Event, EventMeta, ScalarStyle};
40use crate::node::{Document, Node, NodeMeta};
41use crate::pos::{LineIndex, Pos, Span};
42use crate::schema::{CollectionKind, Schema, resolve_collection, resolve_scalar};
43
44use comments::{attach_leading_comments, attach_trailing_comment};
45use reloc::reloc;
46use stream::{
47    consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
48    with_hash_prefix,
49};
50
51mod comments;
52mod reloc;
53mod stream;
54
55// ---------------------------------------------------------------------------
56// Public error type
57// ---------------------------------------------------------------------------
58
59/// Errors produced by the loader.
60#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
61pub enum LoadError {
62    /// The event stream contained a parse error.
63    #[error("parse error at {pos:?}: {message}")]
64    Parse {
65        /// Source position where the parse error was detected.
66        pos: Pos,
67        /// Human-readable description of the error.
68        message: String,
69    },
70
71    /// The event stream ended unexpectedly mid-document.
72    #[error("unexpected end of event stream")]
73    UnexpectedEndOfStream,
74
75    /// Nesting depth exceeded the configured limit.
76    #[error("nesting depth limit exceeded at {pos:?} (max: {limit})")]
77    NestingDepthLimitExceeded {
78        /// The configured nesting depth limit that was exceeded.
79        limit: usize,
80        /// Source position of the collection start that exceeded the limit.
81        pos: Pos,
82    },
83
84    /// Too many distinct anchor names were defined.
85    #[error("anchor count limit exceeded at {pos:?} (max: {limit})")]
86    AnchorCountLimitExceeded {
87        /// The configured anchor count limit that was exceeded.
88        limit: usize,
89        /// Source position of the anchor that exceeded the limit.
90        pos: Pos,
91    },
92
93    /// Alias expansion produced more nodes than the configured limit.
94    #[error("alias expansion node limit exceeded at {pos:?} (max: {limit})")]
95    AliasExpansionLimitExceeded {
96        /// The configured expansion node limit that was exceeded.
97        limit: usize,
98        /// Source position of the node that exceeded the expansion limit.
99        pos: Pos,
100    },
101
102    /// A circular alias reference was detected.
103    #[error("circular alias reference at {pos:?}: '{name}'")]
104    CircularAlias {
105        /// The anchor name involved in the cycle.
106        name: String,
107        /// Source position of the alias that triggered the cycle detection.
108        pos: Pos,
109    },
110
111    /// An alias referred to an anchor that was never defined.
112    #[error("undefined alias at {pos:?}: '{name}'")]
113    UndefinedAlias {
114        /// The alias name that had no corresponding anchor definition.
115        name: String,
116        /// Source position of the alias reference.
117        pos: Pos,
118    },
119
120    /// A plain scalar could not be resolved under the JSON schema.
121    ///
122    /// The JSON schema has no fallback: every untagged plain scalar must match
123    /// one of its patterns (null, bool, int, float).  If none match, the scalar
124    /// is an error per YAML 1.2.2 §10.2.
125    ///
126    /// `value` is truncated to 128 Unicode scalar values and ASCII control
127    /// characters (U+0000–U+001F, U+007F) are replaced with `\uXXXX` escapes
128    /// to prevent log injection via the `Display` impl.
129    #[error("JSON schema: plain scalar does not match any type pattern")]
130    UnresolvedScalar {
131        /// The sanitized, truncated scalar value that failed resolution.
132        value: String,
133        /// Source position of the scalar.
134        pos: Pos,
135    },
136}
137
138// Convenience alias used inside the module.
139type Result<T> = std::result::Result<T, LoadError>;
140
141// Type alias for the peekable event stream used throughout the loader.
142type EventStream<'a> =
143    Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
144
145/// Unpack an `Option<Box<EventMeta>>` into its four constituent fields.
146#[expect(
147    clippy::type_complexity,
148    reason = "four-tuple mirrors EventMeta fields; extracting a type alias here would obscure the one-to-one correspondence"
149)]
150#[inline]
151fn unpack_meta(
152    meta: Option<Box<EventMeta<'_>>>,
153) -> (
154    Option<&'_ str>,
155    Option<Span>,
156    Option<std::borrow::Cow<'_, str>>,
157    Option<Span>,
158) {
159    meta.map_or((None, None, None, None), |m| {
160        (m.anchor, m.anchor_loc, m.tag, m.tag_loc)
161    })
162}
163
164// ---------------------------------------------------------------------------
165// Configuration
166// ---------------------------------------------------------------------------
167
168/// Loader mode — controls how alias references are handled.
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum LoadMode {
171    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
172    Lossless,
173    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
174    Resolved,
175}
176
177/// Security and behaviour options for the loader.
178#[derive(Debug, Clone)]
179pub struct LoaderOptions {
180    /// Maximum mapping/sequence nesting depth before returning
181    /// [`LoadError::NestingDepthLimitExceeded`] (default: 512).
182    pub max_nesting_depth: usize,
183    /// Maximum number of distinct anchor names per document before returning
184    /// [`LoadError::AnchorCountLimitExceeded`] (default: 10 000).
185    pub max_anchors: usize,
186    /// Maximum total nodes produced by alias expansion in resolved mode before
187    /// returning [`LoadError::AliasExpansionLimitExceeded`] (default: 1 000 000).
188    pub max_expanded_nodes: usize,
189    /// Controls how alias references are handled during loading.
190    pub mode: LoadMode,
191    /// YAML 1.2.2 §10 schema to apply during loading (default: [`Schema::Core`]).
192    ///
193    /// Each node's tag is resolved according to this schema after the node is
194    /// constructed.  Nodes with explicit source tags are left unchanged.
195    pub schema: Schema,
196}
197
198impl Default for LoaderOptions {
199    fn default() -> Self {
200        Self {
201            max_nesting_depth: 512,
202            max_anchors: 10_000,
203            max_expanded_nodes: 1_000_000,
204            mode: LoadMode::Lossless,
205            schema: Schema::Core,
206        }
207    }
208}
209
210// ---------------------------------------------------------------------------
211// Builder
212// ---------------------------------------------------------------------------
213
214/// Builder for configuring and creating a [`Loader`].
215///
216/// ```
217/// use rlsp_yaml_parser::loader::LoaderBuilder;
218///
219/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
220/// assert_eq!(docs.len(), 1);
221/// ```
222pub struct LoaderBuilder {
223    options: LoaderOptions,
224}
225
226impl LoaderBuilder {
227    /// Create a builder with default options (lossless mode, safe limits).
228    #[must_use]
229    pub fn new() -> Self {
230        Self {
231            options: LoaderOptions::default(),
232        }
233    }
234
235    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
236    #[must_use]
237    pub const fn lossless(mut self) -> Self {
238        self.options.mode = LoadMode::Lossless;
239        self
240    }
241
242    /// Use resolved mode — aliases are expanded inline.
243    #[must_use]
244    pub const fn resolved(mut self) -> Self {
245        self.options.mode = LoadMode::Resolved;
246        self
247    }
248
249    /// Override the maximum nesting depth.
250    #[must_use]
251    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
252        self.options.max_nesting_depth = limit;
253        self
254    }
255
256    /// Override the maximum anchor count.
257    #[must_use]
258    pub const fn max_anchors(mut self, limit: usize) -> Self {
259        self.options.max_anchors = limit;
260        self
261    }
262
263    /// Override the maximum expanded-node count (resolved mode only).
264    #[must_use]
265    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
266        self.options.max_expanded_nodes = limit;
267        self
268    }
269
270    /// Override the YAML 1.2.2 §10 schema used for tag resolution during loading.
271    ///
272    /// The default is [`Schema::Core`].  Untagged nodes receive resolved tag URIs
273    /// in the AST; nodes with explicit source tags are not modified.
274    #[must_use]
275    pub const fn schema(mut self, s: Schema) -> Self {
276        self.options.schema = s;
277        self
278    }
279
280    /// Consume the builder and produce a [`Loader`].
281    #[must_use]
282    pub const fn build(self) -> Loader {
283        Loader {
284            options: self.options,
285        }
286    }
287}
288
289impl Default for LoaderBuilder {
290    fn default() -> Self {
291        Self::new()
292    }
293}
294
295// ---------------------------------------------------------------------------
296// Loader
297// ---------------------------------------------------------------------------
298
299/// A configured YAML loader.
300pub struct Loader {
301    options: LoaderOptions,
302}
303
304impl Loader {
305    /// Load YAML text into a sequence of documents.
306    ///
307    /// # Errors
308    ///
309    /// Returns `Err` if the input contains a parse error, exceeds a configured
310    /// security limit, or (in resolved mode) references an undefined anchor.
311    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
312        let mut state = LoadState::new(&self.options, input);
313        let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
314            Box::new(crate::parse_events(input));
315        state.run(iter.peekable())
316    }
317}
318
319// ---------------------------------------------------------------------------
320// Convenience entry point
321// ---------------------------------------------------------------------------
322
323/// Load YAML text using lossless mode, default security limits, and Core schema tag
324/// resolution (YAML 1.2.2 §10.3).
325///
326/// Returns one `Document<Span>` per YAML document in the stream.  Untagged nodes
327/// receive resolved tag URIs according to the Core schema; nodes with explicit source
328/// tags are left unchanged.
329///
330/// # Errors
331///
332/// Returns `Err` if the input contains a parse error or exceeds a security
333/// limit (nesting depth or anchor count).
334///
335/// ```
336/// use rlsp_yaml_parser::loader::load;
337/// use rlsp_yaml_parser::Node;
338///
339/// let docs = load("hello\n").unwrap();
340/// assert_eq!(docs.len(), 1);
341/// let Node::Scalar { tag, .. } = &docs[0].root else { panic!() };
342/// assert_eq!(tag.as_deref(), Some("tag:yaml.org,2002:str"));
343/// ```
344pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
345    LoaderBuilder::new().lossless().build().load(input)
346}
347
348// ---------------------------------------------------------------------------
349// Internal loader state
350// ---------------------------------------------------------------------------
351
352struct LoadState<'opt> {
353    options: &'opt LoaderOptions,
354    /// Anchors registered so far in the current document: name → node.
355    anchor_map: HashMap<String, Node<Span>>,
356    /// Count of distinct anchors registered (resets per document).
357    anchor_count: usize,
358    /// Current nesting depth (incremented on Begin, decremented on End).
359    depth: usize,
360    /// Total nodes produced via alias expansion (resolved mode only).
361    expanded_nodes: usize,
362    /// Leading comments accumulated by `parse_node` when it encounters a
363    /// `Comment` event between a mapping key and its value's collection start,
364    /// or by a sequence/mapping loop when it hits End with leftover leading
365    /// comments.  The next mapping/sequence loop iteration picks these up and
366    /// prepends them to the next entry's leading comments.
367    pending_leading: Vec<String>,
368    /// Line index for the current document source; shared across all documents
369    /// produced from the same input via `Arc` to avoid N full copies.
370    line_index: Arc<LineIndex>,
371}
372
373impl<'opt> LoadState<'opt> {
374    fn new(options: &'opt LoaderOptions, input: &str) -> Self {
375        Self {
376            options,
377            anchor_map: HashMap::new(),
378            anchor_count: 0,
379            depth: 0,
380            expanded_nodes: 0,
381            pending_leading: Vec::new(),
382            line_index: Arc::new(LineIndex::new(input)),
383        }
384    }
385
386    fn reset_for_document(&mut self) {
387        self.anchor_map.clear();
388        self.anchor_count = 0;
389        self.expanded_nodes = 0;
390        self.pending_leading.clear();
391    }
392
393    fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
394        let mut docs: Vec<Document<Span>> = Vec::new();
395
396        // Skip StreamStart.
397        match stream.next() {
398            Some(Ok(_)) | None => {}
399            Some(Err(e)) => {
400                return Err(LoadError::Parse {
401                    pos: e.pos,
402                    message: e.message,
403                });
404            }
405        }
406
407        loop {
408            // Skip any leading comments or unknown events before a document.
409            match next_from(&mut stream)? {
410                None | Some((Event::StreamEnd, _)) => break,
411                Some((
412                    Event::DocumentStart {
413                        explicit,
414                        version,
415                        tag_directives,
416                    },
417                    _,
418                )) => {
419                    let doc_explicit_start = explicit;
420                    let doc_version = version;
421                    let doc_tags = tag_directives;
422                    self.reset_for_document();
423
424                    let mut doc_comments: Vec<String> = Vec::new();
425
426                    // Consume leading comments at document level.
427                    consume_leading_doc_comments(&mut stream, &mut doc_comments, &self.line_index)?;
428
429                    // Parse root node (may be absent for empty documents).
430                    let root = if is_document_end(stream.peek()) {
431                        // Empty document — emit an empty scalar as root.
432                        let mut node = empty_scalar();
433                        apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
434                        node
435                    } else {
436                        self.parse_node(&mut stream)?
437                    };
438
439                    // Consume DocumentEnd if present and capture its explicit flag.
440                    let doc_explicit_end =
441                        if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
442                            let end_explicit = *explicit;
443                            let _ = stream.next();
444                            end_explicit
445                        } else {
446                            false
447                        };
448
449                    docs.push(Document {
450                        root,
451                        version: doc_version,
452                        tags: doc_tags,
453                        comments: doc_comments,
454                        explicit_start: doc_explicit_start,
455                        explicit_end: doc_explicit_end,
456                        line_index: Some(self.line_index.clone()),
457                    });
458                }
459                Some(_) => {
460                    // Comment or any other stray event outside a document — skip.
461                }
462            }
463        }
464
465        Ok(docs)
466    }
467
468    /// Parse a single node from the stream.
469    ///
470    /// Advances the stream past the node (including end-of-container events).
471    #[expect(
472        clippy::too_many_lines,
473        reason = "match-on-event-type; splitting would obscure flow"
474    )]
475    fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
476        // Structural end events close the caller's collection loop — do NOT
477        // consume them here.  Return an empty scalar and leave the event in
478        // the stream so the outer mapping/sequence loop can see and consume it.
479        if matches!(
480            stream.peek(),
481            Some(Ok((
482                Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
483                _
484            )))
485        ) {
486            return Ok(empty_scalar());
487        }
488
489        let Some((event, span)) = next_from(stream)? else {
490            return Ok(empty_scalar());
491        };
492
493        match event {
494            Event::Scalar { value, style, meta } => {
495                let (anchor, anchor_loc, tag, tag_loc) = unpack_meta(meta);
496                let anchor = anchor.map(str::to_owned);
497                // Capture the anchor span before it moves into NodeMeta.
498                let anchor_span = anchor_loc.unwrap_or(span);
499                let mut node = Node::Scalar {
500                    value: value.into_owned(),
501                    style,
502                    tag: tag.map(|t| Cow::Owned(t.into_owned())),
503                    loc: span,
504                    meta: NodeMeta {
505                        anchor,
506                        anchor_loc,
507                        tag_loc,
508                        leading_comments: None,
509                        trailing_comment: None,
510                    }
511                    .into_option(),
512                };
513                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
514                if let Some(name) = node.anchor() {
515                    self.register_anchor(name.to_owned(), &node, anchor_span)?;
516                }
517                Ok(node)
518            }
519
520            Event::MappingStart { style, meta } => {
521                let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
522                let anchor = event_anchor.map(str::to_owned);
523                let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
524                let anchor_for_registration = anchor.clone();
525                // Capture the anchor span before it moves into NodeMeta.
526                let anchor_span = anchor_loc.unwrap_or(span);
527
528                self.depth += 1;
529                if self.depth > self.options.max_nesting_depth {
530                    return Err(LoadError::NestingDepthLimitExceeded {
531                        limit: self.options.max_nesting_depth,
532                        pos: span_start_to_pos(span.start, &self.line_index),
533                    });
534                }
535
536                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
537                let mut end_span = span;
538
539                loop {
540                    // Consume leading comments before the next key.  Also
541                    // collect any comments that spilled over from a sibling
542                    // value's collection end (stored in `pending_leading`).
543                    let raw_leading = consume_leading_comments(stream)?;
544                    let leading = if self.pending_leading.is_empty() {
545                        raw_leading
546                    } else {
547                        let mut combined = std::mem::take(&mut self.pending_leading);
548                        combined.extend(raw_leading);
549                        combined
550                    };
551
552                    match stream.peek() {
553                        None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
554                            // Save any collected leading comments so the next
555                            // sibling entry in the parent collection can inherit
556                            // them (e.g. a comment just before MappingEnd that
557                            // belongs to the following mapping entry).
558                            if !leading.is_empty() {
559                                self.pending_leading = leading;
560                            }
561                            break;
562                        }
563                        Some(Err(_)) => {
564                            // Consume the error.
565                            return Err(match stream.next() {
566                                Some(Err(e)) => LoadError::Parse {
567                                    pos: e.pos,
568                                    message: e.message,
569                                },
570                                _ => LoadError::UnexpectedEndOfStream,
571                            });
572                        }
573                        Some(Ok(_)) => {}
574                    }
575
576                    let mut key = self.parse_node(stream)?;
577                    attach_leading_comments(&mut key, leading);
578
579                    let mut value = self.parse_node(stream)?;
580
581                    // Trailing comment on the value — peek for inline comment.
582                    // Block scalars (literal `|` and folded `>`) consume trailing
583                    // blank lines as part of chomping; their span.end falls on the
584                    // first line after the scalar, which can coincide with the
585                    // next comment's line number.  That would falsely attach a
586                    // leading inter-node comment as a trailing inline comment.
587                    // Block scalars never have an inline comment on their content
588                    // lines, so skip trailing-comment detection for them.
589                    if !is_block_scalar(&value)
590                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
591                    {
592                        let value_end_line = node_end_line(&value, &self.line_index);
593                        if let Some(trail) =
594                            peek_trailing_comment(stream, value_end_line, &self.line_index)?
595                        {
596                            attach_trailing_comment(&mut value, trail);
597                        }
598                    }
599
600                    entries.push((key, value));
601                }
602
603                // Consume MappingEnd and capture its span.
604                if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
605                    end_span = *end;
606                    let _ = stream.next();
607                }
608                self.depth -= 1;
609
610                let mut node = Node::Mapping {
611                    entries,
612                    style,
613                    tag,
614                    loc: Span {
615                        start: span.start,
616                        end: end_span.end,
617                    },
618                    meta: NodeMeta {
619                        anchor,
620                        anchor_loc,
621                        tag_loc,
622                        leading_comments: None,
623                        trailing_comment: None,
624                    }
625                    .into_option(),
626                };
627                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
628                if let Some(name) = anchor_for_registration {
629                    self.register_anchor(name, &node, anchor_span)?;
630                }
631                Ok(node)
632            }
633
634            Event::SequenceStart { style, meta } => {
635                let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
636                let anchor = event_anchor.map(str::to_owned);
637                let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
638                let anchor_for_registration = anchor.clone();
639                // Capture the anchor span before it moves into NodeMeta.
640                let anchor_span = anchor_loc.unwrap_or(span);
641
642                self.depth += 1;
643                if self.depth > self.options.max_nesting_depth {
644                    return Err(LoadError::NestingDepthLimitExceeded {
645                        limit: self.options.max_nesting_depth,
646                        pos: span_start_to_pos(span.start, &self.line_index),
647                    });
648                }
649
650                let mut items: Vec<Node<Span>> = Vec::new();
651                let mut end_span = span;
652
653                loop {
654                    // Collect leading comments before the next item.  Also
655                    // collect any comments that spilled over from a sibling
656                    // value's collection end (stored in `pending_leading`).
657                    let raw_leading = consume_leading_comments(stream)?;
658                    let leading = if self.pending_leading.is_empty() {
659                        raw_leading
660                    } else {
661                        let mut combined = std::mem::take(&mut self.pending_leading);
662                        combined.extend(raw_leading);
663                        combined
664                    };
665
666                    match stream.peek() {
667                        None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
668                            // Save any collected leading comments so the next
669                            // sibling entry in the parent collection can inherit
670                            // them (e.g. a comment just before SequenceEnd that
671                            // belongs to the following sequence item or mapping
672                            // entry in the parent).
673                            if !leading.is_empty() {
674                                self.pending_leading = leading;
675                            }
676                            break;
677                        }
678                        Some(Err(_)) => {
679                            // Consume the error.
680                            return Err(match stream.next() {
681                                Some(Err(e)) => LoadError::Parse {
682                                    pos: e.pos,
683                                    message: e.message,
684                                },
685                                _ => LoadError::UnexpectedEndOfStream,
686                            });
687                        }
688                        Some(Ok(_)) => {}
689                    }
690
691                    let mut item = self.parse_node(stream)?;
692                    attach_leading_comments(&mut item, leading);
693
694                    // Trailing comment on the item — peek for inline comment.
695                    // Block scalars are excluded for the same reason as in the
696                    // mapping path: their span.end can coincide with the next
697                    // comment's line, falsely turning a leading comment into a
698                    // trailing one.
699                    if !is_block_scalar(&item)
700                        && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
701                    {
702                        let item_end_line = node_end_line(&item, &self.line_index);
703                        if let Some(trail) =
704                            peek_trailing_comment(stream, item_end_line, &self.line_index)?
705                        {
706                            attach_trailing_comment(&mut item, trail);
707                        }
708                    }
709
710                    items.push(item);
711                }
712
713                // Consume SequenceEnd and capture its span.
714                if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
715                    end_span = *end;
716                    let _ = stream.next();
717                }
718                self.depth -= 1;
719
720                let mut node = Node::Sequence {
721                    items,
722                    style,
723                    tag,
724                    loc: Span {
725                        start: span.start,
726                        end: end_span.end,
727                    },
728                    meta: NodeMeta {
729                        anchor,
730                        anchor_loc,
731                        tag_loc,
732                        leading_comments: None,
733                        trailing_comment: None,
734                    }
735                    .into_option(),
736                };
737                apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
738                if let Some(name) = anchor_for_registration {
739                    self.register_anchor(name, &node, anchor_span)?;
740                }
741                Ok(node)
742            }
743
744            Event::Alias { name } => {
745                let name = name.to_owned();
746                self.resolve_alias(&name, span)
747            }
748
749            Event::Comment { text } => {
750                // Comment between a mapping key and its collection value (e.g.
751                // `key:\n  # comment\n  subkey: val`).  The comment appears
752                // after the key Scalar and before the MappingStart/SequenceStart
753                // that begins the value.  Save it in `pending_leading` so the
754                // first entry of the upcoming collection can inherit it.
755                self.pending_leading.push(with_hash_prefix(text));
756                self.parse_node(stream)
757            }
758
759            Event::StreamStart
760            | Event::StreamEnd
761            | Event::DocumentStart { .. }
762            | Event::DocumentEnd { .. }
763            | Event::MappingEnd
764            | Event::SequenceEnd => {
765                // Structural event where a node is expected — return empty scalar.
766                Ok(empty_scalar())
767            }
768        }
769    }
770
771    fn register_anchor(
772        &mut self,
773        name: String,
774        node: &Node<Span>,
775        anchor_span: Span,
776    ) -> Result<()> {
777        let pos = span_start_to_pos(anchor_span.start, &self.line_index);
778        if !self.anchor_map.contains_key(&name) {
779            self.anchor_count += 1;
780            if self.anchor_count > self.options.max_anchors {
781                return Err(LoadError::AnchorCountLimitExceeded {
782                    limit: self.options.max_anchors,
783                    pos,
784                });
785            }
786        }
787        // Count the anchor node itself toward the expansion budget in resolved
788        // mode so that the total reflects every node present in the expanded
789        // document (anchor definition + each alias expansion).
790        if self.options.mode == LoadMode::Resolved {
791            self.expanded_nodes += 1;
792            if self.expanded_nodes > self.options.max_expanded_nodes {
793                return Err(LoadError::AliasExpansionLimitExceeded {
794                    limit: self.options.max_expanded_nodes,
795                    pos,
796                });
797            }
798            self.anchor_map.insert(name, node.clone());
799        } else {
800            // Lossless mode never reads anchor_map for expansion; store a
801            // zero-cost placeholder so contains_key still detects re-definitions.
802            self.anchor_map.insert(name, empty_scalar());
803        }
804        Ok(())
805    }
806
807    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
808        match self.options.mode {
809            LoadMode::Lossless => Ok(Node::Alias {
810                name: name.to_owned(),
811                loc,
812                leading_comments: None,
813                trailing_comment: None,
814            }),
815            LoadMode::Resolved => {
816                let pos = span_start_to_pos(loc.start, &self.line_index);
817                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
818                    LoadError::UndefinedAlias {
819                        name: name.to_owned(),
820                        pos,
821                    }
822                })?;
823                let mut in_progress: HashSet<String> = HashSet::new();
824                self.expand_node(anchored, &mut in_progress, loc)
825            }
826        }
827    }
828
829    /// Recursively expand a node, counting every node produced against the
830    /// expansion limit and checking for cycles via `in_progress`.
831    ///
832    /// `alias_loc` is the span of the alias site that triggered this expansion
833    /// chain; it is used for error positions when the limit or a cycle is
834    /// detected inside expanded content.
835    fn expand_node(
836        &mut self,
837        node: Node<Span>,
838        in_progress: &mut HashSet<String>,
839        alias_loc: Span,
840    ) -> Result<Node<Span>> {
841        // Increment at the top — before child recursion — so every node
842        // (including non-alias nodes inside expanded trees) counts against the
843        // budget.
844        self.expanded_nodes += 1;
845        if self.expanded_nodes > self.options.max_expanded_nodes {
846            return Err(LoadError::AliasExpansionLimitExceeded {
847                limit: self.options.max_expanded_nodes,
848                pos: span_start_to_pos(alias_loc.start, &self.line_index),
849            });
850        }
851
852        match node {
853            Node::Alias { ref name, loc, .. } => {
854                let pos = span_start_to_pos(loc.start, &self.line_index);
855                if in_progress.contains(name) {
856                    return Err(LoadError::CircularAlias {
857                        name: name.clone(),
858                        pos,
859                    });
860                }
861                let target = self.anchor_map.get(name).cloned().ok_or_else(|| {
862                    LoadError::UndefinedAlias {
863                        name: name.clone(),
864                        pos,
865                    }
866                })?;
867                in_progress.insert(name.clone());
868                // Pass the inner alias loc as the new alias_loc for deeper expansion.
869                let expanded = self.expand_node(target, in_progress, loc)?;
870                in_progress.remove(name);
871                // Re-stamp with the alias site's location.
872                Ok(reloc(expanded, loc))
873            }
874            Node::Mapping {
875                entries,
876                style,
877                tag,
878                loc,
879                meta,
880            } => {
881                let mut expanded_entries = Vec::with_capacity(entries.len());
882                for (k, v) in entries {
883                    let ek = self.expand_node(k, in_progress, alias_loc)?;
884                    let ev = self.expand_node(v, in_progress, alias_loc)?;
885                    expanded_entries.push((ek, ev));
886                }
887                Ok(Node::Mapping {
888                    entries: expanded_entries,
889                    style,
890                    tag,
891                    loc,
892                    meta,
893                })
894            }
895            Node::Sequence {
896                items,
897                style,
898                tag,
899                loc,
900                meta,
901            } => {
902                let mut expanded_items = Vec::with_capacity(items.len());
903                for item in items {
904                    expanded_items.push(self.expand_node(item, in_progress, alias_loc)?);
905                }
906                Ok(Node::Sequence {
907                    items: expanded_items,
908                    style,
909                    tag,
910                    loc,
911                    meta,
912                })
913            }
914            // Scalars and already-resolved nodes — pass through.
915            scalar @ Node::Scalar { .. } => Ok(scalar),
916        }
917    }
918}
919
920/// Return `true` if the peeked item signals end of document (or stream).
921const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
922    matches!(
923        peeked,
924        None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
925    )
926}
927
928/// Convert a `Span.start` byte offset to a `Pos` with accurate line/column.
929#[inline]
930fn span_start_to_pos(offset: u32, line_index: &LineIndex) -> Pos {
931    let (line, column) = line_index.line_column(offset);
932    Pos {
933        byte_offset: offset as usize,
934        line: line as usize,
935        column: column as usize,
936    }
937}
938
939/// Return the line number of a node's span end position.
940///
941/// Used to determine whether the next `Comment` event is trailing (same line)
942/// or leading (different line).
943#[inline]
944fn node_end_line(node: &Node<Span>, line_index: &LineIndex) -> u32 {
945    let end_offset = match node {
946        Node::Scalar { loc, .. }
947        | Node::Mapping { loc, .. }
948        | Node::Sequence { loc, .. }
949        | Node::Alias { loc, .. } => loc.end,
950    };
951    line_index.line_column(end_offset).0
952}
953
954/// Return `true` if the node is a block scalar (literal `|` or folded `>`).
955///
956/// Block scalars consume trailing blank lines as part of chomping, so their
957/// `span.end` falls on the line *after* the last consumed line.  This means a
958/// comment on the immediately following line has the same line number as
959/// `span.end.line`, which would cause `peek_trailing_comment` to falsely
960/// classify it as an inline trailing comment.  The caller uses this predicate
961/// to skip trailing-comment detection for block scalars.
962#[inline]
963const fn is_block_scalar(node: &Node<Span>) -> bool {
964    matches!(
965        node,
966        Node::Scalar {
967            style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
968            ..
969        }
970    )
971}
972
973// ---------------------------------------------------------------------------
974// Schema resolution helpers
975// ---------------------------------------------------------------------------
976
977/// Maximum number of Unicode scalar values kept in [`LoadError::UnresolvedScalar`]
978/// value field.  Prevents unbounded allocation when storing user-supplied input
979/// in error messages.
980const UNRESOLVED_VALUE_MAX_CHARS: usize = 128;
981
982/// Sanitize a raw scalar value for inclusion in an error message.
983///
984/// - Truncates to [`UNRESOLVED_VALUE_MAX_CHARS`] Unicode scalar values,
985///   appending `"..."` when truncated.
986/// - Replaces ASCII control characters (U+0000–U+001F and U+007F) with
987///   `\uXXXX` hex escapes to prevent log injection via the `Display` impl.
988fn sanitize_scalar_for_error(raw: &str) -> String {
989    let mut out = String::with_capacity(raw.len().min(UNRESOLVED_VALUE_MAX_CHARS * 2));
990    let mut truncated = false;
991
992    for (i, ch) in raw.chars().enumerate() {
993        if i >= UNRESOLVED_VALUE_MAX_CHARS {
994            truncated = true;
995            break;
996        }
997        if ch.is_ascii_control() {
998            // Replace control chars with \uXXXX escape to prevent log injection.
999            let escaped = format!("\\u{:04X}", ch as u32);
1000            out.push_str(&escaped);
1001        } else {
1002            out.push(ch);
1003        }
1004    }
1005
1006    if truncated {
1007        out.push_str("...");
1008    }
1009    out
1010}
1011
1012/// Apply schema tag resolution to a freshly-constructed node.
1013///
1014/// - For scalars: translates bare `!` to `None` (non-specific), then calls
1015///   `resolve_scalar`.
1016/// - For mappings/sequences: translates bare `!` to `None`, then calls
1017///   `resolve_collection`.
1018/// - On `Ok(Some(tag))`: overwrites `node.tag`; `tag_loc` is left `None`
1019///   (no source position for a resolved tag).
1020/// - On `Ok(None)` (explicit tag present): leaves `node.tag` unchanged.
1021///
1022/// # Errors
1023///
1024/// Returns [`LoadError::UnresolvedScalar`] when `schema` is [`Schema::Json`]
1025/// and a plain scalar does not match any JSON type pattern.
1026#[inline]
1027fn apply_schema_to_node(
1028    node: &mut Node<Span>,
1029    schema: Schema,
1030    line_index: &LineIndex,
1031) -> Result<()> {
1032    match node {
1033        Node::Scalar {
1034            value,
1035            style,
1036            tag,
1037            loc,
1038            meta,
1039        } => {
1040            // Bare `!` on a scalar is the non-specific scalar tag — it resolves
1041            // unconditionally to !!str regardless of content (YAML 1.2.2 §10.2.1,
1042            // §10.3.2: "non-specific" tag for scalars = Failsafe str).  We handle
1043            // it before calling the schema resolver so Core doesn't pattern-match
1044            // the value.
1045            //
1046            // `tag_loc` is preserved here (NOT cleared) because `!` is explicitly
1047            // written in the source.  Preserving `tag_loc` lets downstream consumers
1048            // (e.g. the formatter) distinguish user-authored tags from resolver-injected
1049            // ones, which is critical for correct idempotent output.
1050            if tag.as_deref() == Some("!") {
1051                *tag = Some(Cow::Borrowed(crate::schema::ResolvedTag::Str.as_str()));
1052                return Ok(());
1053            }
1054            // All other tags: pass through as-is (Some(non-!) = explicit tag → Ok(None)).
1055            match resolve_scalar(schema, *style, value, tag.as_deref()) {
1056                Ok(Some(resolved)) => {
1057                    *tag = Some(Cow::Borrowed(resolved.as_str()));
1058                    // Clear tag_loc: resolver-injected tags have no source position.
1059                    if let Some(m) = meta.as_mut() {
1060                        m.tag_loc = None;
1061                        if m.is_all_none() {
1062                            *meta = None;
1063                        }
1064                    }
1065                }
1066                Ok(None) => {}
1067                Err(_) => {
1068                    return Err(LoadError::UnresolvedScalar {
1069                        value: sanitize_scalar_for_error(value),
1070                        pos: span_start_to_pos(loc.start, line_index),
1071                    });
1072                }
1073            }
1074        }
1075        Node::Mapping { tag, meta, .. } => {
1076            // Bare `!` on a collection means non-specific collection tag — translate
1077            // to None so the resolver returns the kind-based tag (!!map / !!seq).
1078            let effective_tag = tag.as_deref().filter(|t| *t != "!");
1079            if let Some(resolved) =
1080                resolve_collection(schema, CollectionKind::Mapping, effective_tag)
1081            {
1082                *tag = Some(Cow::Borrowed(resolved.as_str()));
1083                if let Some(m) = meta.as_mut() {
1084                    m.tag_loc = None;
1085                    if m.is_all_none() {
1086                        *meta = None;
1087                    }
1088                }
1089            }
1090        }
1091        Node::Sequence { tag, meta, .. } => {
1092            let effective_tag = tag.as_deref().filter(|t| *t != "!");
1093            if let Some(resolved) =
1094                resolve_collection(schema, CollectionKind::Sequence, effective_tag)
1095            {
1096                *tag = Some(Cow::Borrowed(resolved.as_str()));
1097                if let Some(m) = meta.as_mut() {
1098                    m.tag_loc = None;
1099                    if m.is_all_none() {
1100                        *meta = None;
1101                    }
1102                }
1103            }
1104        }
1105        Node::Alias { .. } => {}
1106    }
1107    Ok(())
1108}
1109
1110// ---------------------------------------------------------------------------
1111// Node helpers
1112// ---------------------------------------------------------------------------
1113
1114const fn empty_scalar() -> Node<Span> {
1115    Node::Scalar {
1116        value: String::new(),
1117        style: ScalarStyle::Plain,
1118        tag: None,
1119        loc: Span { start: 0, end: 0 },
1120        meta: None,
1121    }
1122}
1123
1124// ---------------------------------------------------------------------------
1125// Tests
1126// ---------------------------------------------------------------------------
1127
1128#[cfg(test)]
1129#[expect(
1130    clippy::expect_used,
1131    clippy::unwrap_used,
1132    clippy::indexing_slicing,
1133    clippy::panic,
1134    reason = "test code"
1135)]
1136mod tests {
1137    use super::*;
1138    use rstest::rstest;
1139
1140    #[test]
1141    fn loader_state_resets_anchor_map_between_documents() {
1142        // In resolved mode: anchor defined in doc 1 must not be visible in doc 2.
1143        let result = LoaderBuilder::new()
1144            .resolved()
1145            .build()
1146            .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
1147        assert!(
1148            result.is_err(),
1149            "expected Err: *foo in doc 2 should be undefined"
1150        );
1151        assert!(matches!(
1152            result.unwrap_err(),
1153            LoadError::UndefinedAlias { .. }
1154        ));
1155    }
1156
1157    #[test]
1158    fn register_anchor_increments_count() {
1159        let options = LoaderOptions {
1160            max_anchors: 2,
1161            ..LoaderOptions::default()
1162        };
1163        let mut state = LoadState::new(&options, "");
1164        let node = Node::Scalar {
1165            value: "x".to_owned(),
1166            style: ScalarStyle::Plain,
1167            tag: None,
1168            loc: Span { start: 0, end: 0 },
1169            meta: None,
1170        };
1171        let dummy_span = Span { start: 0, end: 0 };
1172        assert!(
1173            state
1174                .register_anchor("a".to_owned(), &node, dummy_span)
1175                .is_ok()
1176        );
1177        assert!(
1178            state
1179                .register_anchor("b".to_owned(), &node, dummy_span)
1180                .is_ok()
1181        );
1182        let err = state
1183            .register_anchor("c".to_owned(), &node, dummy_span)
1184            .expect_err("expected AnchorCountLimitExceeded");
1185        assert!(matches!(
1186            err,
1187            LoadError::AnchorCountLimitExceeded { limit: 2, .. }
1188        ));
1189    }
1190
1191    #[test]
1192    fn expand_node_detects_circular_alias() {
1193        let options = LoaderOptions {
1194            mode: LoadMode::Resolved,
1195            ..LoaderOptions::default()
1196        };
1197        let mut state = LoadState::new(&options, "");
1198        // Insert a self-referential alias node.
1199        let alias_node = Node::Alias {
1200            name: "a".to_owned(),
1201            loc: Span { start: 0, end: 0 },
1202            leading_comments: None,
1203            trailing_comment: None,
1204        };
1205        state.anchor_map.insert("a".to_owned(), alias_node.clone());
1206        let mut in_progress = HashSet::new();
1207        let alias_loc = Span { start: 0, end: 0 };
1208        let result = state.expand_node(alias_node, &mut in_progress, alias_loc);
1209        assert!(
1210            matches!(result, Err(LoadError::CircularAlias { .. })),
1211            "expected CircularAlias, got: {result:?}"
1212        );
1213    }
1214
1215    // -----------------------------------------------------------------------
1216    // Comment between mapping key and nested collection is attached to first nested entry
1217    // -----------------------------------------------------------------------
1218
1219    #[test]
1220    fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
1221        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1222        let root = &docs[0].root;
1223        let Node::Mapping { entries, .. } = root else {
1224            panic!("expected root mapping");
1225        };
1226        assert_eq!(entries.len(), 1);
1227        let (_outer_key, outer_value) = &entries[0];
1228        let Node::Mapping {
1229            entries: nested, ..
1230        } = outer_value
1231        else {
1232            panic!("expected nested mapping");
1233        };
1234        assert_eq!(nested.len(), 1);
1235        let (inner_key, _) = &nested[0];
1236        assert_eq!(
1237            inner_key.leading_comments(),
1238            &["# Style 1"],
1239            "comment should be attached to the first nested key"
1240        );
1241    }
1242
1243    #[test]
1244    fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
1245        let docs = load("key:\n  # leading\n  - item1\n  - item2\n").unwrap();
1246        let root = &docs[0].root;
1247        let Node::Mapping { entries, .. } = root else {
1248            panic!("expected root mapping");
1249        };
1250        let (_key, seq_value) = &entries[0];
1251        let Node::Sequence { items, .. } = seq_value else {
1252            panic!("expected sequence value");
1253        };
1254        assert_eq!(
1255            items[0].leading_comments(),
1256            &["# leading"],
1257            "comment should be attached to first sequence item"
1258        );
1259    }
1260
1261    #[test]
1262    fn multiple_comments_between_key_and_collection_all_preserved() {
1263        let docs = load("key:\n  # first\n  # second\n  - item\n").unwrap();
1264        let root = &docs[0].root;
1265        let Node::Mapping { entries, .. } = root else {
1266            panic!("expected root mapping");
1267        };
1268        let (_key, seq_value) = &entries[0];
1269        let Node::Sequence { items, .. } = seq_value else {
1270            panic!("expected sequence value");
1271        };
1272        assert_eq!(
1273            items[0].leading_comments(),
1274            &["# first", "# second"],
1275            "both comments should be on first item"
1276        );
1277    }
1278
1279    #[test]
1280    fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1281        let docs = load("outer:\n  # Style 1\n  inner: val\n").unwrap();
1282        let root = &docs[0].root;
1283        let Node::Mapping { entries, .. } = root else {
1284            panic!("expected root mapping");
1285        };
1286        let (outer_key, _) = &entries[0];
1287        assert!(
1288            outer_key.leading_comments().is_empty(),
1289            "outer key should have no leading comments"
1290        );
1291        assert!(
1292            outer_key.trailing_comment().is_none(),
1293            "outer key should have no trailing comment"
1294        );
1295    }
1296
1297    #[test]
1298    fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1299        let docs = load("key:\n  inner: val\n").unwrap();
1300        let root = &docs[0].root;
1301        let Node::Mapping { entries, .. } = root else {
1302            panic!("expected root mapping");
1303        };
1304        let (_key, nested) = &entries[0];
1305        let Node::Mapping {
1306            entries: nested_entries,
1307            ..
1308        } = nested
1309        else {
1310            panic!("expected nested mapping");
1311        };
1312        let (inner_key, _) = &nested_entries[0];
1313        assert!(
1314            inner_key.leading_comments().is_empty(),
1315            "inner key should have no leading comments when there is no comment"
1316        );
1317    }
1318
1319    // -----------------------------------------------------------------------
1320    // Trailing comment of nested collection becomes leading comment on next sibling
1321    // -----------------------------------------------------------------------
1322
1323    #[test]
1324    fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1325        let input =
1326            "Lists:\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n    - item1\n";
1327        let docs = load(input).unwrap();
1328        let root = &docs[0].root;
1329        let Node::Mapping { entries, .. } = root else {
1330            panic!("expected root mapping");
1331        };
1332        let (_lists_key, nested) = &entries[0];
1333        let Node::Mapping {
1334            entries: nested_entries,
1335            ..
1336        } = nested
1337        else {
1338            panic!("expected nested mapping");
1339        };
1340        assert_eq!(nested_entries.len(), 2);
1341        let (list_b_key, _) = &nested_entries[1];
1342        assert_eq!(
1343            list_b_key.leading_comments(),
1344            &["# Style 2"],
1345            "# Style 2 should be leading comment on list-b key"
1346        );
1347    }
1348
1349    #[test]
1350    fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1351        let input = "outer:\n  a:\n    - x\n    # between\n  b: y\n";
1352        let docs = load(input).unwrap();
1353        let root = &docs[0].root;
1354        let Node::Mapping { entries, .. } = root else {
1355            panic!("expected root mapping");
1356        };
1357        let (_outer_key, outer_val) = &entries[0];
1358        let Node::Mapping {
1359            entries: nested, ..
1360        } = outer_val
1361        else {
1362            panic!("expected nested mapping");
1363        };
1364        assert_eq!(nested.len(), 2);
1365        let (b_key, _) = &nested[1];
1366        assert_eq!(
1367            b_key.leading_comments(),
1368            &["# between"],
1369            "# between should be leading comment on b key"
1370        );
1371    }
1372
1373    #[test]
1374    fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1375        let input = "parent:\n  child1:\n    k: v\n    # end-of-child1\n  child2: val\n";
1376        let docs = load(input).unwrap();
1377        let root = &docs[0].root;
1378        let Node::Mapping { entries, .. } = root else {
1379            panic!("expected root mapping");
1380        };
1381        let (_parent_key, parent_val) = &entries[0];
1382        let Node::Mapping {
1383            entries: siblings, ..
1384        } = parent_val
1385        else {
1386            panic!("expected parent mapping value");
1387        };
1388        assert_eq!(siblings.len(), 2);
1389        let (child2_key, _) = &siblings[1];
1390        assert_eq!(
1391            child2_key.leading_comments(),
1392            &["# end-of-child1"],
1393            "# end-of-child1 should be leading comment on child2 key"
1394        );
1395    }
1396
1397    #[test]
1398    fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1399        let input = "items:\n  - a\n  - b\n  # tail\n";
1400        let docs = load(input).unwrap();
1401        // The document must parse successfully (no panic, no error).
1402        assert!(!docs.is_empty(), "document should parse without error");
1403        // The # tail comment must not cause data loss — the sequence items are intact.
1404        let root = &docs[0].root;
1405        let Node::Mapping { entries, .. } = root else {
1406            panic!("expected root mapping");
1407        };
1408        let (_items_key, seq_val) = &entries[0];
1409        let Node::Sequence { items, .. } = seq_val else {
1410            panic!("expected sequence value");
1411        };
1412        assert_eq!(items.len(), 2, "sequence items must not be lost");
1413    }
1414
1415    #[test]
1416    fn no_overflow_comments_when_collection_ends_cleanly() {
1417        let docs = load("key:\n  - item1\n  - item2\n").unwrap();
1418        let root = &docs[0].root;
1419        let Node::Mapping { entries, .. } = root else {
1420            panic!("expected root mapping");
1421        };
1422        let (_key, seq_val) = &entries[0];
1423        let Node::Sequence { items, .. } = seq_val else {
1424            panic!("expected sequence value");
1425        };
1426        for item in items {
1427            assert!(
1428                item.leading_comments().is_empty(),
1429                "items should have no leading comments"
1430            );
1431        }
1432    }
1433
1434    // -----------------------------------------------------------------------
1435    // Combined scenarios
1436    // -----------------------------------------------------------------------
1437
1438    #[test]
1439    fn original_bug_report_input_preserves_both_comments() {
1440        let input = "Lists:\n  # Style 1\n  list-a:\n    - item1\n    - item2\n\n  # Style 2\n  list-b:\n  - item1\n  - item2\n";
1441        let docs = load(input).unwrap();
1442        let root = &docs[0].root;
1443        let Node::Mapping { entries, .. } = root else {
1444            panic!("expected root mapping");
1445        };
1446        let (_lists_key, nested) = &entries[0];
1447        let Node::Mapping {
1448            entries: nested_entries,
1449            ..
1450        } = nested
1451        else {
1452            panic!("expected nested mapping");
1453        };
1454        assert_eq!(nested_entries.len(), 2);
1455        let (first_key, _) = &nested_entries[0];
1456        let (second_key, _) = &nested_entries[1];
1457        assert_eq!(
1458            first_key.leading_comments(),
1459            &["# Style 1"],
1460            "list-a should have # Style 1 as leading comment"
1461        );
1462        assert_eq!(
1463            second_key.leading_comments(),
1464            &["# Style 2"],
1465            "list-b should have # Style 2 as leading comment"
1466        );
1467    }
1468
1469    #[test]
1470    fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1471        let input = "map:\n  # leading\n  key: value  # trailing\n  # next-leading\n  key2: v2\n";
1472        let docs = load(input).unwrap();
1473        let root = &docs[0].root;
1474        let Node::Mapping { entries, .. } = root else {
1475            panic!("expected root mapping");
1476        };
1477        let (_map_key, map_val) = &entries[0];
1478        let Node::Mapping {
1479            entries: siblings, ..
1480        } = map_val
1481        else {
1482            panic!("expected mapping value");
1483        };
1484        assert_eq!(siblings.len(), 2);
1485        let (key1, val1) = &siblings[0];
1486        let (key2, _) = &siblings[1];
1487        assert_eq!(key1.leading_comments(), &["# leading"]);
1488        assert_eq!(val1.trailing_comment(), Some("# trailing"));
1489        assert_eq!(key2.leading_comments(), &["# next-leading"]);
1490    }
1491
1492    #[test]
1493    fn deeply_nested_overflow_comments_reach_correct_sibling() {
1494        let input = "top:\n  mid:\n    - x\n    # deep-overflow\n  next: y\n";
1495        let docs = load(input).unwrap();
1496        let root = &docs[0].root;
1497        let Node::Mapping { entries, .. } = root else {
1498            panic!("expected root mapping");
1499        };
1500        let (_top_key, top_val) = &entries[0];
1501        let Node::Mapping {
1502            entries: top_entries,
1503            ..
1504        } = top_val
1505        else {
1506            panic!("expected top-level mapping");
1507        };
1508        assert_eq!(top_entries.len(), 2);
1509        let (next_key, _) = &top_entries[1];
1510        assert_eq!(
1511            next_key.leading_comments(),
1512            &["# deep-overflow"],
1513            "# deep-overflow should propagate from nested sequence to next sibling"
1514        );
1515    }
1516
1517    // -----------------------------------------------------------------------
1518    // Document marker flags (explicit_start / explicit_end)
1519    // -----------------------------------------------------------------------
1520
1521    #[rstest]
1522    #[case::bare_document("key: value\n", false, false)]
1523    #[case::start_marker_only("---\nkey: value\n", true, false)]
1524    #[case::end_marker_only("key: value\n...\n", false, true)]
1525    #[case::both_markers("---\nkey: value\n...\n", true, true)]
1526    #[case::empty_with_both_markers("---\n...\n", true, true)]
1527    fn document_marker_flags_match_input(
1528        #[case] input: &str,
1529        #[case] expected_start: bool,
1530        #[case] expected_end: bool,
1531    ) {
1532        let docs = load(input).expect("load failed");
1533        assert_eq!(docs.len(), 1);
1534        assert_eq!(docs[0].explicit_start, expected_start, "explicit_start");
1535        assert_eq!(docs[0].explicit_end, expected_end, "explicit_end");
1536    }
1537
1538    #[test]
1539    fn multi_document_flags_are_independent() {
1540        let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1541        assert_eq!(docs.len(), 3);
1542        assert!(!docs[0].explicit_start, "doc1 explicit_start");
1543        assert!(!docs[0].explicit_end, "doc1 explicit_end");
1544        assert!(docs[1].explicit_start, "doc2 explicit_start");
1545        assert!(docs[1].explicit_end, "doc2 explicit_end");
1546        assert!(docs[2].explicit_start, "doc3 explicit_start");
1547        assert!(!docs[2].explicit_end, "doc3 explicit_end");
1548    }
1549
1550    // -----------------------------------------------------------------------
1551    // sanitize_scalar_for_error unit tests
1552    // -----------------------------------------------------------------------
1553
1554    #[rstest]
1555    #[case::newline("foo\nbar", '\n', "\\u000A", "foo\\u000Abar")]
1556    #[case::carriage_return("foo\rbar", '\r', "\\u000D", "foo\\u000Dbar")]
1557    #[case::null_byte("foo\0bar", '\0', "\\u0000", "foo\\u0000bar")]
1558    fn sanitize_replaces_control_char_with_escape(
1559        #[case] input: &str,
1560        #[case] raw_char: char,
1561        #[case] escape_seq: &str,
1562        #[case] expected: &str,
1563    ) {
1564        let result = sanitize_scalar_for_error(input);
1565        assert!(
1566            !result.contains(raw_char),
1567            "output must not contain the raw control character"
1568        );
1569        assert!(
1570            result.contains(escape_seq),
1571            "output must contain {escape_seq} escape, got: {result:?}"
1572        );
1573        assert_eq!(result, expected);
1574    }
1575
1576    #[test]
1577    fn sanitize_short_value_stored_verbatim() {
1578        let input = "hello";
1579        let result = sanitize_scalar_for_error(input);
1580        assert_eq!(result, "hello");
1581        assert!(
1582            !result.ends_with("..."),
1583            "short value must not be truncated"
1584        );
1585    }
1586
1587    #[test]
1588    fn sanitize_value_at_exact_limit_not_truncated() {
1589        let input = "a".repeat(128);
1590        let result = sanitize_scalar_for_error(&input);
1591        assert_eq!(
1592            result.len(),
1593            128,
1594            "128-char input must produce 128-char output"
1595        );
1596        assert!(
1597            !result.ends_with("..."),
1598            "value at exact limit must not be truncated"
1599        );
1600    }
1601
1602    #[test]
1603    fn sanitize_value_over_limit_truncated() {
1604        let input = "a".repeat(129);
1605        let result = sanitize_scalar_for_error(&input);
1606        assert!(
1607            result.ends_with("..."),
1608            "value over limit must end with '...'"
1609        );
1610        assert_eq!(
1611            result.len(),
1612            128 + 3,
1613            "truncated output must be 128 chars + 3 ellipsis chars"
1614        );
1615    }
1616
1617    #[test]
1618    fn sanitize_multibyte_char_boundary_not_split() {
1619        let input: String = "中".repeat(127) + "ab"; // 129 chars total
1620        let result = sanitize_scalar_for_error(&input);
1621        assert!(
1622            result.ends_with("..."),
1623            "129-char multibyte input should be truncated"
1624        );
1625        let char_count = result.trim_end_matches("...").chars().count();
1626        assert_eq!(
1627            char_count, 128,
1628            "truncated portion must be exactly 128 chars"
1629        );
1630    }
1631
1632    // -----------------------------------------------------------------------
1633    // Cow variant identity for resolver-injected vs user-authored tags
1634    // -----------------------------------------------------------------------
1635
1636    fn load_root(input: &str) -> Node<Span> {
1637        load(input).expect("load failed").remove(0).root
1638    }
1639
1640    fn node_tag(node: Node<Span>) -> Option<Cow<'static, str>> {
1641        match node {
1642            Node::Scalar { tag, .. } | Node::Mapping { tag, .. } | Node::Sequence { tag, .. } => {
1643                tag
1644            }
1645            Node::Alias { .. } => None,
1646        }
1647    }
1648
1649    #[rstest]
1650    #[case::str_tag("hello\n")]
1651    #[case::int_tag("42\n")]
1652    #[case::null_tag("null\n")]
1653    #[case::map_tag("a: 1\n")]
1654    #[case::seq_tag("- a\n")]
1655    #[case::bare_excl_tag("! hello\n")]
1656    fn resolver_emitted_tag_is_borrowed(#[case] input: &str) {
1657        let tag = node_tag(load_root(input));
1658        assert!(
1659            matches!(tag, Some(Cow::Borrowed(_))),
1660            "resolver-emitted tag must be Borrowed, got: {tag:?}"
1661        );
1662    }
1663
1664    #[rstest]
1665    #[case::scalar("!!str hello\n")]
1666    #[case::mapping("!!map\na: 1\n")]
1667    #[case::sequence("!!seq\n- a\n")]
1668    fn user_authored_tag_is_owned(#[case] input: &str) {
1669        let tag = node_tag(load_root(input));
1670        assert!(
1671            matches!(tag, Some(Cow::Owned(_))),
1672            "user-authored tag must be Owned, got: {tag:?}"
1673        );
1674    }
1675
1676    #[test]
1677    fn alias_node_has_no_tag_field() {
1678        let docs = LoaderBuilder::new()
1679            .build()
1680            .load("- &a x\n- *a\n")
1681            .expect("load failed");
1682        let Node::Sequence { items, .. } = &docs[0].root else {
1683            panic!("expected root sequence");
1684        };
1685        assert!(
1686            matches!(items[1], Node::Alias { .. }),
1687            "second item must be Alias in lossless mode"
1688        );
1689    }
1690
1691    #[test]
1692    fn tag_value_content_preserved_across_cow_variants() {
1693        let Node::Scalar {
1694            tag: tag_resolver, ..
1695        } = load_root("hello\n")
1696        else {
1697            panic!("expected scalar");
1698        };
1699        assert_eq!(tag_resolver.as_deref(), Some("tag:yaml.org,2002:str"));
1700
1701        let Node::Scalar { tag: tag_user, .. } = load_root("!custom hello\n") else {
1702            panic!("expected scalar");
1703        };
1704        assert_eq!(tag_user.as_deref(), Some("!custom"));
1705    }
1706
1707    // -----------------------------------------------------------------------
1708    // Loader correctly gates NodeMeta construction
1709    // -----------------------------------------------------------------------
1710
1711    fn node_meta_is_none(node: &Node<Span>) -> bool {
1712        matches!(
1713            node,
1714            Node::Scalar { meta: None, .. }
1715                | Node::Mapping { meta: None, .. }
1716                | Node::Sequence { meta: None, .. }
1717        )
1718    }
1719
1720    #[rstest]
1721    #[case::plain_scalar("hello\n")]
1722    #[case::plain_mapping("a: 1\n")]
1723    #[case::plain_sequence("- a\n")]
1724    fn loaded_node_with_no_meta_fields_has_meta_none(#[case] input: &str) {
1725        let docs = load(input).unwrap();
1726        let root = &docs[0].root;
1727        assert!(
1728            node_meta_is_none(root),
1729            "plain node must have meta: None, got: {root:?}"
1730        );
1731    }
1732
1733    #[test]
1734    fn loaded_anchored_scalar_has_meta_some() {
1735        let docs = load("- &foo bar\n").unwrap();
1736        let Node::Sequence { items, .. } = &docs[0].root else {
1737            panic!("expected root Sequence");
1738        };
1739        let item = &items[0];
1740        assert!(
1741            matches!(item, Node::Scalar { meta: Some(_), .. }),
1742            "anchored scalar must have meta: Some, got: {item:?}"
1743        );
1744        assert_eq!(item.anchor(), Some("foo"));
1745    }
1746
1747    #[test]
1748    fn loaded_scalar_with_anchor_has_meta_some_with_anchor_loc() {
1749        let docs = load("&tag hello\n").unwrap();
1750        let root = &docs[0].root;
1751        assert!(
1752            matches!(root, Node::Scalar { meta: Some(_), .. }),
1753            "anchored scalar must have meta: Some"
1754        );
1755        assert!(
1756            root.anchor_loc().is_some(),
1757            "anchor_loc() must be Some for anchored scalar"
1758        );
1759    }
1760
1761    // -----------------------------------------------------------------------
1762    // Property displacement promotion — combined anchor+tag on block collections
1763    // -----------------------------------------------------------------------
1764
1765    #[rstest]
1766    // Block mapping
1767    #[case::block_mapping_anchor_only("&a\nk: v\n", Some("a"), false)]
1768    #[case::block_mapping_tag_only("!mytag\nk: v\n", None, true)]
1769    #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n", Some("a"), true)]
1770    #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n", Some("a"), true)]
1771    // Block sequence
1772    #[case::block_sequence_anchor_only("&a\n- item\n", Some("a"), false)]
1773    #[case::block_sequence_tag_only("!mytag\n- item\n", None, true)]
1774    #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n", Some("a"), true)]
1775    #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n", Some("a"), true)]
1776    // Flow mapping
1777    #[case::flow_mapping_anchor_only("&a {k: v}\n", Some("a"), false)]
1778    #[case::flow_mapping_tag_only("!mytag {k: v}\n", None, true)]
1779    #[case::flow_mapping_anchor_then_tag("&a !mytag {k: v}\n", Some("a"), true)]
1780    #[case::flow_mapping_tag_then_anchor("!mytag &a {k: v}\n", Some("a"), true)]
1781    // Flow sequence
1782    #[case::flow_sequence_anchor_only("&a [item]\n", Some("a"), false)]
1783    #[case::flow_sequence_tag_only("!mytag [item]\n", None, true)]
1784    #[case::flow_sequence_anchor_then_tag("&a !mytag [item]\n", Some("a"), true)]
1785    #[case::flow_sequence_tag_then_anchor("!mytag &a [item]\n", Some("a"), true)]
1786    fn combined_properties_attach_to_root_collection(
1787        #[case] input: &str,
1788        #[case] expected_anchor: Option<&str>,
1789        #[case] expected_has_tag: bool,
1790    ) {
1791        let docs = load(input).unwrap();
1792        let root = &docs[0].root;
1793        assert_eq!(root.anchor(), expected_anchor, "anchor on root collection");
1794        assert_eq!(
1795            root.tag_loc().is_some(),
1796            expected_has_tag,
1797            "tag_loc on root collection"
1798        );
1799    }
1800
1801    // Block collections: first child must not inherit anchor or tag from the root
1802    #[rstest]
1803    // Block mapping
1804    #[case::block_mapping_anchor_only("&a\nk: v\n")]
1805    #[case::block_mapping_tag_only("!mytag\nk: v\n")]
1806    #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n")]
1807    #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n")]
1808    // Block sequence
1809    #[case::block_sequence_anchor_only("&a\n- item\n")]
1810    #[case::block_sequence_tag_only("!mytag\n- item\n")]
1811    #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n")]
1812    #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n")]
1813    fn first_child_of_block_collection_has_no_properties(#[case] input: &str) {
1814        let docs = load(input).unwrap();
1815        let root = &docs[0].root;
1816        let first_child: &Node<Span> = match root {
1817            Node::Mapping { entries, .. } => &entries[0].0,
1818            Node::Sequence { items, .. } => &items[0],
1819            Node::Scalar { .. } | Node::Alias { .. } => panic!("expected block collection"),
1820        };
1821        assert_eq!(
1822            first_child.anchor(),
1823            None,
1824            "anchor must not appear on first child"
1825        );
1826        assert!(
1827            first_child.tag_loc().is_none(),
1828            "tag_loc must not appear on first child"
1829        );
1830    }
1831
1832    // --- Alias registration smoke test ---
1833
1834    #[test]
1835    fn anchor_on_block_mapping_with_tag_is_resolvable_via_alias() {
1836        let input = "root:\n  tagged: &a !mytag\n    k: v\n  ref: *a\n";
1837        let result = LoaderBuilder::new().resolved().build().load(input);
1838        assert!(
1839            result.is_ok(),
1840            "alias *a must resolve — anchor must be on the mapping, not lost to first key: {result:?}"
1841        );
1842    }
1843}