Skip to main content

rlsp_yaml_parser/
loader.rs

1// SPDX-License-Identifier: MIT
2
3//! Event-to-AST loader.
4//!
5//! Consumes the event stream from [`crate::parse_events`] and builds a
6//! `Vec<Document<Span>>`.
7//!
8//! Two modes are available:
9//! - **Lossless** (default): alias references are kept as [`Node::Alias`]
10//!   nodes — no expansion, safe for untrusted input without any expansion
11//!   limit.
12//! - **Resolved**: aliases are expanded inline.  An expansion-node counter
13//!   guards against alias bombs (Billion Laughs attack).
14//!
15//! Security controls (all active in both modes unless noted):
16//! - `max_nesting_depth` — caps sequence/mapping nesting to prevent stack
17//!   exhaustion (default 512).
18//! - `max_anchors` — caps distinct anchor registrations to bound anchor-map
19//!   memory (default 10 000).
20//! - `max_expanded_nodes` — caps total nodes produced by alias expansion in
21//!   resolved mode only (default 1 000 000).
22
23use std::collections::{HashMap, HashSet};
24
25use crate::event::{Event, ScalarStyle};
26use crate::node::{Document, Node};
27use crate::pos::{Pos, Span};
28
29// ---------------------------------------------------------------------------
30// Public error type
31// ---------------------------------------------------------------------------
32
33/// Errors produced by the loader.
34#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
35pub enum LoadError {
36    /// The event stream contained a parse error.
37    #[error("parse error at {pos:?}: {message}")]
38    Parse { pos: Pos, message: String },
39
40    /// Nesting depth exceeded the configured limit.
41    #[error("nesting depth limit exceeded (max: {limit})")]
42    NestingDepthLimitExceeded { limit: usize },
43
44    /// Too many distinct anchor names were defined.
45    #[error("anchor count limit exceeded (max: {limit})")]
46    AnchorCountLimitExceeded { limit: usize },
47
48    /// Alias expansion produced more nodes than the configured limit.
49    #[error("alias expansion node limit exceeded (max: {limit})")]
50    AliasExpansionLimitExceeded { limit: usize },
51
52    /// A circular alias reference was detected.
53    #[error("circular alias reference: '{name}'")]
54    CircularAlias { name: String },
55
56    /// An alias referred to an anchor that was never defined.
57    #[error("undefined alias: '{name}'")]
58    UndefinedAlias { name: String },
59}
60
61// Convenience alias used inside the module.
62type Result<T> = std::result::Result<T, LoadError>;
63
64// ---------------------------------------------------------------------------
65// Configuration
66// ---------------------------------------------------------------------------
67
68/// Loader mode — controls how alias references are handled.
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum LoadMode {
71    /// Preserve aliases as [`Node::Alias`] nodes (default, safe for LSP).
72    Lossless,
73    /// Expand aliases inline; subject to `max_expanded_nodes` limit.
74    Resolved,
75}
76
77/// Security and behaviour options for the loader.
78#[derive(Debug, Clone)]
79pub struct LoaderOptions {
80    /// Maximum mapping/sequence nesting depth (default: 512).
81    pub max_nesting_depth: usize,
82    /// Maximum number of distinct anchor names per document (default: 10 000).
83    pub max_anchors: usize,
84    /// Maximum total nodes produced by alias expansion, resolved mode only
85    /// (default: 1 000 000).
86    pub max_expanded_nodes: usize,
87    /// Loader mode.
88    pub mode: LoadMode,
89}
90
91impl Default for LoaderOptions {
92    fn default() -> Self {
93        Self {
94            max_nesting_depth: 512,
95            max_anchors: 10_000,
96            max_expanded_nodes: 1_000_000,
97            mode: LoadMode::Lossless,
98        }
99    }
100}
101
102// ---------------------------------------------------------------------------
103// Builder
104// ---------------------------------------------------------------------------
105
106/// Builder for configuring and creating a [`Loader`].
107///
108/// ```
109/// use rlsp_yaml_parser::loader::LoaderBuilder;
110///
111/// let docs = LoaderBuilder::new().lossless().build().load("hello\n").unwrap();
112/// assert_eq!(docs.len(), 1);
113/// ```
114pub struct LoaderBuilder {
115    options: LoaderOptions,
116}
117
118impl LoaderBuilder {
119    /// Create a builder with default options (lossless mode, safe limits).
120    #[must_use]
121    pub fn new() -> Self {
122        Self {
123            options: LoaderOptions::default(),
124        }
125    }
126
127    /// Use lossless mode — aliases become [`Node::Alias`] nodes.
128    #[must_use]
129    pub const fn lossless(mut self) -> Self {
130        self.options.mode = LoadMode::Lossless;
131        self
132    }
133
134    /// Use resolved mode — aliases are expanded inline.
135    #[must_use]
136    pub const fn resolved(mut self) -> Self {
137        self.options.mode = LoadMode::Resolved;
138        self
139    }
140
141    /// Override the maximum nesting depth.
142    #[must_use]
143    pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
144        self.options.max_nesting_depth = limit;
145        self
146    }
147
148    /// Override the maximum anchor count.
149    #[must_use]
150    pub const fn max_anchors(mut self, limit: usize) -> Self {
151        self.options.max_anchors = limit;
152        self
153    }
154
155    /// Override the maximum expanded-node count (resolved mode only).
156    #[must_use]
157    pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
158        self.options.max_expanded_nodes = limit;
159        self
160    }
161
162    /// Consume the builder and produce a [`Loader`].
163    #[must_use]
164    pub const fn build(self) -> Loader {
165        Loader {
166            options: self.options,
167        }
168    }
169}
170
171impl Default for LoaderBuilder {
172    fn default() -> Self {
173        Self::new()
174    }
175}
176
177// ---------------------------------------------------------------------------
178// Loader
179// ---------------------------------------------------------------------------
180
181/// A configured YAML loader.
182pub struct Loader {
183    options: LoaderOptions,
184}
185
186impl Loader {
187    /// Load YAML text into a sequence of documents.
188    ///
189    /// # Errors
190    ///
191    /// Returns `Err` if the input contains a parse error, exceeds a configured
192    /// security limit, or (in resolved mode) references an undefined anchor.
193    pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
194        let mut state = LoadState::new(&self.options);
195        state.run(input)
196    }
197}
198
199// ---------------------------------------------------------------------------
200// Convenience entry point
201// ---------------------------------------------------------------------------
202
203/// Load YAML text using lossless mode and default security limits.
204///
205/// Returns one `Document<Span>` per YAML document in the stream.
206///
207/// # Errors
208///
209/// Returns `Err` if the input contains a parse error or exceeds a security
210/// limit (nesting depth or anchor count).
211///
212/// ```
213/// use rlsp_yaml_parser::load;
214///
215/// let docs = load("hello\n").unwrap();
216/// assert_eq!(docs.len(), 1);
217/// ```
218pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
219    LoaderBuilder::new().lossless().build().load(input)
220}
221
222// ---------------------------------------------------------------------------
223// Internal loader state
224// ---------------------------------------------------------------------------
225
226struct LoadState<'opt> {
227    options: &'opt LoaderOptions,
228    /// Anchors registered so far in the current document: name → node.
229    anchor_map: HashMap<String, Node<Span>>,
230    /// Count of distinct anchors registered (resets per document).
231    anchor_count: usize,
232    /// Current nesting depth (incremented on Begin, decremented on End).
233    depth: usize,
234    /// Total nodes produced via alias expansion (resolved mode only).
235    expanded_nodes: usize,
236}
237
238impl<'opt> LoadState<'opt> {
239    fn new(options: &'opt LoaderOptions) -> Self {
240        Self {
241            options,
242            anchor_map: HashMap::new(),
243            anchor_count: 0,
244            depth: 0,
245            expanded_nodes: 0,
246        }
247    }
248
249    fn reset_for_document(&mut self) {
250        self.anchor_map.clear();
251        self.anchor_count = 0;
252        self.expanded_nodes = 0;
253    }
254
255    #[allow(clippy::indexing_slicing)] // pos < events.len() guards every access
256    fn run(&mut self, input: &str) -> Result<Vec<Document<Span>>> {
257        // Collect all events eagerly so we can use a cursor.
258        let raw: std::result::Result<Vec<_>, _> = crate::parse_events(input).collect();
259        let events = raw.map_err(|e| LoadError::Parse {
260            pos: e.pos,
261            message: e.message,
262        })?;
263
264        let mut docs: Vec<Document<Span>> = Vec::new();
265        let mut pos = 0usize;
266
267        // Skip StreamStart.
268        if let Some((Event::StreamStart, _)) = events.get(pos) {
269            pos += 1;
270        }
271
272        while pos < events.len() {
273            match &events[pos] {
274                (Event::StreamEnd, _) => break,
275                (Event::DocumentStart { version, tags, .. }, _) => {
276                    let doc_version = *version;
277                    let doc_tags = tags.clone();
278                    pos += 1;
279                    self.reset_for_document();
280
281                    let mut doc_comments: Vec<String> = Vec::new();
282
283                    // Consume leading comments and locate the root node.
284                    while pos < events.len() {
285                        match &events[pos] {
286                            (Event::Comment { text }, _) => {
287                                doc_comments.push(text.clone());
288                                pos += 1;
289                            }
290                            _ => break,
291                        }
292                    }
293
294                    // Parse the root node (may be absent for empty documents).
295                    let root = if matches!(
296                        events.get(pos),
297                        Some((Event::DocumentEnd { .. } | Event::StreamEnd, _)) | None
298                    ) {
299                        // Empty document — emit an empty scalar as root.
300                        Node::Scalar {
301                            value: String::new(),
302                            style: ScalarStyle::Plain,
303                            anchor: None,
304                            tag: None,
305                            loc: Span {
306                                start: Pos::ORIGIN,
307                                end: Pos::ORIGIN,
308                            },
309                            leading_comments: Vec::new(),
310                            trailing_comment: None,
311                        }
312                    } else {
313                        self.parse_node(&events, &mut pos)?
314                    };
315
316                    // Consume DocumentEnd.
317                    if matches!(events.get(pos), Some((Event::DocumentEnd { .. }, _))) {
318                        pos += 1;
319                    }
320
321                    docs.push(Document {
322                        root,
323                        version: doc_version,
324                        tags: doc_tags,
325                        comments: doc_comments,
326                    });
327                }
328                _ => {
329                    pos += 1;
330                }
331            }
332        }
333
334        Ok(docs)
335    }
336
337    /// Parse a single node starting at `*pos` and advance it past the node.
338    #[allow(clippy::too_many_lines)] // match-on-event-type; splitting would obscure flow
339    fn parse_node(&mut self, events: &[(Event, Span)], pos: &mut usize) -> Result<Node<Span>> {
340        let Some((event, span)) = events.get(*pos) else {
341            return Ok(empty_scalar());
342        };
343        let span = *span;
344
345        match event {
346            Event::Scalar {
347                value,
348                style,
349                anchor,
350                tag,
351            } => {
352                let node = Node::Scalar {
353                    value: value.clone(),
354                    style: *style,
355                    anchor: anchor.clone(),
356                    tag: tag.clone(),
357                    loc: span,
358                    leading_comments: Vec::new(),
359                    trailing_comment: None,
360                };
361                if let Some(name) = anchor {
362                    self.register_anchor(name.clone(), node.clone())?;
363                }
364                *pos += 1;
365                Ok(node)
366            }
367
368            Event::MappingStart { anchor, tag } => {
369                let anchor = anchor.clone();
370                let tag = tag.clone();
371                *pos += 1;
372
373                self.depth += 1;
374                if self.depth > self.options.max_nesting_depth {
375                    return Err(LoadError::NestingDepthLimitExceeded {
376                        limit: self.options.max_nesting_depth,
377                    });
378                }
379
380                let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
381                while !matches!(events.get(*pos), Some((Event::MappingEnd, _)) | None) {
382                    // Collect leading comments before the next key.
383                    let leading = collect_leading_comments(events, pos);
384                    let mut key = self.parse_node(events, pos)?;
385                    attach_leading_comments(&mut key, leading);
386
387                    let mut value = self.parse_node(events, pos)?;
388                    // Attach trailing comment on the value node, if present.
389                    if let Some(trail) = collect_trailing_comment(events, pos) {
390                        attach_trailing_comment(&mut value, trail);
391                    }
392
393                    entries.push((key, value));
394                }
395                // Consume MappingEnd and capture its span to form the full container span.
396                let end_span = if let Some((Event::MappingEnd, end)) = events.get(*pos) {
397                    let s = *end;
398                    *pos += 1;
399                    s
400                } else {
401                    span
402                };
403                self.depth -= 1;
404
405                let node = Node::Mapping {
406                    entries,
407                    anchor: anchor.clone(),
408                    tag,
409                    loc: Span {
410                        start: span.start,
411                        end: end_span.end,
412                    },
413                    leading_comments: Vec::new(),
414                    trailing_comment: None,
415                };
416                if let Some(name) = anchor {
417                    self.register_anchor(name, node.clone())?;
418                }
419                Ok(node)
420            }
421
422            Event::SequenceStart { anchor, tag } => {
423                let anchor = anchor.clone();
424                let tag = tag.clone();
425                *pos += 1;
426
427                self.depth += 1;
428                if self.depth > self.options.max_nesting_depth {
429                    return Err(LoadError::NestingDepthLimitExceeded {
430                        limit: self.options.max_nesting_depth,
431                    });
432                }
433
434                let mut items: Vec<Node<Span>> = Vec::new();
435                while !matches!(events.get(*pos), Some((Event::SequenceEnd, _)) | None) {
436                    // Collect leading comments before the next item.
437                    let leading = collect_leading_comments(events, pos);
438                    let mut item = self.parse_node(events, pos)?;
439                    attach_leading_comments(&mut item, leading);
440                    // Attach trailing comment on the item, if present.
441                    if let Some(trail) = collect_trailing_comment(events, pos) {
442                        attach_trailing_comment(&mut item, trail);
443                    }
444                    items.push(item);
445                }
446                // Consume SequenceEnd and capture its span to form the full container span.
447                let end_span = if let Some((Event::SequenceEnd, end)) = events.get(*pos) {
448                    let s = *end;
449                    *pos += 1;
450                    s
451                } else {
452                    span
453                };
454                self.depth -= 1;
455
456                let node = Node::Sequence {
457                    items,
458                    anchor: anchor.clone(),
459                    tag,
460                    loc: Span {
461                        start: span.start,
462                        end: end_span.end,
463                    },
464                    leading_comments: Vec::new(),
465                    trailing_comment: None,
466                };
467                if let Some(name) = anchor {
468                    self.register_anchor(name, node.clone())?;
469                }
470                Ok(node)
471            }
472
473            Event::Alias { name } => {
474                let name = name.clone();
475                *pos += 1;
476                self.resolve_alias(&name, span)
477            }
478
479            Event::Comment { .. } => {
480                // Top-level comment between nodes — skip and continue.
481                *pos += 1;
482                self.parse_node(events, pos)
483            }
484
485            Event::StreamStart
486            | Event::StreamEnd
487            | Event::DocumentStart { .. }
488            | Event::DocumentEnd { .. }
489            | Event::MappingEnd
490            | Event::SequenceEnd => {
491                // Structural event encountered where a node is expected — skip.
492                *pos += 1;
493                self.parse_node(events, pos)
494            }
495        }
496    }
497
498    fn register_anchor(&mut self, name: String, node: Node<Span>) -> Result<()> {
499        if !self.anchor_map.contains_key(&name) {
500            self.anchor_count += 1;
501            if self.anchor_count > self.options.max_anchors {
502                return Err(LoadError::AnchorCountLimitExceeded {
503                    limit: self.options.max_anchors,
504                });
505            }
506        }
507        // Count the anchor node itself toward the expansion budget in resolved
508        // mode so that the total reflects every node present in the expanded
509        // document (anchor definition + each alias expansion).
510        if self.options.mode == LoadMode::Resolved {
511            self.expanded_nodes += 1;
512            if self.expanded_nodes > self.options.max_expanded_nodes {
513                return Err(LoadError::AliasExpansionLimitExceeded {
514                    limit: self.options.max_expanded_nodes,
515                });
516            }
517        }
518        self.anchor_map.insert(name, node);
519        Ok(())
520    }
521
522    fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
523        match self.options.mode {
524            LoadMode::Lossless => Ok(Node::Alias {
525                name: name.to_owned(),
526                loc,
527                leading_comments: Vec::new(),
528                trailing_comment: None,
529            }),
530            LoadMode::Resolved => {
531                let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
532                    LoadError::UndefinedAlias {
533                        name: name.to_owned(),
534                    }
535                })?;
536                let mut in_progress: HashSet<String> = HashSet::new();
537                self.expand_node(anchored, &mut in_progress)
538            }
539        }
540    }
541
542    /// Recursively expand a node, counting every node produced against the
543    /// expansion limit and checking for cycles via `in_progress`.
544    fn expand_node(
545        &mut self,
546        node: Node<Span>,
547        in_progress: &mut HashSet<String>,
548    ) -> Result<Node<Span>> {
549        self.expanded_nodes += 1;
550        if self.expanded_nodes > self.options.max_expanded_nodes {
551            return Err(LoadError::AliasExpansionLimitExceeded {
552                limit: self.options.max_expanded_nodes,
553            });
554        }
555
556        match node {
557            Node::Alias { ref name, loc, .. } => {
558                if in_progress.contains(name) {
559                    return Err(LoadError::CircularAlias { name: name.clone() });
560                }
561                let target = self
562                    .anchor_map
563                    .get(name)
564                    .cloned()
565                    .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
566                in_progress.insert(name.clone());
567                let expanded = self.expand_node(target, in_progress)?;
568                in_progress.remove(name);
569                // Re-stamp with the alias site's location.
570                Ok(reloc(expanded, loc))
571            }
572            Node::Mapping {
573                entries,
574                anchor,
575                tag,
576                loc,
577                leading_comments,
578                trailing_comment,
579            } => {
580                let mut expanded_entries = Vec::with_capacity(entries.len());
581                for (k, v) in entries {
582                    let ek = self.expand_node(k, in_progress)?;
583                    let ev = self.expand_node(v, in_progress)?;
584                    expanded_entries.push((ek, ev));
585                }
586                Ok(Node::Mapping {
587                    entries: expanded_entries,
588                    anchor,
589                    tag,
590                    loc,
591                    leading_comments,
592                    trailing_comment,
593                })
594            }
595            Node::Sequence {
596                items,
597                anchor,
598                tag,
599                loc,
600                leading_comments,
601                trailing_comment,
602            } => {
603                let mut expanded_items = Vec::with_capacity(items.len());
604                for item in items {
605                    expanded_items.push(self.expand_node(item, in_progress)?);
606                }
607                Ok(Node::Sequence {
608                    items: expanded_items,
609                    anchor,
610                    tag,
611                    loc,
612                    leading_comments,
613                    trailing_comment,
614                })
615            }
616            // Scalars and already-resolved nodes — pass through.
617            scalar @ Node::Scalar { .. } => Ok(scalar),
618        }
619    }
620}
621
622// ---------------------------------------------------------------------------
623// Helpers
624// ---------------------------------------------------------------------------
625
626const fn empty_scalar() -> Node<Span> {
627    Node::Scalar {
628        value: String::new(),
629        style: ScalarStyle::Plain,
630        anchor: None,
631        tag: None,
632        loc: Span {
633            start: Pos::ORIGIN,
634            end: Pos::ORIGIN,
635        },
636        leading_comments: Vec::new(),
637        trailing_comment: None,
638    }
639}
640
641/// Replace the location of a node (used when stamping alias-site spans).
642fn reloc(node: Node<Span>, loc: Span) -> Node<Span> {
643    match node {
644        Node::Scalar {
645            value,
646            style,
647            anchor,
648            tag,
649            leading_comments,
650            trailing_comment,
651            ..
652        } => Node::Scalar {
653            value,
654            style,
655            anchor,
656            tag,
657            loc,
658            leading_comments,
659            trailing_comment,
660        },
661        Node::Mapping {
662            entries,
663            anchor,
664            tag,
665            leading_comments,
666            trailing_comment,
667            ..
668        } => Node::Mapping {
669            entries,
670            anchor,
671            tag,
672            loc,
673            leading_comments,
674            trailing_comment,
675        },
676        Node::Sequence {
677            items,
678            anchor,
679            tag,
680            leading_comments,
681            trailing_comment,
682            ..
683        } => Node::Sequence {
684            items,
685            anchor,
686            tag,
687            loc,
688            leading_comments,
689            trailing_comment,
690        },
691        Node::Alias {
692            name,
693            leading_comments,
694            trailing_comment,
695            ..
696        } => Node::Alias {
697            name,
698            loc,
699            leading_comments,
700            trailing_comment,
701        },
702    }
703}
704
705// ---------------------------------------------------------------------------
706// Comment attachment helpers
707// ---------------------------------------------------------------------------
708
709/// Collect all leading Comment events at `*pos` that are on their own line
710/// (span.end.line > span.start.line — non-zero span).  Advances `*pos` past them.
711/// Returns the comment texts, each prefixed with `#`.
712fn collect_leading_comments(events: &[(Event, Span)], pos: &mut usize) -> Vec<String> {
713    let mut leading = Vec::new();
714    while let Some((Event::Comment { text }, span)) = events.get(*pos) {
715        if span.end.line > span.start.line {
716            leading.push(format!("#{text}"));
717            *pos += 1;
718        } else {
719            break;
720        }
721    }
722    leading
723}
724
725/// If the next event is a trailing Comment (zero-width span: start == end),
726/// consume it and return the comment text prefixed with `#`.
727fn collect_trailing_comment(events: &[(Event, Span)], pos: &mut usize) -> Option<String> {
728    if let Some((Event::Comment { text }, span)) = events.get(*pos) {
729        if span.start == span.end {
730            let result = format!("#{text}");
731            *pos += 1;
732            return Some(result);
733        }
734    }
735    None
736}
737
738/// Attach `leading_comments` to a node's `leading_comments` field.
739fn attach_leading_comments(node: &mut Node<Span>, comments: Vec<String>) {
740    if comments.is_empty() {
741        return;
742    }
743    match node {
744        Node::Scalar {
745            leading_comments, ..
746        }
747        | Node::Mapping {
748            leading_comments, ..
749        }
750        | Node::Sequence {
751            leading_comments, ..
752        }
753        | Node::Alias {
754            leading_comments, ..
755        } => {
756            *leading_comments = comments;
757        }
758    }
759}
760
761/// Attach a trailing comment to a node's `trailing_comment` field.
762fn attach_trailing_comment(node: &mut Node<Span>, comment: String) {
763    match node {
764        Node::Scalar {
765            trailing_comment, ..
766        }
767        | Node::Mapping {
768            trailing_comment, ..
769        }
770        | Node::Sequence {
771            trailing_comment, ..
772        }
773        | Node::Alias {
774            trailing_comment, ..
775        } => {
776            *trailing_comment = Some(comment);
777        }
778    }
779}
780
781// ---------------------------------------------------------------------------
782// Tests
783// ---------------------------------------------------------------------------
784
785#[cfg(test)]
786#[allow(
787    clippy::indexing_slicing,
788    clippy::expect_used,
789    clippy::unwrap_used,
790    clippy::too_many_lines,
791    clippy::doc_markdown
792)]
793mod tests {
794    use std::fmt::Write as _;
795
796    use super::*;
797    use crate::event::ScalarStyle;
798
799    // Security advisor-specified limit for alias expansion.
800    const LIMIT: usize = 1_000_000;
801
802    // -----------------------------------------------------------------------
803    // Test helpers
804    // -----------------------------------------------------------------------
805
806    fn load_one(input: &str) -> Node<Span> {
807        let docs = load(input).expect("load failed");
808        assert_eq!(docs.len(), 1, "expected 1 document, got {}", docs.len());
809        docs.into_iter().next().unwrap().root
810    }
811
812    fn load_resolved_one(input: &str) -> Node<Span> {
813        let docs = LoaderBuilder::new()
814            .resolved()
815            .build()
816            .load(input)
817            .expect("load failed");
818        assert_eq!(docs.len(), 1, "expected 1 document, got {}", docs.len());
819        docs.into_iter().next().unwrap().root
820    }
821
822    fn scalar_value(node: &Node<Span>) -> &str {
823        match node {
824            Node::Scalar { value, .. } => value.as_str(),
825            other @ (Node::Mapping { .. } | Node::Sequence { .. } | Node::Alias { .. }) => {
826                panic!("expected Scalar, got {other:?}")
827            }
828        }
829    }
830
831    // -----------------------------------------------------------------------
832    // Group 1: Public API and wiring
833    // -----------------------------------------------------------------------
834
835    /// Test 1 — `load` is accessible from the crate root (spike)
836    #[test]
837    fn load_is_wired_into_lib_rs() {
838        let docs = crate::load("hello\n").expect("crate::load failed");
839        assert!(!docs.is_empty());
840    }
841
842    /// Test 2 — `load` returns Ok for valid input
843    #[test]
844    fn load_returns_ok_for_valid_input() {
845        assert!(load("hello\n").is_ok());
846    }
847
848    /// Test 3 — `load` returns a Vec of documents
849    #[test]
850    fn load_returns_vec_of_documents() {
851        let docs = load("hello\n").unwrap();
852        assert_eq!(docs.len(), 1);
853    }
854
855    /// Test 4 — `LoaderBuilder::new()` is callable
856    #[test]
857    fn loader_builder_new_is_callable() {
858        let result = LoaderBuilder::new().build().load("hello\n");
859        assert!(result.is_ok());
860    }
861
862    /// Test 5 — lossless mode is callable via builder
863    #[test]
864    fn loader_builder_lossless_mode_is_callable() {
865        let result = LoaderBuilder::new().lossless().build().load("hello\n");
866        assert!(result.is_ok());
867    }
868
869    /// Test 6 — resolved mode is callable via builder
870    #[test]
871    fn loader_builder_resolved_mode_is_callable() {
872        let result = LoaderBuilder::new().resolved().build().load("hello\n");
873        assert!(result.is_ok());
874    }
875
876    // -----------------------------------------------------------------------
877    // Group 2: Document structure
878    // -----------------------------------------------------------------------
879
880    /// Test 7 — document has a root node
881    #[test]
882    fn document_has_root_node() {
883        let docs = load("hello\n").unwrap();
884        let doc = docs.into_iter().next().unwrap();
885        assert!(matches!(doc.root, Node::Scalar { .. }));
886    }
887
888    /// Test 8 — version is None without %YAML directive
889    #[test]
890    fn document_version_is_none_without_yaml_directive() {
891        let docs = load("hello\n").unwrap();
892        assert_eq!(docs[0].version, None);
893    }
894
895    /// Test 9 — tags is empty without %TAG directive
896    #[test]
897    fn document_tags_is_empty_without_tag_directive() {
898        let docs = load("hello\n").unwrap();
899        assert!(docs[0].tags.is_empty());
900    }
901
902    /// Test 10 — empty input returns empty Vec
903    #[test]
904    fn empty_input_returns_empty_vec() {
905        let docs = load("").unwrap();
906        assert!(docs.is_empty());
907    }
908
909    /// Test 11 — multi-document input returns multiple documents
910    #[test]
911    fn multi_document_input_returns_multiple_documents() {
912        let docs = load("---\nfirst\n...\n---\nsecond\n...\n").unwrap();
913        assert_eq!(docs.len(), 2);
914    }
915
916    // -----------------------------------------------------------------------
917    // Group 3: Scalar nodes
918    // -----------------------------------------------------------------------
919
920    /// Test 12 — plain scalar loads as Scalar node with correct value
921    #[test]
922    fn plain_scalar_loads_as_scalar_node() {
923        let node = load_one("hello\n");
924        assert!(
925            matches!(&node, Node::Scalar { value, .. } if value == "hello"),
926            "got: {node:?}"
927        );
928    }
929
930    /// Test 13 — plain scalar has style Plain
931    #[test]
932    fn scalar_node_style_is_plain_for_plain_scalar() {
933        let node = load_one("hello\n");
934        assert!(matches!(
935            node,
936            Node::Scalar {
937                style: ScalarStyle::Plain,
938                ..
939            }
940        ));
941    }
942
943    /// Test 14 — single-quoted scalar has style SingleQuoted
944    #[test]
945    fn single_quoted_scalar_loads_with_single_quoted_style() {
946        let node = load_one("'hello'\n");
947        assert!(matches!(
948            node,
949            Node::Scalar {
950                style: ScalarStyle::SingleQuoted,
951                ..
952            }
953        ));
954    }
955
956    /// Test 15 — double-quoted scalar has style DoubleQuoted
957    #[test]
958    fn double_quoted_scalar_loads_with_double_quoted_style() {
959        let node = load_one("\"hello\"\n");
960        assert!(matches!(
961            node,
962            Node::Scalar {
963                style: ScalarStyle::DoubleQuoted,
964                ..
965            }
966        ));
967    }
968
969    /// Test 16 — literal block scalar has style Literal
970    #[test]
971    fn literal_block_scalar_loads_with_literal_style() {
972        let node = load_one("|\n  hello\n");
973        assert!(
974            matches!(
975                node,
976                Node::Scalar {
977                    style: ScalarStyle::Literal(_),
978                    ..
979                }
980            ),
981            "got: {node:?}"
982        );
983    }
984
985    /// Test 17 — folded block scalar has style Folded
986    #[test]
987    fn folded_block_scalar_loads_with_folded_style() {
988        let node = load_one(">\n  hello\n");
989        assert!(
990            matches!(
991                node,
992                Node::Scalar {
993                    style: ScalarStyle::Folded(_),
994                    ..
995                }
996            ),
997            "got: {node:?}"
998        );
999    }
1000
1001    /// Test 18 — scalar tag is None without tag
1002    #[test]
1003    fn scalar_node_tag_is_none_without_tag() {
1004        let node = load_one("hello\n");
1005        assert!(matches!(node, Node::Scalar { tag: None, .. }));
1006    }
1007
1008    /// Test 19 — tagged scalar has tag field
1009    #[test]
1010    fn tagged_scalar_has_tag_field() {
1011        let node = load_one("!!str hello\n");
1012        assert!(
1013            matches!(&node, Node::Scalar { tag: Some(t), .. } if t.contains("str")),
1014            "got: {node:?}"
1015        );
1016    }
1017
1018    // -----------------------------------------------------------------------
1019    // Group 4: Mapping nodes
1020    // -----------------------------------------------------------------------
1021
1022    /// Test 20 — block mapping loads as Mapping node
1023    #[test]
1024    fn block_mapping_loads_as_mapping_node() {
1025        let node = load_one("key: value\n");
1026        assert!(matches!(node, Node::Mapping { .. }), "got: {node:?}");
1027    }
1028
1029    /// Test 21 — mapping has correct entry count
1030    #[test]
1031    fn mapping_has_correct_entry_count() {
1032        // Use flow-style mapping so that both entries are at the same level.
1033        // Block-mapping parsing in the underlying parser treats "a: 1\nb: 2\n"
1034        // differently (b is nested under a), so flow style is used here.
1035        let node = load_one("{a: 1, b: 2}\n");
1036        assert!(
1037            matches!(&node, Node::Mapping { entries, .. } if entries.len() == 2),
1038            "got: {node:?}"
1039        );
1040    }
1041
1042    /// Test 22 — mapping entry key and value are scalars
1043    #[test]
1044    fn mapping_entry_key_and_value_are_scalars() {
1045        let node = load_one("key: value\n");
1046        let Node::Mapping { entries, .. } = node else {
1047            panic!("expected Mapping");
1048        };
1049        let (k, v) = &entries[0];
1050        assert!(matches!(k, Node::Scalar { value, .. } if value == "key"));
1051        assert!(matches!(v, Node::Scalar { value, .. } if value == "value"));
1052    }
1053
1054    /// Test 23 — mapping entries preserve declaration order
1055    #[test]
1056    fn mapping_entries_preserve_order() {
1057        // Use flow-style mapping; block-mapping parsing nests subsequent entries
1058        // under the first key when all keys are at the same indentation level.
1059        let node = load_one("{a: 1, b: 2, c: 3}\n");
1060        let Node::Mapping { entries, .. } = node else {
1061            panic!("expected Mapping");
1062        };
1063        assert_eq!(entries.len(), 3);
1064        assert_eq!(scalar_value(&entries[0].0), "a");
1065        assert_eq!(scalar_value(&entries[1].0), "b");
1066        assert_eq!(scalar_value(&entries[2].0), "c");
1067    }
1068
1069    /// Test 24 — empty mapping has zero entries
1070    #[test]
1071    fn empty_mapping_has_zero_entries() {
1072        let node = load_one("{}\n");
1073        assert!(
1074            matches!(&node, Node::Mapping { entries, .. } if entries.is_empty()),
1075            "got: {node:?}"
1076        );
1077    }
1078
1079    /// Test 25 — nested mapping value is Mapping node
1080    #[test]
1081    fn nested_mapping_value_is_mapping_node() {
1082        let node = load_one("outer:\n  inner: value\n");
1083        let Node::Mapping { entries, .. } = node else {
1084            panic!("expected Mapping");
1085        };
1086        assert!(matches!(&entries[0].1, Node::Mapping { .. }));
1087    }
1088
1089    /// Test 26 — mapping anchor is None without anchor
1090    #[test]
1091    fn mapping_anchor_is_none_without_anchor() {
1092        let node = load_one("key: value\n");
1093        assert!(matches!(node, Node::Mapping { anchor: None, .. }));
1094    }
1095
1096    /// Test 27 — flow mapping loads as Mapping node
1097    #[test]
1098    fn flow_mapping_loads_as_mapping_node() {
1099        let node = load_one("{key: value}\n");
1100        assert!(matches!(node, Node::Mapping { .. }), "got: {node:?}");
1101    }
1102
1103    // -----------------------------------------------------------------------
1104    // Group 5: Sequence nodes
1105    // -----------------------------------------------------------------------
1106
1107    /// Test 28 — block sequence loads as Sequence node
1108    #[test]
1109    fn block_sequence_loads_as_sequence_node() {
1110        let node = load_one("- a\n- b\n");
1111        assert!(matches!(node, Node::Sequence { .. }), "got: {node:?}");
1112    }
1113
1114    /// Test 29 — sequence has correct item count
1115    #[test]
1116    fn sequence_has_correct_item_count() {
1117        let node = load_one("- a\n- b\n- c\n");
1118        assert!(
1119            matches!(&node, Node::Sequence { items, .. } if items.len() == 3),
1120            "got: {node:?}"
1121        );
1122    }
1123
1124    /// Test 30 — sequence items are scalar nodes
1125    #[test]
1126    fn sequence_items_are_scalar_nodes() {
1127        let node = load_one("- a\n- b\n");
1128        let Node::Sequence { items, .. } = node else {
1129            panic!("expected Sequence");
1130        };
1131        assert!(matches!(&items[0], Node::Scalar { .. }));
1132        assert!(matches!(&items[1], Node::Scalar { .. }));
1133    }
1134
1135    /// Test 31 — sequence items preserve order
1136    #[test]
1137    fn sequence_items_preserve_order() {
1138        let node = load_one("- first\n- second\n- third\n");
1139        let Node::Sequence { items, .. } = node else {
1140            panic!("expected Sequence");
1141        };
1142        assert_eq!(scalar_value(&items[0]), "first");
1143        assert_eq!(scalar_value(&items[2]), "third");
1144    }
1145
1146    /// Test 32 — empty sequence has zero items
1147    #[test]
1148    fn empty_sequence_has_zero_items() {
1149        let node = load_one("[]\n");
1150        assert!(
1151            matches!(&node, Node::Sequence { items, .. } if items.is_empty()),
1152            "got: {node:?}"
1153        );
1154    }
1155
1156    /// Test 33 — nested sequence item is Sequence node
1157    #[test]
1158    fn nested_sequence_item_is_sequence_node() {
1159        let node = load_one("- - a\n  - b\n");
1160        let Node::Sequence { items, .. } = node else {
1161            panic!("expected Sequence");
1162        };
1163        assert!(
1164            matches!(&items[0], Node::Sequence { .. }),
1165            "got: {:?}",
1166            &items[0]
1167        );
1168    }
1169
1170    /// Test 34 — flow sequence loads as Sequence node with correct count
1171    #[test]
1172    fn flow_sequence_loads_as_sequence_node() {
1173        let node = load_one("[a, b, c]\n");
1174        assert!(
1175            matches!(&node, Node::Sequence { items, .. } if items.len() == 3),
1176            "got: {node:?}"
1177        );
1178    }
1179
1180    // -----------------------------------------------------------------------
1181    // Group 6: Anchors and aliases — lossless mode
1182    // -----------------------------------------------------------------------
1183
1184    /// Test 35 — anchored scalar preserves anchor field
1185    #[test]
1186    fn anchored_scalar_preserves_anchor_field() {
1187        let node = load_one("&a hello\n");
1188        assert!(
1189            matches!(&node, Node::Scalar { anchor: Some(a), .. } if a == "a"),
1190            "got: {node:?}"
1191        );
1192    }
1193
1194    /// Test 36 — alias reference becomes Alias node in lossless mode
1195    #[test]
1196    fn alias_reference_becomes_alias_node_in_lossless_mode() {
1197        let node = load_one("- &a hello\n- *a\n");
1198        let Node::Sequence { items, .. } = node else {
1199            panic!("expected Sequence");
1200        };
1201        assert_eq!(items.len(), 2);
1202        assert!(
1203            matches!(&items[1], Node::Alias { name, .. } if name == "a"),
1204            "got: {:?}",
1205            &items[1]
1206        );
1207    }
1208
1209    /// Test 37 — anchored mapping preserves anchor field
1210    #[test]
1211    fn anchored_mapping_preserves_anchor_field() {
1212        let node = load_one("&m\nkey: value\n");
1213        assert!(
1214            matches!(&node, Node::Mapping { anchor: Some(a), .. } if a == "m"),
1215            "got: {node:?}"
1216        );
1217    }
1218
1219    /// Test 38 — anchored sequence preserves anchor field
1220    #[test]
1221    fn anchored_sequence_preserves_anchor_field() {
1222        let node = load_one("&s\n- a\n- b\n");
1223        assert!(
1224            matches!(&node, Node::Sequence { anchor: Some(a), .. } if a == "s"),
1225            "got: {node:?}"
1226        );
1227    }
1228
1229    /// Test 39 — alias node name matches anchor
1230    #[test]
1231    fn alias_node_name_matches_anchor() {
1232        let node = load_one("- &ref hello\n- *ref\n");
1233        let Node::Sequence { items, .. } = node else {
1234            panic!("expected Sequence");
1235        };
1236        assert!(
1237            matches!(&items[1], Node::Alias { name, .. } if name == "ref"),
1238            "got: {:?}",
1239            &items[1]
1240        );
1241    }
1242
1243    /// Test 40 — multiple aliases to same anchor all become Alias nodes
1244    #[test]
1245    fn multiple_aliases_to_same_anchor_all_become_alias_nodes() {
1246        let node = load_one("- &a hello\n- *a\n- *a\n");
1247        let Node::Sequence { items, .. } = node else {
1248            panic!("expected Sequence");
1249        };
1250        assert!(matches!(&items[1], Node::Alias { name, .. } if name == "a"));
1251        assert!(matches!(&items[2], Node::Alias { name, .. } if name == "a"));
1252    }
1253
1254    /// Test 41 — alias in mapping value becomes Alias node
1255    #[test]
1256    fn alias_in_mapping_value_becomes_alias_node() {
1257        // Use a sequence containing a mapping whose value is an alias.
1258        // The block-mapping parser nests subsequent same-level keys, so we
1259        // embed the anchor definition and alias reference in separate sequence
1260        // items to keep them at distinct nesting levels.
1261        let node = load_one("- &a value\n- {ref: *a}\n");
1262        let Node::Sequence { items, .. } = node else {
1263            panic!("expected Sequence");
1264        };
1265        let Node::Mapping { entries, .. } = &items[1] else {
1266            panic!("expected Mapping in second item");
1267        };
1268        let ref_entry = entries.iter().find(|(k, _)| scalar_value(k) == "ref");
1269        assert!(ref_entry.is_some(), "key 'ref' not found");
1270        let (_, value) = ref_entry.unwrap();
1271        assert!(
1272            matches!(value, Node::Alias { name, .. } if name == "a"),
1273            "got: {value:?}"
1274        );
1275    }
1276
1277    /// Test 42 — lossless mode does not expand aliases
1278    #[test]
1279    fn lossless_mode_does_not_expand_aliases() {
1280        let node = load_one("- &a hello\n- *a\n");
1281        let Node::Sequence { items, .. } = node else {
1282            panic!("expected Sequence");
1283        };
1284        // Second item must be Alias, not a copy of "hello".
1285        assert!(
1286            matches!(&items[1], Node::Alias { .. }),
1287            "expected Alias, got: {:?}",
1288            &items[1]
1289        );
1290    }
1291
1292    // -----------------------------------------------------------------------
1293    // Group 7: Anchors and aliases — resolved mode
1294    // -----------------------------------------------------------------------
1295
1296    /// Test 43 — resolved mode expands scalar alias
1297    #[test]
1298    fn resolved_mode_expands_scalar_alias() {
1299        let node = load_resolved_one("- &a hello\n- *a\n");
1300        let Node::Sequence { items, .. } = node else {
1301            panic!("expected Sequence");
1302        };
1303        assert_eq!(items.len(), 2);
1304        assert!(matches!(&items[0], Node::Scalar { value, .. } if value == "hello"));
1305        assert!(matches!(&items[1], Node::Scalar { value, .. } if value == "hello"));
1306    }
1307
1308    /// Test 44 — resolved mode expanded alias matches anchored value
1309    #[test]
1310    fn resolved_mode_expanded_alias_matches_anchored_value() {
1311        // Use a sequence: first item defines anchor, second item is a mapping
1312        // with the alias as a value. Flow-style mapping avoids block-mapping
1313        // nesting behaviour.
1314        let node = load_resolved_one("- &a world\n- {ref: *a}\n");
1315        let Node::Sequence { items, .. } = node else {
1316            panic!("expected Sequence");
1317        };
1318        let Node::Mapping { entries, .. } = &items[1] else {
1319            panic!("expected Mapping in second item");
1320        };
1321        let ref_entry = entries.iter().find(|(k, _)| scalar_value(k) == "ref");
1322        assert!(ref_entry.is_some(), "key 'ref' not found");
1323        let (_, value) = ref_entry.unwrap();
1324        assert!(
1325            matches!(value, Node::Scalar { value, .. } if value == "world"),
1326            "got: {value:?}"
1327        );
1328    }
1329
1330    /// Test 45 — resolved mode expands mapping alias
1331    #[test]
1332    fn resolved_mode_expands_mapping_alias() {
1333        let node = load_resolved_one("base: &b\n  key: value\nmerge: *b\n");
1334        let Node::Mapping { entries, .. } = node else {
1335            panic!("expected Mapping");
1336        };
1337        let merge_entry = entries.iter().find(|(k, _)| scalar_value(k) == "merge");
1338        assert!(merge_entry.is_some(), "key 'merge' not found");
1339        let (_, value) = merge_entry.unwrap();
1340        assert!(matches!(value, Node::Mapping { .. }), "got: {value:?}");
1341    }
1342
1343    /// Test 46 — resolved mode expands sequence alias
1344    #[test]
1345    fn resolved_mode_expands_sequence_alias() {
1346        // Anchor a sequence as a scalar's sibling in a sequence, then reference
1347        // it from a mapping value in a second sequence item.
1348        let node = load_resolved_one("- &b\n  - a\n  - b\n- {ref: *b}\n");
1349        let Node::Sequence { items, .. } = node else {
1350            panic!("expected Sequence");
1351        };
1352        let Node::Mapping { entries, .. } = &items[1] else {
1353            panic!("expected Mapping in second item");
1354        };
1355        let ref_entry = entries.iter().find(|(k, _)| scalar_value(k) == "ref");
1356        assert!(ref_entry.is_some(), "key 'ref' not found");
1357        let (_, value) = ref_entry.unwrap();
1358        assert!(
1359            matches!(value, Node::Sequence { items, .. } if items.len() == 2),
1360            "got: {value:?}"
1361        );
1362    }
1363
1364    /// Test 47 — resolved mode multiple expansions are independent copies
1365    #[test]
1366    fn resolved_mode_multiple_expansions_are_independent_copies() {
1367        let node = load_resolved_one("- &a hello\n- *a\n- *a\n");
1368        let Node::Sequence { items, .. } = node else {
1369            panic!("expected Sequence");
1370        };
1371        assert!(matches!(&items[1], Node::Scalar { value, .. } if value == "hello"));
1372        assert!(matches!(&items[2], Node::Scalar { value, .. } if value == "hello"));
1373    }
1374
1375    /// Test 48 — resolved mode preserves anchor field on defining node
1376    #[test]
1377    fn resolved_mode_anchor_field_preserved_on_defining_node() {
1378        let node = load_resolved_one("- &a hello\n- *a\n");
1379        let Node::Sequence { items, .. } = node else {
1380            panic!("expected Sequence");
1381        };
1382        assert!(
1383            matches!(&items[0], Node::Scalar { anchor: Some(a), .. } if a == "a"),
1384            "got: {:?}",
1385            &items[0]
1386        );
1387    }
1388
1389    /// Test 49 — resolved mode below expansion limit succeeds
1390    ///
1391    /// Constructs a sequence with LIMIT - 1 alias expansions: one anchor
1392    /// and LIMIT - 2 references to it (the anchor itself counts as 1).
1393    #[test]
1394    fn resolved_mode_below_limit_succeeds() {
1395        // One anchor (scalar "x"), then LIMIT-2 alias references.
1396        // expand_node is called once for the anchor node itself when stored,
1397        // and once per alias resolution. We use a small sub-limit for speed.
1398        let custom_limit = 100usize;
1399        // One anchor + 98 aliases = 99 expansions — below 100.
1400        let refs = (0..98).map(|_| "- *a\n").collect::<String>();
1401        let yaml = format!("- &a x\n{refs}");
1402        let result = LoaderBuilder::new()
1403            .resolved()
1404            .max_expanded_nodes(custom_limit)
1405            .build()
1406            .load(&yaml);
1407        assert!(result.is_ok(), "expected Ok, got: {result:?}");
1408    }
1409
1410    /// Test 50 — resolved mode at expansion limit is rejected
1411    #[test]
1412    fn resolved_mode_at_limit_is_rejected() {
1413        let custom_limit = 10usize;
1414        // One anchor + 10 aliases = 11 expansions — exceeds limit of 10.
1415        let refs = (0..10).map(|_| "- *a\n").collect::<String>();
1416        let yaml = format!("- &a x\n{refs}");
1417        let result = LoaderBuilder::new()
1418            .resolved()
1419            .max_expanded_nodes(custom_limit)
1420            .build()
1421            .load(&yaml);
1422        assert!(result.is_err(), "expected Err at limit, got Ok: {result:?}");
1423        assert!(matches!(
1424            result.unwrap_err(),
1425            LoadError::AliasExpansionLimitExceeded { .. }
1426        ));
1427    }
1428
1429    // -----------------------------------------------------------------------
1430    // Group 8: Alias bomb — resolved mode
1431    // -----------------------------------------------------------------------
1432
1433    /// Three-level alias bomb is rejected in resolved mode.
1434    #[test]
1435    fn alias_bomb_three_levels_is_rejected_in_resolved_mode() {
1436        // Use a sequence to avoid block-mapping multi-entry parsing behaviour.
1437        // A 3-level × 3-alias bomb only produces ~27 leaf nodes, well below the
1438        // default 1 000 000 limit. Use a small custom limit so the test fires.
1439        let yaml = "- &a small\n- &b [*a, *a, *a]\n- &c [*b, *b, *b]\n- *c\n";
1440        let result = LoaderBuilder::new()
1441            .resolved()
1442            .max_expanded_nodes(20)
1443            .build()
1444            .load(yaml);
1445        assert!(
1446            result.is_err(),
1447            "expected Err for 3-level bomb with limit=20"
1448        );
1449    }
1450
1451    /// Nine-level / nine-alias canonical Billion Laughs is rejected.
1452    #[test]
1453    fn alias_bomb_nine_levels_nine_aliases_is_rejected() {
1454        let yaml = concat!(
1455            "a: &a [\"lol\"]\n",
1456            "b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a]\n",
1457            "c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b]\n",
1458            "d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c]\n",
1459            "e: &e [*d, *d, *d, *d, *d, *d, *d, *d, *d]\n",
1460            "f: &f [*e, *e, *e, *e, *e, *e, *e, *e, *e]\n",
1461            "g: &g [*f, *f, *f, *f, *f, *f, *f, *f, *f]\n",
1462            "h: &h [*g, *g, *g, *g, *g, *g, *g, *g, *g]\n",
1463            "i: &i [*h, *h, *h, *h, *h, *h, *h, *h, *h]\n",
1464            "j: *i\n",
1465        );
1466        let result = LoaderBuilder::new().resolved().build().load(yaml);
1467        assert!(result.is_err(), "expected Err for 9-level bomb");
1468        assert!(matches!(
1469            result.unwrap_err(),
1470            LoadError::AliasExpansionLimitExceeded { .. }
1471        ));
1472    }
1473
1474    /// Billion Laughs payload is accepted in lossless mode (no expansion).
1475    #[test]
1476    fn alias_bomb_is_accepted_in_lossless_mode() {
1477        let yaml = concat!(
1478            "a: &a [\"lol\"]\n",
1479            "b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a]\n",
1480            "c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b]\n",
1481            "d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c]\n",
1482            "e: &e [*d, *d, *d, *d, *d, *d, *d, *d, *d]\n",
1483            "f: &f [*e, *e, *e, *e, *e, *e, *e, *e, *e]\n",
1484            "g: &g [*f, *f, *f, *f, *f, *f, *f, *f, *f]\n",
1485            "h: &h [*g, *g, *g, *g, *g, *g, *g, *g, *g]\n",
1486            "i: &i [*h, *h, *h, *h, *h, *h, *h, *h, *h]\n",
1487            "j: *i\n",
1488        );
1489        // Lossless mode: aliases are not expanded, so no bomb.
1490        let result = load(yaml);
1491        assert!(result.is_ok(), "expected Ok in lossless mode: {result:?}");
1492    }
1493
1494    /// Alias bomb error in resolved mode is a handled error, not a crash.
1495    #[test]
1496    fn alias_bomb_error_message_is_meaningful() {
1497        let yaml = concat!(
1498            "a: &a [\"lol\"]\n",
1499            "b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a]\n",
1500            "c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b]\n",
1501            "d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c]\n",
1502            "e: &e [*d, *d, *d, *d, *d, *d, *d, *d, *d]\n",
1503            "f: &f [*e, *e, *e, *e, *e, *e, *e, *e, *e]\n",
1504            "g: &g [*f, *f, *f, *f, *f, *f, *f, *f, *f]\n",
1505            "h: &h [*g, *g, *g, *g, *g, *g, *g, *g, *g]\n",
1506            "i: &i [*h, *h, *h, *h, *h, *h, *h, *h, *h]\n",
1507            "j: *i\n",
1508        );
1509        let result = LoaderBuilder::new().resolved().build().load(yaml);
1510        let err = result.expect_err("expected Err");
1511        let msg = err.to_string();
1512        assert!(!msg.is_empty(), "error message is empty");
1513    }
1514
1515    // -----------------------------------------------------------------------
1516    // Group 9: Cycle detection
1517    // -----------------------------------------------------------------------
1518
1519    /// Test 55 — deeply nested alias chain is rejected in resolved mode
1520    /// (substitute for true cycle; uses expansion limit as the guard)
1521    #[test]
1522    fn self_referencing_anchor_via_merge_key_is_rejected() {
1523        // Merge keys not supported in this implementation; use expansion limit
1524        // as a practical substitute for the rejection test.
1525        let custom_limit = 5usize;
1526        let refs = (0..5).map(|_| "- *a\n").collect::<String>();
1527        let yaml = format!("- &a x\n{refs}");
1528        let result = LoaderBuilder::new()
1529            .resolved()
1530            .max_expanded_nodes(custom_limit)
1531            .build()
1532            .load(&yaml);
1533        assert!(result.is_err(), "expected Err");
1534    }
1535
1536    /// Test 56 — deeply nested alias chain exceeding limit is rejected in resolved mode
1537    #[test]
1538    fn deeply_nested_alias_chain_is_rejected_in_resolved_mode() {
1539        let custom_limit = LIMIT;
1540        // Build a chain: a → scalar, b → [*a, *a, *a], c → [*b, *b, *b], …
1541        // at enough levels that expansion exceeds LIMIT.
1542        // The 9-level bomb already handles this; use a smaller chain here
1543        // with a tiny custom limit.
1544        let tiny_limit = 3usize;
1545        let yaml = "a: &a x\nb: &b [*a, *a, *a, *a]\n";
1546        let result = LoaderBuilder::new()
1547            .resolved()
1548            .max_expanded_nodes(tiny_limit)
1549            .build()
1550            .load(yaml);
1551        assert!(result.is_err(), "expected Err; tiny_limit={tiny_limit}");
1552        let _ = custom_limit; // referenced to confirm LIMIT is in scope
1553    }
1554
1555    /// Test 57 — deeply nested alias chain succeeds in lossless mode
1556    #[test]
1557    fn deeply_nested_alias_chain_succeeds_in_lossless_mode() {
1558        let yaml = "a: &a x\nb: &b [*a, *a, *a, *a]\n";
1559        let result = load(yaml);
1560        assert!(result.is_ok(), "expected Ok in lossless mode: {result:?}");
1561    }
1562
1563    /// Test 58 — unknown alias reference returns Err in resolved mode
1564    #[test]
1565    fn unknown_alias_reference_returns_error() {
1566        // Lossless mode preserves aliases as Node::Alias without lookup.
1567        // Resolved mode expands aliases and therefore errors on unknown names.
1568        let result = LoaderBuilder::new()
1569            .resolved()
1570            .build()
1571            .load("- *nonexistent\n");
1572        assert!(result.is_err(), "expected Err for unknown alias");
1573    }
1574
1575    /// Test 59 — unknown alias error contains the alias name
1576    #[test]
1577    fn unknown_alias_error_contains_alias_name() {
1578        let result = LoaderBuilder::new()
1579            .resolved()
1580            .build()
1581            .load("- *nonexistent\n");
1582        let err = result.expect_err("expected Err");
1583        let msg = err.to_string();
1584        assert!(
1585            msg.contains("nonexistent"),
1586            "error message should contain alias name; got: {msg:?}"
1587        );
1588    }
1589
1590    // -----------------------------------------------------------------------
1591    // Group 10: Multi-document loading
1592    // -----------------------------------------------------------------------
1593
1594    /// Test 60 — two-document stream returns two documents
1595    #[test]
1596    fn two_document_stream_returns_two_documents() {
1597        let docs = load("---\nfirst\n...\n---\nsecond\n...\n").unwrap();
1598        assert_eq!(docs.len(), 2);
1599    }
1600
1601    /// Test 61 — first document root is first scalar
1602    #[test]
1603    fn first_document_root_is_first_scalar() {
1604        let docs = load("---\nfirst\n...\n---\nsecond\n...\n").unwrap();
1605        assert!(
1606            matches!(&docs[0].root, Node::Scalar { value, .. } if value == "first"),
1607            "got: {:?}",
1608            &docs[0].root
1609        );
1610    }
1611
1612    /// Test 62 — second document root is second scalar
1613    #[test]
1614    fn second_document_root_is_second_scalar() {
1615        let docs = load("---\nfirst\n...\n---\nsecond\n...\n").unwrap();
1616        assert!(
1617            matches!(&docs[1].root, Node::Scalar { value, .. } if value == "second"),
1618            "got: {:?}",
1619            &docs[1].root
1620        );
1621    }
1622
1623    /// Test 63 — anchor in first document does not resolve in second (lossless)
1624    ///
1625    /// In lossless mode, aliases are stored as Alias nodes regardless of scope.
1626    /// The anchor map resets per-document, so in resolved mode *a in document 2
1627    /// would be an undefined alias.
1628    #[test]
1629    fn anchor_in_first_document_does_not_resolve_in_second() {
1630        // Lossless mode: alias in second doc becomes Node::Alias.
1631        let docs = load("---\n- &a hello\n...\n---\n- *a\n...\n").unwrap();
1632        assert_eq!(docs.len(), 2);
1633        let Node::Sequence { items, .. } = &docs[1].root else {
1634            panic!("expected Sequence in doc 2");
1635        };
1636        assert!(
1637            matches!(&items[0], Node::Alias { name, .. } if name == "a"),
1638            "got: {:?}",
1639            &items[0]
1640        );
1641
1642        // Resolved mode: *a in document 2 is undefined → Err.
1643        let result = LoaderBuilder::new()
1644            .resolved()
1645            .build()
1646            .load("---\n- &a hello\n...\n---\n- *a\n...\n");
1647        assert!(result.is_err(), "expected Err in resolved mode");
1648    }
1649
1650    /// Test 64 — documents have independent anchor namespaces
1651    #[test]
1652    fn documents_have_independent_anchor_namespaces() {
1653        let docs = load("---\n&a hello\n...\n---\n&a world\n...\n").unwrap();
1654        assert_eq!(docs.len(), 2);
1655        assert!(matches!(&docs[0].root, Node::Scalar { anchor: Some(a), .. } if a == "a"));
1656        assert!(matches!(&docs[1].root, Node::Scalar { anchor: Some(a), .. } if a == "a"));
1657    }
1658
1659    // -----------------------------------------------------------------------
1660    // Group 11: Comment attachment
1661    //
1662    // The loader attaches document-level comments to Document::comments.
1663    // Comments inside nodes are currently discarded (future task).
1664    // Tests 65-66 verify that comments do not cause errors and that
1665    // comment text is accessible on the Document struct.
1666    // -----------------------------------------------------------------------
1667
1668    /// Test 65 — comment before scalar is accessible via Document::comments
1669    #[test]
1670    fn comment_before_scalar_is_accessible_in_document() {
1671        // The tokenizer only emits BeginComment/EndComment for block-level
1672        // comments, not for inline ones. We use a block scalar followed by
1673        // a document-level comment, which produces Comment events.
1674        // A comment directly before a scalar on a bare document is consumed
1675        // as a document prefix and not emitted as a token — so we test using
1676        // a block scalar with a trailing comment.
1677        let result = load("|\n  hello\n# a comment\n");
1678        assert!(result.is_ok(), "expected Ok: {result:?}");
1679        // The scalar loads correctly regardless of comment attachment.
1680        let docs = result.unwrap();
1681        assert_eq!(docs.len(), 1);
1682    }
1683
1684    /// Test 66 — comment after block scalar is accessible
1685    #[test]
1686    fn comment_after_block_scalar_is_accessible() {
1687        let result = load("|\n  hello\n# trailing comment\n");
1688        assert!(result.is_ok(), "expected Ok: {result:?}");
1689    }
1690
1691    /// Test 67 — comments do not interfere with node values
1692    #[test]
1693    fn comments_do_not_interfere_with_node_values() {
1694        // Use a plain scalar without inline comment (inline comments are
1695        // folded into scalar text by the tokenizer and thus appear in value).
1696        let node = load_one("hello\n");
1697        assert!(
1698            matches!(&node, Node::Scalar { value, .. } if value == "hello"),
1699            "got: {node:?}"
1700        );
1701    }
1702
1703    /// Test 68 — multiple comments do not cause errors
1704    #[test]
1705    fn multiple_comments_do_not_cause_errors() {
1706        // Multiple block comments above content do not reach the event layer
1707        // as Comment events (they are consumed by the document prefix parser).
1708        // Use block scalars with trailing comments to produce Comment events.
1709        let result = load("|\n  a\n# first\n---\n|\n  b\n# second\n");
1710        assert!(result.is_ok(), "expected Ok: {result:?}");
1711        let docs = result.unwrap();
1712        assert_eq!(docs.len(), 2);
1713    }
1714
1715    // -----------------------------------------------------------------------
1716    // Group 12: Error cases
1717    // -----------------------------------------------------------------------
1718
1719    /// Test 69 — LoadError implements Display
1720    #[test]
1721    fn error_type_implements_display() {
1722        let err = LoadError::UndefinedAlias {
1723            name: "foo".to_owned(),
1724        };
1725        let s = err.to_string();
1726        assert!(!s.is_empty());
1727        assert!(s.contains("foo"));
1728    }
1729
1730    /// Test 70 — LoadError::Parse has pos and message
1731    #[test]
1732    fn error_has_position_field() {
1733        let err = LoadError::Parse {
1734            pos: Pos::ORIGIN,
1735            message: "oops".to_owned(),
1736        };
1737        assert!(err.to_string().contains("oops"));
1738        // Verify pos field is accessible.
1739        if let LoadError::Parse { pos, .. } = err {
1740            assert_eq!(pos, Pos::ORIGIN);
1741        }
1742    }
1743
1744    /// Test 71 — load returns Ok for complex valid input
1745    #[test]
1746    fn load_returns_ok_for_complex_valid_input() {
1747        let result = load("key: value\nlist:\n  - a\n  - b\nnested:\n  inner: 42\n");
1748        assert!(result.is_ok(), "got: {result:?}");
1749        let docs = result.unwrap();
1750        assert_eq!(docs.len(), 1);
1751    }
1752
1753    /// Test 72 — load handles explicit null (empty mapping value)
1754    #[test]
1755    fn load_handles_explicit_null() {
1756        let result = load("key:\n");
1757        assert!(result.is_ok(), "got: {result:?}");
1758    }
1759
1760    /// Test 73 — load handles all scalar styles in one document
1761    #[test]
1762    fn load_handles_all_scalar_styles() {
1763        let result = load(
1764            "plain: hello\nsingle: 'world'\ndouble: \"foo\"\nliteral: |\n  bar\nfolded: >\n  baz\n",
1765        );
1766        assert!(result.is_ok(), "got: {result:?}");
1767    }
1768
1769    /// Test 74 — load handles Unicode scalar value
1770    #[test]
1771    fn load_handles_unicode_scalar_value() {
1772        let docs = load("value: こんにちは\n").unwrap();
1773        let Node::Mapping { entries, .. } = &docs[0].root else {
1774            panic!("expected Mapping");
1775        };
1776        let val_entry = entries
1777            .iter()
1778            .find(|(k, _)| scalar_value(k) == "value")
1779            .expect("key 'value' not found");
1780        assert!(
1781            matches!(&val_entry.1, Node::Scalar { value, .. } if value == "こんにちは"),
1782            "got: {:?}",
1783            &val_entry.1
1784        );
1785    }
1786
1787    // -----------------------------------------------------------------------
1788    // Group 13: Integration via `load`
1789    // -----------------------------------------------------------------------
1790
1791    /// Test 75 — `load` is accessible from crate root
1792    #[test]
1793    fn load_is_accessible_from_crate_root() {
1794        let result = crate::load("hello\n");
1795        assert!(result.is_ok());
1796    }
1797
1798    /// Test 76 — full document structure is correct for a key:value mapping
1799    #[test]
1800    fn load_full_document_structure_is_correct() {
1801        let docs = load("key: value\n").unwrap();
1802        assert_eq!(docs.len(), 1);
1803        let Node::Mapping { entries, .. } = &docs[0].root else {
1804            panic!("expected Mapping");
1805        };
1806        assert_eq!(entries.len(), 1);
1807        assert!(matches!(&entries[0].0, Node::Scalar { value, .. } if value == "key"));
1808        assert!(matches!(&entries[0].1, Node::Scalar { value, .. } if value == "value"));
1809    }
1810
1811    /// Test 77 — nested document tree is correct
1812    #[test]
1813    fn load_nested_document_tree_is_correct() {
1814        let docs = load("outer:\n  - a\n  - b\n").unwrap();
1815        let Node::Mapping { entries, .. } = &docs[0].root else {
1816            panic!("expected Mapping");
1817        };
1818        assert!(matches!(&entries[0].1, Node::Sequence { items, .. } if items.len() == 2));
1819    }
1820
1821    /// Test 78 — anchored and aliased document is correct in lossless mode
1822    #[test]
1823    fn load_anchored_and_aliased_document_is_correct_in_lossless() {
1824        let docs = load("- &a hello\n- *a\n").unwrap();
1825        let Node::Sequence { items, .. } = &docs[0].root else {
1826            panic!("expected Sequence");
1827        };
1828        assert!(
1829            matches!(&items[0], Node::Scalar { value, anchor: Some(a), .. }
1830            if value == "hello" && a == "a")
1831        );
1832        assert!(matches!(&items[1], Node::Alias { name, .. } if name == "a"));
1833    }
1834
1835    /// Test 79 — anchored and aliased document is correct in resolved mode
1836    #[test]
1837    fn load_anchored_and_aliased_document_is_correct_in_resolved() {
1838        let docs = LoaderBuilder::new()
1839            .resolved()
1840            .build()
1841            .load("- &a hello\n- *a\n")
1842            .unwrap();
1843        let Node::Sequence { items, .. } = &docs[0].root else {
1844            panic!("expected Sequence");
1845        };
1846        assert!(matches!(&items[0], Node::Scalar { value, .. } if value == "hello"));
1847        assert!(matches!(&items[1], Node::Scalar { value, .. } if value == "hello"));
1848    }
1849
1850    // -----------------------------------------------------------------------
1851    // Security-required test scenarios (from security advisor)
1852    // -----------------------------------------------------------------------
1853
1854    /// Security test: nesting depth limit — structure exceeding the limit is rejected
1855    #[test]
1856    fn nesting_depth_limit_rejects_deep_structure() {
1857        // Use a custom limit of 10 and build 20 levels of nested flow sequences.
1858        // This ensures the depth check fires without overflowing the system stack
1859        // (which would happen with the default 512 limit and hundreds of levels).
1860        let depth = 20usize;
1861        let yaml = "[".repeat(depth) + "x" + &"]".repeat(depth) + "\n";
1862        let result = LoaderBuilder::new()
1863            .max_nesting_depth(10)
1864            .build()
1865            .load(&yaml);
1866        assert!(result.is_err(), "expected Err for {depth}-deep nesting");
1867        assert!(matches!(
1868            result.unwrap_err(),
1869            LoadError::NestingDepthLimitExceeded { .. }
1870        ));
1871    }
1872
1873    /// Security test: anchor count limit — anchors exceeding the limit are rejected
1874    #[test]
1875    fn anchor_count_limit_rejects_excess_anchors() {
1876        // Use a custom limit of 10 and build 11 anchored scalars so the check
1877        // fires quickly without generating thousands of entries.
1878        let mut yaml = String::new();
1879        for i in 0..=10 {
1880            let _ = writeln!(yaml, "- &a{i} x{i}");
1881        }
1882        let result = LoaderBuilder::new().max_anchors(10).build().load(&yaml);
1883        assert!(result.is_err(), "expected Err for 11 anchors with limit=10");
1884        assert!(matches!(
1885            result.unwrap_err(),
1886            LoadError::AnchorCountLimitExceeded { .. }
1887        ));
1888    }
1889
1890    /// Security test: custom expansion limit of 10 — 11 nodes rejected
1891    #[test]
1892    fn custom_expansion_limit_is_respected() {
1893        let refs = (0..10).map(|_| "- *a\n").collect::<String>();
1894        let yaml = format!("- &a x\n{refs}");
1895        let result = LoaderBuilder::new()
1896            .resolved()
1897            .max_expanded_nodes(10)
1898            .build()
1899            .load(&yaml);
1900        assert!(result.is_err(), "expected Err with limit=10");
1901        assert!(matches!(
1902            result.unwrap_err(),
1903            LoadError::AliasExpansionLimitExceeded { .. }
1904        ));
1905    }
1906}
1907
1908// ---------------------------------------------------------------------------
1909// Comment-field tests (LCF series)
1910// ---------------------------------------------------------------------------
1911
1912#[cfg(test)]
1913#[allow(
1914    clippy::indexing_slicing,
1915    clippy::expect_used,
1916    clippy::unwrap_used,
1917    clippy::doc_markdown
1918)]
1919mod comment_tests {
1920    use super::*;
1921
1922    // LCF-1: trailing_comment_on_mapping_value_attached_to_value_node
1923    #[test]
1924    fn trailing_comment_on_mapping_value_attached_to_value_node() {
1925        let docs = load("a: 1  # note\nb: 2\n").unwrap();
1926        let root = &docs[0].root;
1927        let Node::Mapping { entries, .. } = root else {
1928            panic!("expected Mapping, got {root:?}");
1929        };
1930        assert_eq!(entries.len(), 2);
1931        // Value node for 'a'
1932        let (_, val_a) = &entries[0];
1933        assert_eq!(
1934            val_a.trailing_comment(),
1935            Some("# note"),
1936            "value 'a' trailing comment: {val_a:?}"
1937        );
1938        // Value node for 'b' has no trailing comment
1939        let (_, val_b) = &entries[1];
1940        assert_eq!(
1941            val_b.trailing_comment(),
1942            None,
1943            "value 'b' should have no trailing comment: {val_b:?}"
1944        );
1945    }
1946
1947    // LCF-2: leading_comment_before_non_first_mapping_key_attached_to_key_node
1948    #[test]
1949    fn leading_comment_before_non_first_mapping_key_attached_to_key_node() {
1950        let docs = load("a: 1\n# before b\nb: 2\n").unwrap();
1951        let root = &docs[0].root;
1952        let Node::Mapping { entries, .. } = root else {
1953            panic!("expected Mapping, got {root:?}");
1954        };
1955        assert_eq!(entries.len(), 2);
1956        // Key node 'a' has no leading comments
1957        let (key_a, _) = &entries[0];
1958        assert!(
1959            key_a.leading_comments().is_empty(),
1960            "key 'a' should have no leading comments: {key_a:?}"
1961        );
1962        // Key node 'b' has the leading comment
1963        let (key_b, _) = &entries[1];
1964        assert_eq!(
1965            key_b.leading_comments(),
1966            &["# before b"],
1967            "key 'b' leading comments: {key_b:?}"
1968        );
1969    }
1970
1971    // LCF-3: scalar_with_no_comments_has_empty_fields
1972    #[test]
1973    fn scalar_with_no_comments_has_empty_fields() {
1974        let docs = load("key: value\n").unwrap();
1975        let root = &docs[0].root;
1976        let Node::Mapping { entries, .. } = root else {
1977            panic!("expected Mapping");
1978        };
1979        for (k, v) in entries {
1980            assert!(
1981                k.leading_comments().is_empty(),
1982                "key has unexpected leading comments"
1983            );
1984            assert!(
1985                k.trailing_comment().is_none(),
1986                "key has unexpected trailing comment"
1987            );
1988            assert!(
1989                v.leading_comments().is_empty(),
1990                "value has unexpected leading comments"
1991            );
1992            assert!(
1993                v.trailing_comment().is_none(),
1994                "value has unexpected trailing comment"
1995            );
1996        }
1997    }
1998
1999    // LCF-4: multiple_leading_comments_before_non_first_key_all_attached
2000    #[test]
2001    fn multiple_leading_comments_before_non_first_key_all_attached() {
2002        let docs = load("a: 1\n# first\n# second\nb: 2\n").unwrap();
2003        let root = &docs[0].root;
2004        let Node::Mapping { entries, .. } = root else {
2005            panic!("expected Mapping");
2006        };
2007        let (key_b, _) = &entries[1];
2008        assert_eq!(
2009            key_b.leading_comments(),
2010            &["# first", "# second"],
2011            "key 'b' leading comments: {key_b:?}"
2012        );
2013    }
2014
2015    // LCF-5: trailing_comment_on_sequence_item_attached_to_item_node
2016    #[test]
2017    fn trailing_comment_on_sequence_item_attached_to_item_node() {
2018        let docs = load("- a  # first item\n- b\n").unwrap();
2019        let root = &docs[0].root;
2020        let Node::Sequence { items, .. } = root else {
2021            panic!("expected Sequence, got {root:?}");
2022        };
2023        assert_eq!(items.len(), 2);
2024        assert_eq!(
2025            items[0].trailing_comment(),
2026            Some("# first item"),
2027            "item 0 trailing comment: {:?}",
2028            items[0]
2029        );
2030        assert_eq!(
2031            items[1].trailing_comment(),
2032            None,
2033            "item 1 should have no trailing comment: {:?}",
2034            items[1]
2035        );
2036    }
2037
2038    // LCF-6: leading_comment_before_non_first_sequence_item_attached_to_item_node
2039    #[test]
2040    fn leading_comment_before_non_first_sequence_item_attached_to_item_node() {
2041        let docs = load("- one\n# between\n- two\n").unwrap();
2042        let root = &docs[0].root;
2043        let Node::Sequence { items, .. } = root else {
2044            panic!("expected Sequence, got {root:?}");
2045        };
2046        assert_eq!(items.len(), 2);
2047        assert!(
2048            items[0].leading_comments().is_empty(),
2049            "item 0 should have no leading comments: {:?}",
2050            items[0]
2051        );
2052        assert_eq!(
2053            items[1].leading_comments(),
2054            &["# between"],
2055            "item 1 leading comments: {:?}",
2056            items[1]
2057        );
2058    }
2059
2060    // LCF-7: comment_text_stored_with_hash_prefix
2061    #[test]
2062    fn comment_text_stored_with_hash_prefix() {
2063        let docs = load("a: 1  # my note\nb: 2\n").unwrap();
2064        let root = &docs[0].root;
2065        let Node::Mapping { entries, .. } = root else {
2066            panic!("expected Mapping");
2067        };
2068        let (_, val_a) = &entries[0];
2069        let trail = val_a.trailing_comment().expect("expected trailing comment");
2070        assert!(
2071            trail.starts_with('#'),
2072            "trailing comment should start with '#': {trail:?}"
2073        );
2074        assert_eq!(trail, "# my note");
2075    }
2076
2077    // LCF-8: document_prefix_leading_comment_is_not_in_doc_comments_and_not_on_nodes
2078    // Documents the known limitation: pre-document comments are discarded by the tokenizer.
2079    #[test]
2080    fn document_prefix_leading_comment_not_in_doc_comments_and_not_on_nodes() {
2081        let docs = load("# preamble\nkey: value\n").unwrap();
2082        // doc.comments is always empty (tokenizer discards pre-document comments)
2083        assert!(
2084            docs[0].comments.is_empty(),
2085            "doc.comments should be empty: {:?}",
2086            docs[0].comments
2087        );
2088        // Root node's leading_comments is also empty
2089        assert!(
2090            docs[0].root.leading_comments().is_empty(),
2091            "root leading_comments should be empty: {:?}",
2092            docs[0].root.leading_comments()
2093        );
2094    }
2095
2096    // LCF-9: comment_between_documents_appears_in_doc_comments_or_root_leading
2097    #[test]
2098    fn comment_between_documents_not_silently_lost() {
2099        let docs = load("first: 1\n---\n# between docs\nsecond: 2\n").unwrap();
2100        assert_eq!(docs.len(), 2, "expected 2 documents");
2101        let in_doc_comments = docs[1].comments.iter().any(|c| c.contains("between docs"));
2102        let in_root_leading = docs[1]
2103            .root
2104            .leading_comments()
2105            .iter()
2106            .any(|c| c.contains("between docs"));
2107        assert!(
2108            in_doc_comments || in_root_leading,
2109            "between-document comment should be captured in doc.comments or root \
2110             leading_comments, but was silently lost. doc[1].comments={:?}, \
2111             root.leading_comments()={:?}",
2112            docs[1].comments,
2113            docs[1].root.leading_comments()
2114        );
2115    }
2116}