use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::iter::Peekable;
use std::sync::Arc;
use crate::error::Error;
use crate::event::{Event, EventMeta, ScalarStyle};
use crate::node::{Document, Node, NodeMeta};
use crate::pos::{LineIndex, Pos, Span};
use crate::schema::{CollectionKind, Schema, resolve_collection, resolve_scalar};
use comments::{attach_leading_comments, attach_trailing_comment};
use reloc::reloc;
use stream::{
consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
with_hash_prefix,
};
mod comments;
mod reloc;
mod stream;
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum LoadError {
#[error("parse error at {pos:?}: {message}")]
Parse {
pos: Pos,
message: String,
},
#[error("unexpected end of event stream")]
UnexpectedEndOfStream,
#[error("nesting depth limit exceeded (max: {limit})")]
NestingDepthLimitExceeded {
limit: usize,
},
#[error("anchor count limit exceeded (max: {limit})")]
AnchorCountLimitExceeded {
limit: usize,
},
#[error("alias expansion node limit exceeded (max: {limit})")]
AliasExpansionLimitExceeded {
limit: usize,
},
#[error("circular alias reference: '{name}'")]
CircularAlias {
name: String,
},
#[error("undefined alias: '{name}'")]
UndefinedAlias {
name: String,
},
#[error("JSON schema: plain scalar does not match any type pattern")]
UnresolvedScalar {
value: String,
pos: Pos,
},
}
type Result<T> = std::result::Result<T, LoadError>;
type EventStream<'a> =
Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
#[expect(
clippy::type_complexity,
reason = "four-tuple mirrors EventMeta fields; extracting a type alias here would obscure the one-to-one correspondence"
)]
#[inline]
fn unpack_meta(
meta: Option<Box<EventMeta<'_>>>,
) -> (
Option<&'_ str>,
Option<Span>,
Option<std::borrow::Cow<'_, str>>,
Option<Span>,
) {
meta.map_or((None, None, None, None), |m| {
(m.anchor, m.anchor_loc, m.tag, m.tag_loc)
})
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LoadMode {
Lossless,
Resolved,
}
#[derive(Debug, Clone)]
pub struct LoaderOptions {
pub max_nesting_depth: usize,
pub max_anchors: usize,
pub max_expanded_nodes: usize,
pub mode: LoadMode,
pub schema: Schema,
}
impl Default for LoaderOptions {
fn default() -> Self {
Self {
max_nesting_depth: 512,
max_anchors: 10_000,
max_expanded_nodes: 1_000_000,
mode: LoadMode::Lossless,
schema: Schema::Core,
}
}
}
pub struct LoaderBuilder {
options: LoaderOptions,
}
impl LoaderBuilder {
#[must_use]
pub fn new() -> Self {
Self {
options: LoaderOptions::default(),
}
}
#[must_use]
pub const fn lossless(mut self) -> Self {
self.options.mode = LoadMode::Lossless;
self
}
#[must_use]
pub const fn resolved(mut self) -> Self {
self.options.mode = LoadMode::Resolved;
self
}
#[must_use]
pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
self.options.max_nesting_depth = limit;
self
}
#[must_use]
pub const fn max_anchors(mut self, limit: usize) -> Self {
self.options.max_anchors = limit;
self
}
#[must_use]
pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
self.options.max_expanded_nodes = limit;
self
}
#[must_use]
pub const fn schema(mut self, s: Schema) -> Self {
self.options.schema = s;
self
}
#[must_use]
pub const fn build(self) -> Loader {
Loader {
options: self.options,
}
}
}
impl Default for LoaderBuilder {
fn default() -> Self {
Self::new()
}
}
pub struct Loader {
options: LoaderOptions,
}
impl Loader {
pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
let mut state = LoadState::new(&self.options, input);
let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
Box::new(crate::parse_events(input));
state.run(iter.peekable())
}
}
pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
LoaderBuilder::new().lossless().build().load(input)
}
struct LoadState<'opt> {
options: &'opt LoaderOptions,
anchor_map: HashMap<String, Node<Span>>,
anchor_count: usize,
depth: usize,
expanded_nodes: usize,
pending_leading: Vec<String>,
line_index: Arc<LineIndex>,
}
impl<'opt> LoadState<'opt> {
fn new(options: &'opt LoaderOptions, input: &str) -> Self {
Self {
options,
anchor_map: HashMap::new(),
anchor_count: 0,
depth: 0,
expanded_nodes: 0,
pending_leading: Vec::new(),
line_index: Arc::new(LineIndex::new(input)),
}
}
fn reset_for_document(&mut self) {
self.anchor_map.clear();
self.anchor_count = 0;
self.expanded_nodes = 0;
self.pending_leading.clear();
}
fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
let mut docs: Vec<Document<Span>> = Vec::new();
match stream.next() {
Some(Ok(_)) | None => {}
Some(Err(e)) => {
return Err(LoadError::Parse {
pos: e.pos,
message: e.message,
});
}
}
loop {
match next_from(&mut stream)? {
None | Some((Event::StreamEnd, _)) => break,
Some((
Event::DocumentStart {
explicit,
version,
tag_directives,
},
_,
)) => {
let doc_explicit_start = explicit;
let doc_version = version;
let doc_tags = tag_directives;
self.reset_for_document();
let mut doc_comments: Vec<String> = Vec::new();
consume_leading_doc_comments(&mut stream, &mut doc_comments, &self.line_index)?;
let root = if is_document_end(stream.peek()) {
let mut node = empty_scalar();
apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
node
} else {
self.parse_node(&mut stream)?
};
let doc_explicit_end =
if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
let end_explicit = *explicit;
let _ = stream.next();
end_explicit
} else {
false
};
docs.push(Document {
root,
version: doc_version,
tags: doc_tags,
comments: doc_comments,
explicit_start: doc_explicit_start,
explicit_end: doc_explicit_end,
line_index: Some(self.line_index.clone()),
});
}
Some(_) => {
}
}
}
Ok(docs)
}
#[expect(
clippy::too_many_lines,
reason = "match-on-event-type; splitting would obscure flow"
)]
fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
if matches!(
stream.peek(),
Some(Ok((
Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
_
)))
) {
return Ok(empty_scalar());
}
let Some((event, span)) = next_from(stream)? else {
return Ok(empty_scalar());
};
match event {
Event::Scalar { value, style, meta } => {
let (anchor, anchor_loc, tag, tag_loc) = unpack_meta(meta);
let anchor = anchor.map(str::to_owned);
let mut node = Node::Scalar {
value: value.into_owned(),
style,
tag: tag.map(|t| Cow::Owned(t.into_owned())),
loc: span,
meta: NodeMeta {
anchor,
anchor_loc,
tag_loc,
leading_comments: None,
trailing_comment: None,
}
.into_option(),
};
apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
if let Some(name) = node.anchor() {
self.register_anchor(name.to_owned(), &node)?;
}
Ok(node)
}
Event::MappingStart { style, meta } => {
let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
let anchor = event_anchor.map(str::to_owned);
let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
let anchor_for_registration = anchor.clone();
self.depth += 1;
if self.depth > self.options.max_nesting_depth {
return Err(LoadError::NestingDepthLimitExceeded {
limit: self.options.max_nesting_depth,
});
}
let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
let mut end_span = span;
loop {
let raw_leading = consume_leading_comments(stream)?;
let leading = if self.pending_leading.is_empty() {
raw_leading
} else {
let mut combined = std::mem::take(&mut self.pending_leading);
combined.extend(raw_leading);
combined
};
match stream.peek() {
None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
if !leading.is_empty() {
self.pending_leading = leading;
}
break;
}
Some(Err(_)) => {
return Err(match stream.next() {
Some(Err(e)) => LoadError::Parse {
pos: e.pos,
message: e.message,
},
_ => LoadError::UnexpectedEndOfStream,
});
}
Some(Ok(_)) => {}
}
let mut key = self.parse_node(stream)?;
attach_leading_comments(&mut key, leading);
let mut value = self.parse_node(stream)?;
if !is_block_scalar(&value)
&& matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
{
let value_end_line = node_end_line(&value, &self.line_index);
if let Some(trail) =
peek_trailing_comment(stream, value_end_line, &self.line_index)?
{
attach_trailing_comment(&mut value, trail);
}
}
entries.push((key, value));
}
if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
end_span = *end;
let _ = stream.next();
}
self.depth -= 1;
let mut node = Node::Mapping {
entries,
style,
tag,
loc: Span {
start: span.start,
end: end_span.end,
},
meta: NodeMeta {
anchor,
anchor_loc,
tag_loc,
leading_comments: None,
trailing_comment: None,
}
.into_option(),
};
apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
if let Some(name) = anchor_for_registration {
self.register_anchor(name, &node)?;
}
Ok(node)
}
Event::SequenceStart { style, meta } => {
let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
let anchor = event_anchor.map(str::to_owned);
let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
let anchor_for_registration = anchor.clone();
self.depth += 1;
if self.depth > self.options.max_nesting_depth {
return Err(LoadError::NestingDepthLimitExceeded {
limit: self.options.max_nesting_depth,
});
}
let mut items: Vec<Node<Span>> = Vec::new();
let mut end_span = span;
loop {
let raw_leading = consume_leading_comments(stream)?;
let leading = if self.pending_leading.is_empty() {
raw_leading
} else {
let mut combined = std::mem::take(&mut self.pending_leading);
combined.extend(raw_leading);
combined
};
match stream.peek() {
None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
if !leading.is_empty() {
self.pending_leading = leading;
}
break;
}
Some(Err(_)) => {
return Err(match stream.next() {
Some(Err(e)) => LoadError::Parse {
pos: e.pos,
message: e.message,
},
_ => LoadError::UnexpectedEndOfStream,
});
}
Some(Ok(_)) => {}
}
let mut item = self.parse_node(stream)?;
attach_leading_comments(&mut item, leading);
if !is_block_scalar(&item)
&& matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
{
let item_end_line = node_end_line(&item, &self.line_index);
if let Some(trail) =
peek_trailing_comment(stream, item_end_line, &self.line_index)?
{
attach_trailing_comment(&mut item, trail);
}
}
items.push(item);
}
if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
end_span = *end;
let _ = stream.next();
}
self.depth -= 1;
let mut node = Node::Sequence {
items,
style,
tag,
loc: Span {
start: span.start,
end: end_span.end,
},
meta: NodeMeta {
anchor,
anchor_loc,
tag_loc,
leading_comments: None,
trailing_comment: None,
}
.into_option(),
};
apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
if let Some(name) = anchor_for_registration {
self.register_anchor(name, &node)?;
}
Ok(node)
}
Event::Alias { name } => {
let name = name.to_owned();
self.resolve_alias(&name, span)
}
Event::Comment { text } => {
self.pending_leading.push(with_hash_prefix(text));
self.parse_node(stream)
}
Event::StreamStart
| Event::StreamEnd
| Event::DocumentStart { .. }
| Event::DocumentEnd { .. }
| Event::MappingEnd
| Event::SequenceEnd => {
Ok(empty_scalar())
}
}
}
fn register_anchor(&mut self, name: String, node: &Node<Span>) -> Result<()> {
if !self.anchor_map.contains_key(&name) {
self.anchor_count += 1;
if self.anchor_count > self.options.max_anchors {
return Err(LoadError::AnchorCountLimitExceeded {
limit: self.options.max_anchors,
});
}
}
if self.options.mode == LoadMode::Resolved {
self.expanded_nodes += 1;
if self.expanded_nodes > self.options.max_expanded_nodes {
return Err(LoadError::AliasExpansionLimitExceeded {
limit: self.options.max_expanded_nodes,
});
}
self.anchor_map.insert(name, node.clone());
} else {
self.anchor_map.insert(name, empty_scalar());
}
Ok(())
}
fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
match self.options.mode {
LoadMode::Lossless => Ok(Node::Alias {
name: name.to_owned(),
loc,
leading_comments: None,
trailing_comment: None,
}),
LoadMode::Resolved => {
let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
LoadError::UndefinedAlias {
name: name.to_owned(),
}
})?;
let mut in_progress: HashSet<String> = HashSet::new();
self.expand_node(anchored, &mut in_progress)
}
}
}
fn expand_node(
&mut self,
node: Node<Span>,
in_progress: &mut HashSet<String>,
) -> Result<Node<Span>> {
self.expanded_nodes += 1;
if self.expanded_nodes > self.options.max_expanded_nodes {
return Err(LoadError::AliasExpansionLimitExceeded {
limit: self.options.max_expanded_nodes,
});
}
match node {
Node::Alias { ref name, loc, .. } => {
if in_progress.contains(name) {
return Err(LoadError::CircularAlias { name: name.clone() });
}
let target = self
.anchor_map
.get(name)
.cloned()
.ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
in_progress.insert(name.clone());
let expanded = self.expand_node(target, in_progress)?;
in_progress.remove(name);
Ok(reloc(expanded, loc))
}
Node::Mapping {
entries,
style,
tag,
loc,
meta,
} => {
let mut expanded_entries = Vec::with_capacity(entries.len());
for (k, v) in entries {
let ek = self.expand_node(k, in_progress)?;
let ev = self.expand_node(v, in_progress)?;
expanded_entries.push((ek, ev));
}
Ok(Node::Mapping {
entries: expanded_entries,
style,
tag,
loc,
meta,
})
}
Node::Sequence {
items,
style,
tag,
loc,
meta,
} => {
let mut expanded_items = Vec::with_capacity(items.len());
for item in items {
expanded_items.push(self.expand_node(item, in_progress)?);
}
Ok(Node::Sequence {
items: expanded_items,
style,
tag,
loc,
meta,
})
}
scalar @ Node::Scalar { .. } => Ok(scalar),
}
}
}
const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
matches!(
peeked,
None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
)
}
#[inline]
fn span_start_to_pos(offset: u32, line_index: &LineIndex) -> Pos {
let (line, column) = line_index.line_column(offset);
Pos {
byte_offset: offset as usize,
line: line as usize,
column: column as usize,
}
}
#[inline]
fn node_end_line(node: &Node<Span>, line_index: &LineIndex) -> u32 {
let end_offset = match node {
Node::Scalar { loc, .. }
| Node::Mapping { loc, .. }
| Node::Sequence { loc, .. }
| Node::Alias { loc, .. } => loc.end,
};
line_index.line_column(end_offset).0
}
#[inline]
const fn is_block_scalar(node: &Node<Span>) -> bool {
matches!(
node,
Node::Scalar {
style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
..
}
)
}
const UNRESOLVED_VALUE_MAX_CHARS: usize = 128;
fn sanitize_scalar_for_error(raw: &str) -> String {
let mut out = String::with_capacity(raw.len().min(UNRESOLVED_VALUE_MAX_CHARS * 2));
let mut truncated = false;
for (i, ch) in raw.chars().enumerate() {
if i >= UNRESOLVED_VALUE_MAX_CHARS {
truncated = true;
break;
}
if ch.is_ascii_control() {
let escaped = format!("\\u{:04X}", ch as u32);
out.push_str(&escaped);
} else {
out.push(ch);
}
}
if truncated {
out.push_str("...");
}
out
}
#[inline]
fn apply_schema_to_node(
node: &mut Node<Span>,
schema: Schema,
line_index: &LineIndex,
) -> Result<()> {
match node {
Node::Scalar {
value,
style,
tag,
loc,
meta,
} => {
if tag.as_deref() == Some("!") {
*tag = Some(Cow::Borrowed(crate::schema::ResolvedTag::Str.as_str()));
return Ok(());
}
match resolve_scalar(schema, *style, value, tag.as_deref()) {
Ok(Some(resolved)) => {
*tag = Some(Cow::Borrowed(resolved.as_str()));
if let Some(m) = meta.as_mut() {
m.tag_loc = None;
if m.is_all_none() {
*meta = None;
}
}
}
Ok(None) => {}
Err(_) => {
return Err(LoadError::UnresolvedScalar {
value: sanitize_scalar_for_error(value),
pos: span_start_to_pos(loc.start, line_index),
});
}
}
}
Node::Mapping { tag, meta, .. } => {
let effective_tag = tag.as_deref().filter(|t| *t != "!");
if let Some(resolved) =
resolve_collection(schema, CollectionKind::Mapping, effective_tag)
{
*tag = Some(Cow::Borrowed(resolved.as_str()));
if let Some(m) = meta.as_mut() {
m.tag_loc = None;
if m.is_all_none() {
*meta = None;
}
}
}
}
Node::Sequence { tag, meta, .. } => {
let effective_tag = tag.as_deref().filter(|t| *t != "!");
if let Some(resolved) =
resolve_collection(schema, CollectionKind::Sequence, effective_tag)
{
*tag = Some(Cow::Borrowed(resolved.as_str()));
if let Some(m) = meta.as_mut() {
m.tag_loc = None;
if m.is_all_none() {
*meta = None;
}
}
}
}
Node::Alias { .. } => {}
}
Ok(())
}
const fn empty_scalar() -> Node<Span> {
Node::Scalar {
value: String::new(),
style: ScalarStyle::Plain,
tag: None,
loc: Span { start: 0, end: 0 },
meta: None,
}
}
#[cfg(test)]
#[expect(
clippy::expect_used,
clippy::unwrap_used,
clippy::indexing_slicing,
clippy::panic,
reason = "test code"
)]
mod tests {
use super::*;
use rstest::rstest;
#[test]
fn loader_state_resets_anchor_map_between_documents() {
let result = LoaderBuilder::new()
.resolved()
.build()
.load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
assert!(
result.is_err(),
"expected Err: *foo in doc 2 should be undefined"
);
assert!(matches!(
result.unwrap_err(),
LoadError::UndefinedAlias { .. }
));
}
#[test]
fn register_anchor_increments_count() {
let options = LoaderOptions {
max_anchors: 2,
..LoaderOptions::default()
};
let mut state = LoadState::new(&options, "");
let node = Node::Scalar {
value: "x".to_owned(),
style: ScalarStyle::Plain,
tag: None,
loc: Span { start: 0, end: 0 },
meta: None,
};
assert!(state.register_anchor("a".to_owned(), &node).is_ok());
assert!(state.register_anchor("b".to_owned(), &node).is_ok());
let err = state
.register_anchor("c".to_owned(), &node)
.expect_err("expected AnchorCountLimitExceeded");
assert!(matches!(
err,
LoadError::AnchorCountLimitExceeded { limit: 2 }
));
}
#[test]
fn expand_node_detects_circular_alias() {
let options = LoaderOptions {
mode: LoadMode::Resolved,
..LoaderOptions::default()
};
let mut state = LoadState::new(&options, "");
let alias_node = Node::Alias {
name: "a".to_owned(),
loc: Span { start: 0, end: 0 },
leading_comments: None,
trailing_comment: None,
};
state.anchor_map.insert("a".to_owned(), alias_node.clone());
let mut in_progress = HashSet::new();
let result = state.expand_node(alias_node, &mut in_progress);
assert!(
matches!(result, Err(LoadError::CircularAlias { .. })),
"expected CircularAlias, got: {result:?}"
);
}
#[test]
fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
let docs = load("outer:\n # Style 1\n inner: val\n").unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
assert_eq!(entries.len(), 1);
let (_outer_key, outer_value) = &entries[0];
let Node::Mapping {
entries: nested, ..
} = outer_value
else {
panic!("expected nested mapping");
};
assert_eq!(nested.len(), 1);
let (inner_key, _) = &nested[0];
assert_eq!(
inner_key.leading_comments(),
&["# Style 1"],
"comment should be attached to the first nested key"
);
}
#[test]
fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
let docs = load("key:\n # leading\n - item1\n - item2\n").unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_key, seq_value) = &entries[0];
let Node::Sequence { items, .. } = seq_value else {
panic!("expected sequence value");
};
assert_eq!(
items[0].leading_comments(),
&["# leading"],
"comment should be attached to first sequence item"
);
}
#[test]
fn multiple_comments_between_key_and_collection_all_preserved() {
let docs = load("key:\n # first\n # second\n - item\n").unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_key, seq_value) = &entries[0];
let Node::Sequence { items, .. } = seq_value else {
panic!("expected sequence value");
};
assert_eq!(
items[0].leading_comments(),
&["# first", "# second"],
"both comments should be on first item"
);
}
#[test]
fn comment_between_key_and_collection_does_not_corrupt_key_node() {
let docs = load("outer:\n # Style 1\n inner: val\n").unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (outer_key, _) = &entries[0];
assert!(
outer_key.leading_comments().is_empty(),
"outer key should have no leading comments"
);
assert!(
outer_key.trailing_comment().is_none(),
"outer key should have no trailing comment"
);
}
#[test]
fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
let docs = load("key:\n inner: val\n").unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_key, nested) = &entries[0];
let Node::Mapping {
entries: nested_entries,
..
} = nested
else {
panic!("expected nested mapping");
};
let (inner_key, _) = &nested_entries[0];
assert!(
inner_key.leading_comments().is_empty(),
"inner key should have no leading comments when there is no comment"
);
}
#[test]
fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
let input =
"Lists:\n list-a:\n - item1\n - item2\n\n # Style 2\n list-b:\n - item1\n";
let docs = load(input).unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_lists_key, nested) = &entries[0];
let Node::Mapping {
entries: nested_entries,
..
} = nested
else {
panic!("expected nested mapping");
};
assert_eq!(nested_entries.len(), 2);
let (list_b_key, _) = &nested_entries[1];
assert_eq!(
list_b_key.leading_comments(),
&["# Style 2"],
"# Style 2 should be leading comment on list-b key"
);
}
#[test]
fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
let input = "outer:\n a:\n - x\n # between\n b: y\n";
let docs = load(input).unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_outer_key, outer_val) = &entries[0];
let Node::Mapping {
entries: nested, ..
} = outer_val
else {
panic!("expected nested mapping");
};
assert_eq!(nested.len(), 2);
let (b_key, _) = &nested[1];
assert_eq!(
b_key.leading_comments(),
&["# between"],
"# between should be leading comment on b key"
);
}
#[test]
fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
let input = "parent:\n child1:\n k: v\n # end-of-child1\n child2: val\n";
let docs = load(input).unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_parent_key, parent_val) = &entries[0];
let Node::Mapping {
entries: siblings, ..
} = parent_val
else {
panic!("expected parent mapping value");
};
assert_eq!(siblings.len(), 2);
let (child2_key, _) = &siblings[1];
assert_eq!(
child2_key.leading_comments(),
&["# end-of-child1"],
"# end-of-child1 should be leading comment on child2 key"
);
}
#[test]
fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
let input = "items:\n - a\n - b\n # tail\n";
let docs = load(input).unwrap();
assert!(!docs.is_empty(), "document should parse without error");
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_items_key, seq_val) = &entries[0];
let Node::Sequence { items, .. } = seq_val else {
panic!("expected sequence value");
};
assert_eq!(items.len(), 2, "sequence items must not be lost");
}
#[test]
fn no_overflow_comments_when_collection_ends_cleanly() {
let docs = load("key:\n - item1\n - item2\n").unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_key, seq_val) = &entries[0];
let Node::Sequence { items, .. } = seq_val else {
panic!("expected sequence value");
};
for item in items {
assert!(
item.leading_comments().is_empty(),
"items should have no leading comments"
);
}
}
#[test]
fn original_bug_report_input_preserves_both_comments() {
let input = "Lists:\n # Style 1\n list-a:\n - item1\n - item2\n\n # Style 2\n list-b:\n - item1\n - item2\n";
let docs = load(input).unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_lists_key, nested) = &entries[0];
let Node::Mapping {
entries: nested_entries,
..
} = nested
else {
panic!("expected nested mapping");
};
assert_eq!(nested_entries.len(), 2);
let (first_key, _) = &nested_entries[0];
let (second_key, _) = &nested_entries[1];
assert_eq!(
first_key.leading_comments(),
&["# Style 1"],
"list-a should have # Style 1 as leading comment"
);
assert_eq!(
second_key.leading_comments(),
&["# Style 2"],
"list-b should have # Style 2 as leading comment"
);
}
#[test]
fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
let input = "map:\n # leading\n key: value # trailing\n # next-leading\n key2: v2\n";
let docs = load(input).unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_map_key, map_val) = &entries[0];
let Node::Mapping {
entries: siblings, ..
} = map_val
else {
panic!("expected mapping value");
};
assert_eq!(siblings.len(), 2);
let (key1, val1) = &siblings[0];
let (key2, _) = &siblings[1];
assert_eq!(key1.leading_comments(), &["# leading"]);
assert_eq!(val1.trailing_comment(), Some("# trailing"));
assert_eq!(key2.leading_comments(), &["# next-leading"]);
}
#[test]
fn deeply_nested_overflow_comments_reach_correct_sibling() {
let input = "top:\n mid:\n - x\n # deep-overflow\n next: y\n";
let docs = load(input).unwrap();
let root = &docs[0].root;
let Node::Mapping { entries, .. } = root else {
panic!("expected root mapping");
};
let (_top_key, top_val) = &entries[0];
let Node::Mapping {
entries: top_entries,
..
} = top_val
else {
panic!("expected top-level mapping");
};
assert_eq!(top_entries.len(), 2);
let (next_key, _) = &top_entries[1];
assert_eq!(
next_key.leading_comments(),
&["# deep-overflow"],
"# deep-overflow should propagate from nested sequence to next sibling"
);
}
#[rstest]
#[case::bare_document("key: value\n", false, false)]
#[case::start_marker_only("---\nkey: value\n", true, false)]
#[case::end_marker_only("key: value\n...\n", false, true)]
#[case::both_markers("---\nkey: value\n...\n", true, true)]
#[case::empty_with_both_markers("---\n...\n", true, true)]
fn document_marker_flags_match_input(
#[case] input: &str,
#[case] expected_start: bool,
#[case] expected_end: bool,
) {
let docs = load(input).expect("load failed");
assert_eq!(docs.len(), 1);
assert_eq!(docs[0].explicit_start, expected_start, "explicit_start");
assert_eq!(docs[0].explicit_end, expected_end, "explicit_end");
}
#[test]
fn multi_document_flags_are_independent() {
let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
assert_eq!(docs.len(), 3);
assert!(!docs[0].explicit_start, "doc1 explicit_start");
assert!(!docs[0].explicit_end, "doc1 explicit_end");
assert!(docs[1].explicit_start, "doc2 explicit_start");
assert!(docs[1].explicit_end, "doc2 explicit_end");
assert!(docs[2].explicit_start, "doc3 explicit_start");
assert!(!docs[2].explicit_end, "doc3 explicit_end");
}
#[rstest]
#[case::newline("foo\nbar", '\n', "\\u000A", "foo\\u000Abar")]
#[case::carriage_return("foo\rbar", '\r', "\\u000D", "foo\\u000Dbar")]
#[case::null_byte("foo\0bar", '\0', "\\u0000", "foo\\u0000bar")]
fn sanitize_replaces_control_char_with_escape(
#[case] input: &str,
#[case] raw_char: char,
#[case] escape_seq: &str,
#[case] expected: &str,
) {
let result = sanitize_scalar_for_error(input);
assert!(
!result.contains(raw_char),
"output must not contain the raw control character"
);
assert!(
result.contains(escape_seq),
"output must contain {escape_seq} escape, got: {result:?}"
);
assert_eq!(result, expected);
}
#[test]
fn sanitize_short_value_stored_verbatim() {
let input = "hello";
let result = sanitize_scalar_for_error(input);
assert_eq!(result, "hello");
assert!(
!result.ends_with("..."),
"short value must not be truncated"
);
}
#[test]
fn sanitize_value_at_exact_limit_not_truncated() {
let input = "a".repeat(128);
let result = sanitize_scalar_for_error(&input);
assert_eq!(
result.len(),
128,
"128-char input must produce 128-char output"
);
assert!(
!result.ends_with("..."),
"value at exact limit must not be truncated"
);
}
#[test]
fn sanitize_value_over_limit_truncated() {
let input = "a".repeat(129);
let result = sanitize_scalar_for_error(&input);
assert!(
result.ends_with("..."),
"value over limit must end with '...'"
);
assert_eq!(
result.len(),
128 + 3,
"truncated output must be 128 chars + 3 ellipsis chars"
);
}
#[test]
fn sanitize_multibyte_char_boundary_not_split() {
let input: String = "中".repeat(127) + "ab"; let result = sanitize_scalar_for_error(&input);
assert!(
result.ends_with("..."),
"129-char multibyte input should be truncated"
);
let char_count = result.trim_end_matches("...").chars().count();
assert_eq!(
char_count, 128,
"truncated portion must be exactly 128 chars"
);
}
fn load_root(input: &str) -> Node<Span> {
load(input).expect("load failed").remove(0).root
}
fn node_tag(node: Node<Span>) -> Option<Cow<'static, str>> {
match node {
Node::Scalar { tag, .. } | Node::Mapping { tag, .. } | Node::Sequence { tag, .. } => {
tag
}
Node::Alias { .. } => None,
}
}
#[rstest]
#[case::str_tag("hello\n")]
#[case::int_tag("42\n")]
#[case::null_tag("null\n")]
#[case::map_tag("a: 1\n")]
#[case::seq_tag("- a\n")]
#[case::bare_excl_tag("! hello\n")]
fn resolver_emitted_tag_is_borrowed(#[case] input: &str) {
let tag = node_tag(load_root(input));
assert!(
matches!(tag, Some(Cow::Borrowed(_))),
"resolver-emitted tag must be Borrowed, got: {tag:?}"
);
}
#[rstest]
#[case::scalar("!!str hello\n")]
#[case::mapping("!!map\na: 1\n")]
#[case::sequence("!!seq\n- a\n")]
fn user_authored_tag_is_owned(#[case] input: &str) {
let tag = node_tag(load_root(input));
assert!(
matches!(tag, Some(Cow::Owned(_))),
"user-authored tag must be Owned, got: {tag:?}"
);
}
#[test]
fn alias_node_has_no_tag_field() {
let docs = LoaderBuilder::new()
.build()
.load("- &a x\n- *a\n")
.expect("load failed");
let Node::Sequence { items, .. } = &docs[0].root else {
panic!("expected root sequence");
};
assert!(
matches!(items[1], Node::Alias { .. }),
"second item must be Alias in lossless mode"
);
}
#[test]
fn tag_value_content_preserved_across_cow_variants() {
let Node::Scalar {
tag: tag_resolver, ..
} = load_root("hello\n")
else {
panic!("expected scalar");
};
assert_eq!(tag_resolver.as_deref(), Some("tag:yaml.org,2002:str"));
let Node::Scalar { tag: tag_user, .. } = load_root("!custom hello\n") else {
panic!("expected scalar");
};
assert_eq!(tag_user.as_deref(), Some("!custom"));
}
fn node_meta_is_none(node: &Node<Span>) -> bool {
matches!(
node,
Node::Scalar { meta: None, .. }
| Node::Mapping { meta: None, .. }
| Node::Sequence { meta: None, .. }
)
}
#[rstest]
#[case::plain_scalar("hello\n")]
#[case::plain_mapping("a: 1\n")]
#[case::plain_sequence("- a\n")]
fn loaded_node_with_no_meta_fields_has_meta_none(#[case] input: &str) {
let docs = load(input).unwrap();
let root = &docs[0].root;
assert!(
node_meta_is_none(root),
"plain node must have meta: None, got: {root:?}"
);
}
#[test]
fn loaded_anchored_scalar_has_meta_some() {
let docs = load("- &foo bar\n").unwrap();
let Node::Sequence { items, .. } = &docs[0].root else {
panic!("expected root Sequence");
};
let item = &items[0];
assert!(
matches!(item, Node::Scalar { meta: Some(_), .. }),
"anchored scalar must have meta: Some, got: {item:?}"
);
assert_eq!(item.anchor(), Some("foo"));
}
#[test]
fn loaded_scalar_with_anchor_has_meta_some_with_anchor_loc() {
let docs = load("&tag hello\n").unwrap();
let root = &docs[0].root;
assert!(
matches!(root, Node::Scalar { meta: Some(_), .. }),
"anchored scalar must have meta: Some"
);
assert!(
root.anchor_loc().is_some(),
"anchor_loc() must be Some for anchored scalar"
);
}
#[rstest]
#[case::block_mapping_anchor_only("&a\nk: v\n", Some("a"), false)]
#[case::block_mapping_tag_only("!mytag\nk: v\n", None, true)]
#[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n", Some("a"), true)]
#[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n", Some("a"), true)]
#[case::block_sequence_anchor_only("&a\n- item\n", Some("a"), false)]
#[case::block_sequence_tag_only("!mytag\n- item\n", None, true)]
#[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n", Some("a"), true)]
#[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n", Some("a"), true)]
#[case::flow_mapping_anchor_only("&a {k: v}\n", Some("a"), false)]
#[case::flow_mapping_tag_only("!mytag {k: v}\n", None, true)]
#[case::flow_mapping_anchor_then_tag("&a !mytag {k: v}\n", Some("a"), true)]
#[case::flow_mapping_tag_then_anchor("!mytag &a {k: v}\n", Some("a"), true)]
#[case::flow_sequence_anchor_only("&a [item]\n", Some("a"), false)]
#[case::flow_sequence_tag_only("!mytag [item]\n", None, true)]
#[case::flow_sequence_anchor_then_tag("&a !mytag [item]\n", Some("a"), true)]
#[case::flow_sequence_tag_then_anchor("!mytag &a [item]\n", Some("a"), true)]
fn combined_properties_attach_to_root_collection(
#[case] input: &str,
#[case] expected_anchor: Option<&str>,
#[case] expected_has_tag: bool,
) {
let docs = load(input).unwrap();
let root = &docs[0].root;
assert_eq!(root.anchor(), expected_anchor, "anchor on root collection");
assert_eq!(
root.tag_loc().is_some(),
expected_has_tag,
"tag_loc on root collection"
);
}
#[rstest]
#[case::block_mapping_anchor_only("&a\nk: v\n")]
#[case::block_mapping_tag_only("!mytag\nk: v\n")]
#[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n")]
#[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n")]
#[case::block_sequence_anchor_only("&a\n- item\n")]
#[case::block_sequence_tag_only("!mytag\n- item\n")]
#[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n")]
#[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n")]
fn first_child_of_block_collection_has_no_properties(#[case] input: &str) {
let docs = load(input).unwrap();
let root = &docs[0].root;
let first_child: &Node<Span> = match root {
Node::Mapping { entries, .. } => &entries[0].0,
Node::Sequence { items, .. } => &items[0],
Node::Scalar { .. } | Node::Alias { .. } => panic!("expected block collection"),
};
assert_eq!(
first_child.anchor(),
None,
"anchor must not appear on first child"
);
assert!(
first_child.tag_loc().is_none(),
"tag_loc must not appear on first child"
);
}
#[test]
fn anchor_on_block_mapping_with_tag_is_resolvable_via_alias() {
let input = "root:\n tagged: &a !mytag\n k: v\n ref: *a\n";
let result = LoaderBuilder::new().resolved().build().load(input);
assert!(
result.is_ok(),
"alias *a must resolve — anchor must be on the mapping, not lost to first key: {result:?}"
);
}
}