mod comments;
mod reloc;
mod stream;
use comments::{attach_leading_comments, attach_trailing_comment};
use reloc::reloc;
use stream::{
consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
};
use std::collections::{HashMap, HashSet};
use std::iter::Peekable;
use crate::error::Error;
use crate::event::{Event, ScalarStyle};
use crate::node::{Document, Node};
use crate::pos::{Pos, Span};
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum LoadError {
#[error("parse error at {pos:?}: {message}")]
Parse { pos: Pos, message: String },
#[error("unexpected end of event stream")]
UnexpectedEndOfStream,
#[error("nesting depth limit exceeded (max: {limit})")]
NestingDepthLimitExceeded { limit: usize },
#[error("anchor count limit exceeded (max: {limit})")]
AnchorCountLimitExceeded { limit: usize },
#[error("alias expansion node limit exceeded (max: {limit})")]
AliasExpansionLimitExceeded { limit: usize },
#[error("circular alias reference: '{name}'")]
CircularAlias { name: String },
#[error("undefined alias: '{name}'")]
UndefinedAlias { name: String },
}
type Result<T> = std::result::Result<T, LoadError>;
type EventStream<'a> =
Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LoadMode {
Lossless,
Resolved,
}
#[derive(Debug, Clone)]
pub struct LoaderOptions {
pub max_nesting_depth: usize,
pub max_anchors: usize,
pub max_expanded_nodes: usize,
pub mode: LoadMode,
}
impl Default for LoaderOptions {
fn default() -> Self {
Self {
max_nesting_depth: 512,
max_anchors: 10_000,
max_expanded_nodes: 1_000_000,
mode: LoadMode::Lossless,
}
}
}
pub struct LoaderBuilder {
options: LoaderOptions,
}
impl LoaderBuilder {
#[must_use]
pub fn new() -> Self {
Self {
options: LoaderOptions::default(),
}
}
#[must_use]
pub const fn lossless(mut self) -> Self {
self.options.mode = LoadMode::Lossless;
self
}
#[must_use]
pub const fn resolved(mut self) -> Self {
self.options.mode = LoadMode::Resolved;
self
}
#[must_use]
pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
self.options.max_nesting_depth = limit;
self
}
#[must_use]
pub const fn max_anchors(mut self, limit: usize) -> Self {
self.options.max_anchors = limit;
self
}
#[must_use]
pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
self.options.max_expanded_nodes = limit;
self
}
#[must_use]
pub const fn build(self) -> Loader {
Loader {
options: self.options,
}
}
}
impl Default for LoaderBuilder {
fn default() -> Self {
Self::new()
}
}
pub struct Loader {
options: LoaderOptions,
}
impl Loader {
pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
let mut state = LoadState::new(&self.options);
let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
Box::new(crate::parse_events(input));
state.run(iter.peekable())
}
}
pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
LoaderBuilder::new().lossless().build().load(input)
}
struct LoadState<'opt> {
options: &'opt LoaderOptions,
anchor_map: HashMap<String, Node<Span>>,
anchor_count: usize,
depth: usize,
expanded_nodes: usize,
}
impl<'opt> LoadState<'opt> {
fn new(options: &'opt LoaderOptions) -> Self {
Self {
options,
anchor_map: HashMap::new(),
anchor_count: 0,
depth: 0,
expanded_nodes: 0,
}
}
fn reset_for_document(&mut self) {
self.anchor_map.clear();
self.anchor_count = 0;
self.expanded_nodes = 0;
}
fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
let mut docs: Vec<Document<Span>> = Vec::new();
match stream.next() {
Some(Ok(_)) | None => {}
Some(Err(e)) => {
return Err(LoadError::Parse {
pos: e.pos,
message: e.message,
});
}
}
loop {
match next_from(&mut stream)? {
None | Some((Event::StreamEnd, _)) => break,
Some((
Event::DocumentStart {
version,
tag_directives,
..
},
_,
)) => {
let doc_version = version;
let doc_tags = tag_directives;
self.reset_for_document();
let mut doc_comments: Vec<String> = Vec::new();
consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
let root = if is_document_end(stream.peek()) {
empty_scalar()
} else {
self.parse_node(&mut stream)?
};
if matches!(stream.peek(), Some(Ok((Event::DocumentEnd { .. }, _)))) {
let _ = stream.next();
}
docs.push(Document {
root,
version: doc_version,
tags: doc_tags,
comments: doc_comments,
});
}
Some(_) => {
}
}
}
Ok(docs)
}
#[expect(
clippy::too_many_lines,
reason = "match-on-event-type; splitting would obscure flow"
)]
fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
let Some((event, span)) = next_from(stream)? else {
return Ok(empty_scalar());
};
match event {
Event::Scalar {
value,
style,
anchor,
tag,
} => {
let node = Node::Scalar {
value: value.into_owned(),
style,
anchor: anchor.map(str::to_owned),
tag: tag.map(std::borrow::Cow::into_owned),
loc: span,
leading_comments: Vec::new(),
trailing_comment: None,
};
if let Some(name) = node.anchor() {
self.register_anchor(name.to_owned(), node.clone())?;
}
Ok(node)
}
Event::MappingStart { anchor, tag, .. } => {
let anchor = anchor.map(str::to_owned);
let tag = tag.map(std::borrow::Cow::into_owned);
self.depth += 1;
if self.depth > self.options.max_nesting_depth {
return Err(LoadError::NestingDepthLimitExceeded {
limit: self.options.max_nesting_depth,
});
}
let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
let mut end_span = span;
loop {
let leading = consume_leading_comments(stream)?;
match stream.peek() {
None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => break,
Some(Err(_)) => {
return Err(match stream.next() {
Some(Err(e)) => LoadError::Parse {
pos: e.pos,
message: e.message,
},
_ => LoadError::UnexpectedEndOfStream,
});
}
Some(Ok(_)) => {}
}
let mut key = self.parse_node(stream)?;
attach_leading_comments(&mut key, leading);
let mut value = self.parse_node(stream)?;
let value_end_line = node_end_line(&value);
if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
attach_trailing_comment(&mut value, trail);
}
entries.push((key, value));
}
if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
end_span = *end;
let _ = stream.next();
}
self.depth -= 1;
let node = Node::Mapping {
entries,
anchor: anchor.clone(),
tag,
loc: Span {
start: span.start,
end: end_span.end,
},
leading_comments: Vec::new(),
trailing_comment: None,
};
if let Some(name) = anchor {
self.register_anchor(name, node.clone())?;
}
Ok(node)
}
Event::SequenceStart { anchor, tag, .. } => {
let anchor = anchor.map(str::to_owned);
let tag = tag.map(std::borrow::Cow::into_owned);
self.depth += 1;
if self.depth > self.options.max_nesting_depth {
return Err(LoadError::NestingDepthLimitExceeded {
limit: self.options.max_nesting_depth,
});
}
let mut items: Vec<Node<Span>> = Vec::new();
let mut end_span = span;
loop {
let leading = consume_leading_comments(stream)?;
match stream.peek() {
None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => break,
Some(Err(_)) => {
return Err(match stream.next() {
Some(Err(e)) => LoadError::Parse {
pos: e.pos,
message: e.message,
},
_ => LoadError::UnexpectedEndOfStream,
});
}
Some(Ok(_)) => {}
}
let mut item = self.parse_node(stream)?;
attach_leading_comments(&mut item, leading);
let item_end_line = node_end_line(&item);
if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
attach_trailing_comment(&mut item, trail);
}
items.push(item);
}
if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
end_span = *end;
let _ = stream.next();
}
self.depth -= 1;
let node = Node::Sequence {
items,
anchor: anchor.clone(),
tag,
loc: Span {
start: span.start,
end: end_span.end,
},
leading_comments: Vec::new(),
trailing_comment: None,
};
if let Some(name) = anchor {
self.register_anchor(name, node.clone())?;
}
Ok(node)
}
Event::Alias { name } => {
let name = name.to_owned();
self.resolve_alias(&name, span)
}
Event::Comment { .. } => {
self.parse_node(stream)
}
Event::StreamStart
| Event::StreamEnd
| Event::DocumentStart { .. }
| Event::DocumentEnd { .. }
| Event::MappingEnd
| Event::SequenceEnd => {
Ok(empty_scalar())
}
}
}
fn register_anchor(&mut self, name: String, node: Node<Span>) -> Result<()> {
if !self.anchor_map.contains_key(&name) {
self.anchor_count += 1;
if self.anchor_count > self.options.max_anchors {
return Err(LoadError::AnchorCountLimitExceeded {
limit: self.options.max_anchors,
});
}
}
if self.options.mode == LoadMode::Resolved {
self.expanded_nodes += 1;
if self.expanded_nodes > self.options.max_expanded_nodes {
return Err(LoadError::AliasExpansionLimitExceeded {
limit: self.options.max_expanded_nodes,
});
}
}
self.anchor_map.insert(name, node);
Ok(())
}
fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
match self.options.mode {
LoadMode::Lossless => Ok(Node::Alias {
name: name.to_owned(),
loc,
leading_comments: Vec::new(),
trailing_comment: None,
}),
LoadMode::Resolved => {
let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
LoadError::UndefinedAlias {
name: name.to_owned(),
}
})?;
let mut in_progress: HashSet<String> = HashSet::new();
self.expand_node(anchored, &mut in_progress)
}
}
}
fn expand_node(
&mut self,
node: Node<Span>,
in_progress: &mut HashSet<String>,
) -> Result<Node<Span>> {
self.expanded_nodes += 1;
if self.expanded_nodes > self.options.max_expanded_nodes {
return Err(LoadError::AliasExpansionLimitExceeded {
limit: self.options.max_expanded_nodes,
});
}
match node {
Node::Alias { ref name, loc, .. } => {
if in_progress.contains(name) {
return Err(LoadError::CircularAlias { name: name.clone() });
}
let target = self
.anchor_map
.get(name)
.cloned()
.ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
in_progress.insert(name.clone());
let expanded = self.expand_node(target, in_progress)?;
in_progress.remove(name);
Ok(reloc(expanded, loc))
}
Node::Mapping {
entries,
anchor,
tag,
loc,
leading_comments,
trailing_comment,
} => {
let mut expanded_entries = Vec::with_capacity(entries.len());
for (k, v) in entries {
let ek = self.expand_node(k, in_progress)?;
let ev = self.expand_node(v, in_progress)?;
expanded_entries.push((ek, ev));
}
Ok(Node::Mapping {
entries: expanded_entries,
anchor,
tag,
loc,
leading_comments,
trailing_comment,
})
}
Node::Sequence {
items,
anchor,
tag,
loc,
leading_comments,
trailing_comment,
} => {
let mut expanded_items = Vec::with_capacity(items.len());
for item in items {
expanded_items.push(self.expand_node(item, in_progress)?);
}
Ok(Node::Sequence {
items: expanded_items,
anchor,
tag,
loc,
leading_comments,
trailing_comment,
})
}
scalar @ Node::Scalar { .. } => Ok(scalar),
}
}
}
const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
matches!(
peeked,
None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
)
}
const fn node_end_line(node: &Node<Span>) -> usize {
match node {
Node::Scalar { loc, .. }
| Node::Mapping { loc, .. }
| Node::Sequence { loc, .. }
| Node::Alias { loc, .. } => loc.end.line,
}
}
const fn empty_scalar() -> Node<Span> {
Node::Scalar {
value: String::new(),
style: ScalarStyle::Plain,
anchor: None,
tag: None,
loc: Span {
start: Pos::ORIGIN,
end: Pos::ORIGIN,
},
leading_comments: Vec::new(),
trailing_comment: None,
}
}
#[cfg(test)]
#[expect(clippy::expect_used, clippy::unwrap_used, reason = "test code")]
mod tests {
use super::*;
#[test]
fn loader_state_resets_anchor_map_between_documents() {
let result = LoaderBuilder::new()
.resolved()
.build()
.load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
assert!(
result.is_err(),
"expected Err: *foo in doc 2 should be undefined"
);
assert!(matches!(
result.unwrap_err(),
LoadError::UndefinedAlias { .. }
));
}
#[test]
fn register_anchor_increments_count() {
let options = LoaderOptions {
max_anchors: 2,
..LoaderOptions::default()
};
let mut state = LoadState::new(&options);
let node = Node::Scalar {
value: "x".to_owned(),
style: ScalarStyle::Plain,
anchor: None,
tag: None,
loc: Span {
start: Pos::ORIGIN,
end: Pos::ORIGIN,
},
leading_comments: Vec::new(),
trailing_comment: None,
};
assert!(state.register_anchor("a".to_owned(), node.clone()).is_ok());
assert!(state.register_anchor("b".to_owned(), node.clone()).is_ok());
let err = state
.register_anchor("c".to_owned(), node)
.expect_err("expected AnchorCountLimitExceeded");
assert!(matches!(
err,
LoadError::AnchorCountLimitExceeded { limit: 2 }
));
}
#[test]
fn expand_node_detects_circular_alias() {
let options = LoaderOptions {
mode: LoadMode::Resolved,
..LoaderOptions::default()
};
let mut state = LoadState::new(&options);
let alias_node = Node::Alias {
name: "a".to_owned(),
loc: Span {
start: Pos::ORIGIN,
end: Pos::ORIGIN,
},
leading_comments: Vec::new(),
trailing_comment: None,
};
state.anchor_map.insert("a".to_owned(), alias_node.clone());
let mut in_progress = HashSet::new();
let result = state.expand_node(alias_node, &mut in_progress);
assert!(
matches!(result, Err(LoadError::CircularAlias { .. })),
"expected CircularAlias, got: {result:?}"
);
}
}