1mod comments;
33mod reloc;
34mod stream;
35
36use comments::{attach_leading_comments, attach_trailing_comment};
37use reloc::reloc;
38use stream::{
39 consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
40};
41
42use std::collections::{HashMap, HashSet};
43use std::iter::Peekable;
44
45use crate::error::Error;
46use crate::event::{Event, ScalarStyle};
47use crate::node::{Document, Node};
48use crate::pos::{Pos, Span};
49
50#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
56pub enum LoadError {
57 #[error("parse error at {pos:?}: {message}")]
59 Parse { pos: Pos, message: String },
60
61 #[error("unexpected end of event stream")]
63 UnexpectedEndOfStream,
64
65 #[error("nesting depth limit exceeded (max: {limit})")]
67 NestingDepthLimitExceeded { limit: usize },
68
69 #[error("anchor count limit exceeded (max: {limit})")]
71 AnchorCountLimitExceeded { limit: usize },
72
73 #[error("alias expansion node limit exceeded (max: {limit})")]
75 AliasExpansionLimitExceeded { limit: usize },
76
77 #[error("circular alias reference: '{name}'")]
79 CircularAlias { name: String },
80
81 #[error("undefined alias: '{name}'")]
83 UndefinedAlias { name: String },
84}
85
86type Result<T> = std::result::Result<T, LoadError>;
88
89type EventStream<'a> =
91 Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
92
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub enum LoadMode {
100 Lossless,
102 Resolved,
104}
105
106#[derive(Debug, Clone)]
108pub struct LoaderOptions {
109 pub max_nesting_depth: usize,
111 pub max_anchors: usize,
113 pub max_expanded_nodes: usize,
116 pub mode: LoadMode,
118}
119
120impl Default for LoaderOptions {
121 fn default() -> Self {
122 Self {
123 max_nesting_depth: 512,
124 max_anchors: 10_000,
125 max_expanded_nodes: 1_000_000,
126 mode: LoadMode::Lossless,
127 }
128 }
129}
130
131pub struct LoaderBuilder {
144 options: LoaderOptions,
145}
146
147impl LoaderBuilder {
148 #[must_use]
150 pub fn new() -> Self {
151 Self {
152 options: LoaderOptions::default(),
153 }
154 }
155
156 #[must_use]
158 pub const fn lossless(mut self) -> Self {
159 self.options.mode = LoadMode::Lossless;
160 self
161 }
162
163 #[must_use]
165 pub const fn resolved(mut self) -> Self {
166 self.options.mode = LoadMode::Resolved;
167 self
168 }
169
170 #[must_use]
172 pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
173 self.options.max_nesting_depth = limit;
174 self
175 }
176
177 #[must_use]
179 pub const fn max_anchors(mut self, limit: usize) -> Self {
180 self.options.max_anchors = limit;
181 self
182 }
183
184 #[must_use]
186 pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
187 self.options.max_expanded_nodes = limit;
188 self
189 }
190
191 #[must_use]
193 pub const fn build(self) -> Loader {
194 Loader {
195 options: self.options,
196 }
197 }
198}
199
200impl Default for LoaderBuilder {
201 fn default() -> Self {
202 Self::new()
203 }
204}
205
206pub struct Loader {
212 options: LoaderOptions,
213}
214
215impl Loader {
216 pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
223 let mut state = LoadState::new(&self.options);
224 let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
225 Box::new(crate::parse_events(input));
226 state.run(iter.peekable())
227 }
228}
229
230pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
250 LoaderBuilder::new().lossless().build().load(input)
251}
252
253struct LoadState<'opt> {
258 options: &'opt LoaderOptions,
259 anchor_map: HashMap<String, Node<Span>>,
261 anchor_count: usize,
263 depth: usize,
265 expanded_nodes: usize,
267}
268
269impl<'opt> LoadState<'opt> {
270 fn new(options: &'opt LoaderOptions) -> Self {
271 Self {
272 options,
273 anchor_map: HashMap::new(),
274 anchor_count: 0,
275 depth: 0,
276 expanded_nodes: 0,
277 }
278 }
279
280 fn reset_for_document(&mut self) {
281 self.anchor_map.clear();
282 self.anchor_count = 0;
283 self.expanded_nodes = 0;
284 }
285
286 fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
287 let mut docs: Vec<Document<Span>> = Vec::new();
288
289 match stream.next() {
291 Some(Ok(_)) | None => {}
292 Some(Err(e)) => {
293 return Err(LoadError::Parse {
294 pos: e.pos,
295 message: e.message,
296 });
297 }
298 }
299
300 loop {
301 match next_from(&mut stream)? {
303 None | Some((Event::StreamEnd, _)) => break,
304 Some((
305 Event::DocumentStart {
306 version,
307 tag_directives,
308 ..
309 },
310 _,
311 )) => {
312 let doc_version = version;
313 let doc_tags = tag_directives;
314 self.reset_for_document();
315
316 let mut doc_comments: Vec<String> = Vec::new();
317
318 consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
320
321 let root = if is_document_end(stream.peek()) {
323 empty_scalar()
325 } else {
326 self.parse_node(&mut stream)?
327 };
328
329 if matches!(stream.peek(), Some(Ok((Event::DocumentEnd { .. }, _)))) {
331 let _ = stream.next();
332 }
333
334 docs.push(Document {
335 root,
336 version: doc_version,
337 tags: doc_tags,
338 comments: doc_comments,
339 });
340 }
341 Some(_) => {
342 }
344 }
345 }
346
347 Ok(docs)
348 }
349
350 #[expect(
354 clippy::too_many_lines,
355 reason = "match-on-event-type; splitting would obscure flow"
356 )]
357 fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
358 let Some((event, span)) = next_from(stream)? else {
359 return Ok(empty_scalar());
360 };
361
362 match event {
363 Event::Scalar {
364 value,
365 style,
366 anchor,
367 tag,
368 } => {
369 let node = Node::Scalar {
370 value: value.into_owned(),
371 style,
372 anchor: anchor.map(str::to_owned),
373 tag: tag.map(std::borrow::Cow::into_owned),
374 loc: span,
375 leading_comments: Vec::new(),
376 trailing_comment: None,
377 };
378 if let Some(name) = node.anchor() {
379 self.register_anchor(name.to_owned(), node.clone())?;
380 }
381 Ok(node)
382 }
383
384 Event::MappingStart { anchor, tag, .. } => {
385 let anchor = anchor.map(str::to_owned);
386 let tag = tag.map(std::borrow::Cow::into_owned);
387
388 self.depth += 1;
389 if self.depth > self.options.max_nesting_depth {
390 return Err(LoadError::NestingDepthLimitExceeded {
391 limit: self.options.max_nesting_depth,
392 });
393 }
394
395 let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
396 let mut end_span = span;
397
398 loop {
399 let leading = consume_leading_comments(stream)?;
402
403 match stream.peek() {
404 None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => break,
405 Some(Err(_)) => {
406 return Err(match stream.next() {
408 Some(Err(e)) => LoadError::Parse {
409 pos: e.pos,
410 message: e.message,
411 },
412 _ => LoadError::UnexpectedEndOfStream,
413 });
414 }
415 Some(Ok(_)) => {}
416 }
417
418 let mut key = self.parse_node(stream)?;
419 attach_leading_comments(&mut key, leading);
420
421 let mut value = self.parse_node(stream)?;
422
423 let value_end_line = node_end_line(&value);
425 if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
426 attach_trailing_comment(&mut value, trail);
427 }
428
429 entries.push((key, value));
430 }
431
432 if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
434 end_span = *end;
435 let _ = stream.next();
436 }
437 self.depth -= 1;
438
439 let node = Node::Mapping {
440 entries,
441 anchor: anchor.clone(),
442 tag,
443 loc: Span {
444 start: span.start,
445 end: end_span.end,
446 },
447 leading_comments: Vec::new(),
448 trailing_comment: None,
449 };
450 if let Some(name) = anchor {
451 self.register_anchor(name, node.clone())?;
452 }
453 Ok(node)
454 }
455
456 Event::SequenceStart { anchor, tag, .. } => {
457 let anchor = anchor.map(str::to_owned);
458 let tag = tag.map(std::borrow::Cow::into_owned);
459
460 self.depth += 1;
461 if self.depth > self.options.max_nesting_depth {
462 return Err(LoadError::NestingDepthLimitExceeded {
463 limit: self.options.max_nesting_depth,
464 });
465 }
466
467 let mut items: Vec<Node<Span>> = Vec::new();
468 let mut end_span = span;
469
470 loop {
471 let leading = consume_leading_comments(stream)?;
473
474 match stream.peek() {
475 None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => break,
476 Some(Err(_)) => {
477 return Err(match stream.next() {
479 Some(Err(e)) => LoadError::Parse {
480 pos: e.pos,
481 message: e.message,
482 },
483 _ => LoadError::UnexpectedEndOfStream,
484 });
485 }
486 Some(Ok(_)) => {}
487 }
488
489 let mut item = self.parse_node(stream)?;
490 attach_leading_comments(&mut item, leading);
491
492 let item_end_line = node_end_line(&item);
494 if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
495 attach_trailing_comment(&mut item, trail);
496 }
497
498 items.push(item);
499 }
500
501 if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
503 end_span = *end;
504 let _ = stream.next();
505 }
506 self.depth -= 1;
507
508 let node = Node::Sequence {
509 items,
510 anchor: anchor.clone(),
511 tag,
512 loc: Span {
513 start: span.start,
514 end: end_span.end,
515 },
516 leading_comments: Vec::new(),
517 trailing_comment: None,
518 };
519 if let Some(name) = anchor {
520 self.register_anchor(name, node.clone())?;
521 }
522 Ok(node)
523 }
524
525 Event::Alias { name } => {
526 let name = name.to_owned();
527 self.resolve_alias(&name, span)
528 }
529
530 Event::Comment { .. } => {
531 self.parse_node(stream)
533 }
534
535 Event::StreamStart
536 | Event::StreamEnd
537 | Event::DocumentStart { .. }
538 | Event::DocumentEnd { .. }
539 | Event::MappingEnd
540 | Event::SequenceEnd => {
541 Ok(empty_scalar())
543 }
544 }
545 }
546
547 fn register_anchor(&mut self, name: String, node: Node<Span>) -> Result<()> {
548 if !self.anchor_map.contains_key(&name) {
549 self.anchor_count += 1;
550 if self.anchor_count > self.options.max_anchors {
551 return Err(LoadError::AnchorCountLimitExceeded {
552 limit: self.options.max_anchors,
553 });
554 }
555 }
556 if self.options.mode == LoadMode::Resolved {
560 self.expanded_nodes += 1;
561 if self.expanded_nodes > self.options.max_expanded_nodes {
562 return Err(LoadError::AliasExpansionLimitExceeded {
563 limit: self.options.max_expanded_nodes,
564 });
565 }
566 }
567 self.anchor_map.insert(name, node);
568 Ok(())
569 }
570
571 fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
572 match self.options.mode {
573 LoadMode::Lossless => Ok(Node::Alias {
574 name: name.to_owned(),
575 loc,
576 leading_comments: Vec::new(),
577 trailing_comment: None,
578 }),
579 LoadMode::Resolved => {
580 let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
581 LoadError::UndefinedAlias {
582 name: name.to_owned(),
583 }
584 })?;
585 let mut in_progress: HashSet<String> = HashSet::new();
586 self.expand_node(anchored, &mut in_progress)
587 }
588 }
589 }
590
591 fn expand_node(
594 &mut self,
595 node: Node<Span>,
596 in_progress: &mut HashSet<String>,
597 ) -> Result<Node<Span>> {
598 self.expanded_nodes += 1;
602 if self.expanded_nodes > self.options.max_expanded_nodes {
603 return Err(LoadError::AliasExpansionLimitExceeded {
604 limit: self.options.max_expanded_nodes,
605 });
606 }
607
608 match node {
609 Node::Alias { ref name, loc, .. } => {
610 if in_progress.contains(name) {
611 return Err(LoadError::CircularAlias { name: name.clone() });
612 }
613 let target = self
614 .anchor_map
615 .get(name)
616 .cloned()
617 .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
618 in_progress.insert(name.clone());
619 let expanded = self.expand_node(target, in_progress)?;
620 in_progress.remove(name);
621 Ok(reloc(expanded, loc))
623 }
624 Node::Mapping {
625 entries,
626 anchor,
627 tag,
628 loc,
629 leading_comments,
630 trailing_comment,
631 } => {
632 let mut expanded_entries = Vec::with_capacity(entries.len());
633 for (k, v) in entries {
634 let ek = self.expand_node(k, in_progress)?;
635 let ev = self.expand_node(v, in_progress)?;
636 expanded_entries.push((ek, ev));
637 }
638 Ok(Node::Mapping {
639 entries: expanded_entries,
640 anchor,
641 tag,
642 loc,
643 leading_comments,
644 trailing_comment,
645 })
646 }
647 Node::Sequence {
648 items,
649 anchor,
650 tag,
651 loc,
652 leading_comments,
653 trailing_comment,
654 } => {
655 let mut expanded_items = Vec::with_capacity(items.len());
656 for item in items {
657 expanded_items.push(self.expand_node(item, in_progress)?);
658 }
659 Ok(Node::Sequence {
660 items: expanded_items,
661 anchor,
662 tag,
663 loc,
664 leading_comments,
665 trailing_comment,
666 })
667 }
668 scalar @ Node::Scalar { .. } => Ok(scalar),
670 }
671 }
672}
673
674const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
676 matches!(
677 peeked,
678 None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
679 )
680}
681
682const fn node_end_line(node: &Node<Span>) -> usize {
687 match node {
688 Node::Scalar { loc, .. }
689 | Node::Mapping { loc, .. }
690 | Node::Sequence { loc, .. }
691 | Node::Alias { loc, .. } => loc.end.line,
692 }
693}
694
695const fn empty_scalar() -> Node<Span> {
700 Node::Scalar {
701 value: String::new(),
702 style: ScalarStyle::Plain,
703 anchor: None,
704 tag: None,
705 loc: Span {
706 start: Pos::ORIGIN,
707 end: Pos::ORIGIN,
708 },
709 leading_comments: Vec::new(),
710 trailing_comment: None,
711 }
712}
713
714#[cfg(test)]
719#[expect(clippy::expect_used, clippy::unwrap_used, reason = "test code")]
720mod tests {
721 use super::*;
722
723 #[test]
725 fn loader_state_resets_anchor_map_between_documents() {
726 let result = LoaderBuilder::new()
728 .resolved()
729 .build()
730 .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
731 assert!(
732 result.is_err(),
733 "expected Err: *foo in doc 2 should be undefined"
734 );
735 assert!(matches!(
736 result.unwrap_err(),
737 LoadError::UndefinedAlias { .. }
738 ));
739 }
740
741 #[test]
743 fn register_anchor_increments_count() {
744 let options = LoaderOptions {
745 max_anchors: 2,
746 ..LoaderOptions::default()
747 };
748 let mut state = LoadState::new(&options);
749 let node = Node::Scalar {
750 value: "x".to_owned(),
751 style: ScalarStyle::Plain,
752 anchor: None,
753 tag: None,
754 loc: Span {
755 start: Pos::ORIGIN,
756 end: Pos::ORIGIN,
757 },
758 leading_comments: Vec::new(),
759 trailing_comment: None,
760 };
761 assert!(state.register_anchor("a".to_owned(), node.clone()).is_ok());
762 assert!(state.register_anchor("b".to_owned(), node.clone()).is_ok());
763 let err = state
764 .register_anchor("c".to_owned(), node)
765 .expect_err("expected AnchorCountLimitExceeded");
766 assert!(matches!(
767 err,
768 LoadError::AnchorCountLimitExceeded { limit: 2 }
769 ));
770 }
771
772 #[test]
774 fn expand_node_detects_circular_alias() {
775 let options = LoaderOptions {
776 mode: LoadMode::Resolved,
777 ..LoaderOptions::default()
778 };
779 let mut state = LoadState::new(&options);
780 let alias_node = Node::Alias {
782 name: "a".to_owned(),
783 loc: Span {
784 start: Pos::ORIGIN,
785 end: Pos::ORIGIN,
786 },
787 leading_comments: Vec::new(),
788 trailing_comment: None,
789 };
790 state.anchor_map.insert("a".to_owned(), alias_node.clone());
791 let mut in_progress = HashSet::new();
792 let result = state.expand_node(alias_node, &mut in_progress);
793 assert!(
794 matches!(result, Err(LoadError::CircularAlias { .. })),
795 "expected CircularAlias, got: {result:?}"
796 );
797 }
798}