1mod comments;
33mod reloc;
34mod stream;
35
36use comments::{attach_leading_comments, attach_trailing_comment};
37use reloc::reloc;
38use stream::{
39 consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
40};
41
42use std::collections::{HashMap, HashSet};
43use std::iter::Peekable;
44
45use crate::error::Error;
46use crate::event::{Event, ScalarStyle};
47use crate::node::{Document, Node};
48use crate::pos::{Pos, Span};
49
50#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
56pub enum LoadError {
57 #[error("parse error at {pos:?}: {message}")]
59 Parse { pos: Pos, message: String },
60
61 #[error("unexpected end of event stream")]
63 UnexpectedEndOfStream,
64
65 #[error("nesting depth limit exceeded (max: {limit})")]
67 NestingDepthLimitExceeded { limit: usize },
68
69 #[error("anchor count limit exceeded (max: {limit})")]
71 AnchorCountLimitExceeded { limit: usize },
72
73 #[error("alias expansion node limit exceeded (max: {limit})")]
75 AliasExpansionLimitExceeded { limit: usize },
76
77 #[error("circular alias reference: '{name}'")]
79 CircularAlias { name: String },
80
81 #[error("undefined alias: '{name}'")]
83 UndefinedAlias { name: String },
84}
85
86type Result<T> = std::result::Result<T, LoadError>;
88
89type EventStream<'a> =
91 Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
92
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub enum LoadMode {
100 Lossless,
102 Resolved,
104}
105
106#[derive(Debug, Clone)]
108pub struct LoaderOptions {
109 pub max_nesting_depth: usize,
111 pub max_anchors: usize,
113 pub max_expanded_nodes: usize,
116 pub mode: LoadMode,
118}
119
120impl Default for LoaderOptions {
121 fn default() -> Self {
122 Self {
123 max_nesting_depth: 512,
124 max_anchors: 10_000,
125 max_expanded_nodes: 1_000_000,
126 mode: LoadMode::Lossless,
127 }
128 }
129}
130
131pub struct LoaderBuilder {
144 options: LoaderOptions,
145}
146
147impl LoaderBuilder {
148 #[must_use]
150 pub fn new() -> Self {
151 Self {
152 options: LoaderOptions::default(),
153 }
154 }
155
156 #[must_use]
158 pub const fn lossless(mut self) -> Self {
159 self.options.mode = LoadMode::Lossless;
160 self
161 }
162
163 #[must_use]
165 pub const fn resolved(mut self) -> Self {
166 self.options.mode = LoadMode::Resolved;
167 self
168 }
169
170 #[must_use]
172 pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
173 self.options.max_nesting_depth = limit;
174 self
175 }
176
177 #[must_use]
179 pub const fn max_anchors(mut self, limit: usize) -> Self {
180 self.options.max_anchors = limit;
181 self
182 }
183
184 #[must_use]
186 pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
187 self.options.max_expanded_nodes = limit;
188 self
189 }
190
191 #[must_use]
193 pub const fn build(self) -> Loader {
194 Loader {
195 options: self.options,
196 }
197 }
198}
199
200impl Default for LoaderBuilder {
201 fn default() -> Self {
202 Self::new()
203 }
204}
205
206pub struct Loader {
212 options: LoaderOptions,
213}
214
215impl Loader {
216 pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
223 let mut state = LoadState::new(&self.options);
224 let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
225 Box::new(crate::parse_events(input));
226 state.run(iter.peekable())
227 }
228}
229
230pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
250 LoaderBuilder::new().lossless().build().load(input)
251}
252
253struct LoadState<'opt> {
258 options: &'opt LoaderOptions,
259 anchor_map: HashMap<String, Node<Span>>,
261 anchor_count: usize,
263 depth: usize,
265 expanded_nodes: usize,
267}
268
269impl<'opt> LoadState<'opt> {
270 fn new(options: &'opt LoaderOptions) -> Self {
271 Self {
272 options,
273 anchor_map: HashMap::new(),
274 anchor_count: 0,
275 depth: 0,
276 expanded_nodes: 0,
277 }
278 }
279
280 fn reset_for_document(&mut self) {
281 self.anchor_map.clear();
282 self.anchor_count = 0;
283 self.expanded_nodes = 0;
284 }
285
286 fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
287 let mut docs: Vec<Document<Span>> = Vec::new();
288
289 match stream.next() {
291 Some(Ok(_)) | None => {}
292 Some(Err(e)) => {
293 return Err(LoadError::Parse {
294 pos: e.pos,
295 message: e.message,
296 });
297 }
298 }
299
300 loop {
301 match next_from(&mut stream)? {
303 None | Some((Event::StreamEnd, _)) => break,
304 Some((
305 Event::DocumentStart {
306 version,
307 tag_directives,
308 ..
309 },
310 _,
311 )) => {
312 let doc_version = version;
313 let doc_tags = tag_directives;
314 self.reset_for_document();
315
316 let mut doc_comments: Vec<String> = Vec::new();
317
318 consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
320
321 let root = if is_document_end(stream.peek()) {
323 empty_scalar()
325 } else {
326 self.parse_node(&mut stream)?
327 };
328
329 if matches!(stream.peek(), Some(Ok((Event::DocumentEnd { .. }, _)))) {
331 let _ = stream.next();
332 }
333
334 docs.push(Document {
335 root,
336 version: doc_version,
337 tags: doc_tags,
338 comments: doc_comments,
339 });
340 }
341 Some(_) => {
342 }
344 }
345 }
346
347 Ok(docs)
348 }
349
350 #[allow(clippy::too_many_lines)] fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
355 let Some((event, span)) = next_from(stream)? else {
356 return Ok(empty_scalar());
357 };
358
359 match event {
360 Event::Scalar {
361 value,
362 style,
363 anchor,
364 tag,
365 } => {
366 let node = Node::Scalar {
367 value: value.into_owned(),
368 style,
369 anchor: anchor.map(str::to_owned),
370 tag: tag.map(std::borrow::Cow::into_owned),
371 loc: span,
372 leading_comments: Vec::new(),
373 trailing_comment: None,
374 };
375 if let Some(name) = node.anchor() {
376 self.register_anchor(name.to_owned(), node.clone())?;
377 }
378 Ok(node)
379 }
380
381 Event::MappingStart { anchor, tag, .. } => {
382 let anchor = anchor.map(str::to_owned);
383 let tag = tag.map(std::borrow::Cow::into_owned);
384
385 self.depth += 1;
386 if self.depth > self.options.max_nesting_depth {
387 return Err(LoadError::NestingDepthLimitExceeded {
388 limit: self.options.max_nesting_depth,
389 });
390 }
391
392 let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
393 let mut end_span = span;
394
395 loop {
396 let leading = consume_leading_comments(stream)?;
399
400 match stream.peek() {
401 None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => break,
402 Some(Err(_)) => {
403 return Err(match stream.next() {
405 Some(Err(e)) => LoadError::Parse {
406 pos: e.pos,
407 message: e.message,
408 },
409 _ => LoadError::UnexpectedEndOfStream,
410 });
411 }
412 Some(Ok(_)) => {}
413 }
414
415 let mut key = self.parse_node(stream)?;
416 attach_leading_comments(&mut key, leading);
417
418 let mut value = self.parse_node(stream)?;
419
420 let value_end_line = node_end_line(&value);
422 if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
423 attach_trailing_comment(&mut value, trail);
424 }
425
426 entries.push((key, value));
427 }
428
429 if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
431 end_span = *end;
432 let _ = stream.next();
433 }
434 self.depth -= 1;
435
436 let node = Node::Mapping {
437 entries,
438 anchor: anchor.clone(),
439 tag,
440 loc: Span {
441 start: span.start,
442 end: end_span.end,
443 },
444 leading_comments: Vec::new(),
445 trailing_comment: None,
446 };
447 if let Some(name) = anchor {
448 self.register_anchor(name, node.clone())?;
449 }
450 Ok(node)
451 }
452
453 Event::SequenceStart { anchor, tag, .. } => {
454 let anchor = anchor.map(str::to_owned);
455 let tag = tag.map(std::borrow::Cow::into_owned);
456
457 self.depth += 1;
458 if self.depth > self.options.max_nesting_depth {
459 return Err(LoadError::NestingDepthLimitExceeded {
460 limit: self.options.max_nesting_depth,
461 });
462 }
463
464 let mut items: Vec<Node<Span>> = Vec::new();
465 let mut end_span = span;
466
467 loop {
468 let leading = consume_leading_comments(stream)?;
470
471 match stream.peek() {
472 None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => break,
473 Some(Err(_)) => {
474 return Err(match stream.next() {
476 Some(Err(e)) => LoadError::Parse {
477 pos: e.pos,
478 message: e.message,
479 },
480 _ => LoadError::UnexpectedEndOfStream,
481 });
482 }
483 Some(Ok(_)) => {}
484 }
485
486 let mut item = self.parse_node(stream)?;
487 attach_leading_comments(&mut item, leading);
488
489 let item_end_line = node_end_line(&item);
491 if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
492 attach_trailing_comment(&mut item, trail);
493 }
494
495 items.push(item);
496 }
497
498 if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
500 end_span = *end;
501 let _ = stream.next();
502 }
503 self.depth -= 1;
504
505 let node = Node::Sequence {
506 items,
507 anchor: anchor.clone(),
508 tag,
509 loc: Span {
510 start: span.start,
511 end: end_span.end,
512 },
513 leading_comments: Vec::new(),
514 trailing_comment: None,
515 };
516 if let Some(name) = anchor {
517 self.register_anchor(name, node.clone())?;
518 }
519 Ok(node)
520 }
521
522 Event::Alias { name } => {
523 let name = name.to_owned();
524 self.resolve_alias(&name, span)
525 }
526
527 Event::Comment { .. } => {
528 self.parse_node(stream)
530 }
531
532 Event::StreamStart
533 | Event::StreamEnd
534 | Event::DocumentStart { .. }
535 | Event::DocumentEnd { .. }
536 | Event::MappingEnd
537 | Event::SequenceEnd => {
538 Ok(empty_scalar())
540 }
541 }
542 }
543
544 fn register_anchor(&mut self, name: String, node: Node<Span>) -> Result<()> {
545 if !self.anchor_map.contains_key(&name) {
546 self.anchor_count += 1;
547 if self.anchor_count > self.options.max_anchors {
548 return Err(LoadError::AnchorCountLimitExceeded {
549 limit: self.options.max_anchors,
550 });
551 }
552 }
553 if self.options.mode == LoadMode::Resolved {
557 self.expanded_nodes += 1;
558 if self.expanded_nodes > self.options.max_expanded_nodes {
559 return Err(LoadError::AliasExpansionLimitExceeded {
560 limit: self.options.max_expanded_nodes,
561 });
562 }
563 }
564 self.anchor_map.insert(name, node);
565 Ok(())
566 }
567
568 fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
569 match self.options.mode {
570 LoadMode::Lossless => Ok(Node::Alias {
571 name: name.to_owned(),
572 loc,
573 leading_comments: Vec::new(),
574 trailing_comment: None,
575 }),
576 LoadMode::Resolved => {
577 let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
578 LoadError::UndefinedAlias {
579 name: name.to_owned(),
580 }
581 })?;
582 let mut in_progress: HashSet<String> = HashSet::new();
583 self.expand_node(anchored, &mut in_progress)
584 }
585 }
586 }
587
588 fn expand_node(
591 &mut self,
592 node: Node<Span>,
593 in_progress: &mut HashSet<String>,
594 ) -> Result<Node<Span>> {
595 self.expanded_nodes += 1;
599 if self.expanded_nodes > self.options.max_expanded_nodes {
600 return Err(LoadError::AliasExpansionLimitExceeded {
601 limit: self.options.max_expanded_nodes,
602 });
603 }
604
605 match node {
606 Node::Alias { ref name, loc, .. } => {
607 if in_progress.contains(name) {
608 return Err(LoadError::CircularAlias { name: name.clone() });
609 }
610 let target = self
611 .anchor_map
612 .get(name)
613 .cloned()
614 .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
615 in_progress.insert(name.clone());
616 let expanded = self.expand_node(target, in_progress)?;
617 in_progress.remove(name);
618 Ok(reloc(expanded, loc))
620 }
621 Node::Mapping {
622 entries,
623 anchor,
624 tag,
625 loc,
626 leading_comments,
627 trailing_comment,
628 } => {
629 let mut expanded_entries = Vec::with_capacity(entries.len());
630 for (k, v) in entries {
631 let ek = self.expand_node(k, in_progress)?;
632 let ev = self.expand_node(v, in_progress)?;
633 expanded_entries.push((ek, ev));
634 }
635 Ok(Node::Mapping {
636 entries: expanded_entries,
637 anchor,
638 tag,
639 loc,
640 leading_comments,
641 trailing_comment,
642 })
643 }
644 Node::Sequence {
645 items,
646 anchor,
647 tag,
648 loc,
649 leading_comments,
650 trailing_comment,
651 } => {
652 let mut expanded_items = Vec::with_capacity(items.len());
653 for item in items {
654 expanded_items.push(self.expand_node(item, in_progress)?);
655 }
656 Ok(Node::Sequence {
657 items: expanded_items,
658 anchor,
659 tag,
660 loc,
661 leading_comments,
662 trailing_comment,
663 })
664 }
665 scalar @ Node::Scalar { .. } => Ok(scalar),
667 }
668 }
669}
670
671const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
673 matches!(
674 peeked,
675 None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
676 )
677}
678
679const fn node_end_line(node: &Node<Span>) -> usize {
684 match node {
685 Node::Scalar { loc, .. }
686 | Node::Mapping { loc, .. }
687 | Node::Sequence { loc, .. }
688 | Node::Alias { loc, .. } => loc.end.line,
689 }
690}
691
692const fn empty_scalar() -> Node<Span> {
697 Node::Scalar {
698 value: String::new(),
699 style: ScalarStyle::Plain,
700 anchor: None,
701 tag: None,
702 loc: Span {
703 start: Pos::ORIGIN,
704 end: Pos::ORIGIN,
705 },
706 leading_comments: Vec::new(),
707 trailing_comment: None,
708 }
709}
710
711#[cfg(test)]
716#[allow(
717 clippy::indexing_slicing,
718 clippy::expect_used,
719 clippy::unwrap_used,
720 clippy::too_many_lines,
721 clippy::doc_markdown
722)]
723mod tests {
724 use super::*;
725
726 #[allow(dead_code)]
727 fn load_one(input: &str) -> Node<Span> {
728 let docs = load(input).expect("load failed");
729 assert_eq!(docs.len(), 1, "expected 1 document, got {}", docs.len());
730 docs.into_iter().next().unwrap().root
731 }
732
733 #[test]
735 fn loader_state_resets_anchor_map_between_documents() {
736 let result = LoaderBuilder::new()
738 .resolved()
739 .build()
740 .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
741 assert!(
742 result.is_err(),
743 "expected Err: *foo in doc 2 should be undefined"
744 );
745 assert!(matches!(
746 result.unwrap_err(),
747 LoadError::UndefinedAlias { .. }
748 ));
749 }
750
751 #[test]
753 fn register_anchor_increments_count() {
754 let options = LoaderOptions {
755 max_anchors: 2,
756 ..LoaderOptions::default()
757 };
758 let mut state = LoadState::new(&options);
759 let node = Node::Scalar {
760 value: "x".to_owned(),
761 style: ScalarStyle::Plain,
762 anchor: None,
763 tag: None,
764 loc: Span {
765 start: Pos::ORIGIN,
766 end: Pos::ORIGIN,
767 },
768 leading_comments: Vec::new(),
769 trailing_comment: None,
770 };
771 assert!(state.register_anchor("a".to_owned(), node.clone()).is_ok());
772 assert!(state.register_anchor("b".to_owned(), node.clone()).is_ok());
773 let err = state
774 .register_anchor("c".to_owned(), node)
775 .expect_err("expected AnchorCountLimitExceeded");
776 assert!(matches!(
777 err,
778 LoadError::AnchorCountLimitExceeded { limit: 2 }
779 ));
780 }
781
782 #[test]
784 fn expand_node_detects_circular_alias() {
785 let options = LoaderOptions {
786 mode: LoadMode::Resolved,
787 ..LoaderOptions::default()
788 };
789 let mut state = LoadState::new(&options);
790 let alias_node = Node::Alias {
792 name: "a".to_owned(),
793 loc: Span {
794 start: Pos::ORIGIN,
795 end: Pos::ORIGIN,
796 },
797 leading_comments: Vec::new(),
798 trailing_comment: None,
799 };
800 state.anchor_map.insert("a".to_owned(), alias_node.clone());
801 let mut in_progress = HashSet::new();
802 let result = state.expand_node(alias_node, &mut in_progress);
803 assert!(
804 matches!(result, Err(LoadError::CircularAlias { .. })),
805 "expected CircularAlias, got: {result:?}"
806 );
807 }
808}