1use std::collections::{HashMap, HashSet};
33use std::iter::Peekable;
34
35use crate::error::Error;
36use crate::event::{Event, ScalarStyle};
37use crate::node::{Document, Node};
38use crate::pos::{Pos, Span};
39
40#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
46pub enum LoadError {
47 #[error("parse error at {pos:?}: {message}")]
49 Parse { pos: Pos, message: String },
50
51 #[error("unexpected end of event stream")]
53 UnexpectedEndOfStream,
54
55 #[error("nesting depth limit exceeded (max: {limit})")]
57 NestingDepthLimitExceeded { limit: usize },
58
59 #[error("anchor count limit exceeded (max: {limit})")]
61 AnchorCountLimitExceeded { limit: usize },
62
63 #[error("alias expansion node limit exceeded (max: {limit})")]
65 AliasExpansionLimitExceeded { limit: usize },
66
67 #[error("circular alias reference: '{name}'")]
69 CircularAlias { name: String },
70
71 #[error("undefined alias: '{name}'")]
73 UndefinedAlias { name: String },
74}
75
76type Result<T> = std::result::Result<T, LoadError>;
78
79type EventStream<'a> =
81 Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
82
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89pub enum LoadMode {
90 Lossless,
92 Resolved,
94}
95
96#[derive(Debug, Clone)]
98pub struct LoaderOptions {
99 pub max_nesting_depth: usize,
101 pub max_anchors: usize,
103 pub max_expanded_nodes: usize,
106 pub mode: LoadMode,
108}
109
110impl Default for LoaderOptions {
111 fn default() -> Self {
112 Self {
113 max_nesting_depth: 512,
114 max_anchors: 10_000,
115 max_expanded_nodes: 1_000_000,
116 mode: LoadMode::Lossless,
117 }
118 }
119}
120
121pub struct LoaderBuilder {
134 options: LoaderOptions,
135}
136
137impl LoaderBuilder {
138 #[must_use]
140 pub fn new() -> Self {
141 Self {
142 options: LoaderOptions::default(),
143 }
144 }
145
146 #[must_use]
148 pub const fn lossless(mut self) -> Self {
149 self.options.mode = LoadMode::Lossless;
150 self
151 }
152
153 #[must_use]
155 pub const fn resolved(mut self) -> Self {
156 self.options.mode = LoadMode::Resolved;
157 self
158 }
159
160 #[must_use]
162 pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
163 self.options.max_nesting_depth = limit;
164 self
165 }
166
167 #[must_use]
169 pub const fn max_anchors(mut self, limit: usize) -> Self {
170 self.options.max_anchors = limit;
171 self
172 }
173
174 #[must_use]
176 pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
177 self.options.max_expanded_nodes = limit;
178 self
179 }
180
181 #[must_use]
183 pub const fn build(self) -> Loader {
184 Loader {
185 options: self.options,
186 }
187 }
188}
189
190impl Default for LoaderBuilder {
191 fn default() -> Self {
192 Self::new()
193 }
194}
195
196pub struct Loader {
202 options: LoaderOptions,
203}
204
205impl Loader {
206 pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
213 let mut state = LoadState::new(&self.options);
214 let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
215 Box::new(crate::parse_events(input));
216 state.run(iter.peekable())
217 }
218}
219
220pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
240 LoaderBuilder::new().lossless().build().load(input)
241}
242
243struct LoadState<'opt> {
248 options: &'opt LoaderOptions,
249 anchor_map: HashMap<String, Node<Span>>,
251 anchor_count: usize,
253 depth: usize,
255 expanded_nodes: usize,
257}
258
259impl<'opt> LoadState<'opt> {
260 fn new(options: &'opt LoaderOptions) -> Self {
261 Self {
262 options,
263 anchor_map: HashMap::new(),
264 anchor_count: 0,
265 depth: 0,
266 expanded_nodes: 0,
267 }
268 }
269
270 fn reset_for_document(&mut self) {
271 self.anchor_map.clear();
272 self.anchor_count = 0;
273 self.expanded_nodes = 0;
274 }
275
276 fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
277 let mut docs: Vec<Document<Span>> = Vec::new();
278
279 match stream.next() {
281 Some(Ok(_)) | None => {}
282 Some(Err(e)) => {
283 return Err(LoadError::Parse {
284 pos: e.pos,
285 message: e.message,
286 });
287 }
288 }
289
290 loop {
291 match next_from(&mut stream)? {
293 None | Some((Event::StreamEnd, _)) => break,
294 Some((
295 Event::DocumentStart {
296 version,
297 tag_directives,
298 ..
299 },
300 _,
301 )) => {
302 let doc_version = version;
303 let doc_tags = tag_directives;
304 self.reset_for_document();
305
306 let mut doc_comments: Vec<String> = Vec::new();
307
308 consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
310
311 let root = if is_document_end(stream.peek()) {
313 empty_scalar()
315 } else {
316 self.parse_node(&mut stream)?
317 };
318
319 if matches!(stream.peek(), Some(Ok((Event::DocumentEnd { .. }, _)))) {
321 let _ = stream.next();
322 }
323
324 docs.push(Document {
325 root,
326 version: doc_version,
327 tags: doc_tags,
328 comments: doc_comments,
329 });
330 }
331 Some(_) => {
332 }
334 }
335 }
336
337 Ok(docs)
338 }
339
340 #[allow(clippy::too_many_lines)] fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
345 let Some((event, span)) = next_from(stream)? else {
346 return Ok(empty_scalar());
347 };
348
349 match event {
350 Event::Scalar {
351 value,
352 style,
353 anchor,
354 tag,
355 } => {
356 let node = Node::Scalar {
357 value: value.into_owned(),
358 style,
359 anchor: anchor.map(str::to_owned),
360 tag: tag.map(std::borrow::Cow::into_owned),
361 loc: span,
362 leading_comments: Vec::new(),
363 trailing_comment: None,
364 };
365 if let Some(name) = node.anchor() {
366 self.register_anchor(name.to_owned(), node.clone())?;
367 }
368 Ok(node)
369 }
370
371 Event::MappingStart { anchor, tag, .. } => {
372 let anchor = anchor.map(str::to_owned);
373 let tag = tag.map(std::borrow::Cow::into_owned);
374
375 self.depth += 1;
376 if self.depth > self.options.max_nesting_depth {
377 return Err(LoadError::NestingDepthLimitExceeded {
378 limit: self.options.max_nesting_depth,
379 });
380 }
381
382 let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
383 let mut end_span = span;
384
385 loop {
386 let leading = consume_leading_comments(stream)?;
389
390 match stream.peek() {
391 None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => break,
392 Some(Err(_)) => {
393 return Err(match stream.next() {
395 Some(Err(e)) => LoadError::Parse {
396 pos: e.pos,
397 message: e.message,
398 },
399 _ => LoadError::UnexpectedEndOfStream,
400 });
401 }
402 Some(Ok(_)) => {}
403 }
404
405 let mut key = self.parse_node(stream)?;
406 attach_leading_comments(&mut key, leading);
407
408 let mut value = self.parse_node(stream)?;
409
410 let value_end_line = node_end_line(&value);
412 if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
413 attach_trailing_comment(&mut value, trail);
414 }
415
416 entries.push((key, value));
417 }
418
419 if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
421 end_span = *end;
422 let _ = stream.next();
423 }
424 self.depth -= 1;
425
426 let node = Node::Mapping {
427 entries,
428 anchor: anchor.clone(),
429 tag,
430 loc: Span {
431 start: span.start,
432 end: end_span.end,
433 },
434 leading_comments: Vec::new(),
435 trailing_comment: None,
436 };
437 if let Some(name) = anchor {
438 self.register_anchor(name, node.clone())?;
439 }
440 Ok(node)
441 }
442
443 Event::SequenceStart { anchor, tag, .. } => {
444 let anchor = anchor.map(str::to_owned);
445 let tag = tag.map(std::borrow::Cow::into_owned);
446
447 self.depth += 1;
448 if self.depth > self.options.max_nesting_depth {
449 return Err(LoadError::NestingDepthLimitExceeded {
450 limit: self.options.max_nesting_depth,
451 });
452 }
453
454 let mut items: Vec<Node<Span>> = Vec::new();
455 let mut end_span = span;
456
457 loop {
458 let leading = consume_leading_comments(stream)?;
460
461 match stream.peek() {
462 None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => break,
463 Some(Err(_)) => {
464 return Err(match stream.next() {
466 Some(Err(e)) => LoadError::Parse {
467 pos: e.pos,
468 message: e.message,
469 },
470 _ => LoadError::UnexpectedEndOfStream,
471 });
472 }
473 Some(Ok(_)) => {}
474 }
475
476 let mut item = self.parse_node(stream)?;
477 attach_leading_comments(&mut item, leading);
478
479 let item_end_line = node_end_line(&item);
481 if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
482 attach_trailing_comment(&mut item, trail);
483 }
484
485 items.push(item);
486 }
487
488 if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
490 end_span = *end;
491 let _ = stream.next();
492 }
493 self.depth -= 1;
494
495 let node = Node::Sequence {
496 items,
497 anchor: anchor.clone(),
498 tag,
499 loc: Span {
500 start: span.start,
501 end: end_span.end,
502 },
503 leading_comments: Vec::new(),
504 trailing_comment: None,
505 };
506 if let Some(name) = anchor {
507 self.register_anchor(name, node.clone())?;
508 }
509 Ok(node)
510 }
511
512 Event::Alias { name } => {
513 let name = name.to_owned();
514 self.resolve_alias(&name, span)
515 }
516
517 Event::Comment { .. } => {
518 self.parse_node(stream)
520 }
521
522 Event::StreamStart
523 | Event::StreamEnd
524 | Event::DocumentStart { .. }
525 | Event::DocumentEnd { .. }
526 | Event::MappingEnd
527 | Event::SequenceEnd => {
528 Ok(empty_scalar())
530 }
531 }
532 }
533
534 fn register_anchor(&mut self, name: String, node: Node<Span>) -> Result<()> {
535 if !self.anchor_map.contains_key(&name) {
536 self.anchor_count += 1;
537 if self.anchor_count > self.options.max_anchors {
538 return Err(LoadError::AnchorCountLimitExceeded {
539 limit: self.options.max_anchors,
540 });
541 }
542 }
543 if self.options.mode == LoadMode::Resolved {
547 self.expanded_nodes += 1;
548 if self.expanded_nodes > self.options.max_expanded_nodes {
549 return Err(LoadError::AliasExpansionLimitExceeded {
550 limit: self.options.max_expanded_nodes,
551 });
552 }
553 }
554 self.anchor_map.insert(name, node);
555 Ok(())
556 }
557
558 fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
559 match self.options.mode {
560 LoadMode::Lossless => Ok(Node::Alias {
561 name: name.to_owned(),
562 loc,
563 leading_comments: Vec::new(),
564 trailing_comment: None,
565 }),
566 LoadMode::Resolved => {
567 let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
568 LoadError::UndefinedAlias {
569 name: name.to_owned(),
570 }
571 })?;
572 let mut in_progress: HashSet<String> = HashSet::new();
573 self.expand_node(anchored, &mut in_progress)
574 }
575 }
576 }
577
578 fn expand_node(
581 &mut self,
582 node: Node<Span>,
583 in_progress: &mut HashSet<String>,
584 ) -> Result<Node<Span>> {
585 self.expanded_nodes += 1;
589 if self.expanded_nodes > self.options.max_expanded_nodes {
590 return Err(LoadError::AliasExpansionLimitExceeded {
591 limit: self.options.max_expanded_nodes,
592 });
593 }
594
595 match node {
596 Node::Alias { ref name, loc, .. } => {
597 if in_progress.contains(name) {
598 return Err(LoadError::CircularAlias { name: name.clone() });
599 }
600 let target = self
601 .anchor_map
602 .get(name)
603 .cloned()
604 .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
605 in_progress.insert(name.clone());
606 let expanded = self.expand_node(target, in_progress)?;
607 in_progress.remove(name);
608 Ok(reloc(expanded, loc))
610 }
611 Node::Mapping {
612 entries,
613 anchor,
614 tag,
615 loc,
616 leading_comments,
617 trailing_comment,
618 } => {
619 let mut expanded_entries = Vec::with_capacity(entries.len());
620 for (k, v) in entries {
621 let ek = self.expand_node(k, in_progress)?;
622 let ev = self.expand_node(v, in_progress)?;
623 expanded_entries.push((ek, ev));
624 }
625 Ok(Node::Mapping {
626 entries: expanded_entries,
627 anchor,
628 tag,
629 loc,
630 leading_comments,
631 trailing_comment,
632 })
633 }
634 Node::Sequence {
635 items,
636 anchor,
637 tag,
638 loc,
639 leading_comments,
640 trailing_comment,
641 } => {
642 let mut expanded_items = Vec::with_capacity(items.len());
643 for item in items {
644 expanded_items.push(self.expand_node(item, in_progress)?);
645 }
646 Ok(Node::Sequence {
647 items: expanded_items,
648 anchor,
649 tag,
650 loc,
651 leading_comments,
652 trailing_comment,
653 })
654 }
655 scalar @ Node::Scalar { .. } => Ok(scalar),
657 }
658 }
659}
660
661fn next_from<'a>(stream: &mut EventStream<'a>) -> Result<Option<(Event<'a>, Span)>> {
667 match stream.next() {
668 None => Ok(None),
669 Some(Ok(item)) => Ok(Some(item)),
670 Some(Err(e)) => Err(LoadError::Parse {
671 pos: e.pos,
672 message: e.message,
673 }),
674 }
675}
676
677const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
679 matches!(
680 peeked,
681 None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
682 )
683}
684
685fn consume_leading_doc_comments(
690 stream: &mut EventStream<'_>,
691 doc_comments: &mut Vec<String>,
692) -> Result<()> {
693 while matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _)))) {
694 if let Some((Event::Comment { text }, span)) = next_from(stream)? {
695 if span.end.line > span.start.line {
696 doc_comments.push(format!("#{text}"));
697 }
698 }
699 }
700 Ok(())
701}
702
703fn consume_leading_comments(stream: &mut EventStream<'_>) -> Result<Vec<String>> {
712 let mut leading = Vec::new();
713 while matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _)))) {
714 if let Some((Event::Comment { text }, _)) = next_from(stream)? {
715 leading.push(format!("#{text}"));
716 }
717 }
718 Ok(leading)
719}
720
721fn peek_trailing_comment(
731 stream: &mut EventStream<'_>,
732 preceding_end_line: usize,
733) -> Result<Option<String>> {
734 if matches!(
735 stream.peek(),
736 Some(Ok((Event::Comment { .. }, span))) if span.start.line == preceding_end_line
737 ) {
738 if let Some((Event::Comment { text }, _)) = next_from(stream)? {
739 return Ok(Some(format!("#{text}")));
740 }
741 }
742 Ok(None)
743}
744
745const fn node_end_line(node: &Node<Span>) -> usize {
750 match node {
751 Node::Scalar { loc, .. }
752 | Node::Mapping { loc, .. }
753 | Node::Sequence { loc, .. }
754 | Node::Alias { loc, .. } => loc.end.line,
755 }
756}
757
758const fn empty_scalar() -> Node<Span> {
763 Node::Scalar {
764 value: String::new(),
765 style: ScalarStyle::Plain,
766 anchor: None,
767 tag: None,
768 loc: Span {
769 start: Pos::ORIGIN,
770 end: Pos::ORIGIN,
771 },
772 leading_comments: Vec::new(),
773 trailing_comment: None,
774 }
775}
776
777fn reloc(node: Node<Span>, loc: Span) -> Node<Span> {
779 match node {
780 Node::Scalar {
781 value,
782 style,
783 anchor,
784 tag,
785 leading_comments,
786 trailing_comment,
787 ..
788 } => Node::Scalar {
789 value,
790 style,
791 anchor,
792 tag,
793 loc,
794 leading_comments,
795 trailing_comment,
796 },
797 Node::Mapping {
798 entries,
799 anchor,
800 tag,
801 leading_comments,
802 trailing_comment,
803 ..
804 } => Node::Mapping {
805 entries,
806 anchor,
807 tag,
808 loc,
809 leading_comments,
810 trailing_comment,
811 },
812 Node::Sequence {
813 items,
814 anchor,
815 tag,
816 leading_comments,
817 trailing_comment,
818 ..
819 } => Node::Sequence {
820 items,
821 anchor,
822 tag,
823 loc,
824 leading_comments,
825 trailing_comment,
826 },
827 Node::Alias {
828 name,
829 leading_comments,
830 trailing_comment,
831 ..
832 } => Node::Alias {
833 name,
834 loc,
835 leading_comments,
836 trailing_comment,
837 },
838 }
839}
840
841fn attach_leading_comments(node: &mut Node<Span>, comments: Vec<String>) {
847 if comments.is_empty() {
848 return;
849 }
850 match node {
851 Node::Scalar {
852 leading_comments, ..
853 }
854 | Node::Mapping {
855 leading_comments, ..
856 }
857 | Node::Sequence {
858 leading_comments, ..
859 }
860 | Node::Alias {
861 leading_comments, ..
862 } => {
863 *leading_comments = comments;
864 }
865 }
866}
867
868fn attach_trailing_comment(node: &mut Node<Span>, comment: String) {
870 match node {
871 Node::Scalar {
872 trailing_comment, ..
873 }
874 | Node::Mapping {
875 trailing_comment, ..
876 }
877 | Node::Sequence {
878 trailing_comment, ..
879 }
880 | Node::Alias {
881 trailing_comment, ..
882 } => {
883 *trailing_comment = Some(comment);
884 }
885 }
886}
887
888#[cfg(test)]
893#[allow(
894 clippy::indexing_slicing,
895 clippy::expect_used,
896 clippy::unwrap_used,
897 clippy::too_many_lines,
898 clippy::doc_markdown
899)]
900mod tests {
901 use super::*;
902
903 #[allow(dead_code)]
904 fn load_one(input: &str) -> Node<Span> {
905 let docs = load(input).expect("load failed");
906 assert_eq!(docs.len(), 1, "expected 1 document, got {}", docs.len());
907 docs.into_iter().next().unwrap().root
908 }
909
910 #[test]
912 fn loader_state_resets_anchor_map_between_documents() {
913 let result = LoaderBuilder::new()
915 .resolved()
916 .build()
917 .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
918 assert!(
919 result.is_err(),
920 "expected Err: *foo in doc 2 should be undefined"
921 );
922 assert!(matches!(
923 result.unwrap_err(),
924 LoadError::UndefinedAlias { .. }
925 ));
926 }
927
928 #[test]
930 fn register_anchor_increments_count() {
931 let options = LoaderOptions {
932 max_anchors: 2,
933 ..LoaderOptions::default()
934 };
935 let mut state = LoadState::new(&options);
936 let node = Node::Scalar {
937 value: "x".to_owned(),
938 style: ScalarStyle::Plain,
939 anchor: None,
940 tag: None,
941 loc: Span {
942 start: Pos::ORIGIN,
943 end: Pos::ORIGIN,
944 },
945 leading_comments: Vec::new(),
946 trailing_comment: None,
947 };
948 assert!(state.register_anchor("a".to_owned(), node.clone()).is_ok());
949 assert!(state.register_anchor("b".to_owned(), node.clone()).is_ok());
950 let err = state
951 .register_anchor("c".to_owned(), node)
952 .expect_err("expected AnchorCountLimitExceeded");
953 assert!(matches!(
954 err,
955 LoadError::AnchorCountLimitExceeded { limit: 2 }
956 ));
957 }
958
959 #[test]
961 fn expand_node_detects_circular_alias() {
962 let options = LoaderOptions {
963 mode: LoadMode::Resolved,
964 ..LoaderOptions::default()
965 };
966 let mut state = LoadState::new(&options);
967 let alias_node = Node::Alias {
969 name: "a".to_owned(),
970 loc: Span {
971 start: Pos::ORIGIN,
972 end: Pos::ORIGIN,
973 },
974 leading_comments: Vec::new(),
975 trailing_comment: None,
976 };
977 state.anchor_map.insert("a".to_owned(), alias_node.clone());
978 let mut in_progress = HashSet::new();
979 let result = state.expand_node(alias_node, &mut in_progress);
980 assert!(
981 matches!(result, Err(LoadError::CircularAlias { .. })),
982 "expected CircularAlias, got: {result:?}"
983 );
984 }
985}