1use std::borrow::Cow;
33use std::collections::{HashMap, HashSet};
34use std::iter::Peekable;
35
36use std::sync::Arc;
37
38use crate::error::Error;
39use crate::event::{Event, EventMeta, ScalarStyle};
40use crate::node::{Document, Node, NodeMeta};
41use crate::pos::{LineIndex, Pos, Span};
42use crate::schema::{CollectionKind, Schema, resolve_collection, resolve_scalar};
43
44use comments::{attach_leading_comments, attach_trailing_comment};
45use reloc::reloc;
46use stream::{
47 consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
48 with_hash_prefix,
49};
50
51mod comments;
52mod reloc;
53mod stream;
54
55#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
61pub enum LoadError {
62 #[error("parse error at {pos:?}: {message}")]
64 Parse {
65 pos: Pos,
67 message: String,
69 },
70
71 #[error("unexpected end of event stream")]
73 UnexpectedEndOfStream,
74
75 #[error("nesting depth limit exceeded (max: {limit})")]
77 NestingDepthLimitExceeded {
78 limit: usize,
80 },
81
82 #[error("anchor count limit exceeded (max: {limit})")]
84 AnchorCountLimitExceeded {
85 limit: usize,
87 },
88
89 #[error("alias expansion node limit exceeded (max: {limit})")]
91 AliasExpansionLimitExceeded {
92 limit: usize,
94 },
95
96 #[error("circular alias reference: '{name}'")]
98 CircularAlias {
99 name: String,
101 },
102
103 #[error("undefined alias: '{name}'")]
105 UndefinedAlias {
106 name: String,
108 },
109
110 #[error("JSON schema: plain scalar does not match any type pattern")]
120 UnresolvedScalar {
121 value: String,
123 pos: Pos,
125 },
126}
127
128type Result<T> = std::result::Result<T, LoadError>;
130
131type EventStream<'a> =
133 Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
134
135#[expect(
137 clippy::type_complexity,
138 reason = "four-tuple mirrors EventMeta fields; extracting a type alias here would obscure the one-to-one correspondence"
139)]
140#[inline]
141fn unpack_meta(
142 meta: Option<Box<EventMeta<'_>>>,
143) -> (
144 Option<&'_ str>,
145 Option<Span>,
146 Option<std::borrow::Cow<'_, str>>,
147 Option<Span>,
148) {
149 meta.map_or((None, None, None, None), |m| {
150 (m.anchor, m.anchor_loc, m.tag, m.tag_loc)
151 })
152}
153
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
160pub enum LoadMode {
161 Lossless,
163 Resolved,
165}
166
167#[derive(Debug, Clone)]
169pub struct LoaderOptions {
170 pub max_nesting_depth: usize,
173 pub max_anchors: usize,
176 pub max_expanded_nodes: usize,
179 pub mode: LoadMode,
181 pub schema: Schema,
186}
187
188impl Default for LoaderOptions {
189 fn default() -> Self {
190 Self {
191 max_nesting_depth: 512,
192 max_anchors: 10_000,
193 max_expanded_nodes: 1_000_000,
194 mode: LoadMode::Lossless,
195 schema: Schema::Core,
196 }
197 }
198}
199
200pub struct LoaderBuilder {
213 options: LoaderOptions,
214}
215
216impl LoaderBuilder {
217 #[must_use]
219 pub fn new() -> Self {
220 Self {
221 options: LoaderOptions::default(),
222 }
223 }
224
225 #[must_use]
227 pub const fn lossless(mut self) -> Self {
228 self.options.mode = LoadMode::Lossless;
229 self
230 }
231
232 #[must_use]
234 pub const fn resolved(mut self) -> Self {
235 self.options.mode = LoadMode::Resolved;
236 self
237 }
238
239 #[must_use]
241 pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
242 self.options.max_nesting_depth = limit;
243 self
244 }
245
246 #[must_use]
248 pub const fn max_anchors(mut self, limit: usize) -> Self {
249 self.options.max_anchors = limit;
250 self
251 }
252
253 #[must_use]
255 pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
256 self.options.max_expanded_nodes = limit;
257 self
258 }
259
260 #[must_use]
265 pub const fn schema(mut self, s: Schema) -> Self {
266 self.options.schema = s;
267 self
268 }
269
270 #[must_use]
272 pub const fn build(self) -> Loader {
273 Loader {
274 options: self.options,
275 }
276 }
277}
278
279impl Default for LoaderBuilder {
280 fn default() -> Self {
281 Self::new()
282 }
283}
284
285pub struct Loader {
291 options: LoaderOptions,
292}
293
294impl Loader {
295 pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
302 let mut state = LoadState::new(&self.options, input);
303 let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
304 Box::new(crate::parse_events(input));
305 state.run(iter.peekable())
306 }
307}
308
309pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
335 LoaderBuilder::new().lossless().build().load(input)
336}
337
338struct LoadState<'opt> {
343 options: &'opt LoaderOptions,
344 anchor_map: HashMap<String, Node<Span>>,
346 anchor_count: usize,
348 depth: usize,
350 expanded_nodes: usize,
352 pending_leading: Vec<String>,
358 line_index: Arc<LineIndex>,
361}
362
363impl<'opt> LoadState<'opt> {
364 fn new(options: &'opt LoaderOptions, input: &str) -> Self {
365 Self {
366 options,
367 anchor_map: HashMap::new(),
368 anchor_count: 0,
369 depth: 0,
370 expanded_nodes: 0,
371 pending_leading: Vec::new(),
372 line_index: Arc::new(LineIndex::new(input)),
373 }
374 }
375
376 fn reset_for_document(&mut self) {
377 self.anchor_map.clear();
378 self.anchor_count = 0;
379 self.expanded_nodes = 0;
380 self.pending_leading.clear();
381 }
382
383 fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
384 let mut docs: Vec<Document<Span>> = Vec::new();
385
386 match stream.next() {
388 Some(Ok(_)) | None => {}
389 Some(Err(e)) => {
390 return Err(LoadError::Parse {
391 pos: e.pos,
392 message: e.message,
393 });
394 }
395 }
396
397 loop {
398 match next_from(&mut stream)? {
400 None | Some((Event::StreamEnd, _)) => break,
401 Some((
402 Event::DocumentStart {
403 explicit,
404 version,
405 tag_directives,
406 },
407 _,
408 )) => {
409 let doc_explicit_start = explicit;
410 let doc_version = version;
411 let doc_tags = tag_directives;
412 self.reset_for_document();
413
414 let mut doc_comments: Vec<String> = Vec::new();
415
416 consume_leading_doc_comments(&mut stream, &mut doc_comments, &self.line_index)?;
418
419 let root = if is_document_end(stream.peek()) {
421 let mut node = empty_scalar();
423 apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
424 node
425 } else {
426 self.parse_node(&mut stream)?
427 };
428
429 let doc_explicit_end =
431 if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
432 let end_explicit = *explicit;
433 let _ = stream.next();
434 end_explicit
435 } else {
436 false
437 };
438
439 docs.push(Document {
440 root,
441 version: doc_version,
442 tags: doc_tags,
443 comments: doc_comments,
444 explicit_start: doc_explicit_start,
445 explicit_end: doc_explicit_end,
446 line_index: Some(self.line_index.clone()),
447 });
448 }
449 Some(_) => {
450 }
452 }
453 }
454
455 Ok(docs)
456 }
457
458 #[expect(
462 clippy::too_many_lines,
463 reason = "match-on-event-type; splitting would obscure flow"
464 )]
465 fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
466 if matches!(
470 stream.peek(),
471 Some(Ok((
472 Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
473 _
474 )))
475 ) {
476 return Ok(empty_scalar());
477 }
478
479 let Some((event, span)) = next_from(stream)? else {
480 return Ok(empty_scalar());
481 };
482
483 match event {
484 Event::Scalar { value, style, meta } => {
485 let (anchor, anchor_loc, tag, tag_loc) = unpack_meta(meta);
486 let anchor = anchor.map(str::to_owned);
487 let mut node = Node::Scalar {
488 value: value.into_owned(),
489 style,
490 tag: tag.map(|t| Cow::Owned(t.into_owned())),
491 loc: span,
492 meta: NodeMeta {
493 anchor,
494 anchor_loc,
495 tag_loc,
496 leading_comments: None,
497 trailing_comment: None,
498 }
499 .into_option(),
500 };
501 apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
502 if let Some(name) = node.anchor() {
503 self.register_anchor(name.to_owned(), &node)?;
504 }
505 Ok(node)
506 }
507
508 Event::MappingStart { style, meta } => {
509 let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
510 let anchor = event_anchor.map(str::to_owned);
511 let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
512 let anchor_for_registration = anchor.clone();
513
514 self.depth += 1;
515 if self.depth > self.options.max_nesting_depth {
516 return Err(LoadError::NestingDepthLimitExceeded {
517 limit: self.options.max_nesting_depth,
518 });
519 }
520
521 let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
522 let mut end_span = span;
523
524 loop {
525 let raw_leading = consume_leading_comments(stream)?;
529 let leading = if self.pending_leading.is_empty() {
530 raw_leading
531 } else {
532 let mut combined = std::mem::take(&mut self.pending_leading);
533 combined.extend(raw_leading);
534 combined
535 };
536
537 match stream.peek() {
538 None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
539 if !leading.is_empty() {
544 self.pending_leading = leading;
545 }
546 break;
547 }
548 Some(Err(_)) => {
549 return Err(match stream.next() {
551 Some(Err(e)) => LoadError::Parse {
552 pos: e.pos,
553 message: e.message,
554 },
555 _ => LoadError::UnexpectedEndOfStream,
556 });
557 }
558 Some(Ok(_)) => {}
559 }
560
561 let mut key = self.parse_node(stream)?;
562 attach_leading_comments(&mut key, leading);
563
564 let mut value = self.parse_node(stream)?;
565
566 if !is_block_scalar(&value)
575 && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
576 {
577 let value_end_line = node_end_line(&value, &self.line_index);
578 if let Some(trail) =
579 peek_trailing_comment(stream, value_end_line, &self.line_index)?
580 {
581 attach_trailing_comment(&mut value, trail);
582 }
583 }
584
585 entries.push((key, value));
586 }
587
588 if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
590 end_span = *end;
591 let _ = stream.next();
592 }
593 self.depth -= 1;
594
595 let mut node = Node::Mapping {
596 entries,
597 style,
598 tag,
599 loc: Span {
600 start: span.start,
601 end: end_span.end,
602 },
603 meta: NodeMeta {
604 anchor,
605 anchor_loc,
606 tag_loc,
607 leading_comments: None,
608 trailing_comment: None,
609 }
610 .into_option(),
611 };
612 apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
613 if let Some(name) = anchor_for_registration {
614 self.register_anchor(name, &node)?;
615 }
616 Ok(node)
617 }
618
619 Event::SequenceStart { style, meta } => {
620 let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
621 let anchor = event_anchor.map(str::to_owned);
622 let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
623 let anchor_for_registration = anchor.clone();
624
625 self.depth += 1;
626 if self.depth > self.options.max_nesting_depth {
627 return Err(LoadError::NestingDepthLimitExceeded {
628 limit: self.options.max_nesting_depth,
629 });
630 }
631
632 let mut items: Vec<Node<Span>> = Vec::new();
633 let mut end_span = span;
634
635 loop {
636 let raw_leading = consume_leading_comments(stream)?;
640 let leading = if self.pending_leading.is_empty() {
641 raw_leading
642 } else {
643 let mut combined = std::mem::take(&mut self.pending_leading);
644 combined.extend(raw_leading);
645 combined
646 };
647
648 match stream.peek() {
649 None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
650 if !leading.is_empty() {
656 self.pending_leading = leading;
657 }
658 break;
659 }
660 Some(Err(_)) => {
661 return Err(match stream.next() {
663 Some(Err(e)) => LoadError::Parse {
664 pos: e.pos,
665 message: e.message,
666 },
667 _ => LoadError::UnexpectedEndOfStream,
668 });
669 }
670 Some(Ok(_)) => {}
671 }
672
673 let mut item = self.parse_node(stream)?;
674 attach_leading_comments(&mut item, leading);
675
676 if !is_block_scalar(&item)
682 && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
683 {
684 let item_end_line = node_end_line(&item, &self.line_index);
685 if let Some(trail) =
686 peek_trailing_comment(stream, item_end_line, &self.line_index)?
687 {
688 attach_trailing_comment(&mut item, trail);
689 }
690 }
691
692 items.push(item);
693 }
694
695 if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
697 end_span = *end;
698 let _ = stream.next();
699 }
700 self.depth -= 1;
701
702 let mut node = Node::Sequence {
703 items,
704 style,
705 tag,
706 loc: Span {
707 start: span.start,
708 end: end_span.end,
709 },
710 meta: NodeMeta {
711 anchor,
712 anchor_loc,
713 tag_loc,
714 leading_comments: None,
715 trailing_comment: None,
716 }
717 .into_option(),
718 };
719 apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
720 if let Some(name) = anchor_for_registration {
721 self.register_anchor(name, &node)?;
722 }
723 Ok(node)
724 }
725
726 Event::Alias { name } => {
727 let name = name.to_owned();
728 self.resolve_alias(&name, span)
729 }
730
731 Event::Comment { text } => {
732 self.pending_leading.push(with_hash_prefix(text));
738 self.parse_node(stream)
739 }
740
741 Event::StreamStart
742 | Event::StreamEnd
743 | Event::DocumentStart { .. }
744 | Event::DocumentEnd { .. }
745 | Event::MappingEnd
746 | Event::SequenceEnd => {
747 Ok(empty_scalar())
749 }
750 }
751 }
752
753 fn register_anchor(&mut self, name: String, node: &Node<Span>) -> Result<()> {
754 if !self.anchor_map.contains_key(&name) {
755 self.anchor_count += 1;
756 if self.anchor_count > self.options.max_anchors {
757 return Err(LoadError::AnchorCountLimitExceeded {
758 limit: self.options.max_anchors,
759 });
760 }
761 }
762 if self.options.mode == LoadMode::Resolved {
766 self.expanded_nodes += 1;
767 if self.expanded_nodes > self.options.max_expanded_nodes {
768 return Err(LoadError::AliasExpansionLimitExceeded {
769 limit: self.options.max_expanded_nodes,
770 });
771 }
772 self.anchor_map.insert(name, node.clone());
773 } else {
774 self.anchor_map.insert(name, empty_scalar());
777 }
778 Ok(())
779 }
780
781 fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
782 match self.options.mode {
783 LoadMode::Lossless => Ok(Node::Alias {
784 name: name.to_owned(),
785 loc,
786 leading_comments: None,
787 trailing_comment: None,
788 }),
789 LoadMode::Resolved => {
790 let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
791 LoadError::UndefinedAlias {
792 name: name.to_owned(),
793 }
794 })?;
795 let mut in_progress: HashSet<String> = HashSet::new();
796 self.expand_node(anchored, &mut in_progress)
797 }
798 }
799 }
800
801 fn expand_node(
804 &mut self,
805 node: Node<Span>,
806 in_progress: &mut HashSet<String>,
807 ) -> Result<Node<Span>> {
808 self.expanded_nodes += 1;
812 if self.expanded_nodes > self.options.max_expanded_nodes {
813 return Err(LoadError::AliasExpansionLimitExceeded {
814 limit: self.options.max_expanded_nodes,
815 });
816 }
817
818 match node {
819 Node::Alias { ref name, loc, .. } => {
820 if in_progress.contains(name) {
821 return Err(LoadError::CircularAlias { name: name.clone() });
822 }
823 let target = self
824 .anchor_map
825 .get(name)
826 .cloned()
827 .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
828 in_progress.insert(name.clone());
829 let expanded = self.expand_node(target, in_progress)?;
830 in_progress.remove(name);
831 Ok(reloc(expanded, loc))
833 }
834 Node::Mapping {
835 entries,
836 style,
837 tag,
838 loc,
839 meta,
840 } => {
841 let mut expanded_entries = Vec::with_capacity(entries.len());
842 for (k, v) in entries {
843 let ek = self.expand_node(k, in_progress)?;
844 let ev = self.expand_node(v, in_progress)?;
845 expanded_entries.push((ek, ev));
846 }
847 Ok(Node::Mapping {
848 entries: expanded_entries,
849 style,
850 tag,
851 loc,
852 meta,
853 })
854 }
855 Node::Sequence {
856 items,
857 style,
858 tag,
859 loc,
860 meta,
861 } => {
862 let mut expanded_items = Vec::with_capacity(items.len());
863 for item in items {
864 expanded_items.push(self.expand_node(item, in_progress)?);
865 }
866 Ok(Node::Sequence {
867 items: expanded_items,
868 style,
869 tag,
870 loc,
871 meta,
872 })
873 }
874 scalar @ Node::Scalar { .. } => Ok(scalar),
876 }
877 }
878}
879
880const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
882 matches!(
883 peeked,
884 None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
885 )
886}
887
888#[inline]
890fn span_start_to_pos(offset: u32, line_index: &LineIndex) -> Pos {
891 let (line, column) = line_index.line_column(offset);
892 Pos {
893 byte_offset: offset as usize,
894 line: line as usize,
895 column: column as usize,
896 }
897}
898
899#[inline]
904fn node_end_line(node: &Node<Span>, line_index: &LineIndex) -> u32 {
905 let end_offset = match node {
906 Node::Scalar { loc, .. }
907 | Node::Mapping { loc, .. }
908 | Node::Sequence { loc, .. }
909 | Node::Alias { loc, .. } => loc.end,
910 };
911 line_index.line_column(end_offset).0
912}
913
914#[inline]
923const fn is_block_scalar(node: &Node<Span>) -> bool {
924 matches!(
925 node,
926 Node::Scalar {
927 style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
928 ..
929 }
930 )
931}
932
933const UNRESOLVED_VALUE_MAX_CHARS: usize = 128;
941
942fn sanitize_scalar_for_error(raw: &str) -> String {
949 let mut out = String::with_capacity(raw.len().min(UNRESOLVED_VALUE_MAX_CHARS * 2));
950 let mut truncated = false;
951
952 for (i, ch) in raw.chars().enumerate() {
953 if i >= UNRESOLVED_VALUE_MAX_CHARS {
954 truncated = true;
955 break;
956 }
957 if ch.is_ascii_control() {
958 let escaped = format!("\\u{:04X}", ch as u32);
960 out.push_str(&escaped);
961 } else {
962 out.push(ch);
963 }
964 }
965
966 if truncated {
967 out.push_str("...");
968 }
969 out
970}
971
972#[inline]
987fn apply_schema_to_node(
988 node: &mut Node<Span>,
989 schema: Schema,
990 line_index: &LineIndex,
991) -> Result<()> {
992 match node {
993 Node::Scalar {
994 value,
995 style,
996 tag,
997 loc,
998 meta,
999 } => {
1000 if tag.as_deref() == Some("!") {
1011 *tag = Some(Cow::Borrowed(crate::schema::ResolvedTag::Str.as_str()));
1012 return Ok(());
1013 }
1014 match resolve_scalar(schema, *style, value, tag.as_deref()) {
1016 Ok(Some(resolved)) => {
1017 *tag = Some(Cow::Borrowed(resolved.as_str()));
1018 if let Some(m) = meta.as_mut() {
1020 m.tag_loc = None;
1021 if m.is_all_none() {
1022 *meta = None;
1023 }
1024 }
1025 }
1026 Ok(None) => {}
1027 Err(_) => {
1028 return Err(LoadError::UnresolvedScalar {
1029 value: sanitize_scalar_for_error(value),
1030 pos: span_start_to_pos(loc.start, line_index),
1031 });
1032 }
1033 }
1034 }
1035 Node::Mapping { tag, meta, .. } => {
1036 let effective_tag = tag.as_deref().filter(|t| *t != "!");
1039 if let Some(resolved) =
1040 resolve_collection(schema, CollectionKind::Mapping, effective_tag)
1041 {
1042 *tag = Some(Cow::Borrowed(resolved.as_str()));
1043 if let Some(m) = meta.as_mut() {
1044 m.tag_loc = None;
1045 if m.is_all_none() {
1046 *meta = None;
1047 }
1048 }
1049 }
1050 }
1051 Node::Sequence { tag, meta, .. } => {
1052 let effective_tag = tag.as_deref().filter(|t| *t != "!");
1053 if let Some(resolved) =
1054 resolve_collection(schema, CollectionKind::Sequence, effective_tag)
1055 {
1056 *tag = Some(Cow::Borrowed(resolved.as_str()));
1057 if let Some(m) = meta.as_mut() {
1058 m.tag_loc = None;
1059 if m.is_all_none() {
1060 *meta = None;
1061 }
1062 }
1063 }
1064 }
1065 Node::Alias { .. } => {}
1066 }
1067 Ok(())
1068}
1069
1070const fn empty_scalar() -> Node<Span> {
1075 Node::Scalar {
1076 value: String::new(),
1077 style: ScalarStyle::Plain,
1078 tag: None,
1079 loc: Span { start: 0, end: 0 },
1080 meta: None,
1081 }
1082}
1083
1084#[cfg(test)]
1089#[expect(
1090 clippy::expect_used,
1091 clippy::unwrap_used,
1092 clippy::indexing_slicing,
1093 clippy::panic,
1094 reason = "test code"
1095)]
1096mod tests {
1097 use super::*;
1098 use rstest::rstest;
1099
1100 #[test]
1101 fn loader_state_resets_anchor_map_between_documents() {
1102 let result = LoaderBuilder::new()
1104 .resolved()
1105 .build()
1106 .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
1107 assert!(
1108 result.is_err(),
1109 "expected Err: *foo in doc 2 should be undefined"
1110 );
1111 assert!(matches!(
1112 result.unwrap_err(),
1113 LoadError::UndefinedAlias { .. }
1114 ));
1115 }
1116
1117 #[test]
1118 fn register_anchor_increments_count() {
1119 let options = LoaderOptions {
1120 max_anchors: 2,
1121 ..LoaderOptions::default()
1122 };
1123 let mut state = LoadState::new(&options, "");
1124 let node = Node::Scalar {
1125 value: "x".to_owned(),
1126 style: ScalarStyle::Plain,
1127 tag: None,
1128 loc: Span { start: 0, end: 0 },
1129 meta: None,
1130 };
1131 assert!(state.register_anchor("a".to_owned(), &node).is_ok());
1132 assert!(state.register_anchor("b".to_owned(), &node).is_ok());
1133 let err = state
1134 .register_anchor("c".to_owned(), &node)
1135 .expect_err("expected AnchorCountLimitExceeded");
1136 assert!(matches!(
1137 err,
1138 LoadError::AnchorCountLimitExceeded { limit: 2 }
1139 ));
1140 }
1141
1142 #[test]
1143 fn expand_node_detects_circular_alias() {
1144 let options = LoaderOptions {
1145 mode: LoadMode::Resolved,
1146 ..LoaderOptions::default()
1147 };
1148 let mut state = LoadState::new(&options, "");
1149 let alias_node = Node::Alias {
1151 name: "a".to_owned(),
1152 loc: Span { start: 0, end: 0 },
1153 leading_comments: None,
1154 trailing_comment: None,
1155 };
1156 state.anchor_map.insert("a".to_owned(), alias_node.clone());
1157 let mut in_progress = HashSet::new();
1158 let result = state.expand_node(alias_node, &mut in_progress);
1159 assert!(
1160 matches!(result, Err(LoadError::CircularAlias { .. })),
1161 "expected CircularAlias, got: {result:?}"
1162 );
1163 }
1164
1165 #[test]
1170 fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
1171 let docs = load("outer:\n # Style 1\n inner: val\n").unwrap();
1172 let root = &docs[0].root;
1173 let Node::Mapping { entries, .. } = root else {
1174 panic!("expected root mapping");
1175 };
1176 assert_eq!(entries.len(), 1);
1177 let (_outer_key, outer_value) = &entries[0];
1178 let Node::Mapping {
1179 entries: nested, ..
1180 } = outer_value
1181 else {
1182 panic!("expected nested mapping");
1183 };
1184 assert_eq!(nested.len(), 1);
1185 let (inner_key, _) = &nested[0];
1186 assert_eq!(
1187 inner_key.leading_comments(),
1188 &["# Style 1"],
1189 "comment should be attached to the first nested key"
1190 );
1191 }
1192
1193 #[test]
1194 fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
1195 let docs = load("key:\n # leading\n - item1\n - item2\n").unwrap();
1196 let root = &docs[0].root;
1197 let Node::Mapping { entries, .. } = root else {
1198 panic!("expected root mapping");
1199 };
1200 let (_key, seq_value) = &entries[0];
1201 let Node::Sequence { items, .. } = seq_value else {
1202 panic!("expected sequence value");
1203 };
1204 assert_eq!(
1205 items[0].leading_comments(),
1206 &["# leading"],
1207 "comment should be attached to first sequence item"
1208 );
1209 }
1210
1211 #[test]
1212 fn multiple_comments_between_key_and_collection_all_preserved() {
1213 let docs = load("key:\n # first\n # second\n - item\n").unwrap();
1214 let root = &docs[0].root;
1215 let Node::Mapping { entries, .. } = root else {
1216 panic!("expected root mapping");
1217 };
1218 let (_key, seq_value) = &entries[0];
1219 let Node::Sequence { items, .. } = seq_value else {
1220 panic!("expected sequence value");
1221 };
1222 assert_eq!(
1223 items[0].leading_comments(),
1224 &["# first", "# second"],
1225 "both comments should be on first item"
1226 );
1227 }
1228
1229 #[test]
1230 fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1231 let docs = load("outer:\n # Style 1\n inner: val\n").unwrap();
1232 let root = &docs[0].root;
1233 let Node::Mapping { entries, .. } = root else {
1234 panic!("expected root mapping");
1235 };
1236 let (outer_key, _) = &entries[0];
1237 assert!(
1238 outer_key.leading_comments().is_empty(),
1239 "outer key should have no leading comments"
1240 );
1241 assert!(
1242 outer_key.trailing_comment().is_none(),
1243 "outer key should have no trailing comment"
1244 );
1245 }
1246
1247 #[test]
1248 fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1249 let docs = load("key:\n inner: val\n").unwrap();
1250 let root = &docs[0].root;
1251 let Node::Mapping { entries, .. } = root else {
1252 panic!("expected root mapping");
1253 };
1254 let (_key, nested) = &entries[0];
1255 let Node::Mapping {
1256 entries: nested_entries,
1257 ..
1258 } = nested
1259 else {
1260 panic!("expected nested mapping");
1261 };
1262 let (inner_key, _) = &nested_entries[0];
1263 assert!(
1264 inner_key.leading_comments().is_empty(),
1265 "inner key should have no leading comments when there is no comment"
1266 );
1267 }
1268
1269 #[test]
1274 fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1275 let input =
1276 "Lists:\n list-a:\n - item1\n - item2\n\n # Style 2\n list-b:\n - item1\n";
1277 let docs = load(input).unwrap();
1278 let root = &docs[0].root;
1279 let Node::Mapping { entries, .. } = root else {
1280 panic!("expected root mapping");
1281 };
1282 let (_lists_key, nested) = &entries[0];
1283 let Node::Mapping {
1284 entries: nested_entries,
1285 ..
1286 } = nested
1287 else {
1288 panic!("expected nested mapping");
1289 };
1290 assert_eq!(nested_entries.len(), 2);
1291 let (list_b_key, _) = &nested_entries[1];
1292 assert_eq!(
1293 list_b_key.leading_comments(),
1294 &["# Style 2"],
1295 "# Style 2 should be leading comment on list-b key"
1296 );
1297 }
1298
1299 #[test]
1300 fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1301 let input = "outer:\n a:\n - x\n # between\n b: y\n";
1302 let docs = load(input).unwrap();
1303 let root = &docs[0].root;
1304 let Node::Mapping { entries, .. } = root else {
1305 panic!("expected root mapping");
1306 };
1307 let (_outer_key, outer_val) = &entries[0];
1308 let Node::Mapping {
1309 entries: nested, ..
1310 } = outer_val
1311 else {
1312 panic!("expected nested mapping");
1313 };
1314 assert_eq!(nested.len(), 2);
1315 let (b_key, _) = &nested[1];
1316 assert_eq!(
1317 b_key.leading_comments(),
1318 &["# between"],
1319 "# between should be leading comment on b key"
1320 );
1321 }
1322
1323 #[test]
1324 fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1325 let input = "parent:\n child1:\n k: v\n # end-of-child1\n child2: val\n";
1326 let docs = load(input).unwrap();
1327 let root = &docs[0].root;
1328 let Node::Mapping { entries, .. } = root else {
1329 panic!("expected root mapping");
1330 };
1331 let (_parent_key, parent_val) = &entries[0];
1332 let Node::Mapping {
1333 entries: siblings, ..
1334 } = parent_val
1335 else {
1336 panic!("expected parent mapping value");
1337 };
1338 assert_eq!(siblings.len(), 2);
1339 let (child2_key, _) = &siblings[1];
1340 assert_eq!(
1341 child2_key.leading_comments(),
1342 &["# end-of-child1"],
1343 "# end-of-child1 should be leading comment on child2 key"
1344 );
1345 }
1346
1347 #[test]
1348 fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1349 let input = "items:\n - a\n - b\n # tail\n";
1350 let docs = load(input).unwrap();
1351 assert!(!docs.is_empty(), "document should parse without error");
1353 let root = &docs[0].root;
1355 let Node::Mapping { entries, .. } = root else {
1356 panic!("expected root mapping");
1357 };
1358 let (_items_key, seq_val) = &entries[0];
1359 let Node::Sequence { items, .. } = seq_val else {
1360 panic!("expected sequence value");
1361 };
1362 assert_eq!(items.len(), 2, "sequence items must not be lost");
1363 }
1364
1365 #[test]
1366 fn no_overflow_comments_when_collection_ends_cleanly() {
1367 let docs = load("key:\n - item1\n - item2\n").unwrap();
1368 let root = &docs[0].root;
1369 let Node::Mapping { entries, .. } = root else {
1370 panic!("expected root mapping");
1371 };
1372 let (_key, seq_val) = &entries[0];
1373 let Node::Sequence { items, .. } = seq_val else {
1374 panic!("expected sequence value");
1375 };
1376 for item in items {
1377 assert!(
1378 item.leading_comments().is_empty(),
1379 "items should have no leading comments"
1380 );
1381 }
1382 }
1383
1384 #[test]
1389 fn original_bug_report_input_preserves_both_comments() {
1390 let input = "Lists:\n # Style 1\n list-a:\n - item1\n - item2\n\n # Style 2\n list-b:\n - item1\n - item2\n";
1391 let docs = load(input).unwrap();
1392 let root = &docs[0].root;
1393 let Node::Mapping { entries, .. } = root else {
1394 panic!("expected root mapping");
1395 };
1396 let (_lists_key, nested) = &entries[0];
1397 let Node::Mapping {
1398 entries: nested_entries,
1399 ..
1400 } = nested
1401 else {
1402 panic!("expected nested mapping");
1403 };
1404 assert_eq!(nested_entries.len(), 2);
1405 let (first_key, _) = &nested_entries[0];
1406 let (second_key, _) = &nested_entries[1];
1407 assert_eq!(
1408 first_key.leading_comments(),
1409 &["# Style 1"],
1410 "list-a should have # Style 1 as leading comment"
1411 );
1412 assert_eq!(
1413 second_key.leading_comments(),
1414 &["# Style 2"],
1415 "list-b should have # Style 2 as leading comment"
1416 );
1417 }
1418
1419 #[test]
1420 fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1421 let input = "map:\n # leading\n key: value # trailing\n # next-leading\n key2: v2\n";
1422 let docs = load(input).unwrap();
1423 let root = &docs[0].root;
1424 let Node::Mapping { entries, .. } = root else {
1425 panic!("expected root mapping");
1426 };
1427 let (_map_key, map_val) = &entries[0];
1428 let Node::Mapping {
1429 entries: siblings, ..
1430 } = map_val
1431 else {
1432 panic!("expected mapping value");
1433 };
1434 assert_eq!(siblings.len(), 2);
1435 let (key1, val1) = &siblings[0];
1436 let (key2, _) = &siblings[1];
1437 assert_eq!(key1.leading_comments(), &["# leading"]);
1438 assert_eq!(val1.trailing_comment(), Some("# trailing"));
1439 assert_eq!(key2.leading_comments(), &["# next-leading"]);
1440 }
1441
1442 #[test]
1443 fn deeply_nested_overflow_comments_reach_correct_sibling() {
1444 let input = "top:\n mid:\n - x\n # deep-overflow\n next: y\n";
1445 let docs = load(input).unwrap();
1446 let root = &docs[0].root;
1447 let Node::Mapping { entries, .. } = root else {
1448 panic!("expected root mapping");
1449 };
1450 let (_top_key, top_val) = &entries[0];
1451 let Node::Mapping {
1452 entries: top_entries,
1453 ..
1454 } = top_val
1455 else {
1456 panic!("expected top-level mapping");
1457 };
1458 assert_eq!(top_entries.len(), 2);
1459 let (next_key, _) = &top_entries[1];
1460 assert_eq!(
1461 next_key.leading_comments(),
1462 &["# deep-overflow"],
1463 "# deep-overflow should propagate from nested sequence to next sibling"
1464 );
1465 }
1466
1467 #[rstest]
1472 #[case::bare_document("key: value\n", false, false)]
1473 #[case::start_marker_only("---\nkey: value\n", true, false)]
1474 #[case::end_marker_only("key: value\n...\n", false, true)]
1475 #[case::both_markers("---\nkey: value\n...\n", true, true)]
1476 #[case::empty_with_both_markers("---\n...\n", true, true)]
1477 fn document_marker_flags_match_input(
1478 #[case] input: &str,
1479 #[case] expected_start: bool,
1480 #[case] expected_end: bool,
1481 ) {
1482 let docs = load(input).expect("load failed");
1483 assert_eq!(docs.len(), 1);
1484 assert_eq!(docs[0].explicit_start, expected_start, "explicit_start");
1485 assert_eq!(docs[0].explicit_end, expected_end, "explicit_end");
1486 }
1487
1488 #[test]
1489 fn multi_document_flags_are_independent() {
1490 let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1491 assert_eq!(docs.len(), 3);
1492 assert!(!docs[0].explicit_start, "doc1 explicit_start");
1493 assert!(!docs[0].explicit_end, "doc1 explicit_end");
1494 assert!(docs[1].explicit_start, "doc2 explicit_start");
1495 assert!(docs[1].explicit_end, "doc2 explicit_end");
1496 assert!(docs[2].explicit_start, "doc3 explicit_start");
1497 assert!(!docs[2].explicit_end, "doc3 explicit_end");
1498 }
1499
1500 #[rstest]
1505 #[case::newline("foo\nbar", '\n', "\\u000A", "foo\\u000Abar")]
1506 #[case::carriage_return("foo\rbar", '\r', "\\u000D", "foo\\u000Dbar")]
1507 #[case::null_byte("foo\0bar", '\0', "\\u0000", "foo\\u0000bar")]
1508 fn sanitize_replaces_control_char_with_escape(
1509 #[case] input: &str,
1510 #[case] raw_char: char,
1511 #[case] escape_seq: &str,
1512 #[case] expected: &str,
1513 ) {
1514 let result = sanitize_scalar_for_error(input);
1515 assert!(
1516 !result.contains(raw_char),
1517 "output must not contain the raw control character"
1518 );
1519 assert!(
1520 result.contains(escape_seq),
1521 "output must contain {escape_seq} escape, got: {result:?}"
1522 );
1523 assert_eq!(result, expected);
1524 }
1525
1526 #[test]
1527 fn sanitize_short_value_stored_verbatim() {
1528 let input = "hello";
1529 let result = sanitize_scalar_for_error(input);
1530 assert_eq!(result, "hello");
1531 assert!(
1532 !result.ends_with("..."),
1533 "short value must not be truncated"
1534 );
1535 }
1536
1537 #[test]
1538 fn sanitize_value_at_exact_limit_not_truncated() {
1539 let input = "a".repeat(128);
1540 let result = sanitize_scalar_for_error(&input);
1541 assert_eq!(
1542 result.len(),
1543 128,
1544 "128-char input must produce 128-char output"
1545 );
1546 assert!(
1547 !result.ends_with("..."),
1548 "value at exact limit must not be truncated"
1549 );
1550 }
1551
1552 #[test]
1553 fn sanitize_value_over_limit_truncated() {
1554 let input = "a".repeat(129);
1555 let result = sanitize_scalar_for_error(&input);
1556 assert!(
1557 result.ends_with("..."),
1558 "value over limit must end with '...'"
1559 );
1560 assert_eq!(
1561 result.len(),
1562 128 + 3,
1563 "truncated output must be 128 chars + 3 ellipsis chars"
1564 );
1565 }
1566
1567 #[test]
1568 fn sanitize_multibyte_char_boundary_not_split() {
1569 let input: String = "中".repeat(127) + "ab"; let result = sanitize_scalar_for_error(&input);
1571 assert!(
1572 result.ends_with("..."),
1573 "129-char multibyte input should be truncated"
1574 );
1575 let char_count = result.trim_end_matches("...").chars().count();
1576 assert_eq!(
1577 char_count, 128,
1578 "truncated portion must be exactly 128 chars"
1579 );
1580 }
1581
1582 fn load_root(input: &str) -> Node<Span> {
1587 load(input).expect("load failed").remove(0).root
1588 }
1589
1590 fn node_tag(node: Node<Span>) -> Option<Cow<'static, str>> {
1591 match node {
1592 Node::Scalar { tag, .. } | Node::Mapping { tag, .. } | Node::Sequence { tag, .. } => {
1593 tag
1594 }
1595 Node::Alias { .. } => None,
1596 }
1597 }
1598
1599 #[rstest]
1600 #[case::str_tag("hello\n")]
1601 #[case::int_tag("42\n")]
1602 #[case::null_tag("null\n")]
1603 #[case::map_tag("a: 1\n")]
1604 #[case::seq_tag("- a\n")]
1605 #[case::bare_excl_tag("! hello\n")]
1606 fn resolver_emitted_tag_is_borrowed(#[case] input: &str) {
1607 let tag = node_tag(load_root(input));
1608 assert!(
1609 matches!(tag, Some(Cow::Borrowed(_))),
1610 "resolver-emitted tag must be Borrowed, got: {tag:?}"
1611 );
1612 }
1613
1614 #[rstest]
1615 #[case::scalar("!!str hello\n")]
1616 #[case::mapping("!!map\na: 1\n")]
1617 #[case::sequence("!!seq\n- a\n")]
1618 fn user_authored_tag_is_owned(#[case] input: &str) {
1619 let tag = node_tag(load_root(input));
1620 assert!(
1621 matches!(tag, Some(Cow::Owned(_))),
1622 "user-authored tag must be Owned, got: {tag:?}"
1623 );
1624 }
1625
1626 #[test]
1627 fn alias_node_has_no_tag_field() {
1628 let docs = LoaderBuilder::new()
1629 .build()
1630 .load("- &a x\n- *a\n")
1631 .expect("load failed");
1632 let Node::Sequence { items, .. } = &docs[0].root else {
1633 panic!("expected root sequence");
1634 };
1635 assert!(
1636 matches!(items[1], Node::Alias { .. }),
1637 "second item must be Alias in lossless mode"
1638 );
1639 }
1640
1641 #[test]
1642 fn tag_value_content_preserved_across_cow_variants() {
1643 let Node::Scalar {
1644 tag: tag_resolver, ..
1645 } = load_root("hello\n")
1646 else {
1647 panic!("expected scalar");
1648 };
1649 assert_eq!(tag_resolver.as_deref(), Some("tag:yaml.org,2002:str"));
1650
1651 let Node::Scalar { tag: tag_user, .. } = load_root("!custom hello\n") else {
1652 panic!("expected scalar");
1653 };
1654 assert_eq!(tag_user.as_deref(), Some("!custom"));
1655 }
1656
1657 fn node_meta_is_none(node: &Node<Span>) -> bool {
1662 matches!(
1663 node,
1664 Node::Scalar { meta: None, .. }
1665 | Node::Mapping { meta: None, .. }
1666 | Node::Sequence { meta: None, .. }
1667 )
1668 }
1669
1670 #[rstest]
1671 #[case::plain_scalar("hello\n")]
1672 #[case::plain_mapping("a: 1\n")]
1673 #[case::plain_sequence("- a\n")]
1674 fn loaded_node_with_no_meta_fields_has_meta_none(#[case] input: &str) {
1675 let docs = load(input).unwrap();
1676 let root = &docs[0].root;
1677 assert!(
1678 node_meta_is_none(root),
1679 "plain node must have meta: None, got: {root:?}"
1680 );
1681 }
1682
1683 #[test]
1684 fn loaded_anchored_scalar_has_meta_some() {
1685 let docs = load("- &foo bar\n").unwrap();
1686 let Node::Sequence { items, .. } = &docs[0].root else {
1687 panic!("expected root Sequence");
1688 };
1689 let item = &items[0];
1690 assert!(
1691 matches!(item, Node::Scalar { meta: Some(_), .. }),
1692 "anchored scalar must have meta: Some, got: {item:?}"
1693 );
1694 assert_eq!(item.anchor(), Some("foo"));
1695 }
1696
1697 #[test]
1698 fn loaded_scalar_with_anchor_has_meta_some_with_anchor_loc() {
1699 let docs = load("&tag hello\n").unwrap();
1700 let root = &docs[0].root;
1701 assert!(
1702 matches!(root, Node::Scalar { meta: Some(_), .. }),
1703 "anchored scalar must have meta: Some"
1704 );
1705 assert!(
1706 root.anchor_loc().is_some(),
1707 "anchor_loc() must be Some for anchored scalar"
1708 );
1709 }
1710
1711 #[rstest]
1716 #[case::block_mapping_anchor_only("&a\nk: v\n", Some("a"), false)]
1718 #[case::block_mapping_tag_only("!mytag\nk: v\n", None, true)]
1719 #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n", Some("a"), true)]
1720 #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n", Some("a"), true)]
1721 #[case::block_sequence_anchor_only("&a\n- item\n", Some("a"), false)]
1723 #[case::block_sequence_tag_only("!mytag\n- item\n", None, true)]
1724 #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n", Some("a"), true)]
1725 #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n", Some("a"), true)]
1726 #[case::flow_mapping_anchor_only("&a {k: v}\n", Some("a"), false)]
1728 #[case::flow_mapping_tag_only("!mytag {k: v}\n", None, true)]
1729 #[case::flow_mapping_anchor_then_tag("&a !mytag {k: v}\n", Some("a"), true)]
1730 #[case::flow_mapping_tag_then_anchor("!mytag &a {k: v}\n", Some("a"), true)]
1731 #[case::flow_sequence_anchor_only("&a [item]\n", Some("a"), false)]
1733 #[case::flow_sequence_tag_only("!mytag [item]\n", None, true)]
1734 #[case::flow_sequence_anchor_then_tag("&a !mytag [item]\n", Some("a"), true)]
1735 #[case::flow_sequence_tag_then_anchor("!mytag &a [item]\n", Some("a"), true)]
1736 fn combined_properties_attach_to_root_collection(
1737 #[case] input: &str,
1738 #[case] expected_anchor: Option<&str>,
1739 #[case] expected_has_tag: bool,
1740 ) {
1741 let docs = load(input).unwrap();
1742 let root = &docs[0].root;
1743 assert_eq!(root.anchor(), expected_anchor, "anchor on root collection");
1744 assert_eq!(
1745 root.tag_loc().is_some(),
1746 expected_has_tag,
1747 "tag_loc on root collection"
1748 );
1749 }
1750
1751 #[rstest]
1753 #[case::block_mapping_anchor_only("&a\nk: v\n")]
1755 #[case::block_mapping_tag_only("!mytag\nk: v\n")]
1756 #[case::block_mapping_anchor_then_tag("&a !mytag\nk: v\n")]
1757 #[case::block_mapping_tag_then_anchor("!mytag &a\nk: v\n")]
1758 #[case::block_sequence_anchor_only("&a\n- item\n")]
1760 #[case::block_sequence_tag_only("!mytag\n- item\n")]
1761 #[case::block_sequence_anchor_then_tag("&a !mytag\n- item\n")]
1762 #[case::block_sequence_tag_then_anchor("!mytag &a\n- item\n")]
1763 fn first_child_of_block_collection_has_no_properties(#[case] input: &str) {
1764 let docs = load(input).unwrap();
1765 let root = &docs[0].root;
1766 let first_child: &Node<Span> = match root {
1767 Node::Mapping { entries, .. } => &entries[0].0,
1768 Node::Sequence { items, .. } => &items[0],
1769 Node::Scalar { .. } | Node::Alias { .. } => panic!("expected block collection"),
1770 };
1771 assert_eq!(
1772 first_child.anchor(),
1773 None,
1774 "anchor must not appear on first child"
1775 );
1776 assert!(
1777 first_child.tag_loc().is_none(),
1778 "tag_loc must not appear on first child"
1779 );
1780 }
1781
1782 #[test]
1785 fn anchor_on_block_mapping_with_tag_is_resolvable_via_alias() {
1786 let input = "root:\n tagged: &a !mytag\n k: v\n ref: *a\n";
1787 let result = LoaderBuilder::new().resolved().build().load(input);
1788 assert!(
1789 result.is_ok(),
1790 "alias *a must resolve — anchor must be on the mapping, not lost to first key: {result:?}"
1791 );
1792 }
1793}