1use std::borrow::Cow;
33use std::collections::{HashMap, HashSet};
34use std::iter::Peekable;
35
36use std::sync::Arc;
37
38use crate::error::Error;
39use crate::event::{Event, EventMeta, ScalarStyle};
40use crate::node::{Document, Node, NodeMeta};
41use crate::pos::{LineIndex, Pos, Span};
42use crate::schema::{CollectionKind, Schema, resolve_collection, resolve_scalar};
43
44use comments::{attach_leading_comments, attach_trailing_comment};
45use reloc::reloc;
46use stream::{
47 consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
48 with_hash_prefix,
49};
50
51mod comments;
52mod reloc;
53mod stream;
54
55#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
61pub enum LoadError {
62 #[error("parse error at {pos:?}: {message}")]
64 Parse {
65 pos: Pos,
67 message: String,
69 },
70
71 #[error("unexpected end of event stream")]
73 UnexpectedEndOfStream,
74
75 #[error("nesting depth limit exceeded (max: {limit})")]
77 NestingDepthLimitExceeded {
78 limit: usize,
80 },
81
82 #[error("anchor count limit exceeded (max: {limit})")]
84 AnchorCountLimitExceeded {
85 limit: usize,
87 },
88
89 #[error("alias expansion node limit exceeded (max: {limit})")]
91 AliasExpansionLimitExceeded {
92 limit: usize,
94 },
95
96 #[error("circular alias reference: '{name}'")]
98 CircularAlias {
99 name: String,
101 },
102
103 #[error("undefined alias: '{name}'")]
105 UndefinedAlias {
106 name: String,
108 },
109
110 #[error("JSON schema: plain scalar does not match any type pattern")]
120 UnresolvedScalar {
121 value: String,
123 pos: Pos,
125 },
126}
127
128type Result<T> = std::result::Result<T, LoadError>;
130
131type EventStream<'a> =
133 Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
134
135#[expect(
137 clippy::type_complexity,
138 reason = "four-tuple mirrors EventMeta fields; extracting a type alias here would obscure the one-to-one correspondence"
139)]
140#[inline]
141fn unpack_meta(
142 meta: Option<Box<EventMeta<'_>>>,
143) -> (
144 Option<&'_ str>,
145 Option<Span>,
146 Option<std::borrow::Cow<'_, str>>,
147 Option<Span>,
148) {
149 meta.map_or((None, None, None, None), |m| {
150 (m.anchor, m.anchor_loc, m.tag, m.tag_loc)
151 })
152}
153
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
160pub enum LoadMode {
161 Lossless,
163 Resolved,
165}
166
167#[derive(Debug, Clone)]
169pub struct LoaderOptions {
170 pub max_nesting_depth: usize,
173 pub max_anchors: usize,
176 pub max_expanded_nodes: usize,
179 pub mode: LoadMode,
181 pub schema: Schema,
186}
187
188impl Default for LoaderOptions {
189 fn default() -> Self {
190 Self {
191 max_nesting_depth: 512,
192 max_anchors: 10_000,
193 max_expanded_nodes: 1_000_000,
194 mode: LoadMode::Lossless,
195 schema: Schema::Core,
196 }
197 }
198}
199
200pub struct LoaderBuilder {
213 options: LoaderOptions,
214}
215
216impl LoaderBuilder {
217 #[must_use]
219 pub fn new() -> Self {
220 Self {
221 options: LoaderOptions::default(),
222 }
223 }
224
225 #[must_use]
227 pub const fn lossless(mut self) -> Self {
228 self.options.mode = LoadMode::Lossless;
229 self
230 }
231
232 #[must_use]
234 pub const fn resolved(mut self) -> Self {
235 self.options.mode = LoadMode::Resolved;
236 self
237 }
238
239 #[must_use]
241 pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
242 self.options.max_nesting_depth = limit;
243 self
244 }
245
246 #[must_use]
248 pub const fn max_anchors(mut self, limit: usize) -> Self {
249 self.options.max_anchors = limit;
250 self
251 }
252
253 #[must_use]
255 pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
256 self.options.max_expanded_nodes = limit;
257 self
258 }
259
260 #[must_use]
265 pub const fn schema(mut self, s: Schema) -> Self {
266 self.options.schema = s;
267 self
268 }
269
270 #[must_use]
272 pub const fn build(self) -> Loader {
273 Loader {
274 options: self.options,
275 }
276 }
277}
278
279impl Default for LoaderBuilder {
280 fn default() -> Self {
281 Self::new()
282 }
283}
284
285pub struct Loader {
291 options: LoaderOptions,
292}
293
294impl Loader {
295 pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
302 let mut state = LoadState::new(&self.options, input);
303 let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
304 Box::new(crate::parse_events(input));
305 state.run(iter.peekable())
306 }
307}
308
309pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
335 LoaderBuilder::new().lossless().build().load(input)
336}
337
338struct LoadState<'opt> {
343 options: &'opt LoaderOptions,
344 anchor_map: HashMap<String, Node<Span>>,
346 anchor_count: usize,
348 depth: usize,
350 expanded_nodes: usize,
352 pending_leading: Vec<String>,
358 line_index: Arc<LineIndex>,
361}
362
363impl<'opt> LoadState<'opt> {
364 fn new(options: &'opt LoaderOptions, input: &str) -> Self {
365 Self {
366 options,
367 anchor_map: HashMap::new(),
368 anchor_count: 0,
369 depth: 0,
370 expanded_nodes: 0,
371 pending_leading: Vec::new(),
372 line_index: Arc::new(LineIndex::new(input)),
373 }
374 }
375
376 fn reset_for_document(&mut self) {
377 self.anchor_map.clear();
378 self.anchor_count = 0;
379 self.expanded_nodes = 0;
380 self.pending_leading.clear();
381 }
382
383 fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
384 let mut docs: Vec<Document<Span>> = Vec::new();
385
386 match stream.next() {
388 Some(Ok(_)) | None => {}
389 Some(Err(e)) => {
390 return Err(LoadError::Parse {
391 pos: e.pos,
392 message: e.message,
393 });
394 }
395 }
396
397 loop {
398 match next_from(&mut stream)? {
400 None | Some((Event::StreamEnd, _)) => break,
401 Some((
402 Event::DocumentStart {
403 explicit,
404 version,
405 tag_directives,
406 },
407 _,
408 )) => {
409 let doc_explicit_start = explicit;
410 let doc_version = version;
411 let doc_tags = tag_directives;
412 self.reset_for_document();
413
414 let mut doc_comments: Vec<String> = Vec::new();
415
416 consume_leading_doc_comments(&mut stream, &mut doc_comments, &self.line_index)?;
418
419 let root = if is_document_end(stream.peek()) {
421 let mut node = empty_scalar();
423 apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
424 node
425 } else {
426 self.parse_node(&mut stream)?
427 };
428
429 let doc_explicit_end =
431 if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
432 let end_explicit = *explicit;
433 let _ = stream.next();
434 end_explicit
435 } else {
436 false
437 };
438
439 docs.push(Document {
440 root,
441 version: doc_version,
442 tags: doc_tags,
443 comments: doc_comments,
444 explicit_start: doc_explicit_start,
445 explicit_end: doc_explicit_end,
446 line_index: Some(self.line_index.clone()),
447 });
448 }
449 Some(_) => {
450 }
452 }
453 }
454
455 Ok(docs)
456 }
457
458 #[expect(
462 clippy::too_many_lines,
463 reason = "match-on-event-type; splitting would obscure flow"
464 )]
465 fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
466 if matches!(
470 stream.peek(),
471 Some(Ok((
472 Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
473 _
474 )))
475 ) {
476 return Ok(empty_scalar());
477 }
478
479 let Some((event, span)) = next_from(stream)? else {
480 return Ok(empty_scalar());
481 };
482
483 match event {
484 Event::Scalar { value, style, meta } => {
485 let (anchor, anchor_loc, tag, tag_loc) = unpack_meta(meta);
486 let anchor = anchor.map(str::to_owned);
487 let mut node = Node::Scalar {
488 value: value.into_owned(),
489 style,
490 tag: tag.map(|t| Cow::Owned(t.into_owned())),
491 loc: span,
492 meta: NodeMeta {
493 anchor,
494 anchor_loc,
495 tag_loc,
496 leading_comments: None,
497 trailing_comment: None,
498 }
499 .into_option(),
500 };
501 apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
502 if let Some(name) = node.anchor() {
503 self.register_anchor(name.to_owned(), &node)?;
504 }
505 Ok(node)
506 }
507
508 Event::MappingStart { style, meta } => {
509 let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
510 let anchor = event_anchor.map(str::to_owned);
511 let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
512 let anchor_for_registration = anchor.clone();
513
514 self.depth += 1;
515 if self.depth > self.options.max_nesting_depth {
516 return Err(LoadError::NestingDepthLimitExceeded {
517 limit: self.options.max_nesting_depth,
518 });
519 }
520
521 let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
522 let mut end_span = span;
523
524 loop {
525 let raw_leading = consume_leading_comments(stream)?;
529 let leading = if self.pending_leading.is_empty() {
530 raw_leading
531 } else {
532 let mut combined = std::mem::take(&mut self.pending_leading);
533 combined.extend(raw_leading);
534 combined
535 };
536
537 match stream.peek() {
538 None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
539 if !leading.is_empty() {
544 self.pending_leading = leading;
545 }
546 break;
547 }
548 Some(Err(_)) => {
549 return Err(match stream.next() {
551 Some(Err(e)) => LoadError::Parse {
552 pos: e.pos,
553 message: e.message,
554 },
555 _ => LoadError::UnexpectedEndOfStream,
556 });
557 }
558 Some(Ok(_)) => {}
559 }
560
561 let mut key = self.parse_node(stream)?;
562 attach_leading_comments(&mut key, leading);
563
564 let mut value = self.parse_node(stream)?;
565
566 if !is_block_scalar(&value)
575 && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
576 {
577 let value_end_line = node_end_line(&value, &self.line_index);
578 if let Some(trail) =
579 peek_trailing_comment(stream, value_end_line, &self.line_index)?
580 {
581 attach_trailing_comment(&mut value, trail);
582 }
583 }
584
585 entries.push((key, value));
586 }
587
588 if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
590 end_span = *end;
591 let _ = stream.next();
592 }
593 self.depth -= 1;
594
595 let mut node = Node::Mapping {
596 entries,
597 style,
598 tag,
599 loc: Span {
600 start: span.start,
601 end: end_span.end,
602 },
603 meta: NodeMeta {
604 anchor,
605 anchor_loc,
606 tag_loc,
607 leading_comments: None,
608 trailing_comment: None,
609 }
610 .into_option(),
611 };
612 apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
613 if let Some(name) = anchor_for_registration {
614 self.register_anchor(name, &node)?;
615 }
616 Ok(node)
617 }
618
619 Event::SequenceStart { style, meta } => {
620 let (event_anchor, anchor_loc, event_tag, tag_loc) = unpack_meta(meta);
621 let anchor = event_anchor.map(str::to_owned);
622 let tag = event_tag.map(|t| Cow::Owned(t.into_owned()));
623 let anchor_for_registration = anchor.clone();
624
625 self.depth += 1;
626 if self.depth > self.options.max_nesting_depth {
627 return Err(LoadError::NestingDepthLimitExceeded {
628 limit: self.options.max_nesting_depth,
629 });
630 }
631
632 let mut items: Vec<Node<Span>> = Vec::new();
633 let mut end_span = span;
634
635 loop {
636 let raw_leading = consume_leading_comments(stream)?;
640 let leading = if self.pending_leading.is_empty() {
641 raw_leading
642 } else {
643 let mut combined = std::mem::take(&mut self.pending_leading);
644 combined.extend(raw_leading);
645 combined
646 };
647
648 match stream.peek() {
649 None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
650 if !leading.is_empty() {
656 self.pending_leading = leading;
657 }
658 break;
659 }
660 Some(Err(_)) => {
661 return Err(match stream.next() {
663 Some(Err(e)) => LoadError::Parse {
664 pos: e.pos,
665 message: e.message,
666 },
667 _ => LoadError::UnexpectedEndOfStream,
668 });
669 }
670 Some(Ok(_)) => {}
671 }
672
673 let mut item = self.parse_node(stream)?;
674 attach_leading_comments(&mut item, leading);
675
676 if !is_block_scalar(&item)
682 && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
683 {
684 let item_end_line = node_end_line(&item, &self.line_index);
685 if let Some(trail) =
686 peek_trailing_comment(stream, item_end_line, &self.line_index)?
687 {
688 attach_trailing_comment(&mut item, trail);
689 }
690 }
691
692 items.push(item);
693 }
694
695 if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
697 end_span = *end;
698 let _ = stream.next();
699 }
700 self.depth -= 1;
701
702 let mut node = Node::Sequence {
703 items,
704 style,
705 tag,
706 loc: Span {
707 start: span.start,
708 end: end_span.end,
709 },
710 meta: NodeMeta {
711 anchor,
712 anchor_loc,
713 tag_loc,
714 leading_comments: None,
715 trailing_comment: None,
716 }
717 .into_option(),
718 };
719 apply_schema_to_node(&mut node, self.options.schema, &self.line_index)?;
720 if let Some(name) = anchor_for_registration {
721 self.register_anchor(name, &node)?;
722 }
723 Ok(node)
724 }
725
726 Event::Alias { name } => {
727 let name = name.to_owned();
728 self.resolve_alias(&name, span)
729 }
730
731 Event::Comment { text } => {
732 self.pending_leading.push(with_hash_prefix(text));
738 self.parse_node(stream)
739 }
740
741 Event::StreamStart
742 | Event::StreamEnd
743 | Event::DocumentStart { .. }
744 | Event::DocumentEnd { .. }
745 | Event::MappingEnd
746 | Event::SequenceEnd => {
747 Ok(empty_scalar())
749 }
750 }
751 }
752
753 fn register_anchor(&mut self, name: String, node: &Node<Span>) -> Result<()> {
754 if !self.anchor_map.contains_key(&name) {
755 self.anchor_count += 1;
756 if self.anchor_count > self.options.max_anchors {
757 return Err(LoadError::AnchorCountLimitExceeded {
758 limit: self.options.max_anchors,
759 });
760 }
761 }
762 if self.options.mode == LoadMode::Resolved {
766 self.expanded_nodes += 1;
767 if self.expanded_nodes > self.options.max_expanded_nodes {
768 return Err(LoadError::AliasExpansionLimitExceeded {
769 limit: self.options.max_expanded_nodes,
770 });
771 }
772 self.anchor_map.insert(name, node.clone());
773 } else {
774 self.anchor_map.insert(name, empty_scalar());
777 }
778 Ok(())
779 }
780
781 fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
782 match self.options.mode {
783 LoadMode::Lossless => Ok(Node::Alias {
784 name: name.to_owned(),
785 loc,
786 leading_comments: None,
787 trailing_comment: None,
788 }),
789 LoadMode::Resolved => {
790 let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
791 LoadError::UndefinedAlias {
792 name: name.to_owned(),
793 }
794 })?;
795 let mut in_progress: HashSet<String> = HashSet::new();
796 self.expand_node(anchored, &mut in_progress)
797 }
798 }
799 }
800
801 fn expand_node(
804 &mut self,
805 node: Node<Span>,
806 in_progress: &mut HashSet<String>,
807 ) -> Result<Node<Span>> {
808 self.expanded_nodes += 1;
812 if self.expanded_nodes > self.options.max_expanded_nodes {
813 return Err(LoadError::AliasExpansionLimitExceeded {
814 limit: self.options.max_expanded_nodes,
815 });
816 }
817
818 match node {
819 Node::Alias { ref name, loc, .. } => {
820 if in_progress.contains(name) {
821 return Err(LoadError::CircularAlias { name: name.clone() });
822 }
823 let target = self
824 .anchor_map
825 .get(name)
826 .cloned()
827 .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
828 in_progress.insert(name.clone());
829 let expanded = self.expand_node(target, in_progress)?;
830 in_progress.remove(name);
831 Ok(reloc(expanded, loc))
833 }
834 Node::Mapping {
835 entries,
836 style,
837 tag,
838 loc,
839 meta,
840 } => {
841 let mut expanded_entries = Vec::with_capacity(entries.len());
842 for (k, v) in entries {
843 let ek = self.expand_node(k, in_progress)?;
844 let ev = self.expand_node(v, in_progress)?;
845 expanded_entries.push((ek, ev));
846 }
847 Ok(Node::Mapping {
848 entries: expanded_entries,
849 style,
850 tag,
851 loc,
852 meta,
853 })
854 }
855 Node::Sequence {
856 items,
857 style,
858 tag,
859 loc,
860 meta,
861 } => {
862 let mut expanded_items = Vec::with_capacity(items.len());
863 for item in items {
864 expanded_items.push(self.expand_node(item, in_progress)?);
865 }
866 Ok(Node::Sequence {
867 items: expanded_items,
868 style,
869 tag,
870 loc,
871 meta,
872 })
873 }
874 scalar @ Node::Scalar { .. } => Ok(scalar),
876 }
877 }
878}
879
880const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
882 matches!(
883 peeked,
884 None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
885 )
886}
887
888#[inline]
890fn span_start_to_pos(offset: u32, line_index: &LineIndex) -> Pos {
891 let (line, column) = line_index.line_column(offset);
892 Pos {
893 byte_offset: offset as usize,
894 line: line as usize,
895 column: column as usize,
896 }
897}
898
899#[inline]
904fn node_end_line(node: &Node<Span>, line_index: &LineIndex) -> u32 {
905 let end_offset = match node {
906 Node::Scalar { loc, .. }
907 | Node::Mapping { loc, .. }
908 | Node::Sequence { loc, .. }
909 | Node::Alias { loc, .. } => loc.end,
910 };
911 line_index.line_column(end_offset).0
912}
913
914#[inline]
923const fn is_block_scalar(node: &Node<Span>) -> bool {
924 matches!(
925 node,
926 Node::Scalar {
927 style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
928 ..
929 }
930 )
931}
932
933const UNRESOLVED_VALUE_MAX_CHARS: usize = 128;
941
942fn sanitize_scalar_for_error(raw: &str) -> String {
949 let mut out = String::with_capacity(raw.len().min(UNRESOLVED_VALUE_MAX_CHARS * 2));
950 let mut truncated = false;
951
952 for (i, ch) in raw.chars().enumerate() {
953 if i >= UNRESOLVED_VALUE_MAX_CHARS {
954 truncated = true;
955 break;
956 }
957 if ch.is_ascii_control() {
958 let escaped = format!("\\u{:04X}", ch as u32);
960 out.push_str(&escaped);
961 } else {
962 out.push(ch);
963 }
964 }
965
966 if truncated {
967 out.push_str("...");
968 }
969 out
970}
971
972#[inline]
987fn apply_schema_to_node(
988 node: &mut Node<Span>,
989 schema: Schema,
990 line_index: &LineIndex,
991) -> Result<()> {
992 match node {
993 Node::Scalar {
994 value,
995 style,
996 tag,
997 loc,
998 meta,
999 } => {
1000 if tag.as_deref() == Some("!") {
1011 *tag = Some(Cow::Borrowed(crate::schema::ResolvedTag::Str.as_str()));
1012 return Ok(());
1013 }
1014 match resolve_scalar(schema, *style, value, tag.as_deref()) {
1016 Ok(Some(resolved)) => {
1017 *tag = Some(Cow::Borrowed(resolved.as_str()));
1018 if let Some(m) = meta.as_mut() {
1020 m.tag_loc = None;
1021 if m.is_all_none() {
1022 *meta = None;
1023 }
1024 }
1025 }
1026 Ok(None) => {}
1027 Err(_) => {
1028 return Err(LoadError::UnresolvedScalar {
1029 value: sanitize_scalar_for_error(value),
1030 pos: span_start_to_pos(loc.start, line_index),
1031 });
1032 }
1033 }
1034 }
1035 Node::Mapping { tag, meta, .. } => {
1036 let effective_tag = tag.as_deref().filter(|t| *t != "!");
1039 if let Some(resolved) =
1040 resolve_collection(schema, CollectionKind::Mapping, effective_tag)
1041 {
1042 *tag = Some(Cow::Borrowed(resolved.as_str()));
1043 if let Some(m) = meta.as_mut() {
1044 m.tag_loc = None;
1045 if m.is_all_none() {
1046 *meta = None;
1047 }
1048 }
1049 }
1050 }
1051 Node::Sequence { tag, meta, .. } => {
1052 let effective_tag = tag.as_deref().filter(|t| *t != "!");
1053 if let Some(resolved) =
1054 resolve_collection(schema, CollectionKind::Sequence, effective_tag)
1055 {
1056 *tag = Some(Cow::Borrowed(resolved.as_str()));
1057 if let Some(m) = meta.as_mut() {
1058 m.tag_loc = None;
1059 if m.is_all_none() {
1060 *meta = None;
1061 }
1062 }
1063 }
1064 }
1065 Node::Alias { .. } => {}
1066 }
1067 Ok(())
1068}
1069
1070const fn empty_scalar() -> Node<Span> {
1075 Node::Scalar {
1076 value: String::new(),
1077 style: ScalarStyle::Plain,
1078 tag: None,
1079 loc: Span { start: 0, end: 0 },
1080 meta: None,
1081 }
1082}
1083
1084#[cfg(test)]
1089#[expect(
1090 clippy::expect_used,
1091 clippy::unwrap_used,
1092 clippy::indexing_slicing,
1093 clippy::panic,
1094 reason = "test code"
1095)]
1096mod tests {
1097 use super::*;
1098
1099 #[test]
1101 fn loader_state_resets_anchor_map_between_documents() {
1102 let result = LoaderBuilder::new()
1104 .resolved()
1105 .build()
1106 .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
1107 assert!(
1108 result.is_err(),
1109 "expected Err: *foo in doc 2 should be undefined"
1110 );
1111 assert!(matches!(
1112 result.unwrap_err(),
1113 LoadError::UndefinedAlias { .. }
1114 ));
1115 }
1116
1117 #[test]
1119 fn register_anchor_increments_count() {
1120 let options = LoaderOptions {
1121 max_anchors: 2,
1122 ..LoaderOptions::default()
1123 };
1124 let mut state = LoadState::new(&options, "");
1125 let node = Node::Scalar {
1126 value: "x".to_owned(),
1127 style: ScalarStyle::Plain,
1128 tag: None,
1129 loc: Span { start: 0, end: 0 },
1130 meta: None,
1131 };
1132 assert!(state.register_anchor("a".to_owned(), &node).is_ok());
1133 assert!(state.register_anchor("b".to_owned(), &node).is_ok());
1134 let err = state
1135 .register_anchor("c".to_owned(), &node)
1136 .expect_err("expected AnchorCountLimitExceeded");
1137 assert!(matches!(
1138 err,
1139 LoadError::AnchorCountLimitExceeded { limit: 2 }
1140 ));
1141 }
1142
1143 #[test]
1145 fn expand_node_detects_circular_alias() {
1146 let options = LoaderOptions {
1147 mode: LoadMode::Resolved,
1148 ..LoaderOptions::default()
1149 };
1150 let mut state = LoadState::new(&options, "");
1151 let alias_node = Node::Alias {
1153 name: "a".to_owned(),
1154 loc: Span { start: 0, end: 0 },
1155 leading_comments: None,
1156 trailing_comment: None,
1157 };
1158 state.anchor_map.insert("a".to_owned(), alias_node.clone());
1159 let mut in_progress = HashSet::new();
1160 let result = state.expand_node(alias_node, &mut in_progress);
1161 assert!(
1162 matches!(result, Err(LoadError::CircularAlias { .. })),
1163 "expected CircularAlias, got: {result:?}"
1164 );
1165 }
1166
1167 #[test]
1173 fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
1174 let docs = load("outer:\n # Style 1\n inner: val\n").unwrap();
1175 let root = &docs[0].root;
1176 let Node::Mapping { entries, .. } = root else {
1180 panic!("expected root mapping");
1181 };
1182 assert_eq!(entries.len(), 1);
1183 let (_outer_key, outer_value) = &entries[0];
1184 let Node::Mapping {
1185 entries: nested, ..
1186 } = outer_value
1187 else {
1188 panic!("expected nested mapping");
1189 };
1190 assert_eq!(nested.len(), 1);
1191 let (inner_key, _) = &nested[0];
1192 assert_eq!(
1193 inner_key.leading_comments(),
1194 &["# Style 1"],
1195 "comment should be attached to the first nested key"
1196 );
1197 }
1198
1199 #[test]
1201 fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
1202 let docs = load("key:\n # leading\n - item1\n - item2\n").unwrap();
1203 let root = &docs[0].root;
1204 let Node::Mapping { entries, .. } = root else {
1205 panic!("expected root mapping");
1206 };
1207 let (_key, seq_value) = &entries[0];
1208 let Node::Sequence { items, .. } = seq_value else {
1209 panic!("expected sequence value");
1210 };
1211 assert_eq!(
1214 items[0].leading_comments(),
1215 &["# leading"],
1216 "comment should be attached to first sequence item"
1217 );
1218 }
1219
1220 #[test]
1222 fn multiple_comments_between_key_and_collection_all_preserved() {
1223 let docs = load("key:\n # first\n # second\n - item\n").unwrap();
1224 let root = &docs[0].root;
1225 let Node::Mapping { entries, .. } = root else {
1226 panic!("expected root mapping");
1227 };
1228 let (_key, seq_value) = &entries[0];
1229 let Node::Sequence { items, .. } = seq_value else {
1230 panic!("expected sequence value");
1231 };
1232 assert_eq!(
1233 items[0].leading_comments(),
1234 &["# first", "# second"],
1235 "both comments should be on first item"
1236 );
1237 }
1238
1239 #[test]
1241 fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1242 let docs = load("outer:\n # Style 1\n inner: val\n").unwrap();
1243 let root = &docs[0].root;
1244 let Node::Mapping { entries, .. } = root else {
1245 panic!("expected root mapping");
1246 };
1247 let (outer_key, _) = &entries[0];
1248 assert!(
1249 outer_key.leading_comments().is_empty(),
1250 "outer key should have no leading comments"
1251 );
1252 assert!(
1253 outer_key.trailing_comment().is_none(),
1254 "outer key should have no trailing comment"
1255 );
1256 }
1257
1258 #[test]
1260 fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1261 let docs = load("key:\n inner: val\n").unwrap();
1262 let root = &docs[0].root;
1263 let Node::Mapping { entries, .. } = root else {
1264 panic!("expected root mapping");
1265 };
1266 let (_key, nested) = &entries[0];
1267 let Node::Mapping {
1268 entries: nested_entries,
1269 ..
1270 } = nested
1271 else {
1272 panic!("expected nested mapping");
1273 };
1274 let (inner_key, _) = &nested_entries[0];
1275 assert!(
1276 inner_key.leading_comments().is_empty(),
1277 "inner key should have no leading comments when there is no comment"
1278 );
1279 }
1280
1281 #[test]
1287 fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1288 let input =
1289 "Lists:\n list-a:\n - item1\n - item2\n\n # Style 2\n list-b:\n - item1\n";
1290 let docs = load(input).unwrap();
1291 let root = &docs[0].root;
1292 let Node::Mapping { entries, .. } = root else {
1293 panic!("expected root mapping");
1294 };
1295 let (_lists_key, nested) = &entries[0];
1296 let Node::Mapping {
1297 entries: nested_entries,
1298 ..
1299 } = nested
1300 else {
1301 panic!("expected nested mapping");
1302 };
1303 assert_eq!(nested_entries.len(), 2);
1304 let (list_b_key, _) = &nested_entries[1];
1305 assert_eq!(
1306 list_b_key.leading_comments(),
1307 &["# Style 2"],
1308 "# Style 2 should be leading comment on list-b key"
1309 );
1310 }
1311
1312 #[test]
1314 fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1315 let input = "outer:\n a:\n - x\n # between\n b: y\n";
1316 let docs = load(input).unwrap();
1317 let root = &docs[0].root;
1318 let Node::Mapping { entries, .. } = root else {
1319 panic!("expected root mapping");
1320 };
1321 let (_outer_key, outer_val) = &entries[0];
1322 let Node::Mapping {
1323 entries: nested, ..
1324 } = outer_val
1325 else {
1326 panic!("expected nested mapping");
1327 };
1328 assert_eq!(nested.len(), 2);
1329 let (b_key, _) = &nested[1];
1330 assert_eq!(
1331 b_key.leading_comments(),
1332 &["# between"],
1333 "# between should be leading comment on b key"
1334 );
1335 }
1336
1337 #[test]
1339 fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1340 let input = "parent:\n child1:\n k: v\n # end-of-child1\n child2: val\n";
1341 let docs = load(input).unwrap();
1342 let root = &docs[0].root;
1343 let Node::Mapping { entries, .. } = root else {
1344 panic!("expected root mapping");
1345 };
1346 let (_parent_key, parent_val) = &entries[0];
1347 let Node::Mapping {
1348 entries: siblings, ..
1349 } = parent_val
1350 else {
1351 panic!("expected parent mapping value");
1352 };
1353 assert_eq!(siblings.len(), 2);
1354 let (child2_key, _) = &siblings[1];
1355 assert_eq!(
1356 child2_key.leading_comments(),
1357 &["# end-of-child1"],
1358 "# end-of-child1 should be leading comment on child2 key"
1359 );
1360 }
1361
1362 #[test]
1364 fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1365 let input = "items:\n - a\n - b\n # tail\n";
1371 let docs = load(input).unwrap();
1372 assert!(!docs.is_empty(), "document should parse without error");
1374 let root = &docs[0].root;
1376 let Node::Mapping { entries, .. } = root else {
1377 panic!("expected root mapping");
1378 };
1379 let (_items_key, seq_val) = &entries[0];
1380 let Node::Sequence { items, .. } = seq_val else {
1381 panic!("expected sequence value");
1382 };
1383 assert_eq!(items.len(), 2, "sequence items must not be lost");
1384 }
1385
1386 #[test]
1388 fn no_overflow_comments_when_collection_ends_cleanly() {
1389 let docs = load("key:\n - item1\n - item2\n").unwrap();
1390 let root = &docs[0].root;
1391 let Node::Mapping { entries, .. } = root else {
1392 panic!("expected root mapping");
1393 };
1394 let (_key, seq_val) = &entries[0];
1395 let Node::Sequence { items, .. } = seq_val else {
1396 panic!("expected sequence value");
1397 };
1398 for item in items {
1399 assert!(
1400 item.leading_comments().is_empty(),
1401 "items should have no leading comments"
1402 );
1403 }
1404 }
1405
1406 #[test]
1412 fn original_bug_report_input_preserves_both_comments() {
1413 let input = "Lists:\n # Style 1\n list-a:\n - item1\n - item2\n\n # Style 2\n list-b:\n - item1\n - item2\n";
1414 let docs = load(input).unwrap();
1415 let root = &docs[0].root;
1416 let Node::Mapping { entries, .. } = root else {
1417 panic!("expected root mapping");
1418 };
1419 let (_lists_key, nested) = &entries[0];
1420 let Node::Mapping {
1421 entries: nested_entries,
1422 ..
1423 } = nested
1424 else {
1425 panic!("expected nested mapping");
1426 };
1427 assert_eq!(nested_entries.len(), 2);
1428 let (first_key, _) = &nested_entries[0];
1429 let (second_key, _) = &nested_entries[1];
1430 assert_eq!(
1431 first_key.leading_comments(),
1432 &["# Style 1"],
1433 "list-a should have # Style 1 as leading comment"
1434 );
1435 assert_eq!(
1436 second_key.leading_comments(),
1437 &["# Style 2"],
1438 "list-b should have # Style 2 as leading comment"
1439 );
1440 }
1441
1442 #[test]
1444 fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1445 let input = "map:\n # leading\n key: value # trailing\n # next-leading\n key2: v2\n";
1446 let docs = load(input).unwrap();
1447 let root = &docs[0].root;
1448 let Node::Mapping { entries, .. } = root else {
1449 panic!("expected root mapping");
1450 };
1451 let (_map_key, map_val) = &entries[0];
1452 let Node::Mapping {
1453 entries: siblings, ..
1454 } = map_val
1455 else {
1456 panic!("expected mapping value");
1457 };
1458 assert_eq!(siblings.len(), 2);
1459 let (key1, val1) = &siblings[0];
1460 let (key2, _) = &siblings[1];
1461 assert_eq!(key1.leading_comments(), &["# leading"]);
1462 assert_eq!(val1.trailing_comment(), Some("# trailing"));
1463 assert_eq!(key2.leading_comments(), &["# next-leading"]);
1464 }
1465
1466 #[test]
1468 fn deeply_nested_overflow_comments_reach_correct_sibling() {
1469 let input = "top:\n mid:\n - x\n # deep-overflow\n next: y\n";
1470 let docs = load(input).unwrap();
1471 let root = &docs[0].root;
1472 let Node::Mapping { entries, .. } = root else {
1473 panic!("expected root mapping");
1474 };
1475 let (_top_key, top_val) = &entries[0];
1476 let Node::Mapping {
1477 entries: top_entries,
1478 ..
1479 } = top_val
1480 else {
1481 panic!("expected top-level mapping");
1482 };
1483 assert_eq!(top_entries.len(), 2);
1484 let (next_key, _) = &top_entries[1];
1485 assert_eq!(
1486 next_key.leading_comments(),
1487 &["# deep-overflow"],
1488 "# deep-overflow should propagate from nested sequence to next sibling"
1489 );
1490 }
1491
1492 #[test]
1498 fn bare_document_has_both_flags_false() {
1499 let docs = load("key: value\n").expect("load failed");
1500 assert_eq!(docs.len(), 1);
1501 assert!(!docs[0].explicit_start, "expected explicit_start=false");
1502 assert!(!docs[0].explicit_end, "expected explicit_end=false");
1503 }
1504
1505 #[test]
1507 fn document_with_start_marker_has_explicit_start_true() {
1508 let docs = load("---\nkey: value\n").expect("load failed");
1509 assert_eq!(docs.len(), 1);
1510 assert!(docs[0].explicit_start, "expected explicit_start=true");
1511 assert!(!docs[0].explicit_end, "expected explicit_end=false");
1512 }
1513
1514 #[test]
1516 fn document_with_end_marker_has_explicit_end_true() {
1517 let docs = load("key: value\n...\n").expect("load failed");
1518 assert_eq!(docs.len(), 1);
1519 assert!(!docs[0].explicit_start, "expected explicit_start=false");
1520 assert!(docs[0].explicit_end, "expected explicit_end=true");
1521 }
1522
1523 #[test]
1525 fn document_with_both_markers_has_both_flags_true() {
1526 let docs = load("---\nkey: value\n...\n").expect("load failed");
1527 assert_eq!(docs.len(), 1);
1528 assert!(docs[0].explicit_start, "expected explicit_start=true");
1529 assert!(docs[0].explicit_end, "expected explicit_end=true");
1530 }
1531
1532 #[test]
1534 fn multi_document_flags_are_independent() {
1535 let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1539 assert_eq!(docs.len(), 3);
1540 assert!(!docs[0].explicit_start, "doc1 explicit_start");
1541 assert!(!docs[0].explicit_end, "doc1 explicit_end");
1542 assert!(docs[1].explicit_start, "doc2 explicit_start");
1543 assert!(docs[1].explicit_end, "doc2 explicit_end");
1544 assert!(docs[2].explicit_start, "doc3 explicit_start");
1545 assert!(!docs[2].explicit_end, "doc3 explicit_end");
1546 }
1547
1548 #[test]
1550 fn empty_document_with_explicit_markers_has_both_flags_true() {
1551 let docs = load("---\n...\n").expect("load failed");
1552 assert_eq!(docs.len(), 1);
1553 assert!(docs[0].explicit_start, "expected explicit_start=true");
1554 assert!(docs[0].explicit_end, "expected explicit_end=true");
1555 }
1556
1557 #[test]
1563 fn sanitize_newline_replaced_with_escape() {
1564 let result = sanitize_scalar_for_error("foo\nbar");
1565 assert!(
1566 !result.contains('\n'),
1567 "output must not contain a raw newline"
1568 );
1569 assert!(
1570 result.contains("\\u000A"),
1571 "output must contain \\u000A escape, got: {result:?}"
1572 );
1573 assert_eq!(result, "foo\\u000Abar");
1574 }
1575
1576 #[test]
1578 fn sanitize_carriage_return_replaced_with_escape() {
1579 let result = sanitize_scalar_for_error("foo\rbar");
1580 assert!(
1581 !result.contains('\r'),
1582 "output must not contain a raw carriage return"
1583 );
1584 assert!(
1585 result.contains("\\u000D"),
1586 "output must contain \\u000D escape, got: {result:?}"
1587 );
1588 assert_eq!(result, "foo\\u000Dbar");
1589 }
1590
1591 #[test]
1593 fn sanitize_null_byte_replaced_with_escape() {
1594 let result = sanitize_scalar_for_error("foo\0bar");
1595 assert!(
1596 !result.contains('\0'),
1597 "output must not contain a raw null byte"
1598 );
1599 assert!(
1600 result.contains("\\u0000"),
1601 "output must contain \\u0000 escape, got: {result:?}"
1602 );
1603 assert_eq!(result, "foo\\u0000bar");
1604 }
1605
1606 #[test]
1608 fn sanitize_short_value_stored_verbatim() {
1609 let input = "hello";
1610 let result = sanitize_scalar_for_error(input);
1611 assert_eq!(result, "hello");
1612 assert!(
1613 !result.ends_with("..."),
1614 "short value must not be truncated"
1615 );
1616 }
1617
1618 #[test]
1620 fn sanitize_value_at_exact_limit_not_truncated() {
1621 let input = "a".repeat(128);
1622 let result = sanitize_scalar_for_error(&input);
1623 assert_eq!(
1624 result.len(),
1625 128,
1626 "128-char input must produce 128-char output"
1627 );
1628 assert!(
1629 !result.ends_with("..."),
1630 "value at exact limit must not be truncated"
1631 );
1632 }
1633
1634 #[test]
1636 fn sanitize_value_over_limit_truncated() {
1637 let input = "a".repeat(129);
1638 let result = sanitize_scalar_for_error(&input);
1639 assert!(
1640 result.ends_with("..."),
1641 "value over limit must end with '...'"
1642 );
1643 assert_eq!(
1644 result.len(),
1645 128 + 3,
1646 "truncated output must be 128 chars + 3 ellipsis chars"
1647 );
1648 }
1649
1650 #[test]
1653 fn sanitize_multibyte_char_boundary_not_split() {
1654 let input: String = "中".repeat(127) + "ab"; let result = sanitize_scalar_for_error(&input);
1659 assert!(
1661 result.ends_with("..."),
1662 "129-char multibyte input should be truncated"
1663 );
1664 let char_count = result.trim_end_matches("...").chars().count();
1667 assert_eq!(
1668 char_count, 128,
1669 "truncated portion must be exactly 128 chars"
1670 );
1671 }
1672
1673 fn load_root(input: &str) -> Node<Span> {
1678 load(input).expect("load failed").remove(0).root
1679 }
1680
1681 #[test]
1683 fn resolver_injected_str_tag_is_borrowed() {
1684 let Node::Scalar { tag, .. } = load_root("hello\n") else {
1685 panic!("expected scalar");
1686 };
1687 assert!(
1688 matches!(tag, Some(Cow::Borrowed(_))),
1689 "resolver-injected !!str must be Borrowed, got: {tag:?}"
1690 );
1691 }
1692
1693 #[test]
1695 fn resolver_injected_int_tag_is_borrowed() {
1696 let Node::Scalar { tag, .. } = load_root("42\n") else {
1697 panic!("expected scalar");
1698 };
1699 assert!(
1700 matches!(tag, Some(Cow::Borrowed(_))),
1701 "resolver-injected !!int must be Borrowed, got: {tag:?}"
1702 );
1703 }
1704
1705 #[test]
1707 fn resolver_injected_null_tag_is_borrowed() {
1708 let Node::Scalar { tag, .. } = load_root("null\n") else {
1709 panic!("expected scalar");
1710 };
1711 assert!(
1712 matches!(tag, Some(Cow::Borrowed(_))),
1713 "resolver-injected !!null must be Borrowed, got: {tag:?}"
1714 );
1715 }
1716
1717 #[test]
1719 fn resolver_injected_map_tag_is_borrowed() {
1720 let Node::Mapping { tag, .. } = load_root("a: 1\n") else {
1721 panic!("expected mapping");
1722 };
1723 assert!(
1724 matches!(tag, Some(Cow::Borrowed(_))),
1725 "resolver-injected !!map must be Borrowed, got: {tag:?}"
1726 );
1727 }
1728
1729 #[test]
1731 fn resolver_injected_seq_tag_is_borrowed() {
1732 let Node::Sequence { tag, .. } = load_root("- a\n") else {
1733 panic!("expected sequence");
1734 };
1735 assert!(
1736 matches!(tag, Some(Cow::Borrowed(_))),
1737 "resolver-injected !!seq must be Borrowed, got: {tag:?}"
1738 );
1739 }
1740
1741 #[test]
1743 fn user_authored_tag_on_scalar_is_owned() {
1744 let Node::Scalar { tag, .. } = load_root("!!str hello\n") else {
1745 panic!("expected scalar");
1746 };
1747 assert!(
1748 matches!(tag, Some(Cow::Owned(_))),
1749 "user-authored !!str must be Owned, got: {tag:?}"
1750 );
1751 }
1752
1753 #[test]
1755 fn user_authored_tag_on_mapping_is_owned() {
1756 let Node::Mapping { tag, .. } = load_root("!!map\na: 1\n") else {
1757 panic!("expected mapping");
1758 };
1759 assert!(
1760 matches!(tag, Some(Cow::Owned(_))),
1761 "user-authored !!map must be Owned, got: {tag:?}"
1762 );
1763 }
1764
1765 #[test]
1767 fn user_authored_tag_on_sequence_is_owned() {
1768 let Node::Sequence { tag, .. } = load_root("!!seq\n- a\n") else {
1769 panic!("expected sequence");
1770 };
1771 assert!(
1772 matches!(tag, Some(Cow::Owned(_))),
1773 "user-authored !!seq must be Owned, got: {tag:?}"
1774 );
1775 }
1776
1777 #[test]
1780 fn bare_excl_tag_resolver_path_is_borrowed() {
1781 let Node::Scalar { tag, .. } = load_root("! hello\n") else {
1782 panic!("expected scalar");
1783 };
1784 assert!(
1785 matches!(tag, Some(Cow::Borrowed(_))),
1786 "bare-! path in apply_schema_to_node must be Borrowed, got: {tag:?}"
1787 );
1788 }
1789
1790 #[test]
1792 fn alias_node_has_no_tag_field() {
1793 let docs = LoaderBuilder::new()
1794 .build()
1795 .load("- &a x\n- *a\n")
1796 .expect("load failed");
1797 let Node::Sequence { items, .. } = &docs[0].root else {
1798 panic!("expected root sequence");
1799 };
1800 assert!(
1802 matches!(items[1], Node::Alias { .. }),
1803 "second item must be Alias in lossless mode"
1804 );
1805 }
1806
1807 #[test]
1809 fn tag_value_content_preserved_across_cow_variants() {
1810 let Node::Scalar {
1812 tag: tag_resolver, ..
1813 } = load_root("hello\n")
1814 else {
1815 panic!("expected scalar");
1816 };
1817 assert_eq!(tag_resolver.as_deref(), Some("tag:yaml.org,2002:str"));
1818
1819 let Node::Scalar { tag: tag_user, .. } = load_root("!custom hello\n") else {
1822 panic!("expected scalar");
1823 };
1824 assert_eq!(tag_user.as_deref(), Some("!custom"));
1825 }
1826
1827 #[test]
1833 fn loaded_plain_scalar_has_no_meta() {
1834 let docs = load("hello\n").unwrap();
1835 let root = &docs[0].root;
1836 assert!(
1838 matches!(root, Node::Scalar { meta: None, .. }),
1839 "plain scalar must have meta: None, got: {root:?}"
1840 );
1841 }
1842
1843 #[test]
1845 fn loaded_anchored_scalar_has_meta_some() {
1846 let docs = load("- &foo bar\n").unwrap();
1847 let Node::Sequence { items, .. } = &docs[0].root else {
1848 panic!("expected root Sequence");
1849 };
1850 let item = &items[0];
1851 assert!(
1852 matches!(item, Node::Scalar { meta: Some(_), .. }),
1853 "anchored scalar must have meta: Some, got: {item:?}"
1854 );
1855 assert_eq!(item.anchor(), Some("foo"));
1856 }
1857
1858 #[test]
1860 fn loaded_mapping_with_no_meta_fields_has_meta_none() {
1861 let docs = load("a: 1\n").unwrap();
1862 let root = &docs[0].root;
1863 assert!(
1864 matches!(root, Node::Mapping { meta: None, .. }),
1865 "plain mapping must have meta: None, got: {root:?}"
1866 );
1867 }
1868
1869 #[test]
1871 fn loaded_sequence_with_no_meta_fields_has_meta_none() {
1872 let docs = load("- a\n").unwrap();
1873 let root = &docs[0].root;
1874 assert!(
1875 matches!(root, Node::Sequence { meta: None, .. }),
1876 "plain sequence must have meta: None, got: {root:?}"
1877 );
1878 }
1879
1880 #[test]
1882 fn loaded_scalar_with_anchor_has_meta_some_with_anchor_loc() {
1883 let docs = load("&tag hello\n").unwrap();
1884 let root = &docs[0].root;
1885 assert!(
1886 matches!(root, Node::Scalar { meta: Some(_), .. }),
1887 "anchored scalar must have meta: Some"
1888 );
1889 assert!(
1890 root.anchor_loc().is_some(),
1891 "anchor_loc() must be Some for anchored scalar"
1892 );
1893 }
1894}