1use std::collections::{HashMap, HashSet};
33use std::iter::Peekable;
34
35use crate::error::Error;
36use crate::event::{Event, ScalarStyle};
37use crate::node::{Document, Node};
38use crate::pos::{Pos, Span};
39use crate::schema::{CollectionKind, Schema, resolve_collection, resolve_scalar};
40
41use comments::{attach_leading_comments, attach_trailing_comment};
42use reloc::reloc;
43use stream::{
44 consume_leading_comments, consume_leading_doc_comments, next_from, peek_trailing_comment,
45 with_hash_prefix,
46};
47
48mod comments;
49mod reloc;
50mod stream;
51
52#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
58pub enum LoadError {
59 #[error("parse error at {pos:?}: {message}")]
61 Parse {
62 pos: Pos,
64 message: String,
66 },
67
68 #[error("unexpected end of event stream")]
70 UnexpectedEndOfStream,
71
72 #[error("nesting depth limit exceeded (max: {limit})")]
74 NestingDepthLimitExceeded {
75 limit: usize,
77 },
78
79 #[error("anchor count limit exceeded (max: {limit})")]
81 AnchorCountLimitExceeded {
82 limit: usize,
84 },
85
86 #[error("alias expansion node limit exceeded (max: {limit})")]
88 AliasExpansionLimitExceeded {
89 limit: usize,
91 },
92
93 #[error("circular alias reference: '{name}'")]
95 CircularAlias {
96 name: String,
98 },
99
100 #[error("undefined alias: '{name}'")]
102 UndefinedAlias {
103 name: String,
105 },
106
107 #[error("JSON schema: plain scalar does not match any type pattern")]
117 UnresolvedScalar {
118 value: String,
120 pos: Pos,
122 },
123}
124
125type Result<T> = std::result::Result<T, LoadError>;
127
128type EventStream<'a> =
130 Peekable<Box<dyn Iterator<Item = std::result::Result<(Event<'a>, Span), Error>> + 'a>>;
131
132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138pub enum LoadMode {
139 Lossless,
141 Resolved,
143}
144
145#[derive(Debug, Clone)]
147pub struct LoaderOptions {
148 pub max_nesting_depth: usize,
151 pub max_anchors: usize,
154 pub max_expanded_nodes: usize,
157 pub mode: LoadMode,
159 pub schema: Schema,
164}
165
166impl Default for LoaderOptions {
167 fn default() -> Self {
168 Self {
169 max_nesting_depth: 512,
170 max_anchors: 10_000,
171 max_expanded_nodes: 1_000_000,
172 mode: LoadMode::Lossless,
173 schema: Schema::Core,
174 }
175 }
176}
177
178pub struct LoaderBuilder {
191 options: LoaderOptions,
192}
193
194impl LoaderBuilder {
195 #[must_use]
197 pub fn new() -> Self {
198 Self {
199 options: LoaderOptions::default(),
200 }
201 }
202
203 #[must_use]
205 pub const fn lossless(mut self) -> Self {
206 self.options.mode = LoadMode::Lossless;
207 self
208 }
209
210 #[must_use]
212 pub const fn resolved(mut self) -> Self {
213 self.options.mode = LoadMode::Resolved;
214 self
215 }
216
217 #[must_use]
219 pub const fn max_nesting_depth(mut self, limit: usize) -> Self {
220 self.options.max_nesting_depth = limit;
221 self
222 }
223
224 #[must_use]
226 pub const fn max_anchors(mut self, limit: usize) -> Self {
227 self.options.max_anchors = limit;
228 self
229 }
230
231 #[must_use]
233 pub const fn max_expanded_nodes(mut self, limit: usize) -> Self {
234 self.options.max_expanded_nodes = limit;
235 self
236 }
237
238 #[must_use]
243 pub const fn schema(mut self, s: Schema) -> Self {
244 self.options.schema = s;
245 self
246 }
247
248 #[must_use]
250 pub const fn build(self) -> Loader {
251 Loader {
252 options: self.options,
253 }
254 }
255}
256
257impl Default for LoaderBuilder {
258 fn default() -> Self {
259 Self::new()
260 }
261}
262
263pub struct Loader {
269 options: LoaderOptions,
270}
271
272impl Loader {
273 pub fn load(&self, input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
280 let mut state = LoadState::new(&self.options);
281 let iter: Box<dyn Iterator<Item = std::result::Result<(Event<'_>, Span), Error>> + '_> =
282 Box::new(crate::parse_events(input));
283 state.run(iter.peekable())
284 }
285}
286
287pub fn load(input: &str) -> std::result::Result<Vec<Document<Span>>, LoadError> {
313 LoaderBuilder::new().lossless().build().load(input)
314}
315
316struct LoadState<'opt> {
321 options: &'opt LoaderOptions,
322 anchor_map: HashMap<String, Node<Span>>,
324 anchor_count: usize,
326 depth: usize,
328 expanded_nodes: usize,
330 pending_leading: Vec<String>,
336}
337
338impl<'opt> LoadState<'opt> {
339 fn new(options: &'opt LoaderOptions) -> Self {
340 Self {
341 options,
342 anchor_map: HashMap::new(),
343 anchor_count: 0,
344 depth: 0,
345 expanded_nodes: 0,
346 pending_leading: Vec::new(),
347 }
348 }
349
350 fn reset_for_document(&mut self) {
351 self.anchor_map.clear();
352 self.anchor_count = 0;
353 self.expanded_nodes = 0;
354 self.pending_leading.clear();
355 }
356
357 fn run(&mut self, mut stream: EventStream<'_>) -> Result<Vec<Document<Span>>> {
358 let mut docs: Vec<Document<Span>> = Vec::new();
359
360 match stream.next() {
362 Some(Ok(_)) | None => {}
363 Some(Err(e)) => {
364 return Err(LoadError::Parse {
365 pos: e.pos,
366 message: e.message,
367 });
368 }
369 }
370
371 loop {
372 match next_from(&mut stream)? {
374 None | Some((Event::StreamEnd, _)) => break,
375 Some((
376 Event::DocumentStart {
377 explicit,
378 version,
379 tag_directives,
380 },
381 _,
382 )) => {
383 let doc_explicit_start = explicit;
384 let doc_version = version;
385 let doc_tags = tag_directives;
386 self.reset_for_document();
387
388 let mut doc_comments: Vec<String> = Vec::new();
389
390 consume_leading_doc_comments(&mut stream, &mut doc_comments)?;
392
393 let root = if is_document_end(stream.peek()) {
395 let mut node = empty_scalar();
397 apply_schema_to_node(&mut node, self.options.schema)?;
398 node
399 } else {
400 self.parse_node(&mut stream)?
401 };
402
403 let doc_explicit_end =
405 if let Some(Ok((Event::DocumentEnd { explicit }, _))) = stream.peek() {
406 let end_explicit = *explicit;
407 let _ = stream.next();
408 end_explicit
409 } else {
410 false
411 };
412
413 docs.push(Document {
414 root,
415 version: doc_version,
416 tags: doc_tags,
417 comments: doc_comments,
418 explicit_start: doc_explicit_start,
419 explicit_end: doc_explicit_end,
420 });
421 }
422 Some(_) => {
423 }
425 }
426 }
427
428 Ok(docs)
429 }
430
431 #[expect(
435 clippy::too_many_lines,
436 reason = "match-on-event-type; splitting would obscure flow"
437 )]
438 fn parse_node(&mut self, stream: &mut EventStream<'_>) -> Result<Node<Span>> {
439 if matches!(
443 stream.peek(),
444 Some(Ok((
445 Event::MappingEnd | Event::SequenceEnd | Event::DocumentEnd { .. },
446 _
447 )))
448 ) {
449 return Ok(empty_scalar());
450 }
451
452 let Some((event, span)) = next_from(stream)? else {
453 return Ok(empty_scalar());
454 };
455
456 match event {
457 Event::Scalar {
458 value,
459 style,
460 anchor,
461 anchor_loc,
462 tag,
463 tag_loc,
464 ..
465 } => {
466 let mut node = Node::Scalar {
467 value: value.into_owned(),
468 style,
469 anchor: anchor.map(str::to_owned),
470 anchor_loc,
471 tag: tag.map(std::borrow::Cow::into_owned),
472 tag_loc,
473 loc: span,
474 leading_comments: None,
475 trailing_comment: None,
476 };
477 apply_schema_to_node(&mut node, self.options.schema)?;
478 if let Some(name) = node.anchor() {
479 self.register_anchor(name.to_owned(), &node)?;
480 }
481 Ok(node)
482 }
483
484 Event::MappingStart {
485 anchor,
486 anchor_loc: mapping_anchor_loc,
487 tag,
488 tag_loc: mapping_tag_loc,
489 style,
490 ..
491 } => {
492 let anchor = anchor.map(str::to_owned);
493 let anchor_loc = mapping_anchor_loc;
494 let tag_loc = mapping_tag_loc;
495 let tag = tag.map(std::borrow::Cow::into_owned);
496
497 self.depth += 1;
498 if self.depth > self.options.max_nesting_depth {
499 return Err(LoadError::NestingDepthLimitExceeded {
500 limit: self.options.max_nesting_depth,
501 });
502 }
503
504 let mut entries: Vec<(Node<Span>, Node<Span>)> = Vec::new();
505 let mut end_span = span;
506
507 loop {
508 let raw_leading = consume_leading_comments(stream)?;
512 let leading = if self.pending_leading.is_empty() {
513 raw_leading
514 } else {
515 let mut combined = std::mem::take(&mut self.pending_leading);
516 combined.extend(raw_leading);
517 combined
518 };
519
520 match stream.peek() {
521 None | Some(Ok((Event::MappingEnd | Event::StreamEnd, _))) => {
522 if !leading.is_empty() {
527 self.pending_leading = leading;
528 }
529 break;
530 }
531 Some(Err(_)) => {
532 return Err(match stream.next() {
534 Some(Err(e)) => LoadError::Parse {
535 pos: e.pos,
536 message: e.message,
537 },
538 _ => LoadError::UnexpectedEndOfStream,
539 });
540 }
541 Some(Ok(_)) => {}
542 }
543
544 let mut key = self.parse_node(stream)?;
545 attach_leading_comments(&mut key, leading);
546
547 let mut value = self.parse_node(stream)?;
548
549 if !is_block_scalar(&value)
558 && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
559 {
560 let value_end_line = node_end_line(&value);
561 if let Some(trail) = peek_trailing_comment(stream, value_end_line)? {
562 attach_trailing_comment(&mut value, trail);
563 }
564 }
565
566 entries.push((key, value));
567 }
568
569 if let Some(Ok((Event::MappingEnd, end))) = stream.peek() {
571 end_span = *end;
572 let _ = stream.next();
573 }
574 self.depth -= 1;
575
576 let mut node = Node::Mapping {
577 entries,
578 style,
579 anchor: anchor.clone(),
580 anchor_loc,
581 tag,
582 tag_loc,
583 loc: Span {
584 start: span.start,
585 end: end_span.end,
586 },
587 leading_comments: None,
588 trailing_comment: None,
589 };
590 apply_schema_to_node(&mut node, self.options.schema)?;
591 if let Some(name) = anchor {
592 self.register_anchor(name, &node)?;
593 }
594 Ok(node)
595 }
596
597 Event::SequenceStart {
598 anchor,
599 anchor_loc: sequence_anchor_loc,
600 tag,
601 tag_loc: sequence_tag_loc,
602 style,
603 ..
604 } => {
605 let anchor = anchor.map(str::to_owned);
606 let anchor_loc = sequence_anchor_loc;
607 let tag_loc = sequence_tag_loc;
608 let tag = tag.map(std::borrow::Cow::into_owned);
609
610 self.depth += 1;
611 if self.depth > self.options.max_nesting_depth {
612 return Err(LoadError::NestingDepthLimitExceeded {
613 limit: self.options.max_nesting_depth,
614 });
615 }
616
617 let mut items: Vec<Node<Span>> = Vec::new();
618 let mut end_span = span;
619
620 loop {
621 let raw_leading = consume_leading_comments(stream)?;
625 let leading = if self.pending_leading.is_empty() {
626 raw_leading
627 } else {
628 let mut combined = std::mem::take(&mut self.pending_leading);
629 combined.extend(raw_leading);
630 combined
631 };
632
633 match stream.peek() {
634 None | Some(Ok((Event::SequenceEnd | Event::StreamEnd, _))) => {
635 if !leading.is_empty() {
641 self.pending_leading = leading;
642 }
643 break;
644 }
645 Some(Err(_)) => {
646 return Err(match stream.next() {
648 Some(Err(e)) => LoadError::Parse {
649 pos: e.pos,
650 message: e.message,
651 },
652 _ => LoadError::UnexpectedEndOfStream,
653 });
654 }
655 Some(Ok(_)) => {}
656 }
657
658 let mut item = self.parse_node(stream)?;
659 attach_leading_comments(&mut item, leading);
660
661 if !is_block_scalar(&item)
667 && matches!(stream.peek(), Some(Ok((Event::Comment { .. }, _))))
668 {
669 let item_end_line = node_end_line(&item);
670 if let Some(trail) = peek_trailing_comment(stream, item_end_line)? {
671 attach_trailing_comment(&mut item, trail);
672 }
673 }
674
675 items.push(item);
676 }
677
678 if let Some(Ok((Event::SequenceEnd, end))) = stream.peek() {
680 end_span = *end;
681 let _ = stream.next();
682 }
683 self.depth -= 1;
684
685 let mut node = Node::Sequence {
686 items,
687 style,
688 anchor: anchor.clone(),
689 anchor_loc,
690 tag,
691 tag_loc,
692 loc: Span {
693 start: span.start,
694 end: end_span.end,
695 },
696 leading_comments: None,
697 trailing_comment: None,
698 };
699 apply_schema_to_node(&mut node, self.options.schema)?;
700 if let Some(name) = anchor {
701 self.register_anchor(name, &node)?;
702 }
703 Ok(node)
704 }
705
706 Event::Alias { name } => {
707 let name = name.to_owned();
708 self.resolve_alias(&name, span)
709 }
710
711 Event::Comment { text } => {
712 self.pending_leading.push(with_hash_prefix(text));
718 self.parse_node(stream)
719 }
720
721 Event::StreamStart
722 | Event::StreamEnd
723 | Event::DocumentStart { .. }
724 | Event::DocumentEnd { .. }
725 | Event::MappingEnd
726 | Event::SequenceEnd => {
727 Ok(empty_scalar())
729 }
730 }
731 }
732
733 fn register_anchor(&mut self, name: String, node: &Node<Span>) -> Result<()> {
734 if !self.anchor_map.contains_key(&name) {
735 self.anchor_count += 1;
736 if self.anchor_count > self.options.max_anchors {
737 return Err(LoadError::AnchorCountLimitExceeded {
738 limit: self.options.max_anchors,
739 });
740 }
741 }
742 if self.options.mode == LoadMode::Resolved {
746 self.expanded_nodes += 1;
747 if self.expanded_nodes > self.options.max_expanded_nodes {
748 return Err(LoadError::AliasExpansionLimitExceeded {
749 limit: self.options.max_expanded_nodes,
750 });
751 }
752 self.anchor_map.insert(name, node.clone());
753 } else {
754 self.anchor_map.insert(name, empty_scalar());
757 }
758 Ok(())
759 }
760
761 fn resolve_alias(&mut self, name: &str, loc: Span) -> Result<Node<Span>> {
762 match self.options.mode {
763 LoadMode::Lossless => Ok(Node::Alias {
764 name: name.to_owned(),
765 loc,
766 leading_comments: None,
767 trailing_comment: None,
768 }),
769 LoadMode::Resolved => {
770 let anchored = self.anchor_map.get(name).cloned().ok_or_else(|| {
771 LoadError::UndefinedAlias {
772 name: name.to_owned(),
773 }
774 })?;
775 let mut in_progress: HashSet<String> = HashSet::new();
776 self.expand_node(anchored, &mut in_progress)
777 }
778 }
779 }
780
781 fn expand_node(
784 &mut self,
785 node: Node<Span>,
786 in_progress: &mut HashSet<String>,
787 ) -> Result<Node<Span>> {
788 self.expanded_nodes += 1;
792 if self.expanded_nodes > self.options.max_expanded_nodes {
793 return Err(LoadError::AliasExpansionLimitExceeded {
794 limit: self.options.max_expanded_nodes,
795 });
796 }
797
798 match node {
799 Node::Alias { ref name, loc, .. } => {
800 if in_progress.contains(name) {
801 return Err(LoadError::CircularAlias { name: name.clone() });
802 }
803 let target = self
804 .anchor_map
805 .get(name)
806 .cloned()
807 .ok_or_else(|| LoadError::UndefinedAlias { name: name.clone() })?;
808 in_progress.insert(name.clone());
809 let expanded = self.expand_node(target, in_progress)?;
810 in_progress.remove(name);
811 Ok(reloc(expanded, loc))
813 }
814 Node::Mapping {
815 entries,
816 style,
817 anchor,
818 anchor_loc,
819 tag,
820 tag_loc,
821 loc,
822 leading_comments,
823 trailing_comment,
824 } => {
825 let mut expanded_entries = Vec::with_capacity(entries.len());
826 for (k, v) in entries {
827 let ek = self.expand_node(k, in_progress)?;
828 let ev = self.expand_node(v, in_progress)?;
829 expanded_entries.push((ek, ev));
830 }
831 Ok(Node::Mapping {
832 entries: expanded_entries,
833 style,
834 anchor,
835 anchor_loc,
836 tag,
837 tag_loc,
838 loc,
839 leading_comments,
840 trailing_comment,
841 })
842 }
843 Node::Sequence {
844 items,
845 style,
846 anchor,
847 anchor_loc,
848 tag,
849 tag_loc,
850 loc,
851 leading_comments,
852 trailing_comment,
853 } => {
854 let mut expanded_items = Vec::with_capacity(items.len());
855 for item in items {
856 expanded_items.push(self.expand_node(item, in_progress)?);
857 }
858 Ok(Node::Sequence {
859 items: expanded_items,
860 style,
861 anchor,
862 anchor_loc,
863 tag,
864 tag_loc,
865 loc,
866 leading_comments,
867 trailing_comment,
868 })
869 }
870 scalar @ Node::Scalar { .. } => Ok(scalar),
872 }
873 }
874}
875
876const fn is_document_end(peeked: Option<&std::result::Result<(Event<'_>, Span), Error>>) -> bool {
878 matches!(
879 peeked,
880 None | Some(Ok((Event::DocumentEnd { .. } | Event::StreamEnd, _)))
881 )
882}
883
884#[inline]
889const fn node_end_line(node: &Node<Span>) -> usize {
890 match node {
891 Node::Scalar { loc, .. }
892 | Node::Mapping { loc, .. }
893 | Node::Sequence { loc, .. }
894 | Node::Alias { loc, .. } => loc.end.line,
895 }
896}
897
898#[inline]
907const fn is_block_scalar(node: &Node<Span>) -> bool {
908 matches!(
909 node,
910 Node::Scalar {
911 style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
912 ..
913 }
914 )
915}
916
917const UNRESOLVED_VALUE_MAX_CHARS: usize = 128;
925
926fn sanitize_scalar_for_error(raw: &str) -> String {
933 let mut out = String::with_capacity(raw.len().min(UNRESOLVED_VALUE_MAX_CHARS * 2));
934 let mut truncated = false;
935
936 for (i, ch) in raw.chars().enumerate() {
937 if i >= UNRESOLVED_VALUE_MAX_CHARS {
938 truncated = true;
939 break;
940 }
941 if ch.is_ascii_control() {
942 let escaped = format!("\\u{:04X}", ch as u32);
944 out.push_str(&escaped);
945 } else {
946 out.push(ch);
947 }
948 }
949
950 if truncated {
951 out.push_str("...");
952 }
953 out
954}
955
956fn apply_schema_to_node(node: &mut Node<Span>, schema: Schema) -> Result<()> {
971 match node {
972 Node::Scalar {
973 value,
974 style,
975 tag,
976 tag_loc,
977 loc,
978 ..
979 } => {
980 if tag.as_deref() == Some("!") {
991 *tag = Some(crate::schema::ResolvedTag::Str.as_str().to_owned());
992 return Ok(());
993 }
994 match resolve_scalar(schema, *style, value, tag.as_deref()) {
996 Ok(Some(resolved)) => {
997 *tag = Some(resolved.as_str().to_owned());
998 *tag_loc = None;
999 }
1000 Ok(None) => {}
1001 Err(_) => {
1002 return Err(LoadError::UnresolvedScalar {
1003 value: sanitize_scalar_for_error(value),
1004 pos: loc.start,
1005 });
1006 }
1007 }
1008 }
1009 Node::Mapping { tag, tag_loc, .. } => {
1010 let effective_tag = tag.as_deref().filter(|t| *t != "!");
1013 if let Some(resolved) =
1014 resolve_collection(schema, CollectionKind::Mapping, effective_tag)
1015 {
1016 *tag = Some(resolved.as_str().to_owned());
1017 *tag_loc = None;
1018 }
1019 }
1020 Node::Sequence { tag, tag_loc, .. } => {
1021 let effective_tag = tag.as_deref().filter(|t| *t != "!");
1022 if let Some(resolved) =
1023 resolve_collection(schema, CollectionKind::Sequence, effective_tag)
1024 {
1025 *tag = Some(resolved.as_str().to_owned());
1026 *tag_loc = None;
1027 }
1028 }
1029 Node::Alias { .. } => {}
1030 }
1031 Ok(())
1032}
1033
1034const fn empty_scalar() -> Node<Span> {
1039 Node::Scalar {
1040 value: String::new(),
1041 style: ScalarStyle::Plain,
1042 anchor: None,
1043 anchor_loc: None,
1044 tag: None,
1045 tag_loc: None,
1046 loc: Span {
1047 start: Pos::ORIGIN,
1048 end: Pos::ORIGIN,
1049 },
1050 leading_comments: None,
1051 trailing_comment: None,
1052 }
1053}
1054
1055#[cfg(test)]
1060#[expect(
1061 clippy::expect_used,
1062 clippy::unwrap_used,
1063 clippy::indexing_slicing,
1064 clippy::panic,
1065 reason = "test code"
1066)]
1067mod tests {
1068 use super::*;
1069
1070 #[test]
1072 fn loader_state_resets_anchor_map_between_documents() {
1073 let result = LoaderBuilder::new()
1075 .resolved()
1076 .build()
1077 .load("---\n- &foo hello\n...\n---\n- *foo\n...\n");
1078 assert!(
1079 result.is_err(),
1080 "expected Err: *foo in doc 2 should be undefined"
1081 );
1082 assert!(matches!(
1083 result.unwrap_err(),
1084 LoadError::UndefinedAlias { .. }
1085 ));
1086 }
1087
1088 #[test]
1090 fn register_anchor_increments_count() {
1091 let options = LoaderOptions {
1092 max_anchors: 2,
1093 ..LoaderOptions::default()
1094 };
1095 let mut state = LoadState::new(&options);
1096 let node = Node::Scalar {
1097 value: "x".to_owned(),
1098 style: ScalarStyle::Plain,
1099 anchor: None,
1100 anchor_loc: None,
1101 tag: None,
1102 tag_loc: None,
1103 loc: Span {
1104 start: Pos::ORIGIN,
1105 end: Pos::ORIGIN,
1106 },
1107 leading_comments: None,
1108 trailing_comment: None,
1109 };
1110 assert!(state.register_anchor("a".to_owned(), &node).is_ok());
1111 assert!(state.register_anchor("b".to_owned(), &node).is_ok());
1112 let err = state
1113 .register_anchor("c".to_owned(), &node)
1114 .expect_err("expected AnchorCountLimitExceeded");
1115 assert!(matches!(
1116 err,
1117 LoadError::AnchorCountLimitExceeded { limit: 2 }
1118 ));
1119 }
1120
1121 #[test]
1123 fn expand_node_detects_circular_alias() {
1124 let options = LoaderOptions {
1125 mode: LoadMode::Resolved,
1126 ..LoaderOptions::default()
1127 };
1128 let mut state = LoadState::new(&options);
1129 let alias_node = Node::Alias {
1131 name: "a".to_owned(),
1132 loc: Span {
1133 start: Pos::ORIGIN,
1134 end: Pos::ORIGIN,
1135 },
1136 leading_comments: None,
1137 trailing_comment: None,
1138 };
1139 state.anchor_map.insert("a".to_owned(), alias_node.clone());
1140 let mut in_progress = HashSet::new();
1141 let result = state.expand_node(alias_node, &mut in_progress);
1142 assert!(
1143 matches!(result, Err(LoadError::CircularAlias { .. })),
1144 "expected CircularAlias, got: {result:?}"
1145 );
1146 }
1147
1148 #[test]
1154 fn comment_between_key_and_nested_mapping_is_attached_to_first_key() {
1155 let docs = load("outer:\n # Style 1\n inner: val\n").unwrap();
1156 let root = &docs[0].root;
1157 let Node::Mapping { entries, .. } = root else {
1161 panic!("expected root mapping");
1162 };
1163 assert_eq!(entries.len(), 1);
1164 let (_outer_key, outer_value) = &entries[0];
1165 let Node::Mapping {
1166 entries: nested, ..
1167 } = outer_value
1168 else {
1169 panic!("expected nested mapping");
1170 };
1171 assert_eq!(nested.len(), 1);
1172 let (inner_key, _) = &nested[0];
1173 assert_eq!(
1174 inner_key.leading_comments(),
1175 &["# Style 1"],
1176 "comment should be attached to the first nested key"
1177 );
1178 }
1179
1180 #[test]
1182 fn comment_between_key_and_nested_sequence_is_attached_to_first_item() {
1183 let docs = load("key:\n # leading\n - item1\n - item2\n").unwrap();
1184 let root = &docs[0].root;
1185 let Node::Mapping { entries, .. } = root else {
1186 panic!("expected root mapping");
1187 };
1188 let (_key, seq_value) = &entries[0];
1189 let Node::Sequence { items, .. } = seq_value else {
1190 panic!("expected sequence value");
1191 };
1192 assert_eq!(
1195 items[0].leading_comments(),
1196 &["# leading"],
1197 "comment should be attached to first sequence item"
1198 );
1199 }
1200
1201 #[test]
1203 fn multiple_comments_between_key_and_collection_all_preserved() {
1204 let docs = load("key:\n # first\n # second\n - item\n").unwrap();
1205 let root = &docs[0].root;
1206 let Node::Mapping { entries, .. } = root else {
1207 panic!("expected root mapping");
1208 };
1209 let (_key, seq_value) = &entries[0];
1210 let Node::Sequence { items, .. } = seq_value else {
1211 panic!("expected sequence value");
1212 };
1213 assert_eq!(
1214 items[0].leading_comments(),
1215 &["# first", "# second"],
1216 "both comments should be on first item"
1217 );
1218 }
1219
1220 #[test]
1222 fn comment_between_key_and_collection_does_not_corrupt_key_node() {
1223 let docs = load("outer:\n # Style 1\n inner: val\n").unwrap();
1224 let root = &docs[0].root;
1225 let Node::Mapping { entries, .. } = root else {
1226 panic!("expected root mapping");
1227 };
1228 let (outer_key, _) = &entries[0];
1229 assert!(
1230 outer_key.leading_comments().is_empty(),
1231 "outer key should have no leading comments"
1232 );
1233 assert!(
1234 outer_key.trailing_comment().is_none(),
1235 "outer key should have no trailing comment"
1236 );
1237 }
1238
1239 #[test]
1241 fn no_comment_between_key_and_value_leaves_leading_comments_empty() {
1242 let docs = load("key:\n inner: val\n").unwrap();
1243 let root = &docs[0].root;
1244 let Node::Mapping { entries, .. } = root else {
1245 panic!("expected root mapping");
1246 };
1247 let (_key, nested) = &entries[0];
1248 let Node::Mapping {
1249 entries: nested_entries,
1250 ..
1251 } = nested
1252 else {
1253 panic!("expected nested mapping");
1254 };
1255 let (inner_key, _) = &nested_entries[0];
1256 assert!(
1257 inner_key.leading_comments().is_empty(),
1258 "inner key should have no leading comments when there is no comment"
1259 );
1260 }
1261
1262 #[test]
1268 fn trailing_comment_of_sequence_preserved_as_leading_on_next_sibling() {
1269 let input =
1270 "Lists:\n list-a:\n - item1\n - item2\n\n # Style 2\n list-b:\n - item1\n";
1271 let docs = load(input).unwrap();
1272 let root = &docs[0].root;
1273 let Node::Mapping { entries, .. } = root else {
1274 panic!("expected root mapping");
1275 };
1276 let (_lists_key, nested) = &entries[0];
1277 let Node::Mapping {
1278 entries: nested_entries,
1279 ..
1280 } = nested
1281 else {
1282 panic!("expected nested mapping");
1283 };
1284 assert_eq!(nested_entries.len(), 2);
1285 let (list_b_key, _) = &nested_entries[1];
1286 assert_eq!(
1287 list_b_key.leading_comments(),
1288 &["# Style 2"],
1289 "# Style 2 should be leading comment on list-b key"
1290 );
1291 }
1292
1293 #[test]
1295 fn overflow_comments_from_nested_sequence_end_reach_next_mapping_entry() {
1296 let input = "outer:\n a:\n - x\n # between\n b: y\n";
1297 let docs = load(input).unwrap();
1298 let root = &docs[0].root;
1299 let Node::Mapping { entries, .. } = root else {
1300 panic!("expected root mapping");
1301 };
1302 let (_outer_key, outer_val) = &entries[0];
1303 let Node::Mapping {
1304 entries: nested, ..
1305 } = outer_val
1306 else {
1307 panic!("expected nested mapping");
1308 };
1309 assert_eq!(nested.len(), 2);
1310 let (b_key, _) = &nested[1];
1311 assert_eq!(
1312 b_key.leading_comments(),
1313 &["# between"],
1314 "# between should be leading comment on b key"
1315 );
1316 }
1317
1318 #[test]
1320 fn overflow_comments_from_nested_mapping_end_reach_next_sibling() {
1321 let input = "parent:\n child1:\n k: v\n # end-of-child1\n child2: val\n";
1322 let docs = load(input).unwrap();
1323 let root = &docs[0].root;
1324 let Node::Mapping { entries, .. } = root else {
1325 panic!("expected root mapping");
1326 };
1327 let (_parent_key, parent_val) = &entries[0];
1328 let Node::Mapping {
1329 entries: siblings, ..
1330 } = parent_val
1331 else {
1332 panic!("expected parent mapping value");
1333 };
1334 assert_eq!(siblings.len(), 2);
1335 let (child2_key, _) = &siblings[1];
1336 assert_eq!(
1337 child2_key.leading_comments(),
1338 &["# end-of-child1"],
1339 "# end-of-child1 should be leading comment on child2 key"
1340 );
1341 }
1342
1343 #[test]
1345 fn overflow_comments_at_top_level_sequence_end_are_not_lost() {
1346 let input = "items:\n - a\n - b\n # tail\n";
1352 let docs = load(input).unwrap();
1353 assert!(!docs.is_empty(), "document should parse without error");
1355 let root = &docs[0].root;
1357 let Node::Mapping { entries, .. } = root else {
1358 panic!("expected root mapping");
1359 };
1360 let (_items_key, seq_val) = &entries[0];
1361 let Node::Sequence { items, .. } = seq_val else {
1362 panic!("expected sequence value");
1363 };
1364 assert_eq!(items.len(), 2, "sequence items must not be lost");
1365 }
1366
1367 #[test]
1369 fn no_overflow_comments_when_collection_ends_cleanly() {
1370 let docs = load("key:\n - item1\n - item2\n").unwrap();
1371 let root = &docs[0].root;
1372 let Node::Mapping { entries, .. } = root else {
1373 panic!("expected root mapping");
1374 };
1375 let (_key, seq_val) = &entries[0];
1376 let Node::Sequence { items, .. } = seq_val else {
1377 panic!("expected sequence value");
1378 };
1379 for item in items {
1380 assert!(
1381 item.leading_comments().is_empty(),
1382 "items should have no leading comments"
1383 );
1384 }
1385 }
1386
1387 #[test]
1393 fn original_bug_report_input_preserves_both_comments() {
1394 let input = "Lists:\n # Style 1\n list-a:\n - item1\n - item2\n\n # Style 2\n list-b:\n - item1\n - item2\n";
1395 let docs = load(input).unwrap();
1396 let root = &docs[0].root;
1397 let Node::Mapping { entries, .. } = root else {
1398 panic!("expected root mapping");
1399 };
1400 let (_lists_key, nested) = &entries[0];
1401 let Node::Mapping {
1402 entries: nested_entries,
1403 ..
1404 } = nested
1405 else {
1406 panic!("expected nested mapping");
1407 };
1408 assert_eq!(nested_entries.len(), 2);
1409 let (first_key, _) = &nested_entries[0];
1410 let (second_key, _) = &nested_entries[1];
1411 assert_eq!(
1412 first_key.leading_comments(),
1413 &["# Style 1"],
1414 "list-a should have # Style 1 as leading comment"
1415 );
1416 assert_eq!(
1417 second_key.leading_comments(),
1418 &["# Style 2"],
1419 "list-b should have # Style 2 as leading comment"
1420 );
1421 }
1422
1423 #[test]
1425 fn leading_and_trailing_comments_both_preserved_on_sibling_entries() {
1426 let input = "map:\n # leading\n key: value # trailing\n # next-leading\n key2: v2\n";
1427 let docs = load(input).unwrap();
1428 let root = &docs[0].root;
1429 let Node::Mapping { entries, .. } = root else {
1430 panic!("expected root mapping");
1431 };
1432 let (_map_key, map_val) = &entries[0];
1433 let Node::Mapping {
1434 entries: siblings, ..
1435 } = map_val
1436 else {
1437 panic!("expected mapping value");
1438 };
1439 assert_eq!(siblings.len(), 2);
1440 let (key1, val1) = &siblings[0];
1441 let (key2, _) = &siblings[1];
1442 assert_eq!(key1.leading_comments(), &["# leading"]);
1443 assert_eq!(val1.trailing_comment(), Some("# trailing"));
1444 assert_eq!(key2.leading_comments(), &["# next-leading"]);
1445 }
1446
1447 #[test]
1449 fn deeply_nested_overflow_comments_reach_correct_sibling() {
1450 let input = "top:\n mid:\n - x\n # deep-overflow\n next: y\n";
1451 let docs = load(input).unwrap();
1452 let root = &docs[0].root;
1453 let Node::Mapping { entries, .. } = root else {
1454 panic!("expected root mapping");
1455 };
1456 let (_top_key, top_val) = &entries[0];
1457 let Node::Mapping {
1458 entries: top_entries,
1459 ..
1460 } = top_val
1461 else {
1462 panic!("expected top-level mapping");
1463 };
1464 assert_eq!(top_entries.len(), 2);
1465 let (next_key, _) = &top_entries[1];
1466 assert_eq!(
1467 next_key.leading_comments(),
1468 &["# deep-overflow"],
1469 "# deep-overflow should propagate from nested sequence to next sibling"
1470 );
1471 }
1472
1473 #[test]
1479 fn bare_document_has_both_flags_false() {
1480 let docs = load("key: value\n").expect("load failed");
1481 assert_eq!(docs.len(), 1);
1482 assert!(!docs[0].explicit_start, "expected explicit_start=false");
1483 assert!(!docs[0].explicit_end, "expected explicit_end=false");
1484 }
1485
1486 #[test]
1488 fn document_with_start_marker_has_explicit_start_true() {
1489 let docs = load("---\nkey: value\n").expect("load failed");
1490 assert_eq!(docs.len(), 1);
1491 assert!(docs[0].explicit_start, "expected explicit_start=true");
1492 assert!(!docs[0].explicit_end, "expected explicit_end=false");
1493 }
1494
1495 #[test]
1497 fn document_with_end_marker_has_explicit_end_true() {
1498 let docs = load("key: value\n...\n").expect("load failed");
1499 assert_eq!(docs.len(), 1);
1500 assert!(!docs[0].explicit_start, "expected explicit_start=false");
1501 assert!(docs[0].explicit_end, "expected explicit_end=true");
1502 }
1503
1504 #[test]
1506 fn document_with_both_markers_has_both_flags_true() {
1507 let docs = load("---\nkey: value\n...\n").expect("load failed");
1508 assert_eq!(docs.len(), 1);
1509 assert!(docs[0].explicit_start, "expected explicit_start=true");
1510 assert!(docs[0].explicit_end, "expected explicit_end=true");
1511 }
1512
1513 #[test]
1515 fn multi_document_flags_are_independent() {
1516 let docs = load("doc1: a\n---\ndoc2: b\n...\n---\ndoc3: c\n").expect("load failed");
1520 assert_eq!(docs.len(), 3);
1521 assert!(!docs[0].explicit_start, "doc1 explicit_start");
1522 assert!(!docs[0].explicit_end, "doc1 explicit_end");
1523 assert!(docs[1].explicit_start, "doc2 explicit_start");
1524 assert!(docs[1].explicit_end, "doc2 explicit_end");
1525 assert!(docs[2].explicit_start, "doc3 explicit_start");
1526 assert!(!docs[2].explicit_end, "doc3 explicit_end");
1527 }
1528
1529 #[test]
1531 fn empty_document_with_explicit_markers_has_both_flags_true() {
1532 let docs = load("---\n...\n").expect("load failed");
1533 assert_eq!(docs.len(), 1);
1534 assert!(docs[0].explicit_start, "expected explicit_start=true");
1535 assert!(docs[0].explicit_end, "expected explicit_end=true");
1536 }
1537
1538 #[test]
1544 fn sanitize_newline_replaced_with_escape() {
1545 let result = sanitize_scalar_for_error("foo\nbar");
1546 assert!(
1547 !result.contains('\n'),
1548 "output must not contain a raw newline"
1549 );
1550 assert!(
1551 result.contains("\\u000A"),
1552 "output must contain \\u000A escape, got: {result:?}"
1553 );
1554 assert_eq!(result, "foo\\u000Abar");
1555 }
1556
1557 #[test]
1559 fn sanitize_carriage_return_replaced_with_escape() {
1560 let result = sanitize_scalar_for_error("foo\rbar");
1561 assert!(
1562 !result.contains('\r'),
1563 "output must not contain a raw carriage return"
1564 );
1565 assert!(
1566 result.contains("\\u000D"),
1567 "output must contain \\u000D escape, got: {result:?}"
1568 );
1569 assert_eq!(result, "foo\\u000Dbar");
1570 }
1571
1572 #[test]
1574 fn sanitize_null_byte_replaced_with_escape() {
1575 let result = sanitize_scalar_for_error("foo\0bar");
1576 assert!(
1577 !result.contains('\0'),
1578 "output must not contain a raw null byte"
1579 );
1580 assert!(
1581 result.contains("\\u0000"),
1582 "output must contain \\u0000 escape, got: {result:?}"
1583 );
1584 assert_eq!(result, "foo\\u0000bar");
1585 }
1586
1587 #[test]
1589 fn sanitize_short_value_stored_verbatim() {
1590 let input = "hello";
1591 let result = sanitize_scalar_for_error(input);
1592 assert_eq!(result, "hello");
1593 assert!(
1594 !result.ends_with("..."),
1595 "short value must not be truncated"
1596 );
1597 }
1598
1599 #[test]
1601 fn sanitize_value_at_exact_limit_not_truncated() {
1602 let input = "a".repeat(128);
1603 let result = sanitize_scalar_for_error(&input);
1604 assert_eq!(
1605 result.len(),
1606 128,
1607 "128-char input must produce 128-char output"
1608 );
1609 assert!(
1610 !result.ends_with("..."),
1611 "value at exact limit must not be truncated"
1612 );
1613 }
1614
1615 #[test]
1617 fn sanitize_value_over_limit_truncated() {
1618 let input = "a".repeat(129);
1619 let result = sanitize_scalar_for_error(&input);
1620 assert!(
1621 result.ends_with("..."),
1622 "value over limit must end with '...'"
1623 );
1624 assert_eq!(
1625 result.len(),
1626 128 + 3,
1627 "truncated output must be 128 chars + 3 ellipsis chars"
1628 );
1629 }
1630
1631 #[test]
1634 fn sanitize_multibyte_char_boundary_not_split() {
1635 let input: String = "中".repeat(127) + "ab"; let result = sanitize_scalar_for_error(&input);
1640 assert!(
1642 result.ends_with("..."),
1643 "129-char multibyte input should be truncated"
1644 );
1645 let char_count = result.trim_end_matches("...").chars().count();
1648 assert_eq!(
1649 char_count, 128,
1650 "truncated portion must be exactly 128 chars"
1651 );
1652 }
1653}