1use std::borrow::Cow;
4use std::collections::HashMap;
5use std::iter::Peekable;
6
7use crate::Span;
8use crate::callback::ParseCallback;
9use crate::event::{Event, ParseErrorKind, ScalarKind, Separator};
10use crate::lexer::Lexer;
11use crate::token::{Token, TokenKind};
12#[allow(unused_imports)]
13use crate::trace;
14
15pub struct Parser<'src> {
17 lexer: Peekable<LexerIter<'src>>,
18}
19
20struct LexerIter<'src> {
22 lexer: Lexer<'src>,
23 done: bool,
24}
25
26impl<'src> Iterator for LexerIter<'src> {
27 type Item = Token<'src>;
28
29 fn next(&mut self) -> Option<Self::Item> {
30 if self.done {
31 return None;
32 }
33 let token = self.lexer.next_token();
34 if token.kind == TokenKind::Eof {
35 self.done = true;
36 }
37 Some(token)
38 }
39}
40
41impl<'src> Parser<'src> {
42 pub fn new(source: &'src str) -> Self {
44 let lexer = Lexer::new(source);
45 Self {
46 lexer: LexerIter { lexer, done: false }.peekable(),
47 }
48 }
49
50 pub fn parse<C: ParseCallback<'src>>(mut self, callback: &mut C) {
53 if !callback.event(Event::DocumentStart) {
54 return;
55 }
56
57 self.skip_whitespace_and_newlines();
59
60 while let Some(token) = self.peek() {
62 match token.kind {
63 TokenKind::LineComment => {
64 let token = self.advance().unwrap();
65 if !callback.event(Event::Comment {
66 span: token.span,
67 text: token.text,
68 }) {
69 return;
70 }
71 self.skip_whitespace_and_newlines();
72 }
73 TokenKind::DocComment => {
74 let token = self.advance().unwrap();
75 if !callback.event(Event::DocComment {
76 span: token.span,
77 text: token.text,
78 }) {
79 return;
80 }
81 self.skip_whitespace_and_newlines();
82 }
83 _ => break,
84 }
85 }
86
87 if matches!(self.peek(), Some(t) if t.kind == TokenKind::LBrace) {
90 let obj = self.parse_object_atom();
91 self.emit_atom_as_value(&obj, callback);
92 } else {
93 self.parse_entries(callback, None);
95 }
96
97 callback.event(Event::DocumentEnd);
98 }
99
100 pub fn parse_to_vec(self) -> Vec<Event<'src>> {
102 let mut events = Vec::new();
103 self.parse(&mut events);
104 events
105 }
106
107 fn peek(&mut self) -> Option<&Token<'src>> {
109 while let Some(token) = self.lexer.peek() {
111 if token.kind == TokenKind::Whitespace {
112 self.lexer.next();
113 } else {
114 break;
115 }
116 }
117 self.lexer.peek()
118 }
119
120 fn peek_raw(&mut self) -> Option<&Token<'src>> {
122 self.lexer.peek()
123 }
124
125 fn advance(&mut self) -> Option<Token<'src>> {
127 self.lexer.next()
128 }
129
130 fn skip_whitespace(&mut self) {
132 while let Some(token) = self.lexer.peek() {
133 if token.kind == TokenKind::Whitespace {
134 self.lexer.next();
135 } else {
136 break;
137 }
138 }
139 }
140
141 fn skip_whitespace_and_newlines(&mut self) {
143 while let Some(token) = self.lexer.peek() {
144 if token.kind == TokenKind::Whitespace || token.kind == TokenKind::Newline {
145 self.lexer.next();
146 } else {
147 break;
148 }
149 }
150 }
151
152 fn parse_entries<C: ParseCallback<'src>>(
155 &mut self,
156 callback: &mut C,
157 closing: Option<TokenKind>,
158 ) {
159 trace!("Parsing entries, closing token: {:?}", closing);
160 let mut path_state = PathState::default();
161 let mut pending_doc_comment: Option<Span> = None;
164
165 self.skip_whitespace_and_newlines();
166
167 while let Some(token) = self.peek() {
168 if token.kind == TokenKind::Eof {
170 break;
171 }
172 if let Some(close) = closing
173 && token.kind == close
174 {
175 break;
176 }
177
178 if token.kind == TokenKind::DocComment {
180 let token = self.advance().unwrap();
181 pending_doc_comment = Some(token.span);
182 if !callback.event(Event::DocComment {
183 span: token.span,
184 text: token.text,
185 }) {
186 return;
187 }
188 self.skip_whitespace_and_newlines();
189 continue;
190 }
191
192 if token.kind == TokenKind::LineComment {
194 let token = self.advance().unwrap();
195 if !callback.event(Event::Comment {
196 span: token.span,
197 text: token.text,
198 }) {
199 return;
200 }
201 self.skip_whitespace_and_newlines();
202 continue;
203 }
204
205 pending_doc_comment = None;
207
208 if !self.parse_entry_with_path_check(callback, &mut path_state) {
210 return;
211 }
212
213 self.skip_whitespace_and_newlines();
215 }
216
217 if let Some(span) = pending_doc_comment {
220 callback.event(Event::Error {
221 span,
222 kind: ParseErrorKind::DanglingDocComment,
223 });
224 }
225 }
226
227 fn parse_entry_with_path_check<C: ParseCallback<'src>>(
231 &mut self,
232 callback: &mut C,
233 path_state: &mut PathState,
234 ) -> bool {
235 if !callback.event(Event::EntryStart) {
236 return false;
237 }
238
239 let atoms = self.collect_entry_atoms();
241
242 if atoms.is_empty() {
243 return callback.event(Event::EntryEnd);
245 }
246
247 let key_atom = &atoms[0];
249
250 match &key_atom.content {
253 AtomContent::Heredoc(_) => {
254 if !callback.event(Event::Error {
255 span: key_atom.span,
256 kind: ParseErrorKind::InvalidKey,
257 }) {
258 return false;
259 }
260 }
261 AtomContent::Object { .. } | AtomContent::Sequence { .. } => {
262 if !callback.event(Event::Error {
263 span: key_atom.span,
264 kind: ParseErrorKind::InvalidKey,
265 }) {
266 return false;
267 }
268 }
269 _ => {}
270 }
271
272 if let AtomContent::Scalar(text) = &key_atom.content
275 && key_atom.kind == ScalarKind::Bare
276 && text.contains('.')
277 {
278 return self.emit_dotted_path_entry(text, key_atom.span, &atoms, callback, path_state);
279 }
280
281 let key_text = match &key_atom.content {
283 AtomContent::Scalar(text) => {
284 let processed = self.process_scalar(text, key_atom.kind);
285 processed.into_owned()
286 }
287 AtomContent::Unit => "@".to_string(),
288 AtomContent::Tag { name, .. } => format!("@{}", name),
289 _ => key_atom.span.start.to_string(), };
291
292 let value_kind = if atoms.len() >= 2 {
294 match &atoms[1].content {
295 AtomContent::Object { .. } | AtomContent::Attributes { .. } => {
296 PathValueKind::Object
297 }
298 _ => PathValueKind::Terminal,
299 }
300 } else {
301 PathValueKind::Terminal
303 };
304
305 let path = vec![key_text];
307 if let Err(err) = path_state.check_and_update(&path, key_atom.span, value_kind)
308 && !self.emit_path_error(err, key_atom.span, callback)
309 {
310 return false;
311 }
312
313 if !self.emit_atom_as_key(key_atom, callback) {
314 return false;
315 }
316
317 if atoms.len() == 1 {
318 if !callback.event(Event::Unit {
320 span: key_atom.span,
321 }) {
322 return false;
323 }
324 } else if atoms.len() == 2 {
325 if !self.emit_atom_as_value(&atoms[1], callback) {
327 return false;
328 }
329 } else {
330 if !self.emit_atom_as_value(&atoms[1], callback) {
333 return false;
334 }
335
336 let third_atom = &atoms[2];
339 if !callback.event(Event::Error {
340 span: third_atom.span,
341 kind: ParseErrorKind::TooManyAtoms,
342 }) {
343 return false;
344 }
345 }
346
347 callback.event(Event::EntryEnd)
348 }
349
350 fn emit_path_error<C: ParseCallback<'src>>(
352 &self,
353 err: PathError,
354 span: Span,
355 callback: &mut C,
356 ) -> bool {
357 let kind = match err {
358 PathError::Duplicate { original } => ParseErrorKind::DuplicateKey { original },
359 PathError::Reopened { closed_path } => ParseErrorKind::ReopenedPath { closed_path },
360 PathError::NestIntoTerminal { terminal_path } => {
361 ParseErrorKind::NestIntoTerminal { terminal_path }
362 }
363 };
364 callback.event(Event::Error { span, kind })
365 }
366
367 fn emit_dotted_path_entry<C: ParseCallback<'src>>(
371 &self,
372 path_text: &'src str,
373 path_span: Span,
374 atoms: &[Atom<'src>],
375 callback: &mut C,
376 path_state: &mut PathState,
377 ) -> bool {
378 let segments: Vec<&str> = path_text.split('.').collect();
380
381 if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
382 if !callback.event(Event::Error {
384 span: path_span,
385 kind: ParseErrorKind::InvalidKey,
386 }) {
387 return false;
388 }
389 return callback.event(Event::EntryEnd);
390 }
391
392 let path: Vec<String> = segments.iter().map(|s| s.to_string()).collect();
394
395 let value_kind = if atoms.len() >= 2 {
397 match &atoms[1].content {
398 AtomContent::Object { .. } | AtomContent::Attributes { .. } => {
399 PathValueKind::Object
400 }
401 _ => PathValueKind::Terminal,
402 }
403 } else {
404 PathValueKind::Terminal
406 };
407
408 if let Err(err) = path_state.check_and_update(&path, path_span, value_kind)
410 && !self.emit_path_error(err, path_span, callback)
411 {
412 return false;
413 }
414
415 let mut current_offset = path_span.start;
418
419 let depth = segments.len();
421 for (i, segment) in segments.iter().enumerate() {
422 let segment_len = segment.len() as u32;
423 let segment_span = Span::new(current_offset, current_offset + segment_len);
424
425 if i > 0 {
426 if !callback.event(Event::EntryStart) {
428 return false;
429 }
430 }
431
432 if !callback.event(Event::Key {
434 span: segment_span,
435 tag: None,
436 payload: Some(Cow::Borrowed(segment)),
437 kind: ScalarKind::Bare,
438 }) {
439 return false;
440 }
441
442 if i < depth - 1 {
443 if !callback.event(Event::ObjectStart {
445 span: segment_span,
446 separator: Separator::Newline,
447 }) {
448 return false;
449 }
450 }
451
452 current_offset += segment_len + 1; }
455
456 if atoms.len() == 1 {
458 if !callback.event(Event::Unit { span: path_span }) {
460 return false;
461 }
462 } else if atoms.len() == 2 {
463 if !self.emit_atom_as_value(&atoms[1], callback) {
465 return false;
466 }
467 } else {
468 if !self.emit_atom_as_value(&atoms[1], callback) {
471 return false;
472 }
473 let third_atom = &atoms[2];
474 if !callback.event(Event::Error {
475 span: third_atom.span,
476 kind: ParseErrorKind::TooManyAtoms,
477 }) {
478 return false;
479 }
480 }
481
482 for i in (0..depth).rev() {
484 if i < depth - 1 {
485 if !callback.event(Event::ObjectEnd {
487 span: path_span, }) {
489 return false;
490 }
491 }
492 if !callback.event(Event::EntryEnd) {
494 return false;
495 }
496 }
497
498 true
499 }
500
501 fn collect_entry_atoms(&mut self) -> Vec<Atom<'src>> {
503 let mut atoms = Vec::new();
504
505 loop {
506 self.skip_whitespace();
507
508 let Some(token) = self.peek() else {
509 break;
510 };
511
512 match token.kind {
513 TokenKind::Newline | TokenKind::Comma | TokenKind::Eof => break,
515 TokenKind::RBrace | TokenKind::RParen => break,
516
517 TokenKind::LineComment | TokenKind::DocComment => break,
519
520 TokenKind::LBrace => {
522 atoms.push(self.parse_object_atom());
523 }
524 TokenKind::LParen => {
525 atoms.push(self.parse_sequence_atom());
526 }
527
528 TokenKind::At => {
530 atoms.push(self.parse_tag_or_unit_atom());
531 }
532
533 TokenKind::BareScalar => {
536 if self.is_attribute_start() {
537 atoms.push(self.parse_attributes());
538 } else {
539 atoms.push(self.parse_scalar_atom());
540 }
541 }
542
543 TokenKind::QuotedScalar | TokenKind::RawScalar | TokenKind::HeredocStart => {
545 atoms.push(self.parse_scalar_atom());
546 }
547
548 TokenKind::Whitespace => {
550 self.advance();
551 }
552
553 TokenKind::Error => {
555 let token = self.advance().unwrap();
556 atoms.push(Atom {
559 span: token.span,
560 kind: ScalarKind::Bare,
561 content: AtomContent::Error,
562 adjacent_block_span: None,
563 });
564 }
565
566 _ => {
568 self.advance();
570 }
571 }
572 }
573
574 atoms
575 }
576
577 fn is_attribute_start(&mut self) -> bool {
580 true
583 }
584
585 fn parse_attributes(&mut self) -> Atom<'src> {
589 let first_token = self.advance().unwrap();
591 let start_span = first_token.span;
592 let first_key = first_token.text;
593
594 let eq_info = self.peek_raw().map(|t| (t.kind, t.span.start, t.span.end));
597
598 let Some((eq_kind, eq_start, eq_end)) = eq_info else {
599 return Atom {
601 span: start_span,
602 kind: ScalarKind::Bare,
603 content: AtomContent::Scalar(first_key),
604 adjacent_block_span: None,
605 };
606 };
607
608 if eq_kind != TokenKind::Gt || eq_start != start_span.end {
609 let adjacent_block_span = if matches!(eq_kind, TokenKind::LBrace | TokenKind::LParen)
613 && eq_start == start_span.end
614 {
615 Some(Span::new(eq_start, eq_end))
616 } else {
617 None
618 };
619 return Atom {
620 span: start_span,
621 kind: ScalarKind::Bare,
622 content: AtomContent::Scalar(first_key),
623 adjacent_block_span,
624 };
625 }
626
627 let gt_token = self.advance().unwrap();
629 let gt_span = gt_token.span;
630
631 let mut trailing_gt_spans = Vec::new();
633
634 let val_info = self.peek_raw().map(|t| (t.span.start, t.kind));
636
637 let Some((val_start, val_kind)) = val_info else {
638 trailing_gt_spans.push(gt_span);
640 return Atom {
641 span: Span::new(start_span.start, gt_span.end),
642 kind: ScalarKind::Bare,
643 content: AtomContent::Attributes {
644 entries: vec![],
645 trailing_gt_spans,
646 },
647 adjacent_block_span: None,
648 };
649 };
650
651 if val_start != eq_end {
652 trailing_gt_spans.push(gt_span);
654 return Atom {
655 span: Span::new(start_span.start, gt_span.end),
656 kind: ScalarKind::Bare,
657 content: AtomContent::Attributes {
658 entries: vec![],
659 trailing_gt_spans,
660 },
661 adjacent_block_span: None,
662 };
663 }
664
665 if !matches!(
667 val_kind,
668 TokenKind::BareScalar
669 | TokenKind::QuotedScalar
670 | TokenKind::RawScalar
671 | TokenKind::LParen
672 | TokenKind::LBrace
673 | TokenKind::At
674 | TokenKind::HeredocStart
675 ) {
676 trailing_gt_spans.push(gt_span);
678 return Atom {
679 span: Span::new(start_span.start, gt_span.end),
680 kind: ScalarKind::Bare,
681 content: AtomContent::Attributes {
682 entries: vec![],
683 trailing_gt_spans,
684 },
685 adjacent_block_span: None,
686 };
687 }
688
689 let first_value = self.parse_attribute_value();
691 let Some(first_value) = first_value else {
692 trailing_gt_spans.push(gt_span);
694 return Atom {
695 span: Span::new(start_span.start, gt_span.end),
696 kind: ScalarKind::Bare,
697 content: AtomContent::Attributes {
698 entries: vec![],
699 trailing_gt_spans,
700 },
701 adjacent_block_span: None,
702 };
703 };
704
705 let mut attrs = vec![AttributeEntry {
706 key: first_key,
707 key_span: start_span,
708 value: first_value,
709 }];
710
711 loop {
713 self.skip_whitespace();
714
715 let token_info = self.peek().map(|t| (t.kind, t.span, t.text));
717 let Some((token_kind, key_span, key_text)) = token_info else {
718 break;
719 };
720
721 if token_kind != TokenKind::BareScalar {
723 break;
724 }
725
726 self.advance();
728
729 let eq_info = self.peek_raw().map(|t| (t.kind, t.span, t.span.end));
731 let Some((eq_kind, loop_gt_span, loop_eq_end)) = eq_info else {
732 break;
735 };
736
737 if eq_kind != TokenKind::Gt || loop_gt_span.start != key_span.end {
738 break;
740 }
741
742 self.advance();
744
745 let val_info = self.peek_raw().map(|t| (t.span.start, t.kind));
747 let Some((val_start, val_kind)) = val_info else {
748 trailing_gt_spans.push(loop_gt_span);
750 break;
751 };
752
753 if val_start != loop_eq_end {
754 trailing_gt_spans.push(loop_gt_span);
756 break;
757 }
758
759 if !matches!(
761 val_kind,
762 TokenKind::BareScalar
763 | TokenKind::QuotedScalar
764 | TokenKind::RawScalar
765 | TokenKind::LParen
766 | TokenKind::LBrace
767 | TokenKind::At
768 | TokenKind::HeredocStart
769 ) {
770 trailing_gt_spans.push(loop_gt_span);
772 break;
773 }
774
775 let Some(value) = self.parse_attribute_value() else {
776 trailing_gt_spans.push(loop_gt_span);
778 break;
779 };
780
781 attrs.push(AttributeEntry {
782 key: key_text,
783 key_span,
784 value,
785 });
786 }
787
788 let end_span = attrs
789 .last()
790 .map(|a| a.value.span.end)
791 .or_else(|| trailing_gt_spans.last().map(|s| s.end))
792 .unwrap_or(start_span.end);
793
794 Atom {
795 span: Span {
796 start: start_span.start,
797 end: end_span,
798 },
799 kind: ScalarKind::Bare,
800 content: AtomContent::Attributes {
801 entries: attrs,
802 trailing_gt_spans,
803 },
804 adjacent_block_span: None,
805 }
806 }
807
808 fn parse_attribute_value(&mut self) -> Option<Atom<'src>> {
811 let token = self.peek()?;
812
813 match token.kind {
814 TokenKind::BareScalar | TokenKind::QuotedScalar | TokenKind::RawScalar => {
815 Some(self.parse_scalar_atom())
816 }
817 TokenKind::LParen => Some(self.parse_sequence_atom()),
818 TokenKind::LBrace => Some(self.parse_object_atom()),
819 TokenKind::At => Some(self.parse_tag_or_unit_atom()),
820 TokenKind::HeredocStart => Some(self.parse_scalar_atom()),
822 _ => None,
823 }
824 }
825
826 fn parse_scalar_atom(&mut self) -> Atom<'src> {
828 let token = self.advance().unwrap();
829 trace!("Parsing scalar: {:?}", token.kind);
830 match token.kind {
831 TokenKind::BareScalar => Atom {
832 span: token.span,
833 kind: ScalarKind::Bare,
834 content: AtomContent::Scalar(token.text),
835 adjacent_block_span: None,
836 },
837 TokenKind::QuotedScalar => Atom {
838 span: token.span,
839 kind: ScalarKind::Quoted,
840 content: AtomContent::Scalar(token.text),
841 adjacent_block_span: None,
842 },
843 TokenKind::RawScalar => Atom {
844 span: token.span,
845 kind: ScalarKind::Raw,
846 content: AtomContent::Scalar(token.text),
847 adjacent_block_span: None,
848 },
849 TokenKind::HeredocStart => {
850 let start_span = token.span;
853 let mut content = String::new();
854 let mut end_span = start_span;
855 let mut is_error = false;
856 let mut end_token_text = "";
857
858 loop {
859 let Some(token) = self.advance() else {
860 break;
861 };
862 match token.kind {
863 TokenKind::HeredocContent => {
864 content.push_str(token.text);
865 }
866 TokenKind::HeredocEnd => {
867 end_span = token.span;
868 end_token_text = token.text;
869 break;
870 }
871 TokenKind::Error => {
872 end_span = token.span;
874 is_error = true;
875 break;
876 }
877 _ => break,
878 }
879 }
880
881 let indent_len = end_token_text
885 .chars()
886 .take_while(|c| *c == ' ' || *c == '\t')
887 .count();
888 if indent_len > 0 && !content.is_empty() {
889 content = Self::dedent_heredoc_content(&content, indent_len);
890 }
891
892 if is_error {
893 Atom {
894 span: Span {
895 start: start_span.start,
896 end: end_span.end,
897 },
898 kind: ScalarKind::Heredoc,
899 content: AtomContent::Error,
900 adjacent_block_span: None,
901 }
902 } else {
903 Atom {
904 span: Span {
905 start: start_span.start,
906 end: end_span.end,
907 },
908 kind: ScalarKind::Heredoc,
909 content: AtomContent::Heredoc(content),
910 adjacent_block_span: None,
911 }
912 }
913 }
914 _ => unreachable!(),
915 }
916 }
917
918 fn parse_object_atom(&mut self) -> Atom<'src> {
921 trace!("Parsing object");
922 let open = self.advance().unwrap(); let start_span = open.span;
924
925 let mut entries: Vec<ObjectEntry<'src>> = Vec::new();
926 let mut separator_mode: Option<Separator> = None;
927 let mut end_span = start_span;
928 let mut seen_keys: HashMap<KeyValue, Span> = HashMap::new();
931 let mut duplicate_key_spans: Vec<(Span, Span)> = Vec::new();
933 let mut mixed_separator_spans: Vec<Span> = Vec::new();
935 let mut pending_doc_comments: Vec<(Span, &'src str)> = Vec::new();
937 let mut dangling_doc_comment_spans: Vec<Span> = Vec::new();
938 let mut unclosed = false;
940
941 loop {
942 self.skip_whitespace();
944
945 let Some(token) = self.peek() else {
946 unclosed = true;
948 for (span, _) in &pending_doc_comments {
950 dangling_doc_comment_spans.push(*span);
951 }
952 break;
953 };
954
955 let token_span = token.span;
957
958 match token.kind {
959 TokenKind::RBrace => {
960 for (span, _) in &pending_doc_comments {
962 dangling_doc_comment_spans.push(*span);
963 }
964 let close = self.advance().unwrap();
965 end_span = close.span;
966 break;
967 }
968
969 TokenKind::Newline => {
970 if separator_mode == Some(Separator::Comma) {
972 mixed_separator_spans.push(token_span);
974 }
975 separator_mode = Some(Separator::Newline);
976 self.advance();
977 while matches!(self.peek(), Some(t) if t.kind == TokenKind::Newline) {
979 self.advance();
980 }
981 }
982
983 TokenKind::Comma => {
984 if separator_mode == Some(Separator::Newline) {
986 mixed_separator_spans.push(token_span);
988 }
989 separator_mode = Some(Separator::Comma);
990 self.advance();
991 }
992
993 TokenKind::LineComment => {
994 self.advance();
996 }
997
998 TokenKind::DocComment => {
999 let doc_token = self.advance().unwrap();
1001 pending_doc_comments.push((doc_token.span, doc_token.text));
1002 }
1003
1004 TokenKind::Eof => {
1005 unclosed = true;
1007 for (span, _) in &pending_doc_comments {
1008 dangling_doc_comment_spans.push(*span);
1009 }
1010 break;
1011 }
1012
1013 _ => {
1014 let doc_comments = std::mem::take(&mut pending_doc_comments);
1016
1017 let entry_atoms = self.collect_entry_atoms();
1019 if !entry_atoms.is_empty() {
1020 let key = entry_atoms[0].clone();
1021
1022 let key_value = KeyValue::from_atom(&key, self);
1025 if let Some(&original_span) = seen_keys.get(&key_value) {
1026 duplicate_key_spans.push((original_span, key.span));
1027 } else {
1028 seen_keys.insert(key_value, key.span);
1029 }
1030
1031 let (value, too_many_atoms_span) = if entry_atoms.len() == 1 {
1032 (
1034 Atom {
1035 span: key.span,
1036 kind: ScalarKind::Bare,
1037 content: AtomContent::Unit,
1038 adjacent_block_span: None,
1039 },
1040 None,
1041 )
1042 } else if entry_atoms.len() == 2 {
1043 (entry_atoms[1].clone(), None)
1045 } else {
1046 (entry_atoms[1].clone(), Some(entry_atoms[2].span))
1049 };
1050 entries.push(ObjectEntry {
1051 key,
1052 value,
1053 doc_comments,
1054 too_many_atoms_span,
1055 });
1056 }
1057 }
1058 }
1059 }
1060
1061 Atom {
1062 span: Span {
1063 start: start_span.start,
1064 end: end_span.end,
1065 },
1066 kind: ScalarKind::Bare,
1067 content: AtomContent::Object {
1068 entries,
1069 separator: separator_mode.unwrap_or(Separator::Comma),
1071 duplicate_key_spans,
1072 mixed_separator_spans,
1073 dangling_doc_comment_spans,
1074 unclosed,
1075 },
1076 adjacent_block_span: None,
1077 }
1078 }
1079
1080 fn parse_sequence_atom(&mut self) -> Atom<'src> {
1083 trace!("Parsing sequence");
1084 let open = self.advance().unwrap(); let start_span = open.span;
1086
1087 let mut elements: Vec<Atom<'src>> = Vec::new();
1088 let mut end_span = start_span;
1089 let mut unclosed = false;
1090 let mut comma_spans: Vec<Span> = Vec::new();
1091
1092 loop {
1093 self.skip_whitespace_and_newlines();
1095
1096 let Some(token) = self.peek() else {
1097 unclosed = true;
1099 break;
1100 };
1101
1102 match token.kind {
1103 TokenKind::RParen => {
1104 let close = self.advance().unwrap();
1105 end_span = close.span;
1106 break;
1107 }
1108
1109 TokenKind::Comma => {
1110 let comma = self.advance().unwrap();
1112 comma_spans.push(comma.span);
1113 }
1114
1115 TokenKind::LineComment | TokenKind::DocComment => {
1116 self.advance();
1118 }
1119
1120 TokenKind::Eof => {
1121 unclosed = true;
1123 break;
1124 }
1125
1126 _ => {
1127 if let Some(elem) = self.parse_single_atom() {
1129 elements.push(elem);
1130 }
1131 }
1132 }
1133 }
1134
1135 Atom {
1136 span: Span {
1137 start: start_span.start,
1138 end: end_span.end,
1139 },
1140 kind: ScalarKind::Bare,
1141 content: AtomContent::Sequence {
1142 elements,
1143 unclosed,
1144 comma_spans,
1145 },
1146 adjacent_block_span: None,
1147 }
1148 }
1149
1150 fn parse_single_atom(&mut self) -> Option<Atom<'src>> {
1152 let token = self.peek()?;
1153
1154 match token.kind {
1155 TokenKind::BareScalar
1156 | TokenKind::QuotedScalar
1157 | TokenKind::RawScalar
1158 | TokenKind::HeredocStart => Some(self.parse_scalar_atom()),
1159 TokenKind::LBrace => Some(self.parse_object_atom()),
1160 TokenKind::LParen => Some(self.parse_sequence_atom()),
1161 TokenKind::At => Some(self.parse_tag_or_unit_atom()),
1162 _ => None,
1163 }
1164 }
1165
1166 fn parse_tag_or_unit_atom(&mut self) -> Atom<'src> {
1169 trace!("Parsing tag or unit");
1170 let at = self.advance().unwrap(); let start_span = at.span;
1172
1173 if let Some(token) = self.peek_raw()
1175 && token.kind == TokenKind::BareScalar
1176 && token.span.start == start_span.end
1177 {
1178 let name_token = self.advance().unwrap();
1182 let full_text = name_token.text;
1183
1184 let tag_name_len = full_text.find('@').unwrap_or(full_text.len());
1186 let name = &full_text[..tag_name_len];
1187 let name_span = Span {
1188 start: name_token.span.start,
1189 end: name_token.span.start + tag_name_len as u32,
1190 };
1191 let name_end = name_span.end;
1192
1193 let has_trailing_at = tag_name_len < full_text.len();
1198
1199 let invalid_tag_name = name.is_empty() || !Self::is_valid_tag_name(name);
1202
1203 let payload = if has_trailing_at {
1205 let at_pos = name_token.span.start + tag_name_len as u32;
1209 Some(Atom {
1210 span: Span {
1211 start: at_pos,
1212 end: at_pos + 1,
1213 },
1214 kind: ScalarKind::Bare,
1215 content: AtomContent::Unit,
1216 adjacent_block_span: None,
1217 })
1218 } else {
1219 self.parse_tag_payload(name_end)
1221 };
1222 let end_span = payload.as_ref().map(|p| p.span.end).unwrap_or(name_end);
1223
1224 return Atom {
1225 span: Span {
1226 start: start_span.start,
1227 end: end_span,
1228 },
1229 kind: ScalarKind::Bare,
1230 content: AtomContent::Tag {
1231 name,
1232 payload: payload.map(Box::new),
1233 invalid_name_span: if invalid_tag_name {
1234 Some(name_span)
1235 } else {
1236 None
1237 },
1238 },
1239 adjacent_block_span: None,
1240 };
1241 }
1242
1243 Atom {
1245 span: start_span,
1246 kind: ScalarKind::Bare,
1247 content: AtomContent::Unit,
1248 adjacent_block_span: None,
1249 }
1250 }
1251
1252 fn is_valid_tag_name(name: &str) -> bool {
1257 let mut chars = name.chars();
1258
1259 match chars.next() {
1261 Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1262 _ => return false,
1263 }
1264
1265 chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
1267 }
1268
1269 fn parse_tag_payload(&mut self, after_name: u32) -> Option<Atom<'src>> {
1272 let Some(token) = self.peek_raw() else {
1273 return None; };
1275
1276 if token.span.start != after_name {
1278 return None; }
1280
1281 match token.kind {
1282 TokenKind::LBrace => Some(self.parse_object_atom()),
1284 TokenKind::LParen => Some(self.parse_sequence_atom()),
1286 TokenKind::QuotedScalar | TokenKind::RawScalar | TokenKind::HeredocStart => {
1288 Some(self.parse_scalar_atom())
1289 }
1290 TokenKind::At => {
1292 let at = self.advance().unwrap();
1293 Some(Atom {
1294 span: at.span,
1295 kind: ScalarKind::Bare,
1296 content: AtomContent::Unit,
1297 adjacent_block_span: None,
1298 })
1299 }
1300 _ => None,
1302 }
1303 }
1304
1305 fn emit_atom_as_value<C: ParseCallback<'src>>(
1307 &self,
1308 atom: &Atom<'src>,
1309 callback: &mut C,
1310 ) -> bool {
1311 match &atom.content {
1312 AtomContent::Scalar(text) => {
1313 if atom.kind == ScalarKind::Quoted {
1316 for (offset, seq) in Self::validate_quoted_escapes(text) {
1317 let error_start = atom.span.start + offset as u32;
1318 let error_span = Span::new(error_start, error_start + seq.len() as u32);
1319 if !callback.event(Event::Error {
1320 span: error_span,
1321 kind: ParseErrorKind::InvalidEscape(seq),
1322 }) {
1323 return false;
1324 }
1325 }
1326 }
1327 callback.event(Event::Scalar {
1328 span: atom.span,
1329 value: self.process_scalar(text, atom.kind),
1330 kind: atom.kind,
1331 })
1332 }
1333 AtomContent::Heredoc(content) => callback.event(Event::Scalar {
1334 span: atom.span,
1335 value: Cow::Owned(content.clone()),
1336 kind: ScalarKind::Heredoc,
1337 }),
1338 AtomContent::Unit => callback.event(Event::Unit { span: atom.span }),
1339 AtomContent::Tag {
1341 name,
1342 payload,
1343 invalid_name_span,
1344 } => {
1345 if let Some(span) = invalid_name_span
1348 && !callback.event(Event::Error {
1349 span: *span,
1350 kind: ParseErrorKind::InvalidTagName,
1351 })
1352 {
1353 return false;
1354 }
1355
1356 if !callback.event(Event::TagStart {
1357 span: atom.span,
1358 name,
1359 }) {
1360 return false;
1361 }
1362 if let Some(payload) = payload
1364 && !self.emit_atom_as_value(payload, callback)
1365 {
1366 return false;
1367 }
1368 callback.event(Event::TagEnd)
1370 }
1371 AtomContent::Object {
1373 entries,
1374 separator,
1375 duplicate_key_spans,
1376 mixed_separator_spans,
1377 dangling_doc_comment_spans,
1378 unclosed,
1379 } => {
1380 if !callback.event(Event::ObjectStart {
1381 span: atom.span,
1382 separator: *separator,
1383 }) {
1384 return false;
1385 }
1386
1387 if *unclosed
1389 && !callback.event(Event::Error {
1390 span: atom.span,
1391 kind: ParseErrorKind::UnclosedObject,
1392 })
1393 {
1394 return false;
1395 }
1396
1397 for (original_span, dup_span) in duplicate_key_spans {
1400 if !callback.event(Event::Error {
1401 span: *dup_span,
1402 kind: ParseErrorKind::DuplicateKey {
1403 original: *original_span,
1404 },
1405 }) {
1406 return false;
1407 }
1408 }
1409
1410 for mix_span in mixed_separator_spans {
1413 if !callback.event(Event::Error {
1414 span: *mix_span,
1415 kind: ParseErrorKind::MixedSeparators,
1416 }) {
1417 return false;
1418 }
1419 }
1420
1421 for doc_span in dangling_doc_comment_spans {
1424 if !callback.event(Event::Error {
1425 span: *doc_span,
1426 kind: ParseErrorKind::DanglingDocComment,
1427 }) {
1428 return false;
1429 }
1430 }
1431
1432 for entry in entries {
1433 for (span, text) in &entry.doc_comments {
1435 if !callback.event(Event::DocComment { span: *span, text }) {
1436 return false;
1437 }
1438 }
1439 if !callback.event(Event::EntryStart) {
1440 return false;
1441 }
1442 if !self.emit_atom_as_key(&entry.key, callback) {
1443 return false;
1444 }
1445 if !self.emit_atom_as_value(&entry.value, callback) {
1446 return false;
1447 }
1448 if let Some(span) = entry.too_many_atoms_span
1451 && !callback.event(Event::Error {
1452 span,
1453 kind: ParseErrorKind::TooManyAtoms,
1454 })
1455 {
1456 return false;
1457 }
1458 if !callback.event(Event::EntryEnd) {
1459 return false;
1460 }
1461 }
1462
1463 callback.event(Event::ObjectEnd { span: atom.span })
1464 }
1465 AtomContent::Sequence {
1467 elements,
1468 unclosed,
1469 comma_spans,
1470 } => {
1471 if !callback.event(Event::SequenceStart { span: atom.span }) {
1472 return false;
1473 }
1474
1475 if *unclosed
1477 && !callback.event(Event::Error {
1478 span: atom.span,
1479 kind: ParseErrorKind::UnclosedSequence,
1480 })
1481 {
1482 return false;
1483 }
1484
1485 for comma_span in comma_spans {
1487 if !callback.event(Event::Error {
1488 span: *comma_span,
1489 kind: ParseErrorKind::CommaInSequence,
1490 }) {
1491 return false;
1492 }
1493 }
1494
1495 for elem in elements {
1496 if !self.emit_atom_as_value(elem, callback) {
1497 return false;
1498 }
1499 }
1500
1501 callback.event(Event::SequenceEnd { span: atom.span })
1502 }
1503 AtomContent::Attributes {
1505 entries,
1506 trailing_gt_spans,
1507 } => {
1508 for gt_span in trailing_gt_spans {
1510 if !callback.event(Event::Error {
1511 span: *gt_span,
1512 kind: ParseErrorKind::ExpectedValue,
1513 }) {
1514 return false;
1515 }
1516 }
1517
1518 if !callback.event(Event::ObjectStart {
1520 span: atom.span,
1521 separator: Separator::Comma,
1522 }) {
1523 return false;
1524 }
1525
1526 for attr in entries {
1527 if !callback.event(Event::EntryStart) {
1528 return false;
1529 }
1530 if !callback.event(Event::Key {
1532 span: attr.key_span,
1533 tag: None,
1534 payload: Some(Cow::Borrowed(attr.key)),
1535 kind: ScalarKind::Bare,
1536 }) {
1537 return false;
1538 }
1539 if !self.emit_atom_as_value(&attr.value, callback) {
1540 return false;
1541 }
1542 if !callback.event(Event::EntryEnd) {
1543 return false;
1544 }
1545 }
1546
1547 callback.event(Event::ObjectEnd { span: atom.span })
1548 }
1549 AtomContent::Error => {
1550 callback.event(Event::Error {
1552 span: atom.span,
1553 kind: ParseErrorKind::UnexpectedToken,
1554 })
1555 }
1556 }
1557 }
1558
1559 fn emit_atom_as_key<C: ParseCallback<'src>>(
1565 &self,
1566 atom: &Atom<'src>,
1567 callback: &mut C,
1568 ) -> bool {
1569 if let Some(span) = atom.adjacent_block_span
1572 && !callback.event(Event::Error {
1573 span,
1574 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
1575 })
1576 {
1577 return false;
1578 }
1579
1580 match &atom.content {
1581 AtomContent::Scalar(text) => {
1582 if atom.kind == ScalarKind::Quoted {
1585 for (offset, seq) in Self::validate_quoted_escapes(text) {
1586 let error_start = atom.span.start + offset as u32;
1587 let error_span = Span::new(error_start, error_start + seq.len() as u32);
1588 if !callback.event(Event::Error {
1589 span: error_span,
1590 kind: ParseErrorKind::InvalidEscape(seq),
1591 }) {
1592 return false;
1593 }
1594 }
1595 }
1596 callback.event(Event::Key {
1597 span: atom.span,
1598 tag: None,
1599 payload: Some(self.process_scalar(text, atom.kind)),
1600 kind: atom.kind,
1601 })
1602 }
1603 AtomContent::Heredoc(_) => {
1604 callback.event(Event::Error {
1606 span: atom.span,
1607 kind: ParseErrorKind::InvalidKey,
1608 })
1609 }
1610 AtomContent::Unit => callback.event(Event::Key {
1611 span: atom.span,
1612 tag: None,
1613 payload: None,
1614 kind: ScalarKind::Bare,
1615 }),
1616 AtomContent::Tag {
1617 name,
1618 payload,
1619 invalid_name_span,
1620 } => {
1621 if let Some(span) = invalid_name_span
1623 && !callback.event(Event::Error {
1624 span: *span,
1625 kind: ParseErrorKind::InvalidTagName,
1626 })
1627 {
1628 return false;
1629 }
1630
1631 match payload {
1632 None => {
1633 callback.event(Event::Key {
1635 span: atom.span,
1636 tag: Some(name),
1637 payload: None,
1638 kind: ScalarKind::Bare,
1639 })
1640 }
1641 Some(inner) => match &inner.content {
1642 AtomContent::Scalar(text) => {
1643 if inner.kind == ScalarKind::Quoted {
1646 for (offset, seq) in Self::validate_quoted_escapes(text) {
1647 let error_start = inner.span.start + offset as u32;
1648 let error_span =
1649 Span::new(error_start, error_start + seq.len() as u32);
1650 if !callback.event(Event::Error {
1651 span: error_span,
1652 kind: ParseErrorKind::InvalidEscape(seq),
1653 }) {
1654 return false;
1655 }
1656 }
1657 }
1658 callback.event(Event::Key {
1660 span: atom.span,
1661 tag: Some(name),
1662 payload: Some(self.process_scalar(text, inner.kind)),
1663 kind: inner.kind,
1664 })
1665 }
1666 AtomContent::Unit => {
1667 callback.event(Event::Key {
1669 span: atom.span,
1670 tag: Some(name),
1671 payload: None,
1672 kind: ScalarKind::Bare,
1673 })
1674 }
1675 AtomContent::Heredoc(_)
1676 | AtomContent::Object { .. }
1677 | AtomContent::Sequence { .. }
1678 | AtomContent::Tag { .. }
1679 | AtomContent::Attributes { .. }
1680 | AtomContent::Error => {
1681 callback.event(Event::Error {
1683 span: inner.span,
1684 kind: ParseErrorKind::InvalidKey,
1685 })
1686 }
1687 },
1688 }
1689 }
1690 AtomContent::Object { .. }
1691 | AtomContent::Sequence { .. }
1692 | AtomContent::Attributes { .. }
1693 | AtomContent::Error => {
1694 callback.event(Event::Error {
1696 span: atom.span,
1697 kind: ParseErrorKind::InvalidKey,
1698 })
1699 }
1700 }
1701 }
1702
1703 fn process_scalar(&self, text: &'src str, kind: ScalarKind) -> Cow<'src, str> {
1705 match kind {
1706 ScalarKind::Bare | ScalarKind::Heredoc => Cow::Borrowed(text),
1707 ScalarKind::Raw => Cow::Borrowed(Self::strip_raw_delimiters(text)),
1708 ScalarKind::Quoted => self.unescape_quoted(text),
1709 }
1710 }
1711
1712 fn validate_quoted_escapes(text: &str) -> Vec<(usize, String)> {
1716 let mut errors = Vec::new();
1717
1718 let inner = if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
1720 &text[1..text.len() - 1]
1721 } else {
1722 text
1723 };
1724
1725 let mut chars = inner.char_indices().peekable();
1726
1727 while let Some((i, c)) = chars.next() {
1728 if c == '\\' {
1729 let escape_start = i;
1730 match chars.next() {
1731 Some((_, 'n' | 'r' | 't' | '\\' | '"')) => {
1732 }
1734 Some((_, 'u')) => {
1735 match chars.peek() {
1737 Some((_, '{')) => {
1738 chars.next(); let mut valid = true;
1741 let mut found_close = false;
1742 for (_, c) in chars.by_ref() {
1743 if c == '}' {
1744 found_close = true;
1745 break;
1746 }
1747 if !c.is_ascii_hexdigit() {
1748 valid = false;
1749 }
1750 }
1751 if !found_close || !valid {
1752 let end = chars.peek().map(|(i, _)| *i).unwrap_or(inner.len());
1754 let seq = &inner[escape_start..end.min(escape_start + 12)];
1755 errors.push((escape_start + 1, format!("\\{}", &seq[1..])));
1756 }
1757 }
1758 Some((_, c)) if c.is_ascii_hexdigit() => {
1759 let mut count = 1;
1761 while count < 4 {
1762 match chars.peek() {
1763 Some((_, c)) if c.is_ascii_hexdigit() => {
1764 chars.next();
1765 count += 1;
1766 }
1767 _ => break,
1768 }
1769 }
1770 if count != 4 {
1771 let end = chars.peek().map(|(i, _)| *i).unwrap_or(inner.len());
1772 let seq = &inner[escape_start..end];
1773 errors.push((escape_start + 1, seq.to_string()));
1774 }
1775 }
1776 _ => {
1777 errors.push((escape_start + 1, "\\u".to_string()));
1779 }
1780 }
1781 }
1782 Some((_, c)) => {
1783 errors.push((escape_start + 1, format!("\\{}", c)));
1785 }
1786 None => {
1787 errors.push((escape_start + 1, "\\".to_string()));
1789 }
1790 }
1791 }
1792 }
1793
1794 errors
1795 }
1796
1797 fn dedent_heredoc_content(content: &str, indent_len: usize) -> String {
1801 content
1802 .lines()
1803 .map(|line| {
1804 let mut chars = line.chars();
1806 let mut stripped = 0;
1807 while stripped < indent_len {
1808 match chars.clone().next() {
1809 Some(' ') | Some('\t') => {
1810 chars.next();
1811 stripped += 1;
1812 }
1813 _ => break,
1814 }
1815 }
1816 chars.as_str()
1817 })
1818 .collect::<Vec<_>>()
1819 .join("\n")
1820 + if content.ends_with('\n') { "\n" } else { "" }
1821 }
1822
1823 fn strip_raw_delimiters(text: &str) -> &str {
1825 let after_r = text.strip_prefix('r').unwrap_or(text);
1828
1829 let hash_count = after_r.chars().take_while(|&c| c == '#').count();
1831 let after_hashes = &after_r[hash_count..];
1832
1833 let after_quote = after_hashes.strip_prefix('"').unwrap_or(after_hashes);
1835
1836 let closing_len = 1 + hash_count; if after_quote.len() >= closing_len {
1839 &after_quote[..after_quote.len() - closing_len]
1840 } else {
1841 after_quote
1842 }
1843 }
1844
1845 fn unescape_quoted(&self, text: &'src str) -> Cow<'src, str> {
1847 let inner = if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
1849 &text[1..text.len() - 1]
1850 } else {
1851 text
1852 };
1853
1854 if !inner.contains('\\') {
1856 return Cow::Borrowed(inner);
1857 }
1858
1859 let mut result = String::with_capacity(inner.len());
1861 let mut chars = inner.chars().peekable();
1862
1863 while let Some(c) = chars.next() {
1864 if c == '\\' {
1865 match chars.next() {
1866 Some('n') => result.push('\n'),
1867 Some('r') => result.push('\r'),
1868 Some('t') => result.push('\t'),
1869 Some('\\') => result.push('\\'),
1870 Some('"') => result.push('"'),
1871 Some('u') => {
1873 match chars.peek() {
1875 Some('{') => {
1876 chars.next(); let mut hex = String::new();
1879 while let Some(&c) = chars.peek() {
1880 if c == '}' {
1881 chars.next();
1882 break;
1883 }
1884 hex.push(chars.next().unwrap());
1885 }
1886 if let Ok(code) = u32::from_str_radix(&hex, 16)
1887 && let Some(ch) = char::from_u32(code)
1888 {
1889 result.push(ch);
1890 }
1891 }
1892 Some(c) if c.is_ascii_hexdigit() => {
1893 let mut hex = String::with_capacity(4);
1895 for _ in 0..4 {
1896 if let Some(&c) = chars.peek() {
1897 if c.is_ascii_hexdigit() {
1898 hex.push(chars.next().unwrap());
1899 } else {
1900 break;
1901 }
1902 } else {
1903 break;
1904 }
1905 }
1906 if hex.len() == 4 {
1907 if let Ok(code) = u32::from_str_radix(&hex, 16)
1908 && let Some(ch) = char::from_u32(code)
1909 {
1910 result.push(ch);
1911 }
1912 } else {
1913 result.push_str("\\u");
1915 result.push_str(&hex);
1916 }
1917 }
1918 _ => {
1919 result.push_str("\\u");
1921 }
1922 }
1923 }
1924 Some(c) => {
1925 result.push('\\');
1927 result.push(c);
1928 }
1929 None => {
1930 result.push('\\');
1931 }
1932 }
1933 } else {
1934 result.push(c);
1935 }
1936 }
1937
1938 Cow::Owned(result)
1939 }
1940}
1941
1942#[derive(Debug, Clone)]
1944struct Atom<'src> {
1945 span: Span,
1946 kind: ScalarKind,
1947 content: AtomContent<'src>,
1948 adjacent_block_span: Option<Span>,
1951}
1952
1953#[derive(Debug, Clone)]
1956enum AtomContent<'src> {
1957 Scalar(&'src str),
1959 Heredoc(String),
1961 Unit,
1963 Tag {
1966 name: &'src str,
1967 payload: Option<Box<Atom<'src>>>,
1968 invalid_name_span: Option<Span>,
1971 },
1972 Object {
1975 entries: Vec<ObjectEntry<'src>>,
1976 separator: Separator,
1977 duplicate_key_spans: Vec<(Span, Span)>,
1979 mixed_separator_spans: Vec<Span>,
1982 dangling_doc_comment_spans: Vec<Span>,
1985 unclosed: bool,
1987 },
1988 Sequence {
1991 elements: Vec<Atom<'src>>,
1992 unclosed: bool,
1994 comma_spans: Vec<Span>,
1996 },
1997 Attributes {
2000 entries: Vec<AttributeEntry<'src>>,
2001 trailing_gt_spans: Vec<Span>,
2003 },
2004 Error,
2006}
2007
2008#[derive(Debug, Clone)]
2010struct AttributeEntry<'src> {
2011 key: &'src str,
2012 key_span: Span,
2013 value: Atom<'src>,
2014}
2015
2016#[derive(Debug, Clone)]
2018struct ObjectEntry<'src> {
2019 key: Atom<'src>,
2020 value: Atom<'src>,
2021 doc_comments: Vec<(Span, &'src str)>,
2023 too_many_atoms_span: Option<Span>,
2026}
2027
2028#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2031enum KeyValue {
2032 Scalar(String),
2034 Unit,
2036 Tagged {
2038 name: String,
2039 payload: Option<Box<KeyValue>>,
2040 },
2041}
2042
2043impl KeyValue {
2044 fn from_atom<'a>(atom: &Atom<'a>, parser: &Parser<'a>) -> Self {
2047 match &atom.content {
2048 AtomContent::Scalar(text) => {
2049 let processed = parser.process_scalar(text, atom.kind);
2051 KeyValue::Scalar(processed.into_owned())
2052 }
2053 AtomContent::Heredoc(content) => KeyValue::Scalar(content.clone()),
2054 AtomContent::Unit => KeyValue::Unit,
2055 AtomContent::Tag { name, payload, .. } => KeyValue::Tagged {
2056 name: (*name).to_string(),
2057 payload: payload
2058 .as_ref()
2059 .map(|p| Box::new(KeyValue::from_atom(p, parser))),
2060 },
2061 AtomContent::Object { .. } => KeyValue::Scalar("{}".into()),
2063 AtomContent::Sequence { .. } => KeyValue::Scalar("()".into()),
2064 AtomContent::Attributes { .. } => KeyValue::Scalar("{}".into()),
2065 AtomContent::Error => KeyValue::Scalar("<error>".into()),
2066 }
2067 }
2068}
2069
2070#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2072enum PathValueKind {
2073 Object,
2075 Terminal,
2077}
2078
2079#[derive(Default)]
2082struct PathState {
2083 current_path: Vec<String>,
2085 closed_paths: std::collections::HashSet<Vec<String>>,
2087 assigned_paths: HashMap<Vec<String>, (Span, PathValueKind)>,
2089}
2090
2091#[derive(Debug)]
2093enum PathError {
2094 Duplicate { original: Span },
2096 Reopened { closed_path: Vec<String> },
2098 NestIntoTerminal { terminal_path: Vec<String> },
2100}
2101
2102impl PathState {
2103 fn check_and_update(
2105 &mut self,
2106 path: &[String],
2107 span: Span,
2108 value_kind: PathValueKind,
2109 ) -> Result<(), PathError> {
2110 if let Some(&(original, _)) = self.assigned_paths.get(path) {
2112 return Err(PathError::Duplicate { original });
2113 }
2114
2115 for i in 1..path.len() {
2117 let prefix = &path[..i];
2118 if self.closed_paths.contains(prefix) {
2119 return Err(PathError::Reopened {
2120 closed_path: prefix.to_vec(),
2121 });
2122 }
2123 if let Some(&(_, PathValueKind::Terminal)) = self.assigned_paths.get(prefix) {
2124 return Err(PathError::NestIntoTerminal {
2125 terminal_path: prefix.to_vec(),
2126 });
2127 }
2128 }
2129
2130 let common_len = self
2132 .current_path
2133 .iter()
2134 .zip(path.iter())
2135 .take_while(|(a, b)| a == b)
2136 .count();
2137
2138 for i in common_len..self.current_path.len() {
2141 let closed: Vec<String> = self.current_path[..=i].to_vec();
2142 self.closed_paths.insert(closed);
2143 }
2144
2145 for i in 1..path.len() {
2147 let prefix = path[..i].to_vec();
2148 self.assigned_paths
2149 .entry(prefix)
2150 .or_insert((span, PathValueKind::Object));
2151 }
2152
2153 self.assigned_paths
2155 .insert(path.to_vec(), (span, value_kind));
2156 self.current_path = path.to_vec();
2157
2158 Ok(())
2159 }
2160}
2161
2162#[cfg(test)]
2163mod tests {
2164 use super::*;
2165 use facet_testhelpers::test;
2166
2167 fn parse(source: &str) -> Vec<Event<'_>> {
2168 tracing::debug!(source, "parsing");
2169 let events = Parser::new(source).parse_to_vec();
2170 tracing::debug!(?events, "parsed");
2171 events
2172 }
2173
2174 #[allow(dead_code)]
2176 fn parse_debug(source: &str) -> Vec<Event<'_>> {
2177 tracing::info!(source, "parsing (debug mode)");
2178 let events = Parser::new(source).parse_to_vec();
2179 tracing::info!(?events, "parsed events");
2180 events
2181 }
2182
2183 #[test]
2184 fn test_empty_document() {
2185 let events = parse("");
2186 assert_eq!(events, vec![Event::DocumentStart, Event::DocumentEnd]);
2187 }
2188
2189 #[test]
2190 fn test_simple_entry() {
2191 let events = parse("foo bar");
2192 assert!(events.contains(&Event::DocumentStart));
2193 assert!(events.contains(&Event::DocumentEnd));
2194 assert!(
2195 events
2196 .iter()
2197 .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "foo"))
2198 );
2199 assert!(
2200 events
2201 .iter()
2202 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "bar"))
2203 );
2204 }
2205
2206 #[test]
2207 fn test_key_only() {
2208 let events = parse("foo");
2209 assert!(
2210 events
2211 .iter()
2212 .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "foo"))
2213 );
2214 assert!(events.iter().any(|e| matches!(e, Event::Unit { .. })));
2215 }
2216
2217 #[test]
2218 fn test_multiple_entries() {
2219 let events = parse("foo bar\nbaz qux");
2220 let keys: Vec<_> = events
2221 .iter()
2222 .filter_map(|e| match e {
2223 Event::Key {
2224 payload: Some(value),
2225 ..
2226 } => Some(value.as_ref()),
2227 _ => None,
2228 })
2229 .collect();
2230 assert_eq!(keys, vec!["foo", "baz"]);
2231 }
2232
2233 #[test]
2234 fn test_quoted_string() {
2235 let events = parse(r#"name "hello world""#);
2236 assert!(events
2237 .iter()
2238 .any(|e| matches!(e, Event::Scalar { value, kind: ScalarKind::Quoted, .. } if value == "hello world")));
2239 }
2240
2241 #[test]
2242 fn test_quoted_escape() {
2243 let events = parse(r#"msg "hello\nworld""#);
2244 assert!(
2245 events
2246 .iter()
2247 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "hello\nworld"))
2248 );
2249 }
2250
2251 #[test]
2252 fn test_too_many_atoms() {
2253 let events = parse("a b c");
2256 assert!(
2258 events
2259 .iter()
2260 .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "a"))
2261 );
2262 assert!(
2263 events
2264 .iter()
2265 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "b"))
2266 );
2267 assert!(events.iter().any(|e| matches!(
2268 e,
2269 Event::Error {
2270 kind: ParseErrorKind::TooManyAtoms,
2271 ..
2272 }
2273 )));
2274 }
2275
2276 #[test]
2277 fn test_unit_value() {
2278 let events = parse("flag @");
2279 assert!(events.iter().any(|e| matches!(e, Event::Unit { .. })));
2280 }
2281
2282 #[test]
2283 fn test_unit_key() {
2284 let events = parse("@ server.schema.styx");
2286 trace!(?events, "parsed events for unit key test");
2287 assert!(
2289 events.iter().any(|e| matches!(
2290 e,
2291 Event::Key {
2292 payload: None,
2293 tag: None,
2294 ..
2295 }
2296 )),
2297 "should have Key event with payload: None (unit key), got: {:?}",
2298 events
2299 );
2300 }
2301
2302 #[test]
2303 fn test_tag() {
2304 let events = parse("type @user");
2305 assert!(
2306 events
2307 .iter()
2308 .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "user"))
2309 );
2310 }
2311
2312 #[test]
2313 fn test_comments() {
2314 let events = parse("// comment\nfoo bar");
2315 assert!(events.iter().any(|e| matches!(e, Event::Comment { .. })));
2316 assert!(
2317 events
2318 .iter()
2319 .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "foo"))
2320 );
2321 }
2322
2323 #[test]
2324 fn test_doc_comments() {
2325 let events = parse("/// doc\nfoo bar");
2326 assert!(events.iter().any(|e| matches!(e, Event::DocComment { .. })));
2327 }
2328
2329 #[test]
2331 fn test_doc_comment_followed_by_entry_ok() {
2332 let events = parse("/// documentation\nkey value");
2333 assert!(events.iter().any(|e| matches!(e, Event::DocComment { .. })));
2335 assert!(!events.iter().any(|e| matches!(
2336 e,
2337 Event::Error {
2338 kind: ParseErrorKind::DanglingDocComment,
2339 ..
2340 }
2341 )));
2342 }
2343
2344 #[test]
2346 fn test_doc_comment_at_eof_error() {
2347 let events = parse("foo bar\n/// dangling");
2348 assert!(events.iter().any(|e| matches!(
2349 e,
2350 Event::Error {
2351 kind: ParseErrorKind::DanglingDocComment,
2352 ..
2353 }
2354 )));
2355 }
2356
2357 #[test]
2359 fn test_doc_comment_before_closing_brace_error() {
2360 let events = parse("{foo bar\n/// dangling\n}");
2361 assert!(events.iter().any(|e| matches!(
2362 e,
2363 Event::Error {
2364 kind: ParseErrorKind::DanglingDocComment,
2365 ..
2366 }
2367 )));
2368 }
2369
2370 #[test]
2372 fn test_multiple_doc_comments_before_entry_ok() {
2373 let events = parse("/// line 1\n/// line 2\nkey value");
2374 let doc_count = events
2376 .iter()
2377 .filter(|e| matches!(e, Event::DocComment { .. }))
2378 .count();
2379 assert_eq!(doc_count, 2);
2380 assert!(!events.iter().any(|e| matches!(
2381 e,
2382 Event::Error {
2383 kind: ParseErrorKind::DanglingDocComment,
2384 ..
2385 }
2386 )));
2387 }
2388
2389 #[test]
2391 fn test_nested_object() {
2392 let events = parse("outer {inner {x 1}}");
2393 let obj_starts = events
2395 .iter()
2396 .filter(|e| matches!(e, Event::ObjectStart { .. }))
2397 .count();
2398 assert_eq!(
2399 obj_starts, 2,
2400 "Expected 2 ObjectStart events for nested objects"
2401 );
2402 }
2403
2404 #[test]
2406 fn test_object_with_entries() {
2407 let events = parse("config {host localhost, port 8080}");
2408 let keys: Vec<_> = events
2410 .iter()
2411 .filter_map(|e| match e {
2412 Event::Key {
2413 payload: Some(value),
2414 ..
2415 } => Some(value.as_ref()),
2416 _ => None,
2417 })
2418 .collect();
2419 assert!(keys.contains(&"config"), "Missing key 'config'");
2420 assert!(keys.contains(&"host"), "Missing key 'host'");
2421 assert!(keys.contains(&"port"), "Missing key 'port'");
2422 }
2423
2424 #[test]
2426 fn test_sequence_elements() {
2427 let events = parse("items (a b c)");
2428 let scalars: Vec<_> = events
2429 .iter()
2430 .filter_map(|e| match e {
2431 Event::Scalar { value, .. } => Some(value.as_ref()),
2432 _ => None,
2433 })
2434 .collect();
2435 assert!(scalars.contains(&"a"), "Missing element 'a'");
2436 assert!(scalars.contains(&"b"), "Missing element 'b'");
2437 assert!(scalars.contains(&"c"), "Missing element 'c'");
2438 }
2439
2440 #[test]
2442 fn test_nested_sequences() {
2443 let events = parse("matrix ((1 2) (3 4))");
2444 let seq_starts = events
2445 .iter()
2446 .filter(|e| matches!(e, Event::SequenceStart { .. }))
2447 .count();
2448 assert_eq!(
2449 seq_starts, 3,
2450 "Expected 3 SequenceStart events (outer + 2 inner)"
2451 );
2452 }
2453
2454 #[test]
2456 fn test_tagged_object() {
2457 let events = parse("result @err{message oops}");
2458 assert!(
2459 events
2460 .iter()
2461 .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "err")),
2462 "Missing TagStart for @err"
2463 );
2464 assert!(
2465 events
2466 .iter()
2467 .any(|e| matches!(e, Event::ObjectStart { .. })),
2468 "Missing ObjectStart for tagged object"
2469 );
2470 }
2471
2472 #[test]
2474 fn test_tagged_sequence() {
2475 let events = parse("color @rgb(255 128 0)");
2476 assert!(
2477 events
2478 .iter()
2479 .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "rgb")),
2480 "Missing TagStart for @rgb"
2481 );
2482 assert!(
2483 events
2484 .iter()
2485 .any(|e| matches!(e, Event::SequenceStart { .. })),
2486 "Missing SequenceStart for tagged sequence"
2487 );
2488 }
2489
2490 #[test]
2492 fn test_tagged_scalar() {
2493 let events = parse(r#"name @nickname"Bob""#);
2494 assert!(
2495 events
2496 .iter()
2497 .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "nickname")),
2498 "Missing TagStart for @nickname"
2499 );
2500 assert!(
2501 events
2502 .iter()
2503 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "Bob")),
2504 "Missing Scalar for tagged string"
2505 );
2506 }
2507
2508 #[test]
2510 fn test_tagged_explicit_unit() {
2511 let events = parse("nothing @empty@");
2512 assert!(
2513 events
2514 .iter()
2515 .any(|e| matches!(e, Event::TagStart { name, .. } if *name == "empty")),
2516 "Missing TagStart for @empty"
2517 );
2518 let unit_count = events
2520 .iter()
2521 .filter(|e| matches!(e, Event::Unit { .. }))
2522 .count();
2523 assert!(
2524 unit_count >= 1,
2525 "Expected at least one Unit event for @empty@"
2526 );
2527 }
2528
2529 #[test]
2531 fn test_tag_whitespace_gap() {
2532 let events = parse("x @tag\ny {a b}");
2535 let tag_events: Vec<_> = events
2537 .iter()
2538 .filter(|e| matches!(e, Event::TagStart { .. } | Event::TagEnd))
2539 .collect();
2540 assert_eq!(tag_events.len(), 2, "Expected TagStart and TagEnd");
2542 let keys: Vec<_> = events
2544 .iter()
2545 .filter_map(|e| match e {
2546 Event::Key {
2547 payload: Some(value),
2548 ..
2549 } => Some(value.as_ref()),
2550 _ => None,
2551 })
2552 .collect();
2553 assert!(keys.contains(&"x"), "Missing key 'x'");
2554 assert!(keys.contains(&"y"), "Missing key 'y'");
2555 }
2556
2557 #[test]
2559 fn test_object_in_sequence() {
2560 let events = parse("servers ({host a} {host b})");
2561 let obj_starts = events
2563 .iter()
2564 .filter(|e| matches!(e, Event::ObjectStart { .. }))
2565 .count();
2566 assert_eq!(
2567 obj_starts, 2,
2568 "Expected 2 ObjectStart events for objects in sequence"
2569 );
2570 }
2571
2572 #[test]
2574 fn test_simple_attribute() {
2575 let events = parse("server host>localhost");
2576 let keys: Vec<_> = events
2578 .iter()
2579 .filter_map(|e| match e {
2580 Event::Key {
2581 payload: Some(value),
2582 ..
2583 } => Some(value.as_ref()),
2584 _ => None,
2585 })
2586 .collect();
2587 assert!(keys.contains(&"server"), "Missing key 'server'");
2588 assert!(keys.contains(&"host"), "Missing key 'host' from attribute");
2589 }
2590
2591 #[test]
2593 fn test_attribute_values() {
2594 let events = parse("config name>app tags>(a b) opts>{x 1}");
2595 let keys: Vec<_> = events
2596 .iter()
2597 .filter_map(|e| match e {
2598 Event::Key {
2599 payload: Some(value),
2600 ..
2601 } => Some(value.as_ref()),
2602 _ => None,
2603 })
2604 .collect();
2605 assert!(keys.contains(&"config"), "Missing key 'config'");
2606 assert!(keys.contains(&"name"), "Missing key 'name'");
2607 assert!(keys.contains(&"tags"), "Missing key 'tags'");
2608 assert!(keys.contains(&"opts"), "Missing key 'opts'");
2609 assert!(
2611 events
2612 .iter()
2613 .any(|e| matches!(e, Event::SequenceStart { .. })),
2614 "Missing SequenceStart for tags>(a b)"
2615 );
2616 }
2617
2618 #[test]
2620 fn test_multiple_attributes() {
2621 let events = parse("server host>localhost port>8080");
2624 let keys: Vec<_> = events
2626 .iter()
2627 .filter_map(|e| match e {
2628 Event::Key {
2629 payload: Some(value),
2630 ..
2631 } => Some(value.as_ref()),
2632 _ => None,
2633 })
2634 .collect();
2635 assert!(keys.contains(&"server"), "Missing key 'server'");
2636 assert!(keys.contains(&"host"), "Missing key 'host'");
2637 assert!(keys.contains(&"port"), "Missing key 'port'");
2638 }
2639
2640 #[test]
2642 fn test_too_many_atoms_with_attributes() {
2643 let events = parse("spec selector matchLabels app>web tier>frontend");
2646 assert!(
2648 events.iter().any(|e| matches!(
2649 e,
2650 Event::Error {
2651 kind: ParseErrorKind::TooManyAtoms,
2652 ..
2653 }
2654 )),
2655 "Should have TooManyAtoms error"
2656 );
2657 }
2658
2659 #[test]
2661 fn test_attribute_no_spaces() {
2662 let events = parse("x > y");
2664 let keys: Vec<_> = events
2667 .iter()
2668 .filter_map(|e| match e {
2669 Event::Key {
2670 payload: Some(value),
2671 ..
2672 } => Some(value.as_ref()),
2673 _ => None,
2674 })
2675 .collect();
2676 assert!(keys.contains(&"x"), "Missing key 'x'");
2678 }
2680
2681 #[test]
2683 fn test_explicit_root_after_comment() {
2684 let events = parse("// comment\n{a 1}");
2686 assert!(
2688 events
2689 .iter()
2690 .any(|e| matches!(e, Event::ObjectStart { .. })),
2691 "Should have ObjectStart for explicit root after comment"
2692 );
2693 assert!(
2694 events
2695 .iter()
2696 .any(|e| matches!(e, Event::Key { payload: Some(value), .. } if value == "a")),
2697 "Should have key 'a'"
2698 );
2699 }
2700
2701 #[test]
2703 fn test_explicit_root_after_doc_comment() {
2704 let events = parse("/// doc comment\n{a 1}");
2706 assert!(
2708 events.iter().any(|e| matches!(e, Event::DocComment { .. })),
2709 "Should preserve doc comment"
2710 );
2711 assert!(
2712 events
2713 .iter()
2714 .any(|e| matches!(e, Event::ObjectStart { .. })),
2715 "Should have ObjectStart for explicit root after doc comment"
2716 );
2717 }
2718
2719 #[test]
2721 fn test_duplicate_bare_key() {
2722 let events = parse("{a 1, a 2}");
2723 assert!(
2724 events.iter().any(|e| matches!(
2725 e,
2726 Event::Error {
2727 kind: ParseErrorKind::DuplicateKey { .. },
2728 ..
2729 }
2730 )),
2731 "Expected DuplicateKey error"
2732 );
2733 }
2734
2735 #[test]
2737 fn test_duplicate_quoted_key() {
2738 let events = parse(r#"{"key" 1, "key" 2}"#);
2739 assert!(
2740 events.iter().any(|e| matches!(
2741 e,
2742 Event::Error {
2743 kind: ParseErrorKind::DuplicateKey { .. },
2744 ..
2745 }
2746 )),
2747 "Expected DuplicateKey error for quoted keys"
2748 );
2749 }
2750
2751 #[test]
2753 fn test_duplicate_key_escape_normalized() {
2754 let events = parse(r#"{"ab" 1, "a\u{62}" 2}"#);
2756 assert!(
2757 events.iter().any(|e| matches!(
2758 e,
2759 Event::Error {
2760 kind: ParseErrorKind::DuplicateKey { .. },
2761 ..
2762 }
2763 )),
2764 "Expected DuplicateKey error for escape-normalized keys"
2765 );
2766 }
2767
2768 #[test]
2770 fn test_duplicate_unit_key() {
2771 let events = parse("{@ 1, @ 2}");
2772 assert!(
2773 events.iter().any(|e| matches!(
2774 e,
2775 Event::Error {
2776 kind: ParseErrorKind::DuplicateKey { .. },
2777 ..
2778 }
2779 )),
2780 "Expected DuplicateKey error for unit keys"
2781 );
2782 }
2783
2784 #[test]
2786 fn test_duplicate_tagged_key() {
2787 let events = parse("{@foo 1, @foo 2}");
2788 assert!(
2789 events.iter().any(|e| matches!(
2790 e,
2791 Event::Error {
2792 kind: ParseErrorKind::DuplicateKey { .. },
2793 ..
2794 }
2795 )),
2796 "Expected DuplicateKey error for tagged keys"
2797 );
2798 }
2799
2800 #[test]
2802 fn test_different_keys_ok() {
2803 let events = parse("{a 1, b 2, c 3}");
2804 assert!(
2805 !events.iter().any(|e| matches!(e, Event::Error { .. })),
2806 "Should not have any errors for different keys"
2807 );
2808 }
2809
2810 #[test]
2812 fn test_duplicate_key_at_root() {
2813 let events = parse("a 1\na 2");
2815 assert!(
2816 events.iter().any(|e| matches!(
2817 e,
2818 Event::Error {
2819 kind: ParseErrorKind::DuplicateKey { .. },
2820 ..
2821 }
2822 )),
2823 "Expected DuplicateKey error at document root level"
2824 );
2825 }
2826
2827 #[test]
2829 fn test_mixed_separators_comma_then_newline() {
2830 let events = parse("{a 1, b 2\nc 3}");
2832 assert!(
2833 events.iter().any(|e| matches!(
2834 e,
2835 Event::Error {
2836 kind: ParseErrorKind::MixedSeparators,
2837 ..
2838 }
2839 )),
2840 "Expected MixedSeparators error when comma mode followed by newline"
2841 );
2842 }
2843
2844 #[test]
2846 fn test_mixed_separators_newline_then_comma() {
2847 let events = parse("{a 1\nb 2, c 3}");
2849 assert!(
2850 events.iter().any(|e| matches!(
2851 e,
2852 Event::Error {
2853 kind: ParseErrorKind::MixedSeparators,
2854 ..
2855 }
2856 )),
2857 "Expected MixedSeparators error when newline mode followed by comma"
2858 );
2859 }
2860
2861 #[test]
2863 fn test_consistent_comma_separators() {
2864 let events = parse("{a 1, b 2, c 3}");
2866 assert!(
2867 !events.iter().any(|e| matches!(
2868 e,
2869 Event::Error {
2870 kind: ParseErrorKind::MixedSeparators,
2871 ..
2872 }
2873 )),
2874 "Should not have MixedSeparators error for consistent comma separators"
2875 );
2876 }
2877
2878 #[test]
2880 fn test_consistent_newline_separators() {
2881 let events = parse("{a 1\nb 2\nc 3}");
2883 assert!(
2884 !events.iter().any(|e| matches!(
2885 e,
2886 Event::Error {
2887 kind: ParseErrorKind::MixedSeparators,
2888 ..
2889 }
2890 )),
2891 "Should not have MixedSeparators error for consistent newline separators"
2892 );
2893 }
2894
2895 #[test]
2897 fn test_valid_tag_names() {
2898 assert!(
2900 !parse("@foo")
2901 .iter()
2902 .any(|e| matches!(e, Event::Error { .. })),
2903 "@foo should be valid"
2904 );
2905 assert!(
2906 !parse("@_private")
2907 .iter()
2908 .any(|e| matches!(e, Event::Error { .. })),
2909 "@_private should be valid"
2910 );
2911 assert!(
2913 parse("@Some.Type")
2914 .iter()
2915 .any(|e| matches!(e, Event::Error { .. })),
2916 "@Some.Type should be invalid (dots not allowed)"
2917 );
2918 assert!(
2919 !parse("@my-tag")
2920 .iter()
2921 .any(|e| matches!(e, Event::Error { .. })),
2922 "@my-tag should be valid"
2923 );
2924 assert!(
2925 !parse("@Type123")
2926 .iter()
2927 .any(|e| matches!(e, Event::Error { .. })),
2928 "@Type123 should be valid"
2929 );
2930 }
2931
2932 #[test]
2934 fn test_invalid_tag_name_starts_with_digit() {
2935 let events = parse("x @123");
2936 assert!(
2937 events.iter().any(|e| matches!(
2938 e,
2939 Event::Error {
2940 kind: ParseErrorKind::InvalidTagName,
2941 ..
2942 }
2943 )),
2944 "Tag starting with digit should be invalid"
2945 );
2946 }
2947
2948 #[test]
2950 fn test_invalid_tag_name_starts_with_hyphen() {
2951 let events = parse("x @-foo");
2952 assert!(
2953 events.iter().any(|e| matches!(
2954 e,
2955 Event::Error {
2956 kind: ParseErrorKind::InvalidTagName,
2957 ..
2958 }
2959 )),
2960 "Tag starting with hyphen should be invalid"
2961 );
2962 }
2963
2964 #[test]
2966 fn test_invalid_tag_name_starts_with_dot() {
2967 let events = parse("x @.foo");
2968 assert!(
2969 events.iter().any(|e| matches!(
2970 e,
2971 Event::Error {
2972 kind: ParseErrorKind::InvalidTagName,
2973 ..
2974 }
2975 )),
2976 "Tag starting with dot should be invalid"
2977 );
2978 }
2979
2980 #[test]
2982 fn test_unicode_escape_braces() {
2983 let events = parse(r#"x "\u{1F600}""#);
2984 assert!(
2985 events
2986 .iter()
2987 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "😀")),
2988 "\\u{{1F600}} should produce 😀"
2989 );
2990 }
2991
2992 #[test]
2994 fn test_unicode_escape_4digit() {
2995 let events = parse(r#"x "\u0041""#);
2996 assert!(
2997 events
2998 .iter()
2999 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "A")),
3000 "\\u0041 should produce A"
3001 );
3002 }
3003
3004 #[test]
3006 fn test_unicode_escape_4digit_accented() {
3007 let events = parse(r#"x "\u00E9""#);
3008 assert!(
3009 events
3010 .iter()
3011 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "é")),
3012 "\\u00E9 should produce é"
3013 );
3014 }
3015
3016 #[test]
3018 fn test_unicode_escape_mixed() {
3019 let events = parse(r#"x "\u0048\u{65}\u006C\u{6C}\u006F""#);
3021 assert!(
3022 events
3023 .iter()
3024 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "Hello")),
3025 "Mixed unicode escapes should produce Hello"
3026 );
3027 }
3028
3029 #[test]
3031 fn test_heredoc_key_rejected() {
3032 let events = parse("<<EOF\nkey\nEOF value");
3033 assert!(
3034 events.iter().any(|e| matches!(
3035 e,
3036 Event::Error {
3037 kind: ParseErrorKind::InvalidKey,
3038 ..
3039 }
3040 )),
3041 "Heredoc as key should be rejected"
3042 );
3043 }
3044
3045 #[test]
3047 fn test_invalid_escape_null() {
3048 let events = parse(r#"x "\0""#);
3050 assert!(
3051 events.iter().any(|e| matches!(
3052 e,
3053 Event::Error {
3054 kind: ParseErrorKind::InvalidEscape(seq),
3055 ..
3056 } if seq == "\\0"
3057 )),
3058 "\\0 should be rejected as invalid escape"
3059 );
3060 }
3061
3062 #[test]
3064 fn test_invalid_escape_unknown() {
3065 let events = parse(r#"x "\q""#);
3067 assert!(
3068 events.iter().any(|e| matches!(
3069 e,
3070 Event::Error {
3071 kind: ParseErrorKind::InvalidEscape(seq),
3072 ..
3073 } if seq == "\\q"
3074 )),
3075 "\\q should be rejected as invalid escape"
3076 );
3077 }
3078
3079 #[test]
3081 fn test_invalid_escape_multiple() {
3082 let events = parse(r#"x "\0\q\?""#);
3084 let invalid_escapes: Vec<_> = events
3085 .iter()
3086 .filter_map(|e| match e {
3087 Event::Error {
3088 kind: ParseErrorKind::InvalidEscape(seq),
3089 ..
3090 } => Some(seq.as_str()),
3091 _ => None,
3092 })
3093 .collect();
3094 assert_eq!(
3095 invalid_escapes.len(),
3096 3,
3097 "Should report 3 invalid escapes, got: {:?}",
3098 invalid_escapes
3099 );
3100 }
3101
3102 #[test]
3104 fn test_valid_escapes_still_work() {
3105 let events = parse(r#"x "a\nb\tc\\d\"e""#);
3107 assert!(
3108 events
3109 .iter()
3110 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "a\nb\tc\\d\"e")),
3111 "Valid escapes should still work"
3112 );
3113 assert!(
3115 !events.iter().any(|e| matches!(
3116 e,
3117 Event::Error {
3118 kind: ParseErrorKind::InvalidEscape(_),
3119 ..
3120 }
3121 )),
3122 "Valid escapes should not produce errors"
3123 );
3124 }
3125
3126 #[test]
3128 fn test_invalid_escape_in_key() {
3129 let events = parse(r#""\0" value"#);
3131 assert!(
3132 events.iter().any(|e| matches!(
3133 e,
3134 Event::Error {
3135 kind: ParseErrorKind::InvalidEscape(seq),
3136 ..
3137 } if seq == "\\0"
3138 )),
3139 "\\0 in key should be rejected as invalid escape"
3140 );
3141 }
3142
3143 #[test]
3145 fn test_simple_key_value_with_attributes() {
3146 let events = parse("server host>localhost port>8080");
3148 let keys: Vec<_> = events
3150 .iter()
3151 .filter_map(|e| match e {
3152 Event::Key {
3153 payload: Some(value),
3154 ..
3155 } => Some(value.as_ref()),
3156 _ => None,
3157 })
3158 .collect();
3159 assert!(keys.contains(&"server"), "Missing key 'server'");
3160 assert!(keys.contains(&"host"), "Missing key 'host'");
3161 assert!(keys.contains(&"port"), "Missing key 'port'");
3162 assert!(
3164 !events.iter().any(|e| matches!(
3165 e,
3166 Event::Error {
3167 kind: ParseErrorKind::TooManyAtoms,
3168 ..
3169 }
3170 )),
3171 "Simple key-value with attributes should not produce TooManyAtoms"
3172 );
3173 }
3174
3175 #[test]
3177 fn test_dotted_path_simple() {
3178 let events = parse("a.b value");
3180 let keys: Vec<_> = events
3181 .iter()
3182 .filter_map(|e| match e {
3183 Event::Key {
3184 payload: Some(value),
3185 ..
3186 } => Some(value.as_ref()),
3187 _ => None,
3188 })
3189 .collect();
3190 assert_eq!(keys, vec!["a", "b"], "Should have keys 'a' and 'b'");
3191 assert!(
3193 events
3194 .iter()
3195 .any(|e| matches!(e, Event::ObjectStart { .. })),
3196 "Should have ObjectStart for nested structure"
3197 );
3198 assert!(
3200 events
3201 .iter()
3202 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "value")),
3203 "Should have scalar value 'value'"
3204 );
3205 assert!(
3207 !events.iter().any(|e| matches!(e, Event::Error { .. })),
3208 "Simple dotted path should not have errors"
3209 );
3210 }
3211
3212 #[test]
3214 fn test_dotted_path_three_segments() {
3215 let events = parse("a.b.c deep");
3217 let keys: Vec<_> = events
3218 .iter()
3219 .filter_map(|e| match e {
3220 Event::Key {
3221 payload: Some(value),
3222 ..
3223 } => Some(value.as_ref()),
3224 _ => None,
3225 })
3226 .collect();
3227 assert_eq!(keys, vec!["a", "b", "c"], "Should have keys 'a', 'b', 'c'");
3228 let obj_starts: Vec<_> = events
3230 .iter()
3231 .filter(|e| matches!(e, Event::ObjectStart { .. }))
3232 .collect();
3233 assert_eq!(
3234 obj_starts.len(),
3235 2,
3236 "Should have 2 ObjectStart for nested structure"
3237 );
3238 assert!(
3240 !events.iter().any(|e| matches!(e, Event::Error { .. })),
3241 "Three-segment dotted path should not have errors"
3242 );
3243 }
3244
3245 #[test]
3247 fn test_dotted_path_with_implicit_unit() {
3248 let events = parse("a.b");
3250 let keys: Vec<_> = events
3251 .iter()
3252 .filter_map(|e| match e {
3253 Event::Key {
3254 payload: Some(value),
3255 ..
3256 } => Some(value.as_ref()),
3257 _ => None,
3258 })
3259 .collect();
3260 assert_eq!(keys, vec!["a", "b"], "Should have keys 'a' and 'b'");
3261 assert!(
3263 events.iter().any(|e| matches!(e, Event::Unit { .. })),
3264 "Should have implicit unit value"
3265 );
3266 }
3267
3268 #[test]
3270 fn test_dotted_path_empty_segment() {
3271 let events = parse("a..b value");
3273 assert!(
3274 events.iter().any(|e| matches!(e, Event::Error { .. })),
3275 "Empty segment in dotted path should produce error"
3276 );
3277 }
3278
3279 #[test]
3281 fn test_dotted_path_trailing_dot() {
3282 let events = parse("a.b. value");
3284 assert!(
3285 events.iter().any(|e| matches!(e, Event::Error { .. })),
3286 "Trailing dot in dotted path should produce error"
3287 );
3288 }
3289
3290 #[test]
3292 fn test_dotted_path_leading_dot() {
3293 let events = parse(".a.b value");
3295 assert!(
3296 events.iter().any(|e| matches!(e, Event::Error { .. })),
3297 "Leading dot in dotted path should produce error"
3298 );
3299 }
3300
3301 #[test]
3303 fn test_dotted_path_with_object_value() {
3304 let events = parse("a.b { c d }");
3306 let keys: Vec<_> = events
3307 .iter()
3308 .filter_map(|e| match e {
3309 Event::Key {
3310 payload: Some(value),
3311 ..
3312 } => Some(value.as_ref()),
3313 _ => None,
3314 })
3315 .collect();
3316 assert!(keys.contains(&"a"), "Should have 'a'");
3317 assert!(keys.contains(&"b"), "Should have 'b'");
3318 assert!(keys.contains(&"c"), "Should have 'c'");
3319 assert!(
3321 !events.iter().any(|e| matches!(e, Event::Error { .. })),
3322 "Dotted path with object value should not have errors"
3323 );
3324 }
3325
3326 #[test]
3328 fn test_dotted_path_with_attributes_value() {
3329 let events = parse("selector.matchLabels app>web");
3331 let keys: Vec<_> = events
3332 .iter()
3333 .filter_map(|e| match e {
3334 Event::Key {
3335 payload: Some(value),
3336 ..
3337 } => Some(value.as_ref()),
3338 _ => None,
3339 })
3340 .collect();
3341 assert!(keys.contains(&"selector"), "Should have 'selector'");
3342 assert!(keys.contains(&"matchLabels"), "Should have 'matchLabels'");
3343 assert!(keys.contains(&"app"), "Should have 'app' from attribute");
3344 assert!(
3346 !events.iter().any(|e| matches!(e, Event::Error { .. })),
3347 "Dotted path with attributes value should not have errors"
3348 );
3349 }
3350
3351 #[test]
3353 fn test_dot_in_value_is_literal() {
3354 let events = parse("key example.com");
3356 let keys: Vec<_> = events
3357 .iter()
3358 .filter_map(|e| match e {
3359 Event::Key {
3360 payload: Some(value),
3361 ..
3362 } => Some(value.as_ref()),
3363 _ => None,
3364 })
3365 .collect();
3366 assert_eq!(keys, vec!["key"], "Should have only one key 'key'");
3367 assert!(
3369 events
3370 .iter()
3371 .any(|e| matches!(e, Event::Scalar { value, .. } if value == "example.com")),
3372 "Value should be 'example.com' as a single scalar"
3373 );
3374 assert!(
3376 !events.iter().any(|e| matches!(e, Event::Error { .. })),
3377 "Dot in value should not cause errors"
3378 );
3379 }
3380
3381 #[test]
3383 fn test_sibling_dotted_paths() {
3384 let events = parse("foo.bar.x value1\nfoo.bar.y value2\nfoo.baz value3");
3386 assert!(
3388 !events.iter().any(|e| matches!(e, Event::Error { .. })),
3389 "Sibling dotted paths should not cause errors"
3390 );
3391 let keys: Vec<_> = events
3393 .iter()
3394 .filter_map(|e| match e {
3395 Event::Key {
3396 payload: Some(value),
3397 ..
3398 } => Some(value.as_ref()),
3399 _ => None,
3400 })
3401 .collect();
3402 assert!(keys.contains(&"foo"), "Should have 'foo'");
3403 assert!(keys.contains(&"bar"), "Should have 'bar'");
3404 assert!(keys.contains(&"baz"), "Should have 'baz'");
3405 assert!(keys.contains(&"x"), "Should have 'x'");
3406 assert!(keys.contains(&"y"), "Should have 'y'");
3407 }
3408
3409 #[test]
3411 fn test_reopen_closed_path_error() {
3412 let events = parse("foo.bar {}\nfoo.baz {}\nfoo.bar.x value");
3414 let errors: Vec<_> = events
3416 .iter()
3417 .filter(|e| matches!(e, Event::Error { .. }))
3418 .collect();
3419 assert_eq!(
3420 errors.len(),
3421 1,
3422 "Should have exactly one error for reopening closed path"
3423 );
3424 assert!(
3425 events.iter().any(|e| matches!(
3426 e,
3427 Event::Error {
3428 kind: ParseErrorKind::ReopenedPath { .. },
3429 ..
3430 }
3431 )),
3432 "Error should be ReopenedPath"
3433 );
3434 }
3435
3436 #[test]
3438 fn test_reopen_nested_closed_path_error() {
3439 let events = parse("a.b.c {}\na.b.d {}\na.x {}\na.b.e {}");
3441 let errors: Vec<_> = events
3443 .iter()
3444 .filter(|e| {
3445 matches!(
3446 e,
3447 Event::Error {
3448 kind: ParseErrorKind::ReopenedPath { .. },
3449 ..
3450 }
3451 )
3452 })
3453 .collect();
3454 assert_eq!(errors.len(), 1, "Should have exactly one reopen error");
3455 }
3456
3457 #[test]
3459 fn test_nest_into_scalar_error() {
3460 let events = parse("a.b value\na.b.c deep");
3462 assert!(
3464 events.iter().any(|e| matches!(
3465 e,
3466 Event::Error {
3467 kind: ParseErrorKind::NestIntoTerminal { .. },
3468 ..
3469 }
3470 )),
3471 "Should have NestIntoTerminal error"
3472 );
3473 }
3474
3475 #[test]
3477 fn test_different_top_level_paths_ok() {
3478 let events = parse("server.host localhost\ndatabase.port 5432");
3480 assert!(
3481 !events.iter().any(|e| matches!(e, Event::Error { .. })),
3482 "Different top-level paths should not conflict"
3483 );
3484 }
3485
3486 #[test]
3488 fn test_bare_key_requires_whitespace_before_brace() {
3489 let events = parse("config{}");
3491 assert!(
3492 events.iter().any(|e| matches!(
3493 e,
3494 Event::Error {
3495 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3496 ..
3497 }
3498 )),
3499 "config{{}} without whitespace should error"
3500 );
3501 }
3502
3503 #[test]
3505 fn test_bare_key_requires_whitespace_before_paren() {
3506 let events = parse("items(1 2 3)");
3508 assert!(
3509 events.iter().any(|e| matches!(
3510 e,
3511 Event::Error {
3512 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3513 ..
3514 }
3515 )),
3516 "items() without whitespace should error"
3517 );
3518 }
3519
3520 #[test]
3522 fn test_bare_key_with_whitespace_before_brace_ok() {
3523 let events = parse("config {}");
3525 assert!(
3526 !events.iter().any(|e| matches!(
3527 e,
3528 Event::Error {
3529 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3530 ..
3531 }
3532 )),
3533 "config {{}} with whitespace should not error"
3534 );
3535 }
3536
3537 #[test]
3539 fn test_bare_key_with_whitespace_before_paren_ok() {
3540 let events = parse("items (1 2 3)");
3542 assert!(
3543 !events.iter().any(|e| matches!(
3544 e,
3545 Event::Error {
3546 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3547 ..
3548 }
3549 )),
3550 "items () with whitespace should not error"
3551 );
3552 }
3553
3554 #[test]
3556 fn test_tag_with_brace_no_whitespace_ok() {
3557 let events = parse("config @object{}");
3559 assert!(
3560 !events.iter().any(|e| matches!(
3561 e,
3562 Event::Error {
3563 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3564 ..
3565 }
3566 )),
3567 "@tag{{}} should not require whitespace"
3568 );
3569 }
3570
3571 #[test]
3573 fn test_quoted_key_no_whitespace_ok() {
3574 let events = parse(r#""config"{}"#);
3576 assert!(
3577 !events.iter().any(|e| matches!(
3578 e,
3579 Event::Error {
3580 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3581 ..
3582 }
3583 )),
3584 "quoted key before {{}} should not require whitespace"
3585 );
3586 }
3587
3588 #[test]
3590 fn test_minified_styx_with_whitespace() {
3591 let events = parse("{server {host localhost,port 8080}}");
3593 assert!(
3594 !events.iter().any(|e| matches!(
3595 e,
3596 Event::Error {
3597 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
3598 ..
3599 }
3600 )),
3601 "minified styx with whitespace should work"
3602 );
3603 }
3604}