1use std::borrow::Cow;
4use std::collections::{HashMap, VecDeque};
5
6use styx_tokenizer::Span;
7use tracing::trace;
8
9use crate::events::{EventKind, ParseErrorKind, ScalarKind};
10use crate::{Event, Lexeme, Lexer};
11
12#[derive(Clone)]
14struct LexemeSource<'src> {
15 lexer: Lexer<'src>,
16 pending: Option<Lexeme<'src>>,
21}
22
23impl<'src> LexemeSource<'src> {
24 fn new(source: &'src str) -> Self {
25 Self {
26 lexer: Lexer::new(source),
27 pending: None,
28 }
29 }
30
31 fn next(&mut self) -> Lexeme<'src> {
32 self.pending
33 .take()
34 .unwrap_or_else(|| self.lexer.next_lexeme())
35 }
36
37 fn stash(&mut self, lexeme: Lexeme<'src>) {
38 assert!(self.pending.is_none(), "double stash - this is a bug");
39 self.pending = Some(lexeme);
40 }
41}
42
43#[derive(Clone)]
45pub struct Parser<'src> {
46 input: &'src str,
47 source: LexemeSource<'src>,
48 state: ParserState,
49 event_queue: VecDeque<Event<'src>>,
50}
51
52#[derive(Clone)]
54enum ParserState {
55 BeforeDocument,
57
58 BeforeExpression,
60
61 DocumentRoot {
63 seen_keys: HashMap<KeyValue, Span>,
64 pending_doc_comment: Option<Span>,
65 path_state: PathState,
66 emitted_object_start: bool,
68 },
69
70 InObject {
72 start_span: Span,
73 seen_keys: HashMap<KeyValue, Span>,
74 pending_doc_comment: Option<Span>,
75 parent: Box<ParserState>,
77 },
78
79 AfterDocument,
81
82 AfterExpression,
84}
85
86impl<'src> Parser<'src> {
87 pub fn new(source: &'src str) -> Self {
89 Self {
90 input: source,
91 source: LexemeSource::new(source),
92 state: ParserState::BeforeDocument,
93 event_queue: VecDeque::new(),
94 }
95 }
96
97 pub fn new_expr(source: &'src str) -> Self {
102 Self {
103 input: source,
104 source: LexemeSource::new(source),
105 state: ParserState::BeforeExpression,
106 event_queue: VecDeque::new(),
107 }
108 }
109
110 pub fn next_event(&mut self) -> Option<Event<'src>> {
112 trace!(
113 queue_len = self.event_queue.len(),
114 "styx-parse next_event called"
115 );
116 if let Some(event) = self.event_queue.pop_front() {
118 trace!(?event, "styx-parse returning queued event");
119 return Some(event);
120 }
121
122 let event = self.advance();
124 trace!(?event, "styx-parse returning from advance");
125 event
126 }
127
128 pub fn parse_to_vec(mut self) -> Vec<Event<'src>> {
130 let mut events = Vec::new();
131 while let Some(event) = self.next_event() {
132 events.push(event);
133 }
134 events
135 }
136
137 fn advance(&mut self) -> Option<Event<'src>> {
139 match &self.state {
140 ParserState::BeforeDocument => {
141 self.state = ParserState::DocumentRoot {
142 seen_keys: HashMap::new(),
143 pending_doc_comment: None,
144 path_state: PathState::default(),
145 emitted_object_start: false,
146 };
147 Some(Event {
148 span: Span::empty(0),
149 kind: EventKind::DocumentStart,
150 })
151 }
152 ParserState::BeforeExpression => self.advance_expression(),
153 ParserState::AfterExpression => None,
154 ParserState::AfterDocument => self.check_trailing_content(),
155 ParserState::DocumentRoot { .. } => self.advance_document_root(),
156 ParserState::InObject { .. } => self.advance_in_object(),
157 }
158 }
159
160 fn advance_expression(&mut self) -> Option<Event<'src>> {
162 loop {
163 let lexeme = self.source.next();
164 match lexeme {
165 Lexeme::Newline { .. } | Lexeme::Comment { .. } => continue,
167 Lexeme::Eof => {
168 self.state = ParserState::AfterExpression;
169 return None;
170 }
171 _ => {
172 let atom = self.parse_atom(lexeme);
174 self.emit_atom_as_value(&atom);
175 self.state = ParserState::AfterExpression;
176 return self.event_queue.pop_front();
177 }
178 }
179 }
180 }
181
182 fn check_trailing_content(&mut self) -> Option<Event<'src>> {
185 loop {
186 let lexeme = self.source.next();
187 match lexeme {
188 Lexeme::Newline { .. } | Lexeme::Comment { .. } => continue,
190 Lexeme::Eof => return None,
191 _ => {
193 let span = lexeme.span();
194 let mut end = span.end;
196 loop {
197 match self.source.next() {
198 Lexeme::Eof => break,
199 lex => end = lex.span().end,
200 }
201 }
202 return Some(Event {
203 span: Span::new(span.start, end),
204 kind: EventKind::Error {
205 kind: ParseErrorKind::TrailingContent,
206 },
207 });
208 }
209 }
210 }
211 }
212
213 fn advance_document_root(&mut self) -> Option<Event<'src>> {
215 loop {
216 let lexeme = self.source.next();
217 match lexeme {
218 Lexeme::Eof => {
219 if let ParserState::DocumentRoot {
220 pending_doc_comment,
221 emitted_object_start,
222 ..
223 } = &mut self.state
224 {
225 if let Some(span) = pending_doc_comment.take() {
226 self.event_queue.push_back(Event {
227 span,
228 kind: EventKind::Error {
229 kind: ParseErrorKind::DanglingDocComment,
230 },
231 });
232 }
233 if *emitted_object_start {
235 self.event_queue.push_back(Event {
236 span: Span::empty(0),
237 kind: EventKind::ObjectEnd,
238 });
239 }
240 }
241 self.event_queue.push_back(Event {
242 span: Span::empty(self.input.len() as u32),
243 kind: EventKind::DocumentEnd,
244 });
245 self.state = ParserState::AfterDocument;
246 return self.event_queue.pop_front();
247 }
248 Lexeme::Newline { .. } | Lexeme::Comma { .. } => continue,
249 Lexeme::Comment { span, text } => {
250 return Some(Event {
251 span,
252 kind: EventKind::Comment { text },
253 });
254 }
255 Lexeme::DocComment { span, text } => {
256 if let ParserState::DocumentRoot {
257 pending_doc_comment,
258 emitted_object_start,
259 ..
260 } = &mut self.state
261 {
262 *pending_doc_comment = Some(span);
263 if !*emitted_object_start {
265 *emitted_object_start = true;
266 self.event_queue.push_back(Event {
267 span: Span::empty(0),
268 kind: EventKind::ObjectStart,
269 });
270 }
271 }
272 let line = text
274 .strip_prefix("/// ")
275 .or_else(|| text.strip_prefix("///"))
276 .unwrap_or(text);
277 self.event_queue.push_back(Event {
278 span,
279 kind: EventKind::DocComment { lines: vec![line] },
280 });
281 return self.event_queue.pop_front();
282 }
283 Lexeme::ObjectStart { span } => {
284 self.state = ParserState::InObject {
286 start_span: span,
287 seen_keys: HashMap::new(),
288 pending_doc_comment: None,
289 parent: Box::new(ParserState::AfterDocument),
290 };
291 return Some(Event {
292 span,
293 kind: EventKind::ObjectStart,
294 });
295 }
296 _ => {
297 if let ParserState::DocumentRoot {
299 pending_doc_comment,
300 emitted_object_start,
301 ..
302 } = &mut self.state
303 {
304 *pending_doc_comment = None;
305 if !*emitted_object_start {
306 *emitted_object_start = true;
307 self.event_queue.push_back(Event {
308 span: Span::empty(0),
309 kind: EventKind::ObjectStart,
310 });
311 }
312 }
313 let atoms = self.collect_entry_atoms(lexeme);
314 if !atoms.is_empty() {
315 self.emit_entry_at_root(&atoms);
316 }
317 return self.event_queue.pop_front();
318 }
319 }
320 }
321 }
322
323 fn advance_in_object(&mut self) -> Option<Event<'src>> {
325 let start = if let ParserState::InObject { start_span, .. } = &self.state {
326 *start_span
327 } else {
328 return None;
329 };
330
331 loop {
332 let lexeme = self.source.next();
333 match lexeme {
334 Lexeme::Eof => {
335 if let ParserState::InObject {
336 pending_doc_comment,
337 parent,
338 ..
339 } = &mut self.state
340 {
341 if let Some(span) = pending_doc_comment.take() {
342 self.event_queue.push_back(Event {
343 span,
344 kind: EventKind::Error {
345 kind: ParseErrorKind::DanglingDocComment,
346 },
347 });
348 }
349 self.event_queue.push_back(Event {
350 span: start,
351 kind: EventKind::Error {
352 kind: ParseErrorKind::UnclosedObject,
353 },
354 });
355 self.event_queue.push_back(Event {
356 span: start,
357 kind: EventKind::ObjectEnd,
358 });
359 if matches!(parent.as_ref(), ParserState::AfterDocument) {
362 self.event_queue.push_back(Event {
363 span: Span::empty(self.input.len() as u32),
364 kind: EventKind::DocumentEnd,
365 });
366 }
367 }
368 self.pop_state();
369 return self.event_queue.pop_front();
370 }
371 Lexeme::ObjectEnd { span } => {
372 if let ParserState::InObject {
373 pending_doc_comment,
374 parent,
375 ..
376 } = &mut self.state
377 {
378 if let Some(doc_span) = pending_doc_comment.take() {
379 self.event_queue.push_back(Event {
380 span: doc_span,
381 kind: EventKind::Error {
382 kind: ParseErrorKind::DanglingDocComment,
383 },
384 });
385 }
386 if matches!(parent.as_ref(), ParserState::AfterDocument) {
389 self.event_queue.push_back(Event {
390 span: Span::empty(self.input.len() as u32),
391 kind: EventKind::DocumentEnd,
392 });
393 }
394 }
395 self.pop_state();
396 return Some(Event {
397 span,
398 kind: EventKind::ObjectEnd,
399 });
400 }
401 Lexeme::Newline { .. } | Lexeme::Comma { .. } => continue,
402 Lexeme::Comment { span, text } => {
403 return Some(Event {
404 span,
405 kind: EventKind::Comment { text },
406 });
407 }
408 Lexeme::DocComment { span, text } => {
409 if let ParserState::InObject {
410 pending_doc_comment,
411 ..
412 } = &mut self.state
413 {
414 *pending_doc_comment = Some(span);
415 }
416 let line = text
418 .strip_prefix("/// ")
419 .or_else(|| text.strip_prefix("///"))
420 .unwrap_or(text);
421 return Some(Event {
422 span,
423 kind: EventKind::DocComment { lines: vec![line] },
424 });
425 }
426 _ => {
427 if let ParserState::InObject {
428 pending_doc_comment,
429 ..
430 } = &mut self.state
431 {
432 *pending_doc_comment = None;
433 }
434 let atoms = self.collect_entry_atoms(lexeme);
435 if !atoms.is_empty() {
436 self.emit_entry_in_object(&atoms);
437 }
438 return self.event_queue.pop_front();
439 }
440 }
441 }
442 }
443
444 fn pop_state(&mut self) {
446 let parent = match &mut self.state {
447 ParserState::InObject { parent, .. } => {
448 std::mem::replace(parent.as_mut(), ParserState::AfterDocument)
449 }
450 _ => ParserState::AfterDocument,
451 };
452 self.state = parent;
453 }
454
455 fn emit_entry_at_root(&mut self, atoms: &[Atom<'src>]) {
457 if atoms.is_empty() {
458 return;
459 }
460
461 let key_atom = &atoms[0];
462
463 if let AtomContent::Scalar {
465 kind: ScalarKind::Heredoc,
466 ..
467 } = &key_atom.content
468 {
469 let error_span = self.heredoc_start_span(key_atom.span);
471 self.event_queue.push_back(Event {
472 span: error_span,
473 kind: EventKind::Error {
474 kind: ParseErrorKind::InvalidKey,
475 },
476 });
477 }
478
479 if let AtomContent::Scalar {
481 value,
482 kind: ScalarKind::Bare,
483 } = &key_atom.content
484 && value.contains('.')
485 {
486 self.emit_dotted_path_entry(value.clone(), key_atom.span, atoms, true);
487 return;
488 }
489
490 let key_value = KeyValue::from_atom(key_atom);
493
494 if let ParserState::DocumentRoot { path_state, .. } = &mut self.state {
495 let key_text = key_value.to_key_string();
497 let path = vec![key_text];
498 let value_kind = if atoms.len() >= 2 {
499 match &atoms[1].content {
500 AtomContent::Object { .. } | AtomContent::Attributes(_) => {
501 PathValueKind::Object
502 }
503 _ => PathValueKind::Terminal,
504 }
505 } else {
506 PathValueKind::Terminal
507 };
508
509 if let Err(err) = path_state.check_and_update(&path, key_atom.span, value_kind) {
510 self.emit_path_error(err, key_atom.span);
511 }
512 }
513
514 self.emit_simple_entry(atoms);
515 }
516
517 fn emit_entry_in_object(&mut self, atoms: &[Atom<'src>]) {
519 if atoms.is_empty() {
520 return;
521 }
522
523 let key_atom = &atoms[0];
524
525 if let AtomContent::Scalar {
527 kind: ScalarKind::Heredoc,
528 ..
529 } = &key_atom.content
530 {
531 self.event_queue.push_back(Event {
532 span: key_atom.span,
533 kind: EventKind::Error {
534 kind: ParseErrorKind::InvalidKey,
535 },
536 });
537 }
538
539 if let AtomContent::Scalar {
541 value,
542 kind: ScalarKind::Bare,
543 } = &key_atom.content
544 && value.contains('.')
545 {
546 self.emit_dotted_path_entry(value.clone(), key_atom.span, atoms, false);
547 return;
548 }
549
550 let key_value = KeyValue::from_atom(key_atom);
552
553 if let ParserState::InObject { seen_keys, .. } = &mut self.state {
554 if let Some(&original_span) = seen_keys.get(&key_value) {
555 self.event_queue.push_back(Event {
556 span: key_atom.span,
557 kind: EventKind::Error {
558 kind: ParseErrorKind::DuplicateKey {
559 original: original_span,
560 },
561 },
562 });
563 } else {
564 seen_keys.insert(key_value, key_atom.span);
565 }
566 }
567
568 self.emit_simple_entry(atoms);
569 }
570
571 fn emit_simple_entry(&mut self, atoms: &[Atom<'src>]) {
573 let key_atom = &atoms[0];
574
575 self.event_queue.push_back(Event {
576 span: key_atom.span,
577 kind: EventKind::EntryStart,
578 });
579 self.emit_atom_as_key(key_atom);
580
581 if atoms.len() == 1 {
582 self.event_queue.push_back(Event {
583 span: key_atom.span,
584 kind: EventKind::Unit,
585 });
586 } else if atoms.len() >= 2 {
587 self.emit_atom_as_value(&atoms[1]);
588 }
589
590 if atoms.len() > 2 {
591 self.event_queue.push_back(Event {
592 span: atoms[2].span,
593 kind: EventKind::Error {
594 kind: ParseErrorKind::TooManyAtoms,
595 },
596 });
597 }
598
599 self.event_queue.push_back(Event {
600 span: atoms.last().map(|a| a.span).unwrap_or(key_atom.span),
601 kind: EventKind::EntryEnd,
602 });
603 }
604
605 fn collect_entry_atoms(&mut self, first: Lexeme<'src>) -> Vec<Atom<'src>> {
607 let mut atoms = Vec::new();
608 let first_atom = self.parse_atom(first);
609 let first_atom_end = first_atom.span.end;
610 let first_is_bare = matches!(
611 &first_atom.content,
612 AtomContent::Scalar {
613 kind: ScalarKind::Bare,
614 ..
615 }
616 );
617 atoms.push(first_atom);
618
619 loop {
620 let lexeme = self.source.next();
621 match lexeme {
622 Lexeme::Eof
623 | Lexeme::Newline { .. }
624 | Lexeme::Comma { .. }
625 | Lexeme::ObjectEnd { .. }
626 | Lexeme::SeqEnd { .. } => {
627 self.source.stash(lexeme);
628 break;
629 }
630 Lexeme::Comment { span, text } => {
631 self.event_queue.push_back(Event {
632 span,
633 kind: EventKind::Comment { text },
634 });
635 break;
636 }
637 Lexeme::DocComment { span, text } => {
638 let line = text
640 .strip_prefix("/// ")
641 .or_else(|| text.strip_prefix("///"))
642 .unwrap_or(text);
643 self.event_queue.push_back(Event {
644 span,
645 kind: EventKind::DocComment { lines: vec![line] },
646 });
647 break;
648 }
649 Lexeme::ObjectStart { span } | Lexeme::SeqStart { span } => {
650 if atoms.len() == 1 && first_is_bare && first_atom_end == span.start {
653 self.event_queue.push_back(Event {
654 span,
655 kind: EventKind::Error {
656 kind: ParseErrorKind::MissingWhitespaceBeforeBlock,
657 },
658 });
659 }
660 let atom = self.parse_atom(lexeme);
661 atoms.push(atom);
662 }
663 _ => {
664 let atom = self.parse_atom(lexeme);
665 atoms.push(atom);
666 }
667 }
668 }
669
670 atoms
671 }
672
673 fn parse_atom(&mut self, lexeme: Lexeme<'src>) -> Atom<'src> {
675 match lexeme {
676 Lexeme::Scalar { span, value, kind } => Atom {
677 span,
678 content: AtomContent::Scalar { value, kind },
679 },
680 Lexeme::Unit { span } => {
681 let next = self.source.next();
684 if let Lexeme::Scalar {
685 span: scalar_span,
686 value,
687 kind: ScalarKind::Bare,
688 } = &next
689 {
690 if scalar_span.start == span.end {
692 return Atom {
693 span: Span::new(span.start, scalar_span.end),
694 content: AtomContent::Tag {
695 name: "", payload: Some(Box::new(Atom {
697 span: *scalar_span,
698 content: AtomContent::Scalar {
699 value: value.clone(),
700 kind: ScalarKind::Bare,
701 },
702 })),
703 invalid_name: true,
704 error_span: Some(*scalar_span), },
706 };
707 }
708 }
709 self.source.stash(next);
711 Atom {
712 span,
713 content: AtomContent::Unit,
714 }
715 }
716 Lexeme::Tag {
717 span,
718 name,
719 has_payload,
720 } => {
721 if !has_payload {
724 let next = self.source.next();
725 if let Lexeme::Scalar {
726 span: scalar_span,
727 value,
728 kind: ScalarKind::Bare,
729 } = &next
730 && scalar_span.start == span.end
731 && value.starts_with('.')
732 {
733 let combined_name_span = Span::new(span.start + 1, scalar_span.end);
735 return Atom {
736 span: Span::new(span.start, scalar_span.end),
737 content: AtomContent::Tag {
738 name,
739 payload: None,
740 invalid_name: true,
741 error_span: Some(combined_name_span),
742 },
743 };
744 }
745 self.source.stash(next);
746 }
747
748 let invalid_name = !is_valid_tag_name(name);
749 let payload = if has_payload {
750 let next = self.source.next();
751 Some(Box::new(self.parse_atom(next)))
752 } else {
753 None
754 };
755 let end = payload.as_ref().map(|p| p.span.end).unwrap_or(span.end);
756 let error_span = if invalid_name { Some(span) } else { None };
758 Atom {
759 span: Span::new(span.start, end),
760 content: AtomContent::Tag {
761 name,
762 payload,
763 invalid_name,
764 error_span,
765 },
766 }
767 }
768 Lexeme::ObjectStart { span } => self.parse_object_atom(span),
769 Lexeme::SeqStart { span } => self.parse_sequence_atom(span),
770 Lexeme::AttrKey { key_span, key, .. } => self.parse_attributes(key_span, key),
771 Lexeme::Error { span, message } => {
772 if message.contains("escape") {
774 let raw_text = &self.input[span.start as usize..span.end as usize];
776 let inner = if raw_text.starts_with('"') && raw_text.ends_with('"') {
778 &raw_text[1..raw_text.len() - 1]
779 } else {
780 raw_text
781 };
782 Atom {
783 span,
784 content: AtomContent::InvalidEscapeScalar {
785 raw_inner: Cow::Borrowed(inner),
786 },
787 }
788 } else {
789 Atom {
790 span,
791 content: AtomContent::Error { message },
792 }
793 }
794 }
795 Lexeme::ObjectEnd { span }
796 | Lexeme::SeqEnd { span }
797 | Lexeme::Comma { span }
798 | Lexeme::Newline { span } => Atom {
799 span,
800 content: AtomContent::Error {
801 message: "unexpected token",
802 },
803 },
804 Lexeme::Comment { span, .. } | Lexeme::DocComment { span, .. } => Atom {
805 span,
806 content: AtomContent::Error {
807 message: "unexpected token",
808 },
809 },
810 Lexeme::Eof => Atom {
811 span: Span::new(self.input.len() as u32, self.input.len() as u32),
812 content: AtomContent::Error {
813 message: "unexpected end of input",
814 },
815 },
816 }
817 }
818
819 fn parse_object_atom(&mut self, start_span: Span) -> Atom<'src> {
821 let mut entries: Vec<ObjectEntry<'src>> = Vec::new();
822 let mut seen_keys: HashMap<KeyValue, Span> = HashMap::new();
823 let mut duplicate_key_spans: Vec<(Span, Span)> = Vec::new();
824 let mut dangling_doc_comment_spans: Vec<Span> = Vec::new();
825 let mut pending_doc_comments: Vec<(Span, &'src str)> = Vec::new();
826 let mut unclosed = false;
827 let mut end_span = start_span;
828
829 loop {
830 let lexeme = self.source.next();
831 match lexeme {
832 Lexeme::Eof => {
833 unclosed = true;
834 for (span, _) in &pending_doc_comments {
835 dangling_doc_comment_spans.push(*span);
836 }
837 break;
838 }
839 Lexeme::ObjectEnd { span } => {
840 for (s, _) in &pending_doc_comments {
841 dangling_doc_comment_spans.push(*s);
842 }
843 end_span = span;
844 break;
845 }
846 Lexeme::Newline { .. } | Lexeme::Comma { .. } => continue,
847 Lexeme::Comment { .. } => continue,
848 Lexeme::DocComment { span, text } => {
849 pending_doc_comments.push((span, text));
850 }
851 _ => {
852 let doc_comment = if pending_doc_comments.is_empty() {
853 None
854 } else {
855 let first_span = pending_doc_comments.first().unwrap().0;
857 let last_span = pending_doc_comments.last().unwrap().0;
858 let combined_span = Span::new(first_span.start, last_span.end);
859 let lines: Vec<&'src str> = pending_doc_comments
860 .iter()
861 .map(|(_, text)| {
862 text.strip_prefix("/// ")
864 .or_else(|| text.strip_prefix("///"))
865 .unwrap_or(*text)
866 })
867 .collect();
868 pending_doc_comments.clear();
869 Some((combined_span, lines))
870 };
871 let entry_atoms = self.collect_entry_atoms(lexeme);
872
873 if !entry_atoms.is_empty() {
874 let key = entry_atoms[0].clone();
875 let key_value = KeyValue::from_atom(&key);
876
877 if let Some(&original_span) = seen_keys.get(&key_value) {
878 duplicate_key_spans.push((original_span, key.span));
879 } else {
880 seen_keys.insert(key_value, key.span);
881 }
882
883 let (value, too_many_atoms_span) = if entry_atoms.len() == 1 {
884 (
885 Atom {
886 span: key.span,
887 content: AtomContent::Unit,
888 },
889 None,
890 )
891 } else if entry_atoms.len() == 2 {
892 (entry_atoms[1].clone(), None)
893 } else {
894 (entry_atoms[1].clone(), Some(entry_atoms[2].span))
895 };
896
897 entries.push(ObjectEntry {
898 key,
899 value,
900 doc_comment,
901 too_many_atoms_span,
902 });
903 }
904 }
905 }
906 }
907
908 Atom {
909 span: Span::new(start_span.start, end_span.end),
910 content: AtomContent::Object {
911 entries,
912 duplicate_key_spans,
913 dangling_doc_comment_spans,
914 unclosed,
915 },
916 }
917 }
918
919 fn parse_sequence_atom(&mut self, start_span: Span) -> Atom<'src> {
921 let mut elements: Vec<Atom<'src>> = Vec::new();
922 let mut unclosed = false;
923 let mut comma_spans: Vec<Span> = Vec::new();
924 let mut end_span = start_span;
925
926 loop {
927 let lexeme = self.source.next();
928 match lexeme {
929 Lexeme::Eof => {
930 unclosed = true;
931 break;
932 }
933 Lexeme::SeqEnd { span } => {
934 end_span = span;
935 break;
936 }
937 Lexeme::Newline { .. } => continue,
938 Lexeme::Comma { span } => {
939 comma_spans.push(span);
940 continue;
941 }
942 Lexeme::Comment { .. } | Lexeme::DocComment { .. } => continue,
943 _ => {
944 let elem = self.parse_atom(lexeme);
945 elements.push(elem);
946 }
947 }
948 }
949
950 Atom {
951 span: Span::new(start_span.start, end_span.end),
952 content: AtomContent::Sequence {
953 elements,
954 unclosed,
955 comma_spans,
956 },
957 }
958 }
959
960 fn parse_attributes(&mut self, first_span: Span, first_key: &'src str) -> Atom<'src> {
962 let mut attrs = Vec::new();
963 let first_value = self.parse_attribute_value();
964 attrs.push(AttributeEntry {
965 key: first_key,
966 key_span: first_span,
967 value: first_value,
968 });
969
970 loop {
971 let lexeme = self.source.next();
972 match lexeme {
973 Lexeme::AttrKey { key_span, key, .. } => {
974 let value = self.parse_attribute_value();
975 attrs.push(AttributeEntry {
976 key,
977 key_span,
978 value,
979 });
980 }
981 other => {
982 self.source.stash(other);
983 break;
984 }
985 }
986 }
987
988 let end = attrs
989 .last()
990 .map(|a| a.value.span.end)
991 .unwrap_or(first_span.end);
992 Atom {
993 span: Span::new(first_span.start, end),
994 content: AtomContent::Attributes(attrs),
995 }
996 }
997
998 fn parse_attribute_value(&mut self) -> Atom<'src> {
1000 let lexeme = self.source.next();
1001 self.parse_atom(lexeme)
1002 }
1003
1004 fn emit_dotted_path_entry(
1006 &mut self,
1007 path_text: Cow<'src, str>,
1008 path_span: Span,
1009 atoms: &[Atom<'src>],
1010 check_path_state: bool,
1011 ) {
1012 let segments: Vec<&str> = path_text.split('.').collect();
1013
1014 if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
1015 self.event_queue.push_back(Event {
1016 span: path_span,
1017 kind: EventKind::Error {
1018 kind: ParseErrorKind::InvalidKey,
1019 },
1020 });
1021 self.event_queue.push_back(Event {
1022 span: path_span,
1023 kind: EventKind::EntryStart,
1024 });
1025 self.event_queue.push_back(Event {
1026 span: path_span,
1027 kind: EventKind::EntryEnd,
1028 });
1029 return;
1030 }
1031
1032 if check_path_state
1034 && let ParserState::DocumentRoot {
1035 seen_keys,
1036 path_state,
1037 ..
1038 } = &mut self.state
1039 {
1040 let first_key_value = KeyValue::Scalar(segments[0].to_string());
1041 seen_keys.entry(first_key_value).or_insert(path_span);
1042
1043 let path: Vec<String> = segments.iter().map(|s| s.to_string()).collect();
1044 let value_kind = if atoms.len() >= 2 {
1045 match &atoms[1].content {
1046 AtomContent::Object { .. } | AtomContent::Attributes(_) => {
1047 PathValueKind::Object
1048 }
1049 _ => PathValueKind::Terminal,
1050 }
1051 } else {
1052 PathValueKind::Terminal
1053 };
1054
1055 if let Err(err) = path_state.check_and_update(&path, path_span, value_kind) {
1056 self.emit_path_error(err, path_span);
1057 }
1058 }
1059
1060 let depth = segments.len();
1062 let mut current_offset = path_span.start;
1063
1064 for (i, segment) in segments.iter().enumerate() {
1065 let segment_len = segment.len() as u32;
1066 let segment_span = Span::new(current_offset, current_offset + segment_len);
1067
1068 self.event_queue.push_back(Event {
1069 span: segment_span,
1070 kind: EventKind::EntryStart,
1071 });
1072 self.event_queue.push_back(Event {
1073 span: segment_span,
1074 kind: EventKind::Key {
1075 tag: None,
1076 payload: Some(Cow::Owned(segment.to_string())),
1077 kind: ScalarKind::Bare,
1078 },
1079 });
1080
1081 if i < depth - 1 {
1082 self.event_queue.push_back(Event {
1083 span: segment_span,
1084 kind: EventKind::ObjectStart,
1085 });
1086 }
1087
1088 current_offset += segment_len + 1;
1089 }
1090
1091 if atoms.len() == 1 {
1093 self.event_queue.push_back(Event {
1094 span: path_span,
1095 kind: EventKind::Unit,
1096 });
1097 } else if atoms.len() >= 2 {
1098 self.emit_atom_as_value(&atoms[1]);
1099 }
1100
1101 if atoms.len() > 2 {
1102 self.event_queue.push_back(Event {
1103 span: atoms[2].span,
1104 kind: EventKind::Error {
1105 kind: ParseErrorKind::TooManyAtoms,
1106 },
1107 });
1108 }
1109
1110 for i in (0..depth).rev() {
1112 if i < depth - 1 {
1113 self.event_queue.push_back(Event {
1114 span: path_span,
1115 kind: EventKind::ObjectEnd,
1116 });
1117 }
1118 self.event_queue.push_back(Event {
1119 span: path_span,
1120 kind: EventKind::EntryEnd,
1121 });
1122 }
1123 }
1124
1125 fn emit_path_error(&mut self, err: PathError, span: Span) {
1127 let kind = match err {
1128 PathError::Duplicate { original } => ParseErrorKind::DuplicateKey { original },
1129 PathError::Reopened { closed_path } => ParseErrorKind::ReopenedPath { closed_path },
1130 PathError::NestIntoTerminal { terminal_path } => {
1131 ParseErrorKind::NestIntoTerminal { terminal_path }
1132 }
1133 };
1134 self.event_queue.push_back(Event {
1135 span,
1136 kind: EventKind::Error { kind },
1137 });
1138 }
1139
1140 fn heredoc_start_span(&self, heredoc_span: Span) -> Span {
1142 let text = &self.input[heredoc_span.start as usize..heredoc_span.end as usize];
1143 let end_offset = text.find('\n').map(|i| i + 1).unwrap_or(text.len());
1145 Span::new(heredoc_span.start, heredoc_span.start + end_offset as u32)
1146 }
1147
1148 fn emit_atom_as_key(&mut self, atom: &Atom<'src>) {
1150 match &atom.content {
1151 AtomContent::Scalar { value, kind } => {
1152 self.event_queue.push_back(Event {
1154 span: atom.span,
1155 kind: EventKind::Key {
1156 tag: None,
1157 payload: Some(value.clone()),
1158 kind: *kind,
1159 },
1160 });
1161 }
1162 AtomContent::Unit => {
1163 self.event_queue.push_back(Event {
1164 span: atom.span,
1165 kind: EventKind::Key {
1166 tag: None,
1167 payload: None,
1168 kind: ScalarKind::Bare,
1169 },
1170 });
1171 }
1172 AtomContent::Tag {
1173 name,
1174 payload,
1175 invalid_name,
1176 error_span,
1177 } => {
1178 if *invalid_name {
1179 self.event_queue.push_back(Event {
1180 span: error_span.unwrap_or(atom.span),
1181 kind: EventKind::Error {
1182 kind: ParseErrorKind::InvalidTagName,
1183 },
1184 });
1185 }
1186 match payload {
1187 None => {
1188 self.event_queue.push_back(Event {
1189 span: atom.span,
1190 kind: EventKind::Key {
1191 tag: Some(name),
1192 payload: None,
1193 kind: ScalarKind::Bare,
1194 },
1195 });
1196 }
1197 Some(inner) => match &inner.content {
1198 AtomContent::Scalar { value, kind } => {
1199 if *kind == ScalarKind::Quoted {
1200 self.emit_escape_errors(value, inner.span);
1201 }
1202 self.event_queue.push_back(Event {
1203 span: atom.span,
1204 kind: EventKind::Key {
1205 tag: Some(name),
1206 payload: Some(value.clone()),
1207 kind: *kind,
1208 },
1209 });
1210 }
1211 AtomContent::Unit => {
1212 self.event_queue.push_back(Event {
1213 span: atom.span,
1214 kind: EventKind::Key {
1215 tag: Some(name),
1216 payload: None,
1217 kind: ScalarKind::Bare,
1218 },
1219 });
1220 }
1221 _ => {
1222 self.event_queue.push_back(Event {
1223 span: inner.span,
1224 kind: EventKind::Error {
1225 kind: ParseErrorKind::InvalidKey,
1226 },
1227 });
1228 }
1229 },
1230 }
1231 }
1232 AtomContent::InvalidEscapeScalar { raw_inner } => {
1233 let inner_start = atom.span.start + 1;
1235 for (offset, seq) in validate_escapes(raw_inner) {
1236 let error_start = inner_start + offset as u32;
1237 let error_span = Span::new(error_start, error_start + seq.len() as u32);
1238 self.event_queue.push_back(Event {
1239 span: error_span,
1240 kind: EventKind::Error {
1241 kind: ParseErrorKind::InvalidEscape(seq),
1242 },
1243 });
1244 }
1245 self.event_queue.push_back(Event {
1247 span: atom.span,
1248 kind: EventKind::Key {
1249 tag: None,
1250 payload: Some(Cow::Owned(unescape_quoted(raw_inner).into_owned())),
1251 kind: ScalarKind::Quoted,
1252 },
1253 });
1254 }
1255 AtomContent::Error { message } => {
1256 let kind = if message.contains("invalid tag name") {
1257 ParseErrorKind::InvalidTagName
1258 } else {
1259 ParseErrorKind::InvalidKey
1260 };
1261 self.event_queue.push_back(Event {
1262 span: atom.span,
1263 kind: EventKind::Error { kind },
1264 });
1265 }
1266 _ => {
1267 self.event_queue.push_back(Event {
1268 span: atom.span,
1269 kind: EventKind::Error {
1270 kind: ParseErrorKind::InvalidKey,
1271 },
1272 });
1273 }
1274 }
1275 }
1276
1277 fn emit_atom_as_value(&mut self, atom: &Atom<'src>) {
1279 match &atom.content {
1280 AtomContent::Scalar { value, kind } => {
1281 self.event_queue.push_back(Event {
1283 span: atom.span,
1284 kind: EventKind::Scalar {
1285 value: value.clone(),
1286 kind: *kind,
1287 },
1288 });
1289 }
1290 AtomContent::Unit => {
1291 self.event_queue.push_back(Event {
1292 span: atom.span,
1293 kind: EventKind::Unit,
1294 });
1295 }
1296 AtomContent::Tag {
1297 name,
1298 payload,
1299 invalid_name,
1300 error_span,
1301 } => {
1302 if *invalid_name {
1303 self.event_queue.push_back(Event {
1304 span: error_span.unwrap_or(atom.span),
1305 kind: EventKind::Error {
1306 kind: ParseErrorKind::InvalidTagName,
1307 },
1308 });
1309 }
1310 self.event_queue.push_back(Event {
1311 span: atom.span,
1312 kind: EventKind::TagStart { name },
1313 });
1314 if let Some(inner) = payload {
1315 self.emit_atom_as_value(inner);
1316 }
1317 self.event_queue.push_back(Event {
1318 span: atom.span,
1319 kind: EventKind::TagEnd,
1320 });
1321 }
1322 AtomContent::Object {
1323 entries,
1324 duplicate_key_spans,
1325 dangling_doc_comment_spans,
1326 unclosed,
1327 } => {
1328 self.event_queue.push_back(Event {
1329 span: atom.span,
1330 kind: EventKind::ObjectStart,
1331 });
1332
1333 if *unclosed {
1334 self.event_queue.push_back(Event {
1335 span: atom.span,
1336 kind: EventKind::Error {
1337 kind: ParseErrorKind::UnclosedObject,
1338 },
1339 });
1340 }
1341
1342 for (original, dup) in duplicate_key_spans {
1343 self.event_queue.push_back(Event {
1344 span: *dup,
1345 kind: EventKind::Error {
1346 kind: ParseErrorKind::DuplicateKey {
1347 original: *original,
1348 },
1349 },
1350 });
1351 }
1352
1353 for span in dangling_doc_comment_spans {
1354 self.event_queue.push_back(Event {
1355 span: *span,
1356 kind: EventKind::Error {
1357 kind: ParseErrorKind::DanglingDocComment,
1358 },
1359 });
1360 }
1361
1362 for entry in entries {
1363 if let Some((span, lines)) = &entry.doc_comment {
1364 self.event_queue.push_back(Event {
1365 span: *span,
1366 kind: EventKind::DocComment {
1367 lines: lines.clone(),
1368 },
1369 });
1370 }
1371 self.event_queue.push_back(Event {
1372 span: entry.key.span,
1373 kind: EventKind::EntryStart,
1374 });
1375 self.emit_atom_as_key(&entry.key);
1376 self.emit_atom_as_value(&entry.value);
1377
1378 let mut end_span = entry.value.span;
1379 if let Some(span) = entry.too_many_atoms_span {
1380 self.event_queue.push_back(Event {
1381 span,
1382 kind: EventKind::Error {
1383 kind: ParseErrorKind::TooManyAtoms,
1384 },
1385 });
1386 end_span = span;
1387 }
1388 self.event_queue.push_back(Event {
1389 span: end_span,
1390 kind: EventKind::EntryEnd,
1391 });
1392 }
1393
1394 self.event_queue.push_back(Event {
1395 span: atom.span,
1396 kind: EventKind::ObjectEnd,
1397 });
1398 }
1399 AtomContent::Sequence {
1400 elements,
1401 unclosed,
1402 comma_spans,
1403 } => {
1404 self.event_queue.push_back(Event {
1405 span: atom.span,
1406 kind: EventKind::SequenceStart,
1407 });
1408
1409 if *unclosed {
1410 self.event_queue.push_back(Event {
1411 span: atom.span,
1412 kind: EventKind::Error {
1413 kind: ParseErrorKind::UnclosedSequence,
1414 },
1415 });
1416 }
1417
1418 for span in comma_spans {
1419 self.event_queue.push_back(Event {
1420 span: *span,
1421 kind: EventKind::Error {
1422 kind: ParseErrorKind::CommaInSequence,
1423 },
1424 });
1425 }
1426
1427 for elem in elements {
1428 self.emit_atom_as_value(elem);
1429 }
1430
1431 self.event_queue.push_back(Event {
1432 span: atom.span,
1433 kind: EventKind::SequenceEnd,
1434 });
1435 }
1436 AtomContent::Attributes(attrs) => {
1437 self.event_queue.push_back(Event {
1438 span: atom.span,
1439 kind: EventKind::ObjectStart,
1440 });
1441
1442 for attr in attrs {
1443 self.event_queue.push_back(Event {
1444 span: attr.key_span,
1445 kind: EventKind::EntryStart,
1446 });
1447 self.event_queue.push_back(Event {
1448 span: attr.key_span,
1449 kind: EventKind::Key {
1450 tag: None,
1451 payload: Some(Cow::Borrowed(attr.key)),
1452 kind: ScalarKind::Bare,
1453 },
1454 });
1455 self.emit_atom_as_value(&attr.value);
1456 self.event_queue.push_back(Event {
1457 span: attr.value.span,
1458 kind: EventKind::EntryEnd,
1459 });
1460 }
1461
1462 self.event_queue.push_back(Event {
1463 span: atom.span,
1464 kind: EventKind::ObjectEnd,
1465 });
1466 }
1467 AtomContent::InvalidEscapeScalar { raw_inner } => {
1468 let inner_start = atom.span.start + 1;
1471 for (offset, seq) in validate_escapes(raw_inner) {
1472 let error_start = inner_start + offset as u32;
1473 let error_span = Span::new(error_start, error_start + seq.len() as u32);
1474 self.event_queue.push_back(Event {
1475 span: error_span,
1476 kind: EventKind::Error {
1477 kind: ParseErrorKind::InvalidEscape(seq),
1478 },
1479 });
1480 }
1481 self.event_queue.push_back(Event {
1483 span: atom.span,
1484 kind: EventKind::Scalar {
1485 value: Cow::Owned(unescape_quoted(raw_inner).into_owned()),
1486 kind: ScalarKind::Quoted,
1487 },
1488 });
1489 }
1490 AtomContent::Error { message } => {
1491 let kind = if message.contains("invalid tag name") {
1492 ParseErrorKind::InvalidTagName
1493 } else if message.contains("expected a value") {
1494 ParseErrorKind::ExpectedValue
1495 } else {
1496 ParseErrorKind::UnexpectedToken
1497 };
1498 self.event_queue.push_back(Event {
1499 span: atom.span,
1500 kind: EventKind::Error { kind },
1501 });
1502 }
1503 }
1504 }
1505
1506 fn emit_escape_errors(&mut self, text: &str, span: Span) {
1508 for (offset, seq) in validate_escapes(text) {
1509 let error_start = span.start + offset as u32;
1510 let error_span = Span::new(error_start, error_start + seq.len() as u32);
1511 self.event_queue.push_back(Event {
1512 span: error_span,
1513 kind: EventKind::Error {
1514 kind: ParseErrorKind::InvalidEscape(seq),
1515 },
1516 });
1517 }
1518 }
1519}
1520
1521#[derive(Debug, Clone)]
1526struct Atom<'src> {
1527 span: Span,
1528 content: AtomContent<'src>,
1529}
1530
1531#[derive(Debug, Clone)]
1532enum AtomContent<'src> {
1533 Scalar {
1534 value: Cow<'src, str>,
1535 kind: ScalarKind,
1536 },
1537 Unit,
1538 Tag {
1539 name: &'src str,
1540 payload: Option<Box<Atom<'src>>>,
1541 invalid_name: bool,
1542 error_span: Option<Span>,
1545 },
1546 Object {
1547 entries: Vec<ObjectEntry<'src>>,
1548 duplicate_key_spans: Vec<(Span, Span)>,
1549 dangling_doc_comment_spans: Vec<Span>,
1550 unclosed: bool,
1551 },
1552 Sequence {
1553 elements: Vec<Atom<'src>>,
1554 unclosed: bool,
1555 comma_spans: Vec<Span>,
1556 },
1557 Attributes(Vec<AttributeEntry<'src>>),
1558 InvalidEscapeScalar {
1561 raw_inner: Cow<'src, str>,
1562 },
1563 Error {
1565 message: &'src str,
1566 },
1567}
1568
1569#[derive(Debug, Clone)]
1570struct ObjectEntry<'src> {
1571 key: Atom<'src>,
1572 value: Atom<'src>,
1573 doc_comment: Option<(Span, Vec<&'src str>)>,
1574 too_many_atoms_span: Option<Span>,
1575}
1576
1577#[derive(Debug, Clone)]
1578struct AttributeEntry<'src> {
1579 key: &'src str,
1580 key_span: Span,
1581 value: Atom<'src>,
1582}
1583
1584#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1589enum KeyValue {
1590 Scalar(String),
1591 Unit,
1592 Tagged {
1593 name: String,
1594 payload: Option<Box<KeyValue>>,
1595 },
1596}
1597
1598impl KeyValue {
1599 fn from_atom(atom: &Atom<'_>) -> Self {
1600 match &atom.content {
1601 AtomContent::Scalar { value, .. } => KeyValue::Scalar(value.to_string()),
1602 AtomContent::Unit => KeyValue::Unit,
1603 AtomContent::Tag { name, payload, .. } => KeyValue::Tagged {
1604 name: (*name).to_string(),
1605 payload: payload.as_ref().map(|p| Box::new(KeyValue::from_atom(p))),
1606 },
1607 AtomContent::Object { .. } => KeyValue::Scalar("{}".into()),
1608 AtomContent::Sequence { .. } => KeyValue::Scalar("()".into()),
1609 AtomContent::Attributes(_) => KeyValue::Scalar("{}".into()),
1610 AtomContent::InvalidEscapeScalar { raw_inner } => {
1611 KeyValue::Scalar(raw_inner.to_string())
1613 }
1614 AtomContent::Error { .. } => KeyValue::Scalar("<error>".into()),
1615 }
1616 }
1617
1618 fn to_key_string(&self) -> String {
1619 match self {
1620 KeyValue::Scalar(s) => s.clone(),
1621 KeyValue::Unit => "@".to_string(),
1622 KeyValue::Tagged { name, .. } => format!("@{}", name),
1623 }
1624 }
1625}
1626
1627#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1632enum PathValueKind {
1633 Object,
1634 Terminal,
1635}
1636
1637#[derive(Debug, Clone)]
1638enum PathError {
1639 Duplicate { original: Span },
1640 Reopened { closed_path: Vec<String> },
1641 NestIntoTerminal { terminal_path: Vec<String> },
1642}
1643
1644#[derive(Debug, Clone)]
1651struct PathSegment {
1652 key: String,
1653 span: Span,
1654 value_kind: PathValueKind,
1655 closed_children: HashMap<String, Span>,
1659}
1660
1661#[derive(Default, Clone)]
1669struct PathState {
1670 segments: Vec<PathSegment>,
1672}
1673
1674impl PathState {
1675 fn check_and_update(
1676 &mut self,
1677 path: &[String],
1678 span: Span,
1679 value_kind: PathValueKind,
1680 ) -> Result<(), PathError> {
1681 if path.is_empty() {
1682 return Ok(());
1683 }
1684
1685 let common_len = self
1687 .segments
1688 .iter()
1689 .zip(path.iter())
1690 .take_while(|(seg, key)| seg.key == **key)
1691 .count();
1692
1693 if common_len == path.len()
1696 && common_len == self.segments.len()
1697 && !self.segments.is_empty()
1698 {
1699 return Err(PathError::Duplicate {
1701 original: self.segments.last().unwrap().span,
1702 });
1703 }
1704
1705 while self.segments.len() > common_len {
1708 let closed_segment = self.segments.pop().unwrap();
1709
1710 if let Some(parent) = self.segments.last_mut() {
1712 parent
1713 .closed_children
1714 .insert(closed_segment.key, closed_segment.span);
1715 }
1716 }
1717
1718 for (i, key) in path.iter().enumerate().skip(common_len) {
1720 let is_last = i == path.len() - 1;
1721 let segment_value_kind = if is_last {
1722 value_kind
1723 } else {
1724 PathValueKind::Object
1725 };
1726
1727 if i == common_len && common_len < self.segments.len() {
1728 unreachable!("segments should have been truncated");
1730 }
1731
1732 if i < self.segments.len() {
1733 let existing = &self.segments[i];
1735 if existing.key == *key && is_last {
1736 return Err(PathError::Duplicate {
1737 original: existing.span,
1738 });
1739 }
1740 } else if i == 0 {
1741 if !self.segments.is_empty() && self.segments[0].key == *key {
1744 if is_last {
1745 return Err(PathError::Duplicate {
1746 original: self.segments[0].span,
1747 });
1748 }
1749 continue;
1751 }
1752 self.segments.push(PathSegment {
1754 key: key.clone(),
1755 span,
1756 value_kind: segment_value_kind,
1757 closed_children: HashMap::new(),
1758 });
1759 } else {
1760 let parent = &self.segments[i - 1];
1762
1763 if parent.value_kind == PathValueKind::Terminal {
1765 return Err(PathError::NestIntoTerminal {
1766 terminal_path: self.segments.iter().map(|s| s.key.clone()).collect(),
1767 });
1768 }
1769
1770 if parent.closed_children.contains_key(key) {
1772 return Err(PathError::Reopened {
1773 closed_path: self.segments[..i]
1774 .iter()
1775 .map(|s| s.key.clone())
1776 .chain(std::iter::once(key.clone()))
1777 .collect(),
1778 });
1779 }
1780
1781 self.segments.push(PathSegment {
1783 key: key.clone(),
1784 span,
1785 value_kind: segment_value_kind,
1786 closed_children: HashMap::new(),
1787 });
1788 }
1789 }
1790
1791 if let Some(last) = self.segments.last_mut() {
1793 last.value_kind = value_kind;
1794 }
1795
1796 Ok(())
1797 }
1798}
1799
1800fn is_valid_tag_name(name: &str) -> bool {
1805 let mut chars = name.chars();
1806 match chars.next() {
1807 Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1808 _ => return false,
1809 }
1810 chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
1811}
1812
1813fn unescape_quoted(text: &str) -> Cow<'_, str> {
1814 if !text.contains('\\') {
1815 return Cow::Borrowed(text);
1816 }
1817
1818 let mut result = String::with_capacity(text.len());
1819 let mut chars = text.chars().peekable();
1820
1821 while let Some(c) = chars.next() {
1822 if c == '\\' {
1823 match chars.next() {
1824 Some('n') => result.push('\n'),
1825 Some('r') => result.push('\r'),
1826 Some('t') => result.push('\t'),
1827 Some('\\') => result.push('\\'),
1828 Some('"') => result.push('"'),
1829 Some('u') => match chars.peek() {
1830 Some('{') => {
1831 chars.next();
1832 let mut hex = String::new();
1833 while let Some(&c) = chars.peek() {
1834 if c == '}' {
1835 chars.next();
1836 break;
1837 }
1838 hex.push(chars.next().unwrap());
1839 }
1840 if let Ok(code) = u32::from_str_radix(&hex, 16)
1841 && let Some(ch) = char::from_u32(code)
1842 {
1843 result.push(ch);
1844 }
1845 }
1846 Some(&c) if c.is_ascii_hexdigit() => {
1847 let mut hex = String::with_capacity(4);
1848 for _ in 0..4 {
1849 if let Some(&c) = chars.peek() {
1850 if c.is_ascii_hexdigit() {
1851 hex.push(chars.next().unwrap());
1852 } else {
1853 break;
1854 }
1855 }
1856 }
1857 if hex.len() == 4
1858 && let Ok(code) = u32::from_str_radix(&hex, 16)
1859 && let Some(ch) = char::from_u32(code)
1860 {
1861 result.push(ch);
1862 }
1863 }
1864 _ => {}
1865 },
1866 Some(c) => {
1867 result.push('\\');
1868 result.push(c);
1869 }
1870 None => {
1871 result.push('\\');
1872 }
1873 }
1874 } else {
1875 result.push(c);
1876 }
1877 }
1878
1879 Cow::Owned(result)
1880}
1881
1882fn validate_escapes(text: &str) -> Vec<(usize, String)> {
1883 let mut errors = Vec::new();
1884 let mut chars = text.char_indices().peekable();
1885
1886 while let Some((i, c)) = chars.next() {
1887 if c == '\\' {
1888 let escape_start = i;
1889 match chars.next() {
1890 Some((_, 'n' | 'r' | 't' | '\\' | '"')) => {}
1891 Some((_, 'u')) => match chars.peek() {
1892 Some((_, '{')) => {
1893 chars.next();
1894 let mut valid = true;
1895 let mut found_close = false;
1896 for (_, c) in chars.by_ref() {
1897 if c == '}' {
1898 found_close = true;
1899 break;
1900 }
1901 if !c.is_ascii_hexdigit() {
1902 valid = false;
1903 }
1904 }
1905 if !found_close || !valid {
1906 let end = chars.peek().map(|(i, _)| *i).unwrap_or(text.len());
1907 let seq = &text[escape_start..end.min(escape_start + 12)];
1908 errors.push((escape_start, seq.to_string()));
1909 }
1910 }
1911 Some((_, c)) if c.is_ascii_hexdigit() => {
1912 let mut count = 1;
1913 while count < 4 {
1914 match chars.peek() {
1915 Some((_, c)) if c.is_ascii_hexdigit() => {
1916 chars.next();
1917 count += 1;
1918 }
1919 _ => break,
1920 }
1921 }
1922 if count != 4 {
1923 let end = chars.peek().map(|(i, _)| *i).unwrap_or(text.len());
1924 let seq = &text[escape_start..end];
1925 errors.push((escape_start, seq.to_string()));
1926 }
1927 }
1928 _ => {
1929 errors.push((escape_start, "\\u".to_string()));
1930 }
1931 },
1932 Some((_, c)) => {
1933 errors.push((escape_start, format!("\\{}", c)));
1934 }
1935 None => {
1936 errors.push((escape_start, "\\".to_string()));
1937 }
1938 }
1939 }
1940 }
1941
1942 errors
1943}
1944
1945#[cfg(test)]
1946mod tests;