1pub use indexmap::IndexMap;
6
7pub mod tier1;
12pub mod tier1_stream;
13
14#[cfg(feature = "fast_hash")]
19type DmsHasher = foldhash::fast::FixedState;
20#[cfg(not(feature = "fast_hash"))]
21type DmsHasher = std::collections::hash_map::RandomState;
22
23pub type DmsMap<V> = IndexMap<String, V, DmsHasher>;
26
27pub type DmsHashMap<V> = std::collections::HashMap<String, V, DmsHasher>;
34
35#[derive(Debug, Clone, PartialEq)]
36pub enum Value {
37 Bool(bool),
38 Integer(i64),
39 Float(f64),
40 String(String),
41 OffsetDateTime(String),
42 LocalDateTime(String),
43 LocalDate(String),
44 LocalTime(String),
45 Table(DmsMap<Value>),
48 UnorderedTable(DmsHashMap<Value>),
54 List(Vec<Value>),
55}
56
57#[derive(Debug, Clone)]
58pub struct DecodeError {
59 pub line: usize,
60 pub column: usize,
61 pub message: String,
62}
63
64impl std::fmt::Display for DecodeError {
65 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66 write!(f, "{}:{}: {}", self.line, self.column, self.message)
67 }
68}
69
70impl std::error::Error for DecodeError {}
71
72#[deprecated(since = "0.3.0", note = "renamed to `DecodeError`")]
78pub type ParseError = DecodeError;
79
80#[derive(Debug, Clone, PartialEq, Eq)]
85pub enum EncodeError {
86 UnorderedInFullMode,
93}
94
95impl std::fmt::Display for EncodeError {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 match self {
98 EncodeError::UnorderedInFullMode => write!(
99 f,
100 "encode (full-mode round-trip) refuses Document with \
101 Value::UnorderedTable; unordered tables have arbitrary \
102 iteration order — use encode_lite instead. \
103 See SPEC §\"Unordered tables\"."
104 ),
105 }
106 }
107}
108
109impl std::error::Error for EncodeError {}
110
111#[derive(Debug, Clone, PartialEq)]
113pub struct Document {
114 pub meta: Option<DmsMap<Value>>,
118 pub body: Value,
120 pub comments: Vec<AttachedComment>,
133 pub original_forms: Vec<(Vec<BreadcrumbSegment>, OriginalLiteral)>,
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
145pub enum CommentKind {
146 Line,
147 Block,
148}
149
150#[derive(Debug, Clone, PartialEq, Eq)]
152pub enum CommentPosition {
153 Leading,
155 Inner,
161 Trailing,
165 Floating,
169}
170
171#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct Comment {
174 pub content: String,
177 pub kind: CommentKind,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
189pub enum BreadcrumbSegment {
190 Key(String),
191 Index(usize),
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
196pub struct AttachedComment {
197 pub comment: Comment,
198 pub position: CommentPosition,
199 pub path: Vec<BreadcrumbSegment>,
203}
204
205#[derive(Debug, Clone, PartialEq)]
210pub enum OriginalLiteral {
211 Integer { lit: String },
215 String { form: StringForm },
219}
220
221#[derive(Debug, Clone, PartialEq)]
222pub enum StringForm {
223 Basic,
225 Literal,
227 Heredoc {
230 flavor: HeredocFlavor,
231 label: Option<String>,
233 modifiers: Vec<HeredocModifierCall>,
236 },
237}
238
239#[derive(Debug, Clone, Copy, PartialEq, Eq)]
240pub enum HeredocFlavor {
241 BasicTriple,
242 LiteralTriple,
243}
244
245#[derive(Debug, Clone, PartialEq)]
246pub struct HeredocModifierCall {
247 pub name: String,
248 pub args: Vec<Value>,
252}
253
254#[derive(Debug, Clone, Copy, PartialEq, Eq)]
259pub enum ParseMode {
260 Full,
261 Lite,
262}
263
264pub const SUPPORTS_LITE_MODE: bool = true;
269
270pub const SUPPORTS_IGNORE_ORDER: bool = true;
275
276pub fn decode(src: &str) -> Result<Value, DecodeError> {
280 decode_document(src).map(|d| d.body)
281}
282
283pub fn decode_lite(src: &str) -> Result<Value, DecodeError> {
287 decode_document_with_mode(src, ParseMode::Lite).map(|d| d.body)
288}
289
290pub fn decode_lite_document(src: &str) -> Result<Document, DecodeError> {
294 decode_document_with_mode(src, ParseMode::Lite)
295}
296
297pub fn decode_document(src: &str) -> Result<Document, DecodeError> {
300 decode_document_with_options(src, ParseMode::Full, false)
301}
302
303pub fn decode_document_unordered(src: &str) -> Result<Document, DecodeError> {
314 decode_document_with_options(src, ParseMode::Full, true)
315}
316
317pub fn decode_lite_document_unordered(src: &str) -> Result<Document, DecodeError> {
322 decode_document_with_options(src, ParseMode::Lite, true)
323}
324
325pub fn decode_document_with_mode(src: &str, mode: ParseMode) -> Result<Document, DecodeError> {
327 decode_document_with_options(src, mode, false)
328}
329
330pub fn decode_document_with_options(
335 src: &str,
336 mode: ParseMode,
337 ignore_order: bool,
338) -> Result<Document, DecodeError> {
339 if src.starts_with('\u{FEFF}') {
344 return Err(DecodeError {
345 line: 1,
346 column: 1,
347 message: "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8"
348 .to_string(),
349 });
350 }
351 if let Some(off) = src.find('\0') {
354 let prefix = &src[..off];
355 let line = 1 + prefix.bytes().filter(|&b| b == b'\n').count();
356 let last_nl = prefix.rfind('\n').map(|i| i + 1).unwrap_or(0);
357 let col = off - last_nl + 1;
358 return Err(DecodeError {
359 line,
360 column: col,
361 message: "U+0000 (NUL) is not allowed in DMS source".to_string(),
362 });
363 }
364 let normalized = nfc_normalize(src);
369 let mut p = Parser::new_with_mode(&normalized, mode);
370 p.ignore_order = ignore_order;
371 let meta = p.parse_front_matter()?;
372 let body = p.parse_body()?;
373 Ok(Document {
374 meta,
375 body,
376 comments: p.comments,
377 original_forms: p.original_forms,
378 })
379}
380
381pub(crate) fn decode_document_accepting_tier1(
390 src: &str,
391) -> Result<
392 (
393 Document,
394 u32,
395 Vec<(Vec<BreadcrumbSegment>, tier1::DecorationPosition, tier1::DecoratorCall)>,
396 Vec<Vec<BreadcrumbSegment>>,
397 ),
398 DecodeError,
399> {
400 if src.starts_with('\u{FEFF}') {
401 return Err(DecodeError {
402 line: 1,
403 column: 1,
404 message: "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8"
405 .to_string(),
406 });
407 }
408 if let Some(off) = src.find('\0') {
409 let prefix = &src[..off];
410 let line = 1 + prefix.bytes().filter(|&b| b == b'\n').count();
411 let last_nl = prefix.rfind('\n').map(|i| i + 1).unwrap_or(0);
412 let col = off - last_nl + 1;
413 return Err(DecodeError {
414 line,
415 column: col,
416 message: "U+0000 (NUL) is not allowed in DMS source".to_string(),
417 });
418 }
419 let normalized = nfc_normalize(src);
420 let mut p = Parser::new_with_mode(&normalized, ParseMode::Full);
421 p.accept_tier1 = true;
422 let meta = p.parse_front_matter()?;
423 let tier = p.observed_tier;
424 let body = p.parse_body()?;
425 let raw_decorations = p.decorations_raw;
426 let decoration_only_paths = p.decoration_only_paths;
427 Ok((
428 Document {
429 meta,
430 body,
431 comments: p.comments,
432 original_forms: p.original_forms,
433 },
434 tier,
435 raw_decorations,
436 decoration_only_paths,
437 ))
438}
439
440pub(crate) fn parse_value_t1(
454 src: &str,
455) -> Result<
456 (
457 Value,
458 Vec<(Vec<BreadcrumbSegment>, tier1::DecorationPosition, tier1::DecoratorCall)>,
459 ),
460 DecodeError,
461> {
462 let mut p = Parser::new_with_mode(src, ParseMode::Full);
463 p.accept_tier1 = true;
464 p.observed_tier = 1;
465 let v = p.parse_body()?;
466 Ok((v, p.decorations_raw))
468}
469
470pub fn decode_front_matter(src: &str) -> Result<Option<Value>, DecodeError> {
495 if src.starts_with('\u{FEFF}') {
499 return Err(DecodeError {
500 line: 1,
501 column: 1,
502 message: "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8"
503 .to_string(),
504 });
505 }
506 if let Some(off) = src.find('\0') {
507 let prefix = &src[..off];
508 let line = 1 + prefix.bytes().filter(|&b| b == b'\n').count();
509 let last_nl = prefix.rfind('\n').map(|i| i + 1).unwrap_or(0);
510 let col = off - last_nl + 1;
511 return Err(DecodeError {
512 line,
513 column: col,
514 message: "U+0000 (NUL) is not allowed in DMS source".to_string(),
515 });
516 }
517 let normalized = nfc_normalize(src);
518 let mut p = Parser::new_with_mode(&normalized, ParseMode::Lite);
519 let meta = p.parse_front_matter()?;
520 Ok(meta.map(Value::Table))
523}
524
525#[deprecated(since = "0.3.0", note = "use `decode` instead")]
535pub fn parse(src: &str) -> Result<Value, DecodeError> {
536 decode(src)
537}
538
539#[deprecated(since = "0.3.0", note = "use `decode_lite` instead")]
541pub fn parse_lite(src: &str) -> Result<Value, DecodeError> {
542 decode_lite(src)
543}
544
545#[deprecated(since = "0.3.0", note = "use `decode_lite_document` instead")]
547pub fn parse_lite_document(src: &str) -> Result<Document, DecodeError> {
548 decode_lite_document(src)
549}
550
551#[deprecated(since = "0.3.0", note = "use `decode_document` instead")]
553pub fn parse_document(src: &str) -> Result<Document, DecodeError> {
554 decode_document(src)
555}
556
557#[deprecated(since = "0.3.0", note = "use `decode_document_unordered` instead")]
559pub fn parse_document_unordered(src: &str) -> Result<Document, DecodeError> {
560 decode_document_unordered(src)
561}
562
563#[deprecated(since = "0.3.0", note = "use `decode_lite_document_unordered` instead")]
565pub fn parse_lite_document_unordered(src: &str) -> Result<Document, DecodeError> {
566 decode_lite_document_unordered(src)
567}
568
569#[deprecated(since = "0.3.0", note = "use `decode_document_with_mode` instead")]
571pub fn parse_document_with_mode(src: &str, mode: ParseMode) -> Result<Document, DecodeError> {
572 decode_document_with_mode(src, mode)
573}
574
575#[deprecated(since = "0.3.0", note = "use `decode_document_with_options` instead")]
577pub fn parse_document_with_options(
578 src: &str,
579 mode: ParseMode,
580 ignore_order: bool,
581) -> Result<Document, DecodeError> {
582 decode_document_with_options(src, mode, ignore_order)
583}
584
585fn nfc_normalize(s: &str) -> String {
589 use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
590 match is_nfc_quick(s.chars()) {
591 IsNormalized::Yes => s.to_string(),
592 _ => s.nfc().collect(),
593 }
594}
595
596static XID_CONTINUE_RANGES: &[(u32, u32)] = &[
604 (0x00AA, 0x00AA),
605 (0x00B5, 0x00B5),
606 (0x00B7, 0x00B7),
607 (0x00BA, 0x00BA),
608 (0x00C0, 0x00D6),
609 (0x00D8, 0x00F6),
610 (0x00F8, 0x02C1),
611 (0x02C6, 0x02D1),
612 (0x02E0, 0x02E4),
613 (0x02EC, 0x02EC),
614 (0x02EE, 0x02EE),
615 (0x0300, 0x034E),
616 (0x0350, 0x0374),
617 (0x0376, 0x0377),
618 (0x037B, 0x037D),
619 (0x037F, 0x037F),
620 (0x0386, 0x038A),
621 (0x038C, 0x038C),
622 (0x038E, 0x03A1),
623 (0x03A3, 0x03F5),
624 (0x03F7, 0x0481),
625 (0x0483, 0x0487),
626 (0x048A, 0x052F),
627 (0x0531, 0x0556),
628 (0x0559, 0x0559),
629 (0x0560, 0x0588),
630 (0x0591, 0x05BD),
631 (0x05BF, 0x05BF),
632 (0x05C1, 0x05C2),
633 (0x05C4, 0x05C5),
634 (0x05C7, 0x05C7),
635 (0x05D0, 0x05EA),
636 (0x05EF, 0x05F2),
637 (0x0610, 0x061A),
638 (0x0620, 0x0669),
639 (0x066E, 0x06D3),
640 (0x06D5, 0x06DC),
641 (0x06DF, 0x06E8),
642 (0x06EA, 0x06FC),
643 (0x06FF, 0x06FF),
644 (0x0710, 0x074A),
645 (0x074D, 0x07B1),
646 (0x07C0, 0x07F5),
647 (0x07FA, 0x07FA),
648 (0x07FD, 0x07FD),
649 (0x0800, 0x082D),
650 (0x0840, 0x085B),
651 (0x0860, 0x086A),
652 (0x0870, 0x0887),
653 (0x0889, 0x088E),
654 (0x0898, 0x08E1),
655 (0x08E3, 0x0963),
656 (0x0966, 0x096F),
657 (0x0971, 0x0983),
658 (0x0985, 0x098C),
659 (0x098F, 0x0990),
660 (0x0993, 0x09A8),
661 (0x09AA, 0x09B0),
662 (0x09B2, 0x09B2),
663 (0x09B6, 0x09B9),
664 (0x09BC, 0x09C4),
665 (0x09C7, 0x09C8),
666 (0x09CB, 0x09CE),
667 (0x09D7, 0x09D7),
668 (0x09DC, 0x09DD),
669 (0x09DF, 0x09E3),
670 (0x09E6, 0x09F1),
671 (0x09FC, 0x09FC),
672 (0x09FE, 0x09FE),
673 (0x0A01, 0x0A03),
674 (0x0A05, 0x0A0A),
675 (0x0A0F, 0x0A10),
676 (0x0A13, 0x0A28),
677 (0x0A2A, 0x0A30),
678 (0x0A32, 0x0A33),
679 (0x0A35, 0x0A36),
680 (0x0A38, 0x0A39),
681 (0x0A3C, 0x0A3C),
682 (0x0A3E, 0x0A42),
683 (0x0A47, 0x0A48),
684 (0x0A4B, 0x0A4D),
685 (0x0A51, 0x0A51),
686 (0x0A59, 0x0A5C),
687 (0x0A5E, 0x0A5E),
688 (0x0A66, 0x0A75),
689 (0x0A81, 0x0A83),
690 (0x0A85, 0x0A8D),
691 (0x0A8F, 0x0A91),
692 (0x0A93, 0x0AA8),
693 (0x0AAA, 0x0AB0),
694 (0x0AB2, 0x0AB3),
695 (0x0AB5, 0x0AB9),
696 (0x0ABC, 0x0AC5),
697 (0x0AC7, 0x0AC9),
698 (0x0ACB, 0x0ACD),
699 (0x0AD0, 0x0AD0),
700 (0x0AE0, 0x0AE3),
701 (0x0AE6, 0x0AEF),
702 (0x0AF9, 0x0AFF),
703 (0x0B01, 0x0B03),
704 (0x0B05, 0x0B0C),
705 (0x0B0F, 0x0B10),
706 (0x0B13, 0x0B28),
707 (0x0B2A, 0x0B30),
708 (0x0B32, 0x0B33),
709 (0x0B35, 0x0B39),
710 (0x0B3C, 0x0B44),
711 (0x0B47, 0x0B48),
712 (0x0B4B, 0x0B4D),
713 (0x0B55, 0x0B57),
714 (0x0B5C, 0x0B5D),
715 (0x0B5F, 0x0B63),
716 (0x0B66, 0x0B6F),
717 (0x0B71, 0x0B71),
718 (0x0B82, 0x0B83),
719 (0x0B85, 0x0B8A),
720 (0x0B8E, 0x0B90),
721 (0x0B92, 0x0B95),
722 (0x0B99, 0x0B9A),
723 (0x0B9C, 0x0B9C),
724 (0x0B9E, 0x0B9F),
725 (0x0BA3, 0x0BA4),
726 (0x0BA8, 0x0BAA),
727 (0x0BAE, 0x0BB9),
728 (0x0BBE, 0x0BC2),
729 (0x0BC6, 0x0BC8),
730 (0x0BCA, 0x0BCD),
731 (0x0BD0, 0x0BD0),
732 (0x0BD7, 0x0BD7),
733 (0x0BE6, 0x0BEF),
734 (0x0C00, 0x0C0C),
735 (0x0C0E, 0x0C10),
736 (0x0C12, 0x0C28),
737 (0x0C2A, 0x0C39),
738 (0x0C3C, 0x0C44),
739 (0x0C46, 0x0C48),
740 (0x0C4A, 0x0C4D),
741 (0x0C55, 0x0C56),
742 (0x0C58, 0x0C5A),
743 (0x0C5D, 0x0C5D),
744 (0x0C60, 0x0C63),
745 (0x0C66, 0x0C6F),
746 (0x0C80, 0x0C83),
747 (0x0C85, 0x0C8C),
748 (0x0C8E, 0x0C90),
749 (0x0C92, 0x0CA8),
750 (0x0CAA, 0x0CB3),
751 (0x0CB5, 0x0CB9),
752 (0x0CBC, 0x0CC4),
753 (0x0CC6, 0x0CC8),
754 (0x0CCA, 0x0CCD),
755 (0x0CD5, 0x0CD6),
756 (0x0CDD, 0x0CDE),
757 (0x0CE0, 0x0CE3),
758 (0x0CE6, 0x0CEF),
759 (0x0CF1, 0x0CF3),
760 (0x0D00, 0x0D0C),
761 (0x0D0E, 0x0D10),
762 (0x0D12, 0x0D44),
763 (0x0D46, 0x0D48),
764 (0x0D4A, 0x0D4E),
765 (0x0D54, 0x0D57),
766 (0x0D5F, 0x0D63),
767 (0x0D66, 0x0D6F),
768 (0x0D7A, 0x0D7F),
769 (0x0D81, 0x0D83),
770 (0x0D85, 0x0D96),
771 (0x0D9A, 0x0DB1),
772 (0x0DB3, 0x0DBB),
773 (0x0DBD, 0x0DBD),
774 (0x0DC0, 0x0DC6),
775 (0x0DCA, 0x0DCA),
776 (0x0DCF, 0x0DD4),
777 (0x0DD6, 0x0DD6),
778 (0x0DD8, 0x0DDF),
779 (0x0DE6, 0x0DEF),
780 (0x0DF2, 0x0DF3),
781 (0x0E01, 0x0E3A),
782 (0x0E40, 0x0E4E),
783 (0x0E50, 0x0E59),
784 (0x0E81, 0x0E82),
785 (0x0E84, 0x0E84),
786 (0x0E86, 0x0E8A),
787 (0x0E8C, 0x0EA3),
788 (0x0EA5, 0x0EA5),
789 (0x0EA7, 0x0EBD),
790 (0x0EC0, 0x0EC4),
791 (0x0EC6, 0x0EC6),
792 (0x0EC8, 0x0ECE),
793 (0x0ED0, 0x0ED9),
794 (0x0EDC, 0x0EDF),
795 (0x0F00, 0x0F00),
796 (0x0F18, 0x0F19),
797 (0x0F20, 0x0F29),
798 (0x0F35, 0x0F35),
799 (0x0F37, 0x0F37),
800 (0x0F39, 0x0F39),
801 (0x0F3E, 0x0F47),
802 (0x0F49, 0x0F6C),
803 (0x0F71, 0x0F84),
804 (0x0F86, 0x0F97),
805 (0x0F99, 0x0FBC),
806 (0x0FC6, 0x0FC6),
807 (0x1000, 0x1049),
808 (0x1050, 0x109D),
809 (0x10A0, 0x10C5),
810 (0x10C7, 0x10C7),
811 (0x10CD, 0x10CD),
812 (0x10D0, 0x10FA),
813 (0x10FC, 0x115E),
814 (0x1161, 0x1248),
815 (0x124A, 0x124D),
816 (0x1250, 0x1256),
817 (0x1258, 0x1258),
818 (0x125A, 0x125D),
819 (0x1260, 0x1288),
820 (0x128A, 0x128D),
821 (0x1290, 0x12B0),
822 (0x12B2, 0x12B5),
823 (0x12B8, 0x12BE),
824 (0x12C0, 0x12C0),
825 (0x12C2, 0x12C5),
826 (0x12C8, 0x12D6),
827 (0x12D8, 0x1310),
828 (0x1312, 0x1315),
829 (0x1318, 0x135A),
830 (0x135D, 0x135F),
831 (0x1369, 0x1371),
832 (0x1380, 0x138F),
833 (0x13A0, 0x13F5),
834 (0x13F8, 0x13FD),
835 (0x1401, 0x166C),
836 (0x166F, 0x167F),
837 (0x1681, 0x169A),
838 (0x16A0, 0x16EA),
839 (0x16EE, 0x16F8),
840 (0x1700, 0x1715),
841 (0x171F, 0x1734),
842 (0x1740, 0x1753),
843 (0x1760, 0x176C),
844 (0x176E, 0x1770),
845 (0x1772, 0x1773),
846 (0x1780, 0x17B3),
847 (0x17B6, 0x17D3),
848 (0x17D7, 0x17D7),
849 (0x17DC, 0x17DD),
850 (0x17E0, 0x17E9),
851 (0x1810, 0x1819),
852 (0x1820, 0x1878),
853 (0x1880, 0x18AA),
854 (0x18B0, 0x18F5),
855 (0x1900, 0x191E),
856 (0x1920, 0x192B),
857 (0x1930, 0x193B),
858 (0x1946, 0x196D),
859 (0x1970, 0x1974),
860 (0x1980, 0x19AB),
861 (0x19B0, 0x19C9),
862 (0x19D0, 0x19DA),
863 (0x1A00, 0x1A1B),
864 (0x1A20, 0x1A5E),
865 (0x1A60, 0x1A7C),
866 (0x1A7F, 0x1A89),
867 (0x1A90, 0x1A99),
868 (0x1AA7, 0x1AA7),
869 (0x1AB0, 0x1ABD),
870 (0x1ABF, 0x1ACE),
871 (0x1B00, 0x1B4C),
872 (0x1B50, 0x1B59),
873 (0x1B6B, 0x1B73),
874 (0x1B80, 0x1BF3),
875 (0x1C00, 0x1C37),
876 (0x1C40, 0x1C49),
877 (0x1C4D, 0x1C7D),
878 (0x1C80, 0x1C88),
879 (0x1C90, 0x1CBA),
880 (0x1CBD, 0x1CBF),
881 (0x1CD0, 0x1CD2),
882 (0x1CD4, 0x1CFA),
883 (0x1D00, 0x1F15),
884 (0x1F18, 0x1F1D),
885 (0x1F20, 0x1F45),
886 (0x1F48, 0x1F4D),
887 (0x1F50, 0x1F57),
888 (0x1F59, 0x1F59),
889 (0x1F5B, 0x1F5B),
890 (0x1F5D, 0x1F5D),
891 (0x1F5F, 0x1F7D),
892 (0x1F80, 0x1FB4),
893 (0x1FB6, 0x1FBC),
894 (0x1FBE, 0x1FBE),
895 (0x1FC2, 0x1FC4),
896 (0x1FC6, 0x1FCC),
897 (0x1FD0, 0x1FD3),
898 (0x1FD6, 0x1FDB),
899 (0x1FE0, 0x1FEC),
900 (0x1FF2, 0x1FF4),
901 (0x1FF6, 0x1FFC),
902 (0x203F, 0x2040),
903 (0x2054, 0x2054),
904 (0x2071, 0x2071),
905 (0x207F, 0x207F),
906 (0x2090, 0x209C),
907 (0x20D0, 0x20DC),
908 (0x20E1, 0x20E1),
909 (0x20E5, 0x20F0),
910 (0x2102, 0x2102),
911 (0x2107, 0x2107),
912 (0x210A, 0x2113),
913 (0x2115, 0x2115),
914 (0x2118, 0x211D),
915 (0x2124, 0x2124),
916 (0x2126, 0x2126),
917 (0x2128, 0x2128),
918 (0x212A, 0x2139),
919 (0x213C, 0x213F),
920 (0x2145, 0x2149),
921 (0x214E, 0x214E),
922 (0x2160, 0x2188),
923 (0x2C00, 0x2CE4),
924 (0x2CEB, 0x2CF3),
925 (0x2D00, 0x2D25),
926 (0x2D27, 0x2D27),
927 (0x2D2D, 0x2D2D),
928 (0x2D30, 0x2D67),
929 (0x2D6F, 0x2D6F),
930 (0x2D7F, 0x2D96),
931 (0x2DA0, 0x2DA6),
932 (0x2DA8, 0x2DAE),
933 (0x2DB0, 0x2DB6),
934 (0x2DB8, 0x2DBE),
935 (0x2DC0, 0x2DC6),
936 (0x2DC8, 0x2DCE),
937 (0x2DD0, 0x2DD6),
938 (0x2DD8, 0x2DDE),
939 (0x2DE0, 0x2DFF),
940 (0x3005, 0x3007),
941 (0x3021, 0x302F),
942 (0x3031, 0x3035),
943 (0x3038, 0x303C),
944 (0x3041, 0x3096),
945 (0x3099, 0x309A),
946 (0x309D, 0x309F),
947 (0x30A1, 0x30FF),
948 (0x3105, 0x312F),
949 (0x3131, 0x3163),
950 (0x3165, 0x318E),
951 (0x31A0, 0x31BF),
952 (0x31F0, 0x31FF),
953 (0x3400, 0x4DBF),
954 (0x4E00, 0xA48C),
955 (0xA4D0, 0xA4FD),
956 (0xA500, 0xA60C),
957 (0xA610, 0xA62B),
958 (0xA640, 0xA66F),
959 (0xA674, 0xA67D),
960 (0xA67F, 0xA6F1),
961 (0xA717, 0xA71F),
962 (0xA722, 0xA788),
963 (0xA78B, 0xA7CA),
964 (0xA7D0, 0xA7D1),
965 (0xA7D3, 0xA7D3),
966 (0xA7D5, 0xA7D9),
967 (0xA7F2, 0xA827),
968 (0xA82C, 0xA82C),
969 (0xA840, 0xA873),
970 (0xA880, 0xA8C5),
971 (0xA8D0, 0xA8D9),
972 (0xA8E0, 0xA8F7),
973 (0xA8FB, 0xA8FB),
974 (0xA8FD, 0xA92D),
975 (0xA930, 0xA953),
976 (0xA960, 0xA97C),
977 (0xA980, 0xA9C0),
978 (0xA9CF, 0xA9D9),
979 (0xA9E0, 0xA9FE),
980 (0xAA00, 0xAA36),
981 (0xAA40, 0xAA4D),
982 (0xAA50, 0xAA59),
983 (0xAA60, 0xAA76),
984 (0xAA7A, 0xAAC2),
985 (0xAADB, 0xAADD),
986 (0xAAE0, 0xAAEF),
987 (0xAAF2, 0xAAF6),
988 (0xAB01, 0xAB06),
989 (0xAB09, 0xAB0E),
990 (0xAB11, 0xAB16),
991 (0xAB20, 0xAB26),
992 (0xAB28, 0xAB2E),
993 (0xAB30, 0xAB5A),
994 (0xAB5C, 0xAB69),
995 (0xAB70, 0xABEA),
996 (0xABEC, 0xABED),
997 (0xABF0, 0xABF9),
998 (0xAC00, 0xD7A3),
999 (0xD7B0, 0xD7C6),
1000 (0xD7CB, 0xD7FB),
1001 (0xF900, 0xFA6D),
1002 (0xFA70, 0xFAD9),
1003 (0xFB00, 0xFB06),
1004 (0xFB13, 0xFB17),
1005 (0xFB1D, 0xFB28),
1006 (0xFB2A, 0xFB36),
1007 (0xFB38, 0xFB3C),
1008 (0xFB3E, 0xFB3E),
1009 (0xFB40, 0xFB41),
1010 (0xFB43, 0xFB44),
1011 (0xFB46, 0xFBB1),
1012 (0xFBD3, 0xFC5D),
1013 (0xFC64, 0xFD3D),
1014 (0xFD50, 0xFD8F),
1015 (0xFD92, 0xFDC7),
1016 (0xFDF0, 0xFDF9),
1017 (0xFE20, 0xFE2F),
1018 (0xFE33, 0xFE34),
1019 (0xFE4D, 0xFE4F),
1020 (0xFE71, 0xFE71),
1021 (0xFE73, 0xFE73),
1022 (0xFE77, 0xFE77),
1023 (0xFE79, 0xFE79),
1024 (0xFE7B, 0xFE7B),
1025 (0xFE7D, 0xFE7D),
1026 (0xFE7F, 0xFEFC),
1027 (0xFF10, 0xFF19),
1028 (0xFF21, 0xFF3A),
1029 (0xFF3F, 0xFF3F),
1030 (0xFF41, 0xFF5A),
1031 (0xFF65, 0xFF9F),
1032 (0xFFA1, 0xFFBE),
1033 (0xFFC2, 0xFFC7),
1034 (0xFFCA, 0xFFCF),
1035 (0xFFD2, 0xFFD7),
1036 (0xFFDA, 0xFFDC),
1037 (0x10000, 0x1000B),
1038 (0x1000D, 0x10026),
1039 (0x10028, 0x1003A),
1040 (0x1003C, 0x1003D),
1041 (0x1003F, 0x1004D),
1042 (0x10050, 0x1005D),
1043 (0x10080, 0x100FA),
1044 (0x10140, 0x10174),
1045 (0x101FD, 0x101FD),
1046 (0x10280, 0x1029C),
1047 (0x102A0, 0x102D0),
1048 (0x102E0, 0x102E0),
1049 (0x10300, 0x1031F),
1050 (0x1032D, 0x1034A),
1051 (0x10350, 0x1037A),
1052 (0x10380, 0x1039D),
1053 (0x103A0, 0x103C3),
1054 (0x103C8, 0x103CF),
1055 (0x103D1, 0x103D5),
1056 (0x10400, 0x1049D),
1057 (0x104A0, 0x104A9),
1058 (0x104B0, 0x104D3),
1059 (0x104D8, 0x104FB),
1060 (0x10500, 0x10527),
1061 (0x10530, 0x10563),
1062 (0x10570, 0x1057A),
1063 (0x1057C, 0x1058A),
1064 (0x1058C, 0x10592),
1065 (0x10594, 0x10595),
1066 (0x10597, 0x105A1),
1067 (0x105A3, 0x105B1),
1068 (0x105B3, 0x105B9),
1069 (0x105BB, 0x105BC),
1070 (0x10600, 0x10736),
1071 (0x10740, 0x10755),
1072 (0x10760, 0x10767),
1073 (0x10780, 0x10785),
1074 (0x10787, 0x107B0),
1075 (0x107B2, 0x107BA),
1076 (0x10800, 0x10805),
1077 (0x10808, 0x10808),
1078 (0x1080A, 0x10835),
1079 (0x10837, 0x10838),
1080 (0x1083C, 0x1083C),
1081 (0x1083F, 0x10855),
1082 (0x10860, 0x10876),
1083 (0x10880, 0x1089E),
1084 (0x108E0, 0x108F2),
1085 (0x108F4, 0x108F5),
1086 (0x10900, 0x10915),
1087 (0x10920, 0x10939),
1088 (0x10980, 0x109B7),
1089 (0x109BE, 0x109BF),
1090 (0x10A00, 0x10A03),
1091 (0x10A05, 0x10A06),
1092 (0x10A0C, 0x10A13),
1093 (0x10A15, 0x10A17),
1094 (0x10A19, 0x10A35),
1095 (0x10A38, 0x10A3A),
1096 (0x10A3F, 0x10A3F),
1097 (0x10A60, 0x10A7C),
1098 (0x10A80, 0x10A9C),
1099 (0x10AC0, 0x10AC7),
1100 (0x10AC9, 0x10AE6),
1101 (0x10B00, 0x10B35),
1102 (0x10B40, 0x10B55),
1103 (0x10B60, 0x10B72),
1104 (0x10B80, 0x10B91),
1105 (0x10C00, 0x10C48),
1106 (0x10C80, 0x10CB2),
1107 (0x10CC0, 0x10CF2),
1108 (0x10D00, 0x10D27),
1109 (0x10D30, 0x10D39),
1110 (0x10E80, 0x10EA9),
1111 (0x10EAB, 0x10EAC),
1112 (0x10EB0, 0x10EB1),
1113 (0x10EFD, 0x10F1C),
1114 (0x10F27, 0x10F27),
1115 (0x10F30, 0x10F50),
1116 (0x10F70, 0x10F85),
1117 (0x10FB0, 0x10FC4),
1118 (0x10FE0, 0x10FF6),
1119 (0x11000, 0x11046),
1120 (0x11066, 0x11075),
1121 (0x1107F, 0x110BA),
1122 (0x110C2, 0x110C2),
1123 (0x110D0, 0x110E8),
1124 (0x110F0, 0x110F9),
1125 (0x11100, 0x11134),
1126 (0x11136, 0x1113F),
1127 (0x11144, 0x11147),
1128 (0x11150, 0x11173),
1129 (0x11176, 0x11176),
1130 (0x11180, 0x111C4),
1131 (0x111C9, 0x111CC),
1132 (0x111CE, 0x111DA),
1133 (0x111DC, 0x111DC),
1134 (0x11200, 0x11211),
1135 (0x11213, 0x11237),
1136 (0x1123E, 0x11241),
1137 (0x11280, 0x11286),
1138 (0x11288, 0x11288),
1139 (0x1128A, 0x1128D),
1140 (0x1128F, 0x1129D),
1141 (0x1129F, 0x112A8),
1142 (0x112B0, 0x112EA),
1143 (0x112F0, 0x112F9),
1144 (0x11300, 0x11303),
1145 (0x11305, 0x1130C),
1146 (0x1130F, 0x11310),
1147 (0x11313, 0x11328),
1148 (0x1132A, 0x11330),
1149 (0x11332, 0x11333),
1150 (0x11335, 0x11339),
1151 (0x1133B, 0x11344),
1152 (0x11347, 0x11348),
1153 (0x1134B, 0x1134D),
1154 (0x11350, 0x11350),
1155 (0x11357, 0x11357),
1156 (0x1135D, 0x11363),
1157 (0x11366, 0x1136C),
1158 (0x11370, 0x11374),
1159 (0x11400, 0x1144A),
1160 (0x11450, 0x11459),
1161 (0x1145E, 0x11461),
1162 (0x11480, 0x114C5),
1163 (0x114C7, 0x114C7),
1164 (0x114D0, 0x114D9),
1165 (0x11580, 0x115B5),
1166 (0x115B8, 0x115C0),
1167 (0x115D8, 0x115DD),
1168 (0x11600, 0x11640),
1169 (0x11644, 0x11644),
1170 (0x11650, 0x11659),
1171 (0x11680, 0x116B8),
1172 (0x116C0, 0x116C9),
1173 (0x11700, 0x1171A),
1174 (0x1171D, 0x1172B),
1175 (0x11730, 0x11739),
1176 (0x11740, 0x11746),
1177 (0x11800, 0x1183A),
1178 (0x118A0, 0x118E9),
1179 (0x118FF, 0x11906),
1180 (0x11909, 0x11909),
1181 (0x1190C, 0x11913),
1182 (0x11915, 0x11916),
1183 (0x11918, 0x11935),
1184 (0x11937, 0x11938),
1185 (0x1193B, 0x11943),
1186 (0x11950, 0x11959),
1187 (0x119A0, 0x119A7),
1188 (0x119AA, 0x119D7),
1189 (0x119DA, 0x119E1),
1190 (0x119E3, 0x119E4),
1191 (0x11A00, 0x11A3E),
1192 (0x11A47, 0x11A47),
1193 (0x11A50, 0x11A99),
1194 (0x11A9D, 0x11A9D),
1195 (0x11AB0, 0x11AF8),
1196 (0x11C00, 0x11C08),
1197 (0x11C0A, 0x11C36),
1198 (0x11C38, 0x11C40),
1199 (0x11C50, 0x11C59),
1200 (0x11C72, 0x11C8F),
1201 (0x11C92, 0x11CA7),
1202 (0x11CA9, 0x11CB6),
1203 (0x11D00, 0x11D06),
1204 (0x11D08, 0x11D09),
1205 (0x11D0B, 0x11D36),
1206 (0x11D3A, 0x11D3A),
1207 (0x11D3C, 0x11D3D),
1208 (0x11D3F, 0x11D47),
1209 (0x11D50, 0x11D59),
1210 (0x11D60, 0x11D65),
1211 (0x11D67, 0x11D68),
1212 (0x11D6A, 0x11D8E),
1213 (0x11D90, 0x11D91),
1214 (0x11D93, 0x11D98),
1215 (0x11DA0, 0x11DA9),
1216 (0x11EE0, 0x11EF6),
1217 (0x11F00, 0x11F10),
1218 (0x11F12, 0x11F3A),
1219 (0x11F3E, 0x11F42),
1220 (0x11F50, 0x11F59),
1221 (0x11FB0, 0x11FB0),
1222 (0x12000, 0x12399),
1223 (0x12400, 0x1246E),
1224 (0x12480, 0x12543),
1225 (0x12F90, 0x12FF0),
1226 (0x13000, 0x1342F),
1227 (0x13440, 0x13455),
1228 (0x14400, 0x14646),
1229 (0x16800, 0x16A38),
1230 (0x16A40, 0x16A5E),
1231 (0x16A60, 0x16A69),
1232 (0x16A70, 0x16ABE),
1233 (0x16AC0, 0x16AC9),
1234 (0x16AD0, 0x16AED),
1235 (0x16AF0, 0x16AF4),
1236 (0x16B00, 0x16B36),
1237 (0x16B40, 0x16B43),
1238 (0x16B50, 0x16B59),
1239 (0x16B63, 0x16B77),
1240 (0x16B7D, 0x16B8F),
1241 (0x16E40, 0x16E7F),
1242 (0x16F00, 0x16F4A),
1243 (0x16F4F, 0x16F87),
1244 (0x16F8F, 0x16F9F),
1245 (0x16FE0, 0x16FE1),
1246 (0x16FE3, 0x16FE4),
1247 (0x16FF0, 0x16FF1),
1248 (0x17000, 0x187F7),
1249 (0x18800, 0x18CD5),
1250 (0x18D00, 0x18D08),
1251 (0x1AFF0, 0x1AFF3),
1252 (0x1AFF5, 0x1AFFB),
1253 (0x1AFFD, 0x1AFFE),
1254 (0x1B000, 0x1B122),
1255 (0x1B132, 0x1B132),
1256 (0x1B150, 0x1B152),
1257 (0x1B155, 0x1B155),
1258 (0x1B164, 0x1B167),
1259 (0x1B170, 0x1B2FB),
1260 (0x1BC00, 0x1BC6A),
1261 (0x1BC70, 0x1BC7C),
1262 (0x1BC80, 0x1BC88),
1263 (0x1BC90, 0x1BC99),
1264 (0x1BC9D, 0x1BC9E),
1265 (0x1CF00, 0x1CF2D),
1266 (0x1CF30, 0x1CF46),
1267 (0x1D165, 0x1D169),
1268 (0x1D16D, 0x1D172),
1269 (0x1D17B, 0x1D182),
1270 (0x1D185, 0x1D18B),
1271 (0x1D1AA, 0x1D1AD),
1272 (0x1D242, 0x1D244),
1273 (0x1D400, 0x1D454),
1274 (0x1D456, 0x1D49C),
1275 (0x1D49E, 0x1D49F),
1276 (0x1D4A2, 0x1D4A2),
1277 (0x1D4A5, 0x1D4A6),
1278 (0x1D4A9, 0x1D4AC),
1279 (0x1D4AE, 0x1D4B9),
1280 (0x1D4BB, 0x1D4BB),
1281 (0x1D4BD, 0x1D4C3),
1282 (0x1D4C5, 0x1D505),
1283 (0x1D507, 0x1D50A),
1284 (0x1D50D, 0x1D514),
1285 (0x1D516, 0x1D51C),
1286 (0x1D51E, 0x1D539),
1287 (0x1D53B, 0x1D53E),
1288 (0x1D540, 0x1D544),
1289 (0x1D546, 0x1D546),
1290 (0x1D54A, 0x1D550),
1291 (0x1D552, 0x1D6A5),
1292 (0x1D6A8, 0x1D6C0),
1293 (0x1D6C2, 0x1D6DA),
1294 (0x1D6DC, 0x1D6FA),
1295 (0x1D6FC, 0x1D714),
1296 (0x1D716, 0x1D734),
1297 (0x1D736, 0x1D74E),
1298 (0x1D750, 0x1D76E),
1299 (0x1D770, 0x1D788),
1300 (0x1D78A, 0x1D7A8),
1301 (0x1D7AA, 0x1D7C2),
1302 (0x1D7C4, 0x1D7CB),
1303 (0x1D7CE, 0x1D7FF),
1304 (0x1DA00, 0x1DA36),
1305 (0x1DA3B, 0x1DA6C),
1306 (0x1DA75, 0x1DA75),
1307 (0x1DA84, 0x1DA84),
1308 (0x1DA9B, 0x1DA9F),
1309 (0x1DAA1, 0x1DAAF),
1310 (0x1DF00, 0x1DF1E),
1311 (0x1DF25, 0x1DF2A),
1312 (0x1E000, 0x1E006),
1313 (0x1E008, 0x1E018),
1314 (0x1E01B, 0x1E021),
1315 (0x1E023, 0x1E024),
1316 (0x1E026, 0x1E02A),
1317 (0x1E030, 0x1E06D),
1318 (0x1E08F, 0x1E08F),
1319 (0x1E100, 0x1E12C),
1320 (0x1E130, 0x1E13D),
1321 (0x1E140, 0x1E149),
1322 (0x1E14E, 0x1E14E),
1323 (0x1E290, 0x1E2AE),
1324 (0x1E2C0, 0x1E2F9),
1325 (0x1E4D0, 0x1E4F9),
1326 (0x1E7E0, 0x1E7E6),
1327 (0x1E7E8, 0x1E7EB),
1328 (0x1E7ED, 0x1E7EE),
1329 (0x1E7F0, 0x1E7FE),
1330 (0x1E800, 0x1E8C4),
1331 (0x1E8D0, 0x1E8D6),
1332 (0x1E900, 0x1E94B),
1333 (0x1E950, 0x1E959),
1334 (0x1EE00, 0x1EE03),
1335 (0x1EE05, 0x1EE1F),
1336 (0x1EE21, 0x1EE22),
1337 (0x1EE24, 0x1EE24),
1338 (0x1EE27, 0x1EE27),
1339 (0x1EE29, 0x1EE32),
1340 (0x1EE34, 0x1EE37),
1341 (0x1EE39, 0x1EE39),
1342 (0x1EE3B, 0x1EE3B),
1343 (0x1EE42, 0x1EE42),
1344 (0x1EE47, 0x1EE47),
1345 (0x1EE49, 0x1EE49),
1346 (0x1EE4B, 0x1EE4B),
1347 (0x1EE4D, 0x1EE4F),
1348 (0x1EE51, 0x1EE52),
1349 (0x1EE54, 0x1EE54),
1350 (0x1EE57, 0x1EE57),
1351 (0x1EE59, 0x1EE59),
1352 (0x1EE5B, 0x1EE5B),
1353 (0x1EE5D, 0x1EE5D),
1354 (0x1EE5F, 0x1EE5F),
1355 (0x1EE61, 0x1EE62),
1356 (0x1EE64, 0x1EE64),
1357 (0x1EE67, 0x1EE6A),
1358 (0x1EE6C, 0x1EE72),
1359 (0x1EE74, 0x1EE77),
1360 (0x1EE79, 0x1EE7C),
1361 (0x1EE7E, 0x1EE7E),
1362 (0x1EE80, 0x1EE89),
1363 (0x1EE8B, 0x1EE9B),
1364 (0x1EEA1, 0x1EEA3),
1365 (0x1EEA5, 0x1EEA9),
1366 (0x1EEAB, 0x1EEBB),
1367 (0x1FBF0, 0x1FBF9),
1368 (0x20000, 0x2A6DF),
1369 (0x2A700, 0x2B739),
1370 (0x2B740, 0x2B81D),
1371 (0x2B820, 0x2CEA1),
1372 (0x2CEB0, 0x2EBE0),
1373 (0x2EBF0, 0x2EE5D),
1374 (0x2F800, 0x2FA1D),
1375 (0x30000, 0x3134A),
1376 (0x31350, 0x323AF),
1377];
1378
1379fn is_xid_continue(cp: u32) -> bool {
1384 if cp < 0x80 {
1385 return false;
1386 }
1387 XID_CONTINUE_RANGES
1388 .binary_search_by(|&(lo, hi)| {
1389 if cp < lo {
1390 std::cmp::Ordering::Greater
1391 } else if cp > hi {
1392 std::cmp::Ordering::Less
1393 } else {
1394 std::cmp::Ordering::Equal
1395 }
1396 })
1397 .is_ok()
1398}
1399
1400pub(crate) fn is_bare_key_char(c: char) -> bool {
1401 if is_reserved_emoji_codepoint(c as u32) {
1402 return false;
1403 }
1404 c == '_'
1405 || c == '-'
1406 || c.is_ascii_alphanumeric()
1407 || (!c.is_ascii() && is_xid_continue(c as u32))
1408}
1409
1410static EXTENDED_PICTOGRAPHIC_RANGES: &[(u32, u32)] = &[
1415 (0x00A9, 0x00A9), (0x00AE, 0x00AE), (0x203C, 0x203C), (0x2049, 0x2049),
1416 (0x2122, 0x2122), (0x2139, 0x2139), (0x2194, 0x2199), (0x21A9, 0x21AA),
1417 (0x231A, 0x231B), (0x2328, 0x2328), (0x2388, 0x2388), (0x23CF, 0x23CF),
1418 (0x23E9, 0x23F3), (0x23F8, 0x23FA), (0x24C2, 0x24C2), (0x25AA, 0x25AB),
1419 (0x25B6, 0x25B6), (0x25C0, 0x25C0), (0x25FB, 0x25FE), (0x2600, 0x2605),
1420 (0x2607, 0x2612), (0x2614, 0x2685), (0x2690, 0x2705), (0x2708, 0x2712),
1421 (0x2714, 0x2714), (0x2716, 0x2716), (0x271D, 0x271D), (0x2721, 0x2721),
1422 (0x2728, 0x2728), (0x2733, 0x2734), (0x2744, 0x2744), (0x2747, 0x2747),
1423 (0x274C, 0x274C), (0x274E, 0x274E), (0x2753, 0x2755), (0x2757, 0x2757),
1424 (0x2763, 0x2767), (0x2795, 0x2797), (0x27A1, 0x27A1), (0x27B0, 0x27B0),
1425 (0x27BF, 0x27BF), (0x2934, 0x2935), (0x2B05, 0x2B07), (0x2B1B, 0x2B1C),
1426 (0x2B50, 0x2B50), (0x2B55, 0x2B55), (0x3030, 0x3030), (0x303D, 0x303D),
1427 (0x3297, 0x3297), (0x3299, 0x3299), (0x1F000, 0x1F0FF), (0x1F10D, 0x1F10F),
1428 (0x1F12F, 0x1F12F), (0x1F16C, 0x1F171), (0x1F17E, 0x1F17F), (0x1F18E, 0x1F18E),
1429 (0x1F191, 0x1F19A), (0x1F1AD, 0x1F1E5), (0x1F201, 0x1F20F), (0x1F21A, 0x1F21A),
1430 (0x1F22F, 0x1F22F), (0x1F232, 0x1F23A), (0x1F23C, 0x1F23F), (0x1F249, 0x1F3FA),
1431 (0x1F400, 0x1F53D), (0x1F546, 0x1F64F), (0x1F680, 0x1F6FF), (0x1F774, 0x1F77F),
1432 (0x1F7D5, 0x1F7FF), (0x1F80C, 0x1F80F), (0x1F848, 0x1F84F), (0x1F85A, 0x1F85F),
1433 (0x1F888, 0x1F88F), (0x1F8AE, 0x1F8FF), (0x1F90C, 0x1F93A), (0x1F93C, 0x1F945),
1434 (0x1F947, 0x1FAFF), (0x1FC00, 0x1FFFD),
1435];
1436
1437fn is_extended_pictographic(cp: u32) -> bool {
1438 if cp < 0xA9 {
1439 return false;
1440 }
1441 EXTENDED_PICTOGRAPHIC_RANGES
1442 .binary_search_by(|&(lo, hi)| {
1443 if cp < lo { std::cmp::Ordering::Greater }
1444 else if cp > hi { std::cmp::Ordering::Less }
1445 else { std::cmp::Ordering::Equal }
1446 })
1447 .is_ok()
1448}
1449
1450pub(crate) fn is_reserved_emoji_codepoint(cp: u32) -> bool {
1460 if (0x1F1E6..=0x1F1FF).contains(&cp) { return true; } if (0x1F3FB..=0x1F3FF).contains(&cp) { return true; } if cp == 0x20E3 { return true; } is_extended_pictographic(cp)
1464}
1465
1466#[inline]
1467pub(crate) fn is_regional_indicator(cp: u32) -> bool {
1468 (0x1F1E6..=0x1F1FF).contains(&cp)
1469}
1470
1471#[inline]
1472pub(crate) fn is_emoji_modifier(cp: u32) -> bool {
1473 (0x1F3FB..=0x1F3FF).contains(&cp)
1474}
1475
1476pub(crate) fn read_reserved_emoji_atom(s: &str, start: usize) -> Option<usize> {
1489 let bytes = s.as_bytes();
1490 if start >= bytes.len() {
1491 return None;
1492 }
1493 let mut chars = s[start..].char_indices();
1494 let (_, c0) = chars.next()?;
1495 let cp0 = c0 as u32;
1496 if !is_reserved_emoji_codepoint(cp0) {
1497 return None;
1498 }
1499 let mut end = start + c0.len_utf8();
1500
1501 if is_regional_indicator(cp0) {
1503 if let Some((_, c1)) = chars.next() {
1504 if is_regional_indicator(c1 as u32) {
1505 end += c1.len_utf8();
1506 }
1507 }
1508 return Some(end);
1509 }
1510
1511 loop {
1514 let rest = &s[end..];
1515 let mut it = rest.chars();
1516 let Some(c) = it.next() else { break };
1517 let cp = c as u32;
1518 if is_emoji_modifier(cp) || cp == 0xFE0F || cp == 0x20E3 {
1519 end += c.len_utf8();
1521 continue;
1522 }
1523 if cp == 0x200D {
1524 let after_zwj = end + c.len_utf8();
1526 let after = &s[after_zwj..];
1527 if let Some(nc) = after.chars().next() {
1528 if is_extended_pictographic(nc as u32) {
1529 end = after_zwj + nc.len_utf8();
1530 continue;
1531 }
1532 }
1533 break;
1535 }
1536 break;
1537 }
1538 Some(end)
1539}
1540
1541fn is_label_start(c: char) -> bool {
1542 c == '_' || c.is_ascii_alphabetic()
1543}
1544
1545fn is_label_cont(c: char) -> bool {
1546 c == '_' || c.is_ascii_alphanumeric()
1547}
1548
1549fn looks_like_date_prefix(s: &str) -> bool {
1550 let b = s.as_bytes();
1551 b.len() >= 10
1552 && b[0].is_ascii_digit()
1553 && b[1].is_ascii_digit()
1554 && b[2].is_ascii_digit()
1555 && b[3].is_ascii_digit()
1556 && b[4] == b'-'
1557 && b[5].is_ascii_digit()
1558 && b[6].is_ascii_digit()
1559 && b[7] == b'-'
1560 && b[8].is_ascii_digit()
1561 && b[9].is_ascii_digit()
1562}
1563
1564fn looks_like_time_prefix(s: &str) -> bool {
1565 let b = s.as_bytes();
1566 b.len() >= 8
1567 && b[0].is_ascii_digit()
1568 && b[1].is_ascii_digit()
1569 && b[2] == b':'
1570 && b[3].is_ascii_digit()
1571 && b[4].is_ascii_digit()
1572 && b[5] == b':'
1573 && b[6].is_ascii_digit()
1574 && b[7].is_ascii_digit()
1575}
1576
1577struct Parser<'a> {
1580 src: &'a str,
1581 pos: usize,
1582 line: usize,
1583 line_start: usize,
1584
1585 comments: Vec<AttachedComment>,
1589 pending_leading: Vec<Comment>,
1594 path: Vec<BreadcrumbSegment>,
1600 original_forms: Vec<(Vec<BreadcrumbSegment>, OriginalLiteral)>,
1605 record_forms: bool,
1610 lite: bool,
1615 ignore_order: bool,
1621 accept_tier1: bool,
1624 observed_tier: u32,
1628 pending_leading_decorators: Vec<tier1::DecoratorCall>,
1633 decorations_raw: Vec<(Vec<BreadcrumbSegment>, tier1::DecorationPosition, tier1::DecoratorCall)>,
1637 decoration_only_paths: Vec<Vec<BreadcrumbSegment>>,
1641}
1642
1643impl<'a> Parser<'a> {
1644 #[cfg(test)]
1645 fn new(src: &'a str) -> Self {
1646 Self::new_with_mode(src, ParseMode::Full)
1647 }
1648
1649 fn new_with_mode(src: &'a str, mode: ParseMode) -> Self {
1650 let src = src.strip_prefix('\u{feff}').unwrap_or(src);
1651 Self {
1652 src,
1653 pos: 0,
1654 line: 1,
1655 line_start: 0,
1656 comments: Vec::new(),
1657 pending_leading: Vec::new(),
1658 path: Vec::new(),
1659 original_forms: Vec::new(),
1660 record_forms: true,
1661 lite: matches!(mode, ParseMode::Lite),
1662 ignore_order: false,
1663 accept_tier1: false,
1664 observed_tier: 0,
1665 pending_leading_decorators: Vec::new(),
1666 decorations_raw: Vec::new(),
1667 decoration_only_paths: Vec::new(),
1668 }
1669 }
1670
1671 fn is_t1_active(&self) -> bool {
1674 self.observed_tier >= 1
1675 }
1676
1677 fn record_form(&mut self, lit: OriginalLiteral) {
1682 if self.lite || !self.record_forms {
1683 return;
1684 }
1685 self.original_forms.push((self.path.clone(), lit));
1686 }
1687
1688 fn col(&self) -> usize {
1691 self.pos - self.line_start + 1
1693 }
1694
1695 fn err(&self, msg: impl Into<String>) -> DecodeError {
1696 DecodeError { line: self.line, column: self.col(), message: msg.into() }
1697 }
1698
1699 fn err_at(&self, line: usize, line_start: usize, pos: usize, msg: impl Into<String>) -> DecodeError {
1700 DecodeError { line, column: pos.saturating_sub(line_start) + 1, message: msg.into() }
1701 }
1702
1703 fn peek(&self) -> Option<char> {
1704 self.src[self.pos..].chars().next()
1705 }
1706
1707 fn rest(&self) -> &str {
1708 &self.src[self.pos..]
1709 }
1710
1711 fn bump(&mut self) -> Option<char> {
1712 let c = self.peek()?;
1713 self.pos += c.len_utf8();
1714 Some(c)
1715 }
1716
1717 fn eof(&self) -> bool {
1718 self.pos >= self.src.len()
1719 }
1720
1721 fn advance_line(&mut self) {
1722 self.line += 1;
1723 self.line_start = self.pos;
1724 }
1725
1726 fn skip_inline_ws(&mut self) {
1729 while matches!(self.peek(), Some(' ') | Some('\t')) {
1730 self.bump();
1731 }
1732 }
1733
1734 fn consume_eol(&mut self) -> bool {
1737 if self.peek() == Some('\n') {
1738 self.bump();
1739 self.advance_line();
1740 true
1741 } else if self.rest().starts_with("\r\n") {
1742 self.pos += 2;
1743 self.advance_line();
1744 true
1745 } else {
1746 false
1747 }
1748 }
1749
1750 fn skip_trivia(&mut self) -> Result<(), DecodeError> {
1760 loop {
1761 let line_start_pos = self.pos;
1764 self.skip_inline_ws();
1765 match self.peek() {
1766 Some('\n') | Some('\r') => {
1767 if self.peek() == Some('\r') && !self.rest().starts_with("\r\n") {
1768 return Err(self.err("bare CR is not a valid line terminator"));
1769 }
1770 self.flush_pending_as_floating();
1774 self.consume_eol();
1775 }
1776 Some('#') => {
1777 if self.rest().starts_with("###") {
1778 let raw = self.read_hash_block_comment()?;
1779 if !self.lite {
1780 self.pending_leading.push(Comment {
1781 content: raw,
1782 kind: CommentKind::Block,
1783 });
1784 }
1785 } else {
1786 let raw = self.read_line_comment_to_eol();
1787 self.consume_eol();
1788 if !self.lite {
1789 self.pending_leading.push(Comment {
1790 content: raw,
1791 kind: CommentKind::Line,
1792 });
1793 }
1794 }
1795 }
1796 Some('/') if self.rest().starts_with("//") => {
1797 let raw = self.read_line_comment_to_eol();
1798 self.consume_eol();
1799 if !self.lite {
1800 self.pending_leading.push(Comment {
1801 content: raw,
1802 kind: CommentKind::Line,
1803 });
1804 }
1805 }
1806 Some('/') if self.rest().starts_with("/*") => {
1807 let raw = self.read_c_block_comment()?;
1808 if !self.lite {
1809 self.pending_leading.push(Comment {
1810 content: raw,
1811 kind: CommentKind::Block,
1812 });
1813 }
1814 }
1816 Some(c) if self.is_t1_active() && tier1::is_sigil_atom_start(c) => {
1817 let (call, end) = tier1::parse_decorator_call(self.src, self.pos)?;
1821 let mut walk = self.pos;
1823 while walk < end {
1824 if self.src.as_bytes()[walk] == b'\n' {
1825 self.line += 1;
1826 self.line_start = walk + 1;
1827 }
1828 walk += 1;
1829 }
1830 self.pos = end;
1831 self.skip_inline_ws();
1833 if !(self.consume_eol() || self.eof()) {
1834 return Err(self.err("trailing content after leading decorator (only EOL allowed in this build)"));
1835 }
1836 self.pending_leading_decorators.push(call);
1837 }
1839 Some(_) => {
1840 self.pos = line_start_pos;
1842 return Ok(());
1843 }
1844 None => return Ok(()),
1845 }
1846 }
1847 }
1848
1849 fn flush_pending_as_floating(&mut self) {
1854 let had_decorators = !self.pending_leading_decorators.is_empty();
1855 if !self.pending_leading.is_empty() {
1856 let drained: Vec<Comment> = self.pending_leading.drain(..).collect();
1857 for c in drained {
1858 self.comments.push(AttachedComment {
1859 comment: c,
1860 position: CommentPosition::Floating,
1861 path: self.path.clone(),
1862 });
1863 }
1864 }
1865 if had_decorators {
1866 let drained: Vec<tier1::DecoratorCall> =
1867 self.pending_leading_decorators.drain(..).collect();
1868 for call in drained {
1869 self.decorations_raw.push((
1870 self.path.clone(),
1871 tier1::DecorationPosition::Floating,
1872 call,
1873 ));
1874 }
1875 }
1876 }
1877
1878 fn flush_pending_as_leading_on_current(&mut self) {
1882 let had_decorators = !self.pending_leading_decorators.is_empty();
1883 if !self.pending_leading.is_empty() {
1884 let drained: Vec<Comment> = self.pending_leading.drain(..).collect();
1885 for c in drained {
1886 self.comments.push(AttachedComment {
1887 comment: c,
1888 position: CommentPosition::Leading,
1889 path: self.path.clone(),
1890 });
1891 }
1892 }
1893 if had_decorators {
1894 let drained: Vec<tier1::DecoratorCall> =
1895 self.pending_leading_decorators.drain(..).collect();
1896 for call in drained {
1897 self.decorations_raw.push((
1898 self.path.clone(),
1899 tier1::DecorationPosition::Leading,
1900 call,
1901 ));
1902 }
1903 }
1904 }
1905
1906 fn read_line_comment_to_eol(&mut self) -> String {
1907 let start = self.pos;
1908 while let Some(c) = self.peek() {
1909 if c == '\n' || c == '\r' {
1910 break;
1911 }
1912 self.bump();
1913 }
1914 self.src[start..self.pos].to_string()
1915 }
1916
1917 fn read_c_block_comment(&mut self) -> Result<String, DecodeError> {
1920 let start_line = self.line;
1921 let start_lstart = self.line_start;
1922 let start_pos = self.pos;
1923 self.pos += 2;
1925 let mut depth = 1usize;
1926 while depth > 0 {
1927 match self.peek() {
1928 None => {
1929 return Err(self.err_at(
1930 start_line,
1931 start_lstart,
1932 start_pos,
1933 "unterminated /* block comment",
1934 ));
1935 }
1936 Some('/') if self.rest().starts_with("/*") => {
1937 self.pos += 2;
1938 depth += 1;
1939 }
1940 Some('*') if self.rest().starts_with("*/") => {
1941 self.pos += 2;
1942 depth -= 1;
1943 }
1944 Some('\n') => {
1945 self.bump();
1946 self.advance_line();
1947 }
1948 Some('\r') if self.rest().starts_with("\r\n") => {
1949 self.pos += 2;
1950 self.advance_line();
1951 }
1952 Some(_) => {
1953 self.bump();
1954 }
1955 }
1956 }
1957 Ok(self.src[start_pos..self.pos].to_string())
1958 }
1959
1960 fn read_hash_block_comment(&mut self) -> Result<String, DecodeError> {
1965 let start_line = self.line;
1966 let start_lstart = self.line_start;
1967 let start_pos = self.pos;
1968 self.pos += 3;
1970 let label_start = self.pos;
1972 while let Some(c) = self.peek() {
1973 if !(c == '_' || c.is_ascii_alphanumeric()) {
1974 break;
1975 }
1976 self.bump();
1977 }
1978 let label_str: String = self.src[label_start..self.pos].to_string();
1979 if !label_str.is_empty() {
1980 if !label_str.chars().next().unwrap().is_ascii_alphabetic()
1982 && !label_str.starts_with('_')
1983 {
1984 return Err(self.err_at(
1985 start_line,
1986 start_lstart,
1987 start_pos,
1988 "block comment label must start with a letter or underscore",
1989 ));
1990 }
1991 }
1992 let terminator: String = if label_str.is_empty() {
1993 "###".to_string()
1994 } else {
1995 label_str.clone()
1996 };
1997 self.skip_inline_ws();
1999 if !(self.consume_eol() || self.eof()) {
2000 return Err(self.err(
2001 "block comment opener must be on its own line",
2002 ));
2003 }
2004 loop {
2007 if self.eof() {
2008 return Err(self.err_at(
2009 start_line,
2010 start_lstart,
2011 start_pos,
2012 "unterminated ### block comment",
2013 ));
2014 }
2015 let line_begin = self.pos;
2017 while let Some(c) = self.peek() {
2018 if c == '\n' || c == '\r' {
2019 break;
2020 }
2021 self.bump();
2022 }
2023 let line_text = &self.src[line_begin..self.pos];
2024 let line_end = self.pos;
2028 let _ = self.consume_eol();
2029 if line_text.trim() == terminator {
2030 return Ok(self.src[start_pos..line_end].to_string());
2031 }
2032 }
2033 }
2034
2035 fn parse_front_matter(&mut self) -> Result<Option<DmsMap<Value>>, DecodeError> {
2042 let save_pos = self.pos;
2047 let save_line = self.line;
2048 let save_lstart = self.line_start;
2049 let save_pending = self.pending_leading.len();
2050 let save_comments = self.comments.len();
2051 self.skip_trivia()?;
2052 if !self.rest().starts_with("+++") {
2054 self.pos = save_pos;
2055 self.line = save_line;
2056 self.line_start = save_lstart;
2057 self.pending_leading.truncate(save_pending);
2061 self.comments.truncate(save_comments);
2062 return Ok(None);
2063 }
2064 let opener_line = self.line;
2071 let opener_lstart = self.line_start;
2072 let opener_pos = self.pos;
2073 self.pos += 3;
2075 self.skip_inline_ws();
2076 if !(self.consume_eol() || self.eof()) {
2077 return Err(self.err(
2078 "front matter opener must be on its own line",
2079 ));
2080 }
2081 let mut inner = String::new();
2083 loop {
2084 if self.eof() {
2085 return Err(self.err_at(
2086 opener_line, opener_lstart, opener_pos,
2087 "unterminated front matter: missing closing '+++'",
2088 ));
2089 }
2090 let line_begin = self.pos;
2092 while let Some(c) = self.peek() {
2093 if c == '\n' || c == '\r' { break; }
2094 self.bump();
2095 }
2096 let line_text = &self.src[line_begin..self.pos];
2097 if line_text.trim() == "+++" {
2099 let _ = self.consume_eol();
2100 break;
2101 }
2102 inner.push_str(line_text);
2104 if self.consume_eol() {
2105 inner.push('\n');
2106 }
2107 }
2108 let inner_mode = if self.lite { ParseMode::Lite } else { ParseMode::Full };
2112 let mut sub = Parser::new_with_mode(&inner, inner_mode);
2113 let table = sub.parse_body_as_table()?;
2114 let mut meta = DmsMap::default();
2116 for (k, v) in table {
2117 if k.strip_prefix('_').is_some() {
2118 match k.as_str() {
2119 "_dms_tier" => {
2120 let Value::Integer(n) = v else {
2121 return Err(self.err_at(
2122 opener_line, opener_lstart, opener_pos,
2123 "_dms_tier must be a non-negative integer",
2124 ));
2125 };
2126 if n < 0 {
2127 return Err(self.err_at(
2128 opener_line, opener_lstart, opener_pos,
2129 "_dms_tier must be non-negative",
2130 ));
2131 }
2132 if n >= 1 {
2133 if self.accept_tier1 {
2134 self.observed_tier = n as u32;
2138 } else {
2139 return Err(self.err_at(
2144 opener_line, opener_lstart, opener_pos,
2145 format!(
2146 "_dms_tier: {n} found, but this decoder only supports tier 0. Use decode_t1."
2147 ),
2148 ));
2149 }
2150 }
2151 }
2153 "_dms_imports" => {
2154 if self.accept_tier1 {
2155 meta.insert(k, v);
2159 } else {
2160 return Err(self.err_at(
2163 opener_line, opener_lstart, opener_pos,
2164 "_dms_imports requires _dms_tier: 1; \
2165 tier-0 documents must not contain _dms_imports",
2166 ));
2167 }
2168 }
2169 other => {
2170 return Err(self.err_at(
2171 opener_line, opener_lstart, opener_pos,
2172 format!("unknown reserved key: {other}"),
2173 ));
2174 }
2175 }
2176 } else {
2177 meta.insert(k, v);
2178 }
2179 }
2180 for ac in sub.comments {
2188 let attached_to_reserved = matches!(
2189 ac.path.first(),
2190 Some(BreadcrumbSegment::Key(k0)) if k0.starts_with('_')
2191 );
2192 if attached_to_reserved {
2193 self.comments.push(AttachedComment {
2194 comment: ac.comment,
2195 position: CommentPosition::Floating,
2196 path: vec![BreadcrumbSegment::Key("__fm__".to_string())],
2197 });
2198 continue;
2199 }
2200 let mut new_path = Vec::with_capacity(ac.path.len() + 1);
2201 new_path.push(BreadcrumbSegment::Key("__fm__".to_string()));
2202 new_path.extend(ac.path);
2203 self.comments.push(AttachedComment {
2204 comment: ac.comment,
2205 position: ac.position,
2206 path: new_path,
2207 });
2208 }
2209 for (path, lit) in sub.original_forms {
2212 if let Some(BreadcrumbSegment::Key(k0)) = path.first() {
2213 if k0.starts_with('_') {
2214 continue;
2215 }
2216 }
2217 let mut new_path = Vec::with_capacity(path.len() + 1);
2218 new_path.push(BreadcrumbSegment::Key("__fm__".to_string()));
2219 new_path.extend(path);
2220 self.original_forms.push((new_path, lit));
2221 }
2222 Ok(Some(meta))
2223 }
2224
2225 fn parse_body_as_table(&mut self) -> Result<DmsMap<Value>, DecodeError> {
2227 self.skip_trivia()?;
2228 if self.eof() {
2229 self.flush_pending_as_floating();
2234 return Ok(DmsMap::default());
2235 }
2236 if matches!(self.peek(), Some(' ') | Some('\t')) {
2237 return Err(self.err("unexpected indentation inside front matter"));
2238 }
2239 self.reject_reserved_decorator_sigil()?;
2241 if self.peek() == Some('+') && self.peek_after_plus_is_space_or_eol() {
2242 return Err(self.err("front matter block cannot have a list root"));
2243 }
2244 if !self.line_starts_kvpair() {
2245 return Err(self.err("front matter block must be a table"));
2246 }
2247 let t = self.parse_table_block(0)?;
2248 self.skip_trivia()?;
2249 if !self.eof() {
2250 return Err(self.err("trailing content inside front matter"));
2251 }
2252 self.flush_pending_as_floating();
2254 Ok(t)
2255 }
2256
2257 fn parse_body(&mut self) -> Result<Value, DecodeError> {
2259 self.skip_trivia()?;
2260 if self.eof() {
2261 self.flush_pending_as_floating();
2263 if self.ignore_order {
2264 return Ok(Value::UnorderedTable(DmsHashMap::default()));
2265 }
2266 return Ok(Value::Table(DmsMap::default()));
2267 }
2268 if matches!(self.peek(), Some(' ') | Some('\t')) {
2270 return Err(self.err("unexpected indentation at document root"));
2271 }
2272 self.reject_reserved_decorator_sigil()?;
2274 let result = if self.peek() == Some('+') && self.peek_after_plus_is_space_or_eol() {
2276 let v = self.parse_list_block(0)?;
2278 self.skip_trivia()?;
2279 if !self.eof() {
2280 return Err(self.err("trailing content after list root"));
2281 }
2282 Value::List(v)
2283 } else if self.line_starts_kvpair() {
2284 let v = if self.ignore_order {
2285 Value::UnorderedTable(self.parse_table_block_unordered(0)?)
2286 } else {
2287 Value::Table(self.parse_table_block(0)?)
2288 };
2289 self.skip_trivia()?;
2290 if !self.eof() {
2291 return Err(self.err("trailing content after table root"));
2292 }
2293 v
2294 } else {
2295 let v = self.parse_inline_value_or_heredoc()?;
2297 self.consume_after_value(true)?;
2298 self.skip_trivia()?;
2299 if !self.eof() {
2300 return Err(self.err("scalar root cannot be followed by more content"));
2301 }
2302 v
2303 };
2304 self.flush_pending_as_floating();
2306 Ok(result)
2307 }
2308
2309 fn peek_after_plus_is_space_or_eol(&self) -> bool {
2310 let bytes = self.src.as_bytes();
2311 let next = bytes.get(self.pos + 1).copied();
2312 matches!(next, Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') | None)
2313 }
2314
2315 fn reject_reserved_decorator_sigil(&self) -> Result<(), DecodeError> {
2326 match self.peek() {
2327 Some(c @ ('!' | '@' | '$' | '%' | '^' | '&' | '*' | '|' | '~' | '`'
2328 | '.' | ',' | '>' | '<' | '?' | ';' | '=')) => {
2329 Err(self.err(format!(
2330 "'{c}' is a reserved decorator sigil at line-start; \
2331 these characters (! @ $ % ^ & * | ~ ` . , > < ? ; =) cannot begin a \
2332 body line in tier 0"
2333 )))
2334 }
2335 Some(c) if is_reserved_emoji_codepoint(c as u32) => {
2336 Err(self.err(format!(
2337 "'{c}' (U+{cp:04X}) is in the Reserved Emoji Set and cannot \
2338 begin a body line in tier 0; quote it (\"{c}\": ...) or place \
2339 it inside a string value",
2340 cp = c as u32,
2341 )))
2342 }
2343 _ => Ok(()),
2344 }
2345 }
2346
2347 fn line_starts_kvpair(&self) -> bool {
2351 let mut p = self.pos;
2352 let bytes = self.src.as_bytes();
2353 if bytes.get(p).copied() == Some(b'"') {
2355 p += 1;
2357 while p < bytes.len() {
2358 match bytes[p] {
2359 b'\\' => p += 2,
2360 b'"' => { p += 1; break; }
2361 b'\n' | b'\r' => return false,
2362 _ => p += 1,
2363 }
2364 }
2365 } else if bytes.get(p).copied() == Some(b'\'') {
2366 p += 1;
2367 while p < bytes.len() {
2368 match bytes[p] {
2369 b'\'' => { p += 1; break; }
2370 b'\n' | b'\r' => return false,
2371 _ => p += 1,
2372 }
2373 }
2374 } else {
2375 let s = &self.src[p..];
2377 let mut chars = s.char_indices();
2378 let mut last_end = 0;
2379 let mut any = false;
2380 while let Some((i, c)) = chars.next() {
2381 if is_bare_key_char(c) {
2382 last_end = i + c.len_utf8();
2383 any = true;
2384 } else {
2385 break;
2386 }
2387 }
2388 if !any {
2389 return false;
2390 }
2391 p += last_end;
2392 }
2393 if bytes.get(p).copied() != Some(b':') {
2395 return false;
2396 }
2397 match bytes.get(p + 1).copied() {
2399 Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') | None => true,
2400 _ => false,
2401 }
2402 }
2403
2404 fn parse_table_block(&mut self, indent: usize) -> Result<DmsMap<Value>, DecodeError> {
2409 let mut table: DmsMap<Value> = DmsMap::default();
2410 loop {
2411 self.skip_trivia()?;
2412 if self.eof() {
2413 break;
2414 }
2415 let line_indent = self.measure_line_indent();
2417 if line_indent < indent {
2418 break;
2419 }
2420 if line_indent != indent {
2421 return Err(self.err_at(
2422 self.line,
2423 self.line_start,
2424 self.line_start + indent,
2425 format!(
2426 "inconsistent indent: expected {indent} spaces, got {line_indent}"
2427 ),
2428 ));
2429 }
2430 self.pos = self.line_start + indent;
2432 self.reject_reserved_decorator_sigil()?;
2434 let (key, value) = self.parse_kvpair(indent)?;
2435 if table.contains_key(&key) {
2436 return Err(self.err(format!("duplicate key: {key}")));
2437 }
2438 table.insert(key, value);
2439 }
2440 self.flush_pending_as_floating();
2443 Ok(table)
2444 }
2445
2446 fn parse_table_block_unordered(
2451 &mut self,
2452 indent: usize,
2453 ) -> Result<DmsHashMap<Value>, DecodeError> {
2454 let mut table: DmsHashMap<Value> = DmsHashMap::default();
2455 loop {
2456 self.skip_trivia()?;
2457 if self.eof() {
2458 break;
2459 }
2460 let line_indent = self.measure_line_indent();
2461 if line_indent < indent {
2462 break;
2463 }
2464 if line_indent != indent {
2465 return Err(self.err_at(
2466 self.line,
2467 self.line_start,
2468 self.line_start + indent,
2469 format!(
2470 "inconsistent indent: expected {indent} spaces, got {line_indent}"
2471 ),
2472 ));
2473 }
2474 self.pos = self.line_start + indent;
2475 self.reject_reserved_decorator_sigil()?;
2477 let (key, value) = self.parse_kvpair(indent)?;
2478 if table.contains_key(&key) {
2479 return Err(self.err(format!("duplicate key: {key}")));
2480 }
2481 table.insert(key, value);
2482 }
2483 self.flush_pending_as_floating();
2484 Ok(table)
2485 }
2486
2487 fn parse_list_block(&mut self, indent: usize) -> Result<Vec<Value>, DecodeError> {
2489 let mut items: Vec<Value> = Vec::new();
2490 loop {
2491 self.skip_trivia()?;
2492 if self.eof() {
2493 break;
2494 }
2495 let line_indent = self.measure_line_indent();
2496 if line_indent < indent {
2497 break;
2498 }
2499 if line_indent != indent {
2500 return Err(self.err_at(
2501 self.line,
2502 self.line_start,
2503 self.line_start + indent,
2504 format!(
2505 "inconsistent indent: expected {indent} spaces, got {line_indent}"
2506 ),
2507 ));
2508 }
2509 self.pos = self.line_start + indent;
2510 self.reject_reserved_decorator_sigil()?;
2512 if self.peek() != Some('+') {
2513 break;
2515 }
2516 let idx = items.len();
2519 self.path.push(BreadcrumbSegment::Index(idx));
2520 self.flush_pending_as_leading_on_current();
2521 self.bump();
2523 let item_result: Result<Value, DecodeError> = match self.peek() {
2525 Some(' ') | Some('\t') => {
2526 self.bump();
2527 self.skip_inline_ws();
2528 let dec_count_before = self.decorations_raw.len();
2532 self.capture_inner_block_comments()?;
2533 let inner_dec_captured = self.decorations_raw.len() > dec_count_before;
2534 match self.peek() {
2535 Some('\n') | Some('\r') | None => {
2536 if self.is_t1_active() && inner_dec_captured {
2540 self.decoration_only_paths.push(self.path.clone());
2541 self.consume_eol();
2542 Ok(Value::Table(DmsMap::default()))
2543 } else {
2544 self.consume_eol();
2547 self.skip_trivia()?;
2548 if self.eof() {
2549 Err(self.err("expected indented block after empty '+' marker"))
2550 } else {
2551 let inner_indent = self.measure_line_indent();
2552 if inner_indent <= indent {
2553 Err(self.err("expected indented block after empty '+' marker"))
2554 } else {
2555 self.parse_block_value(inner_indent)
2556 }
2557 }
2558 }
2559 }
2560 _ => self.parse_list_item_value(indent),
2561 }
2562 }
2563 Some('\n') | Some('\r') | None => {
2564 self.consume_eol();
2566 self.skip_trivia()?;
2567 if self.eof() {
2568 Err(self.err("expected indented block after empty '+' marker"))
2569 } else {
2570 let inner_indent = self.measure_line_indent();
2571 if inner_indent <= indent {
2572 Err(self.err("expected indented block after empty '+' marker"))
2573 } else {
2574 self.parse_block_value(inner_indent)
2575 }
2576 }
2577 }
2578 _ => Err(self.err("expected space after '+'")),
2579 };
2580 self.path.pop();
2581 let v = item_result?;
2582 items.push(v);
2583 }
2584 self.flush_pending_as_floating();
2586 Ok(items)
2587 }
2588
2589 fn measure_line_indent(&self) -> usize {
2590 let b = self.src.as_bytes();
2591 let mut i = self.line_start;
2592 let mut n = 0usize;
2593 while i < b.len() && b[i] == b' ' {
2594 n += 1;
2595 i += 1;
2596 }
2597 n
2598 }
2599
2600 fn parse_block_value(&mut self, indent: usize) -> Result<Value, DecodeError> {
2601 self.pos = self.line_start + indent;
2604 self.reject_reserved_decorator_sigil()?;
2606 if self.peek() == Some('+') && self.peek_after_plus_is_space_or_eol() {
2607 Ok(Value::List(self.parse_list_block(indent)?))
2608 } else if self.ignore_order {
2609 Ok(Value::UnorderedTable(self.parse_table_block_unordered(indent)?))
2610 } else {
2611 Ok(Value::Table(self.parse_table_block(indent)?))
2612 }
2613 }
2614
2615 fn parse_list_item_value(&mut self, list_indent: usize) -> Result<Value, DecodeError> {
2616 if self.line_starts_kvpair() {
2623 let key_col = self.col() - 1; if self.ignore_order {
2626 let (k, v) = self.parse_kvpair(key_col)?;
2627 let mut t: DmsHashMap<Value> = DmsHashMap::default();
2628 t.insert(k, v);
2629 loop {
2630 self.skip_trivia()?;
2631 if self.eof() {
2632 break;
2633 }
2634 let li = self.measure_line_indent();
2635 if li < key_col {
2636 break;
2637 }
2638 if li != key_col {
2639 return Err(self.err_at(
2640 self.line,
2641 self.line_start,
2642 self.line_start + key_col,
2643 "list-item table sibling key must align with first key",
2644 ));
2645 }
2646 self.pos = self.line_start + key_col;
2647 self.reject_reserved_decorator_sigil()?;
2650 if self.peek() == Some('+') {
2651 return Err(self.err(
2652 "'+' marker at sibling-key column is ambiguous",
2653 ));
2654 }
2655 if !self.line_starts_kvpair() {
2656 break;
2657 }
2658 let (k, v) = self.parse_kvpair(key_col)?;
2659 if t.contains_key(&k) {
2660 return Err(self.err(format!("duplicate key: {k}")));
2661 }
2662 t.insert(k, v);
2663 }
2664 self.flush_pending_as_floating();
2665 return Ok(Value::UnorderedTable(t));
2666 }
2667 let (k, v) = self.parse_kvpair(key_col)?;
2668 let mut t = DmsMap::default();
2670 t.insert(k, v);
2671 loop {
2673 self.skip_trivia()?;
2674 if self.eof() {
2675 break;
2676 }
2677 let li = self.measure_line_indent();
2678 if li < key_col {
2679 break;
2680 }
2681 if li != key_col {
2682 return Err(self.err_at(
2683 self.line,
2684 self.line_start,
2685 self.line_start + key_col,
2686 "list-item table sibling key must align with first key",
2687 ));
2688 }
2689 self.pos = self.line_start + key_col;
2690 self.reject_reserved_decorator_sigil()?;
2692 if self.peek() == Some('+') {
2694 return Err(self.err(
2695 "'+' marker at sibling-key column is ambiguous",
2696 ));
2697 }
2698 if !self.line_starts_kvpair() {
2699 break;
2700 }
2701 let (k, v) = self.parse_kvpair(key_col)?;
2702 if t.contains_key(&k) {
2703 return Err(self.err(format!("duplicate key: {k}")));
2704 }
2705 t.insert(k, v);
2706 }
2707 self.flush_pending_as_floating();
2710 Ok(Value::Table(t))
2711 } else {
2712 let v = self.parse_inline_value_or_heredoc()?;
2714 self.consume_after_value(false)?;
2715 let _ = list_indent;
2721 Ok(v)
2722 }
2723 }
2724
2725 fn parse_kvpair(&mut self, parent_indent: usize) -> Result<(String, Value), DecodeError> {
2728 let key = self.parse_key()?;
2729 if self.peek() != Some(':') {
2730 return Err(self.err("expected ':' after key"));
2731 }
2732 self.path.push(BreadcrumbSegment::Key(key.clone()));
2738 self.flush_pending_as_leading_on_current();
2739 let result = self.parse_kvpair_after_key(parent_indent);
2740 self.path.pop();
2741 result.map(|v| (key, v))
2742 }
2743
2744 fn parse_kvpair_after_key(&mut self, parent_indent: usize) -> Result<Value, DecodeError> {
2745 self.bump(); match self.peek() {
2748 Some(' ') | Some('\t') => {
2749 self.bump();
2750 self.skip_inline_ws();
2751 let dec_count_before = self.decorations_raw.len();
2754 self.capture_inner_block_comments()?;
2755 let inner_dec_captured = self.decorations_raw.len() > dec_count_before;
2756 match self.peek() {
2760 Some('\n') | Some('\r') | None => {
2761 if self.is_t1_active() && inner_dec_captured {
2765 self.decoration_only_paths.push(self.path.clone());
2766 self.consume_eol();
2767 return Ok(Value::Table(DmsMap::default()));
2768 }
2769 self.consume_eol();
2775 self.skip_trivia()?;
2776 if self.eof() {
2777 return Err(self.err("expected indented child block"));
2778 }
2779 let child_indent = self.measure_line_indent();
2780 if child_indent <= parent_indent {
2781 return Err(self.err("expected indented child block"));
2782 }
2783 let v = self.parse_block_value(child_indent)?;
2784 Ok(v)
2785 }
2786 _ => {
2787 let v = self.parse_inline_value_or_heredoc()?;
2788 self.consume_after_value(false)?;
2789 Ok(v)
2790 }
2791 }
2792 }
2793 Some('\n') | Some('\r') | None => {
2794 self.consume_eol();
2796 self.skip_trivia()?;
2797 if self.eof() {
2798 return Err(self.err("expected indented child block"));
2799 }
2800 let child_indent = self.measure_line_indent();
2801 if child_indent <= parent_indent {
2802 return Err(self.err("expected indented child block"));
2803 }
2804 let v = self.parse_block_value(child_indent)?;
2805 Ok(v)
2806 }
2807 _ => Err(self.err("expected whitespace after ':'")),
2808 }
2809 }
2810
2811 fn parse_key(&mut self) -> Result<String, DecodeError> {
2814 match self.peek() {
2815 Some('"') => self.parse_basic_string_key(),
2816 Some('\'') => self.parse_literal_string_key(),
2817 Some(_) => self.parse_bare_key(),
2818 None => Err(self.err("expected key")),
2819 }
2820 }
2821
2822 fn parse_bare_key(&mut self) -> Result<String, DecodeError> {
2823 let start = self.pos;
2824 while let Some(c) = self.peek() {
2825 if is_bare_key_char(c) {
2826 self.bump();
2827 } else {
2828 break;
2829 }
2830 }
2831 if self.pos == start {
2832 return Err(self.err("expected key"));
2833 }
2834 Ok(self.src[start..self.pos].to_string())
2835 }
2836
2837 fn parse_basic_string_key(&mut self) -> Result<String, DecodeError> {
2838 if self.rest().starts_with("\"\"\"") {
2840 return Err(self.err("triple-quoted strings are not allowed as keys"));
2841 }
2842 let saved = self.record_forms;
2846 self.record_forms = false;
2847 let r = self.parse_basic_string_value();
2848 self.record_forms = saved;
2849 r
2850 }
2851
2852 fn parse_literal_string_key(&mut self) -> Result<String, DecodeError> {
2853 if self.rest().starts_with("'''") {
2854 return Err(self.err("triple-quoted strings are not allowed as keys"));
2855 }
2856 let saved = self.record_forms;
2857 self.record_forms = false;
2858 let r = self.parse_literal_string_value();
2859 self.record_forms = saved;
2860 r
2861 }
2862
2863 fn capture_inner_block_comments(&mut self) -> Result<(), DecodeError> {
2875 if self.is_t1_active() {
2876 return self.capture_t1_decoration_run(tier1::DecorationPosition::Inner);
2877 }
2878 loop {
2879 match self.peek() {
2880 Some('/') if self.rest().starts_with("/*") => {
2881 let raw = self.read_c_block_comment()?;
2882 if !self.lite {
2883 self.comments.push(AttachedComment {
2884 comment: Comment {
2885 content: raw,
2886 kind: CommentKind::Block,
2887 },
2888 position: CommentPosition::Inner,
2889 path: self.path.clone(),
2890 });
2891 }
2892 self.skip_inline_ws();
2893 }
2894 _ => break,
2895 }
2896 }
2897 Ok(())
2898 }
2899
2900 fn capture_t1_decoration_run(
2910 &mut self,
2911 position: tier1::DecorationPosition,
2912 ) -> Result<(), DecodeError> {
2913 if !self.is_t1_active() {
2914 return Ok(());
2915 }
2916 let comment_position = match position {
2917 tier1::DecorationPosition::Inner => CommentPosition::Inner,
2918 tier1::DecorationPosition::Trailing => CommentPosition::Trailing,
2919 tier1::DecorationPosition::Leading => CommentPosition::Leading,
2920 tier1::DecorationPosition::Floating => CommentPosition::Floating,
2921 };
2922 loop {
2923 self.skip_inline_ws();
2925 match self.peek() {
2926 Some(c) if tier1::is_sigil_atom_start(c) => {
2927 let (mut call, end) = tier1::parse_decorator_call(self.src, self.pos)?;
2928 let mut walk = self.pos;
2930 while walk < end {
2931 if self.src.as_bytes()[walk] == b'\n' {
2932 self.line += 1;
2933 self.line_start = walk + 1;
2934 }
2935 walk += 1;
2936 }
2937 self.pos = end;
2938 call.position = position;
2939 self.decorations_raw.push((self.path.clone(), position, call));
2940 }
2941 Some('/') if self.rest().starts_with("/*") => {
2942 let raw = self.read_c_block_comment()?;
2943 if !self.lite {
2944 self.comments.push(AttachedComment {
2945 comment: Comment {
2946 content: raw,
2947 kind: CommentKind::Block,
2948 },
2949 position: comment_position.clone(),
2950 path: self.path.clone(),
2951 });
2952 }
2953 }
2954 _ => return Ok(()),
2955 }
2956 }
2957 }
2958
2959 fn capture_t1_decoration_run_flow(
2968 &mut self,
2969 position: tier1::DecorationPosition,
2970 ) -> Result<(), DecodeError> {
2971 if !self.is_t1_active() {
2972 return Ok(());
2973 }
2974 let comment_position = match position {
2975 tier1::DecorationPosition::Inner => CommentPosition::Inner,
2976 tier1::DecorationPosition::Trailing => CommentPosition::Trailing,
2977 tier1::DecorationPosition::Leading => CommentPosition::Leading,
2978 tier1::DecorationPosition::Floating => CommentPosition::Floating,
2979 };
2980 loop {
2981 self.skip_inline_ws();
2983 match self.peek() {
2984 Some(',') | Some(']') | Some('}') => return Ok(()),
2987 Some(c) if tier1::is_sigil_atom_start(c) => {
2988 let (mut call, end) = tier1::parse_decorator_call(self.src, self.pos)?;
2989 let mut walk = self.pos;
2991 while walk < end {
2992 if self.src.as_bytes()[walk] == b'\n' {
2993 self.line += 1;
2994 self.line_start = walk + 1;
2995 }
2996 walk += 1;
2997 }
2998 self.pos = end;
2999 call.position = position;
3000 self.decorations_raw.push((self.path.clone(), position, call));
3001 }
3002 Some('/') if self.rest().starts_with("/*") => {
3003 let raw = self.read_c_block_comment()?;
3004 if !self.lite {
3005 self.comments.push(AttachedComment {
3006 comment: Comment {
3007 content: raw,
3008 kind: CommentKind::Block,
3009 },
3010 position: comment_position.clone(),
3011 path: self.path.clone(),
3012 });
3013 }
3014 }
3015 _ => return Ok(()),
3016 }
3017 }
3018 }
3019
3020 fn parse_inline_value_or_heredoc(&mut self) -> Result<Value, DecodeError> {
3021 match self.peek() {
3026 Some('"') => {
3027 if self.rest().starts_with("\"\"\"") {
3028 self.parse_heredoc_basic().map(Value::String)
3029 } else {
3030 let r = self.parse_basic_string_value().map(Value::String);
3031 r
3037 }
3038 }
3039 Some('\'') => {
3040 if self.rest().starts_with("'''") {
3041 self.parse_heredoc_literal().map(Value::String)
3042 } else {
3043 let r = self.parse_literal_string_value().map(Value::String)?;
3044 self.record_form(OriginalLiteral::String { form: StringForm::Literal });
3045 Ok(r)
3046 }
3047 }
3048 Some('[') => Ok(Value::List(self.parse_flow_array()?)),
3049 Some('{') => {
3050 if self.ignore_order {
3051 Ok(Value::UnorderedTable(self.parse_flow_table_unordered()?))
3052 } else {
3053 Ok(Value::Table(self.parse_flow_table()?))
3054 }
3055 }
3056 Some('t') | Some('f') => self.parse_bool_or_keyword_value(),
3057 Some('i') => self.parse_inf_value(false),
3058 Some('n') => self.parse_nan_value(false),
3059 Some(c) if c == '+' || c == '-' || c.is_ascii_digit() => {
3060 self.parse_number_or_datetime()
3061 }
3062 Some(c) => Err(self.err(format!("unexpected character '{c}' in value"))),
3063 None => Err(self.err("expected value")),
3064 }
3065 }
3066
3067 fn parse_bool_or_keyword_value(&mut self) -> Result<Value, DecodeError> {
3068 let rest = self.rest();
3069 if let Some(after) = rest.strip_prefix("true") {
3070 if Self::is_value_terminator(after.chars().next()) {
3071 self.pos += 4;
3072 return Ok(Value::Bool(true));
3073 }
3074 }
3075 if let Some(after) = rest.strip_prefix("false") {
3076 if Self::is_value_terminator(after.chars().next()) {
3077 self.pos += 5;
3078 return Ok(Value::Bool(false));
3079 }
3080 }
3081 Err(self.err("expected value"))
3082 }
3083
3084 fn parse_inf_value(&mut self, signed: bool) -> Result<Value, DecodeError> {
3085 let after = if signed { 4 } else { 3 };
3086 let want: &'static str = if signed {
3087 match self.peek() {
3089 Some('+') => "+inf",
3090 Some('-') => "-inf",
3091 _ => "inf",
3092 }
3093 } else {
3094 "inf"
3095 };
3096 let neg = want.starts_with('-');
3097 let rest_owned = self.rest().to_string();
3098 if let Some(rem) = rest_owned.strip_prefix(want) {
3099 if Self::is_value_terminator(rem.chars().next()) {
3100 self.pos += after;
3101 let v = if neg { f64::NEG_INFINITY } else { f64::INFINITY };
3102 return Ok(Value::Float(v));
3103 }
3104 }
3105 Err(self.err("expected 'inf'"))
3106 }
3107
3108 fn parse_nan_value(&mut self, signed: bool) -> Result<Value, DecodeError> {
3109 let after = if signed { 4 } else { 3 };
3110 let want: &'static str = if signed {
3111 match self.peek() { Some('+') => "+nan", Some('-') => "-nan", _ => "nan" }
3112 } else {
3113 "nan"
3114 };
3115 let rest_owned = self.rest().to_string();
3116 if let Some(rem) = rest_owned.strip_prefix(want) {
3117 if Self::is_value_terminator(rem.chars().next()) {
3118 self.pos += after;
3119 return Ok(Value::Float(f64::NAN));
3120 }
3121 }
3122 Err(self.err("expected 'nan'"))
3123 }
3124
3125 fn is_value_terminator(c: Option<char>) -> bool {
3126 match c {
3127 None => true,
3128 Some(c) => matches!(c, ' ' | '\t' | '\n' | '\r' | '#' | '/' | ',' | ']' | '}'),
3129 }
3130 }
3131
3132 fn parse_number_or_datetime(&mut self) -> Result<Value, DecodeError> {
3135 let rest = self.rest();
3136 let starts_with_sign = matches!(self.peek(), Some('+') | Some('-'));
3137 if !starts_with_sign && looks_like_date_prefix(rest) {
3138 return self.parse_datetime_value();
3139 }
3140 if !starts_with_sign && looks_like_time_prefix(rest) {
3141 return self.parse_local_time_value();
3142 }
3143 if starts_with_sign {
3145 let after_sign = &rest[1..];
3146 if after_sign.starts_with("inf") {
3147 let signed_rest = &rest[..4];
3148 if Self::is_value_terminator(rest[4..].chars().next()) {
3149 let neg = signed_rest.starts_with('-');
3150 self.pos += 4;
3151 return Ok(Value::Float(if neg { f64::NEG_INFINITY } else { f64::INFINITY }));
3152 }
3153 }
3154 }
3156 let tok = self.scan_number_token();
3158 let s = &self.src[self.pos..self.pos + tok.len];
3159 if tok.is_float {
3160 let f = parse_float(s).ok_or_else(|| self.err(format!("invalid float: {s}")))?;
3161 self.pos += tok.len;
3162 Ok(Value::Float(f))
3163 } else {
3164 let n = parse_integer(s).map_err(|e| self.err(e))?;
3165 self.pos += tok.len;
3166 if !self.lite && self.record_forms {
3176 let canonical = n.to_string();
3177 if s != canonical {
3178 self.record_form(OriginalLiteral::Integer { lit: s.to_string() });
3179 }
3180 }
3181 Ok(Value::Integer(n))
3182 }
3183 }
3184
3185 fn scan_number_token(&self) -> NumTok {
3186 let s = self.rest();
3187 let bytes = s.as_bytes();
3188 let mut i = 0usize;
3189 if matches!(bytes.get(0), Some(b'+') | Some(b'-')) {
3190 i += 1;
3191 }
3192 let is_prefixed = bytes.get(i) == Some(&b'0')
3194 && matches!(bytes.get(i + 1).copied(), Some(b'x') | Some(b'o') | Some(b'b'));
3195 if is_prefixed {
3196 i += 2;
3197 let mut saw_dot = false;
3199 let mut saw_p = false;
3200 while let Some(&b) = bytes.get(i) {
3201 if b == b'_' || is_base_digit(b, bytes[i - if saw_dot { 1 } else { 0 } - 1]) {
3202 i += 1;
3203 } else if b == b'.' && !saw_dot && !saw_p {
3204 saw_dot = true;
3205 i += 1;
3206 } else if b == b'p' && !saw_p {
3207 saw_p = true;
3208 i += 1;
3209 if matches!(bytes.get(i), Some(b'+') | Some(b'-')) {
3210 i += 1;
3211 }
3212 } else if saw_p && b.is_ascii_digit() {
3213 i += 1;
3214 } else {
3215 break;
3216 }
3217 }
3218 return NumTok { len: i, is_float: saw_dot || saw_p };
3219 }
3220 let mut saw_dot = false;
3222 let mut saw_e = false;
3223 while let Some(&b) = bytes.get(i) {
3224 if b.is_ascii_digit() || b == b'_' {
3225 i += 1;
3226 } else if b == b'.' && !saw_dot && !saw_e {
3227 saw_dot = true;
3228 i += 1;
3229 } else if (b == b'e' || b == b'E') && !saw_e {
3230 saw_e = true;
3231 i += 1;
3232 if matches!(bytes.get(i), Some(b'+') | Some(b'-')) {
3233 i += 1;
3234 }
3235 } else {
3236 break;
3237 }
3238 }
3239 NumTok { len: i, is_float: saw_dot || saw_e }
3240 }
3241
3242 fn parse_datetime_value(&mut self) -> Result<Value, DecodeError> {
3243 let rest: String = self.rest().to_string();
3244 let date = rest[..10].to_string();
3245 validate_date(&date).map_err(|e| self.err(e))?;
3246 let rest2 = &rest[10..];
3247 if !rest2.starts_with('T') && !rest2.starts_with(' ') {
3248 if rest2.starts_with('t') {
3249 return Err(self.err("date and time separator must be uppercase 'T' (lowercase 't' not permitted)"));
3250 }
3251 if !Self::is_value_terminator(rest2.chars().next()) {
3252 return Err(self.err("invalid character after date"));
3253 }
3254 self.pos += 10;
3255 return Ok(Value::LocalDate(date));
3256 }
3257 if rest2.starts_with(' ') {
3258 let after_ws = rest2.trim_start_matches(|c: char| c == ' ' || c == '\t');
3259 if matches!(after_ws.chars().next(), Some(c) if c.is_ascii_digit()) {
3260 return Err(self.err("date and time must be separated by 'T' (space not permitted)"));
3261 }
3262 self.pos += 10;
3263 return Ok(Value::LocalDate(date));
3264 }
3265 let after_t = &rest2[1..];
3266 if !looks_like_time_prefix(after_t) {
3267 return Err(self.err("expected HH:MM:SS after 'T'"));
3268 }
3269 let time_str = &after_t[..8];
3270 validate_time(time_str).map_err(|e| self.err(e))?;
3271 let mut consumed = 10 + 1 + 8;
3272 let after_time = &rest[consumed..];
3273 let mut frac_len = 0usize;
3274 if after_time.starts_with('.') {
3275 let bytes = after_time.as_bytes();
3276 let mut k = 1usize;
3277 while k < bytes.len() && bytes[k].is_ascii_digit() {
3278 k += 1;
3279 }
3280 let digits = k - 1;
3281 if digits == 0 {
3282 return Err(self.err("expected fractional digits after '.'"));
3283 }
3284 if digits > 9 {
3285 return Err(self.err("fractional seconds limited to 9 digits (nanosecond precision)"));
3286 }
3287 frac_len = k;
3288 }
3289 consumed += frac_len;
3290 let after_frac = &rest[consumed..];
3291 if after_frac.starts_with('Z') || after_frac.starts_with('z') {
3292 consumed += 1;
3293 let s = rest[..consumed].to_string();
3294 self.pos += consumed;
3295 return Ok(Value::OffsetDateTime(s));
3296 }
3297 if after_frac.starts_with('+') || after_frac.starts_with('-') {
3298 let bytes = after_frac.as_bytes();
3299 if bytes.len() < 6
3300 || !bytes[1].is_ascii_digit()
3301 || !bytes[2].is_ascii_digit()
3302 || bytes[3] != b':'
3303 || !bytes[4].is_ascii_digit()
3304 || !bytes[5].is_ascii_digit()
3305 {
3306 return Err(self.err("invalid offset; expected ±HH:MM"));
3307 }
3308 let oh = (bytes[1] - b'0') * 10 + (bytes[2] - b'0');
3309 let om = (bytes[4] - b'0') * 10 + (bytes[5] - b'0');
3310 if oh > 23 || om > 59 {
3311 return Err(self.err("offset out of range"));
3312 }
3313 consumed += 6;
3314 let s = rest[..consumed].to_string();
3315 self.pos += consumed;
3316 return Ok(Value::OffsetDateTime(s));
3317 }
3318 if !Self::is_value_terminator(after_frac.chars().next()) {
3320 return Err(self.err("invalid character after datetime"));
3321 }
3322 let s = rest[..consumed].to_string();
3323 self.pos += consumed;
3324 Ok(Value::LocalDateTime(s))
3325 }
3326
3327 fn parse_local_time_value(&mut self) -> Result<Value, DecodeError> {
3328 let rest = self.rest();
3329 let time_str = &rest[..8];
3330 validate_time(time_str).map_err(|e| self.err(e))?;
3331 let mut consumed = 8usize;
3332 let after = &rest[consumed..];
3333 if after.starts_with('.') {
3334 let bytes = after.as_bytes();
3335 let mut k = 1usize;
3336 while k < bytes.len() && bytes[k].is_ascii_digit() {
3337 k += 1;
3338 }
3339 let digits = k - 1;
3340 if digits == 0 {
3341 return Err(self.err("expected fractional digits after '.'"));
3342 }
3343 if digits > 9 {
3344 return Err(self.err("fractional seconds limited to 9 digits"));
3345 }
3346 consumed += k;
3347 }
3348 let after2 = &rest[consumed..];
3349 if !Self::is_value_terminator(after2.chars().next()) {
3350 return Err(self.err("invalid character after time"));
3351 }
3352 let s = rest[..consumed].to_string();
3353 self.pos += consumed;
3354 Ok(Value::LocalTime(s))
3355 }
3356
3357 fn parse_basic_string_value(&mut self) -> Result<String, DecodeError> {
3360 let start_line = self.line;
3361 let start_lstart = self.line_start;
3362 let start_pos = self.pos;
3363 self.bump(); let mut out = String::new();
3365 loop {
3366 match self.peek() {
3367 None => return Err(self.err_at(start_line, start_lstart, start_pos, "unterminated string")),
3368 Some('\n') | Some('\r') => return Err(self.err("strings cannot span lines")),
3369 Some('"') => {
3370 self.bump();
3371 return Ok(nfc_normalize(&out));
3375 }
3376 Some('\\') => {
3377 self.bump();
3378 let esc_pos = self.pos;
3379 match self.bump() {
3380 Some('"') => out.push('"'),
3381 Some('\\') => out.push('\\'),
3382 Some('n') => out.push('\n'),
3383 Some('t') => out.push('\t'),
3384 Some('r') => out.push('\r'),
3385 Some('b') => out.push('\u{0008}'),
3386 Some('f') => out.push('\u{000c}'),
3387 Some('u') => {
3388 let cp = self.read_hex_codepoint(4, esc_pos)?;
3389 out.push(cp);
3390 }
3391 Some('U') => {
3392 let cp = self.read_hex_codepoint(8, esc_pos)?;
3393 out.push(cp);
3394 }
3395 Some(c) => return Err(self.err(format!("invalid escape '\\{c}'"))),
3396 None => return Err(self.err("unterminated escape")),
3397 }
3398 }
3399 Some(c) => {
3400 self.bump();
3401 out.push(c);
3402 }
3403 }
3404 }
3405 }
3406
3407 fn parse_literal_string_value(&mut self) -> Result<String, DecodeError> {
3408 let start_line = self.line;
3409 let start_lstart = self.line_start;
3410 let start_pos = self.pos;
3411 self.bump(); let mut out = String::new();
3413 loop {
3414 match self.peek() {
3415 None => return Err(self.err_at(start_line, start_lstart, start_pos, "unterminated string")),
3416 Some('\n') | Some('\r') => return Err(self.err("strings cannot span lines")),
3417 Some('\'') => {
3418 self.bump();
3419 return Ok(out);
3420 }
3421 Some(c) => {
3422 self.bump();
3423 out.push(c);
3424 }
3425 }
3426 }
3427 }
3428
3429 fn read_hex_codepoint(&mut self, n: usize, _esc_pos: usize) -> Result<char, DecodeError> {
3430 let s = self.rest();
3431 if s.len() < n {
3432 return Err(self.err(format!("expected {n} hex digits in unicode escape")));
3433 }
3434 let hex = &s[..n];
3435 if !hex.bytes().all(|b| b.is_ascii_hexdigit()) {
3436 return Err(self.err(format!("invalid hex in unicode escape: {hex}")));
3437 }
3438 let v = u32::from_str_radix(hex, 16)
3439 .map_err(|_| self.err("invalid unicode escape"))?;
3440 if v == 0 {
3443 return Err(self.err("\\u0000 escape forbidden"));
3444 }
3445 self.pos += n;
3446 char::from_u32(v).ok_or_else(|| self.err("unicode escape is not a scalar value"))
3447 }
3448
3449 fn parse_heredoc_basic(&mut self) -> Result<String, DecodeError> {
3452 self.pos += 3;
3454 let label = self.parse_heredoc_label();
3455 let modifiers = self.parse_heredoc_modifiers()?;
3456 self.skip_inline_ws();
3458 if !(self.consume_eol() || self.eof()) {
3459 return Err(self.err("heredoc opener must be followed by end of line"));
3460 }
3461 let label_opt = if label.is_empty() { None } else { Some(label.clone()) };
3462 let terminator = if label.is_empty() { "\"\"\"".to_string() } else { label };
3463 let body = self.collect_heredoc_body(&terminator, true)?;
3464 validate_heredoc_basic_surrogates(&body)?;
3469 let stripped = strip_indent_and_continuations(&body, true)?;
3470 let processed = apply_modifiers(stripped, &modifiers).map_err(|e| self.err(e))?;
3471 let calls: Vec<HeredocModifierCall> = modifiers
3472 .into_iter()
3473 .map(|m| HeredocModifierCall { name: m.name, args: m.args })
3474 .collect();
3475 self.record_form(OriginalLiteral::String {
3476 form: StringForm::Heredoc {
3477 flavor: HeredocFlavor::BasicTriple,
3478 label: label_opt,
3479 modifiers: calls,
3480 },
3481 });
3482 Ok(nfc_normalize(&processed))
3486 }
3487
3488 fn parse_heredoc_literal(&mut self) -> Result<String, DecodeError> {
3489 self.pos += 3;
3490 let label = self.parse_heredoc_label();
3491 let modifiers = self.parse_heredoc_modifiers()?;
3492 self.skip_inline_ws();
3493 if !(self.consume_eol() || self.eof()) {
3494 return Err(self.err("heredoc opener must be followed by end of line"));
3495 }
3496 let label_opt = if label.is_empty() { None } else { Some(label.clone()) };
3497 let terminator = if label.is_empty() { "'''".to_string() } else { label };
3498 let body = self.collect_heredoc_body(&terminator, false)?;
3499 let stripped = strip_indent_and_continuations(&body, false)?;
3500 let processed = apply_modifiers(stripped, &modifiers).map_err(|e| self.err(e))?;
3501 let calls: Vec<HeredocModifierCall> = modifiers
3502 .into_iter()
3503 .map(|m| HeredocModifierCall { name: m.name, args: m.args })
3504 .collect();
3505 self.record_form(OriginalLiteral::String {
3506 form: StringForm::Heredoc {
3507 flavor: HeredocFlavor::LiteralTriple,
3508 label: label_opt,
3509 modifiers: calls,
3510 },
3511 });
3512 Ok(processed)
3513 }
3514
3515 fn parse_heredoc_label(&mut self) -> String {
3516 let start = self.pos;
3517 if let Some(c) = self.peek() {
3518 if !is_label_start(c) {
3519 return String::new();
3520 }
3521 } else {
3522 return String::new();
3523 }
3524 while let Some(c) = self.peek() {
3525 if is_label_cont(c) {
3526 self.bump();
3527 } else {
3528 break;
3529 }
3530 }
3531 self.src[start..self.pos].to_string()
3532 }
3533
3534 fn parse_heredoc_modifiers(&mut self) -> Result<Vec<HMod>, DecodeError> {
3535 let mut mods = Vec::new();
3536 loop {
3537 let ws_start = self.pos;
3539 self.skip_inline_ws();
3540 let had_ws = self.pos > ws_start;
3541 match self.peek() {
3542 Some(c) if is_label_start(c) => {
3543 if !had_ws {
3544 return Err(self.err("modifier must be preceded by whitespace"));
3545 }
3546 let m = self.parse_one_modifier()?;
3547 mods.push(m);
3548 }
3549 _ => {
3550 self.pos = ws_start;
3551 return Ok(mods);
3552 }
3553 }
3554 }
3555 }
3556
3557 fn parse_one_modifier(&mut self) -> Result<HMod, DecodeError> {
3558 let name_start = self.pos;
3559 while let Some(c) = self.peek() {
3560 if is_label_cont(c) {
3561 self.bump();
3562 } else {
3563 break;
3564 }
3565 }
3566 let name = self.src[name_start..self.pos].to_string();
3567 if self.peek() != Some('(') {
3568 return Err(self.err("modifiers require parentheses"));
3569 }
3570 self.bump();
3571 let prev_record = self.record_forms;
3578 self.record_forms = false;
3579 let mut args = Vec::new();
3581 let result: Result<(), DecodeError> = (|| {
3582 loop {
3583 self.skip_inline_ws();
3584 if self.peek() == Some(')') {
3585 self.bump();
3586 return Ok(());
3587 }
3588 let v = self.parse_inline_value_or_heredoc()?;
3589 args.push(v);
3590 self.skip_inline_ws();
3591 match self.peek() {
3592 Some(',') => { self.bump(); }
3593 Some(')') => { self.bump(); return Ok(()); }
3594 _ => return Err(self.err("expected ',' or ')' in modifier args")),
3595 }
3596 }
3597 })();
3598 self.record_forms = prev_record;
3599 result?;
3600 Ok(HMod { name, args })
3601 }
3602
3603 fn collect_heredoc_body(&mut self, terminator: &str, _allow_continuation: bool) -> Result<HBody, DecodeError> {
3604 let mut lines: Vec<HLine> = Vec::new();
3605 let opener_line = self.line;
3606 let opener_lstart = self.line_start;
3607 let opener_pos = self.pos;
3608 loop {
3609 if self.eof() {
3610 return Err(self.err_at(opener_line, opener_lstart, opener_pos, "unterminated heredoc"));
3611 }
3612 let line_begin = self.pos;
3613 while let Some(c) = self.peek() {
3615 if c == '\n' || c == '\r' {
3616 break;
3617 }
3618 self.bump();
3619 }
3620 let line_text = &self.src[line_begin..self.pos];
3621 let raw = line_text.to_string();
3622 let this_line = self.line;
3623 let this_line_start = self.line_start;
3624 if raw.trim() == terminator {
3625 let strip_depth = raw.bytes().take_while(|&b| b == b' ').count();
3629 return Ok(HBody { lines, strip_depth });
3630 }
3631 let _consumed_eol = self.consume_eol();
3633 lines.push(HLine { text: raw, line: this_line, line_start: this_line_start });
3634 }
3635 }
3636
3637 fn parse_flow_array(&mut self) -> Result<Vec<Value>, DecodeError> {
3640 self.bump(); let mut items = Vec::new();
3642 loop {
3643 self.skip_flow_ws()?;
3644 if self.peek() == Some(']') {
3645 self.bump();
3646 return Ok(items);
3647 }
3648 let idx = items.len();
3652 self.path.push(BreadcrumbSegment::Index(idx));
3653 if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Inner) {
3655 self.path.pop();
3656 return Err(e);
3657 }
3658 let parsed = self.parse_inline_value_or_heredoc_in_flow();
3659 if let Err(e) = parsed {
3660 self.path.pop();
3661 return Err(e);
3662 }
3663 let v = parsed.unwrap();
3664 if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Trailing) {
3666 self.path.pop();
3667 return Err(e);
3668 }
3669 self.path.pop();
3670 items.push(v);
3671 self.skip_flow_ws()?;
3672 match self.peek() {
3673 Some(',') => { self.bump(); }
3674 Some(']') => { self.bump(); return Ok(items); }
3675 Some(c) => return Err(self.err(format!("unexpected '{c}' in flow array; expected ',' or ']'"))),
3676 None => return Err(self.err("unterminated flow array")),
3677 }
3678 }
3679 }
3680
3681 fn parse_flow_table(&mut self) -> Result<DmsMap<Value>, DecodeError> {
3682 self.bump(); let mut t: DmsMap<Value> = DmsMap::default();
3684 loop {
3685 self.skip_flow_ws()?;
3686 if self.peek() == Some('}') {
3687 self.bump();
3688 return Ok(t);
3689 }
3690 let key = self.parse_key()?;
3691 if self.peek() != Some(':') {
3692 return Err(self.err("expected ':' after flow-table key"));
3693 }
3694 self.bump();
3695 if !matches!(self.peek(), Some(' ') | Some('\t') | Some('\n') | Some('\r')) {
3697 return Err(self.err("expected whitespace after ':'"));
3698 }
3699 self.skip_flow_ws()?;
3700 self.path.push(BreadcrumbSegment::Key(key.clone()));
3703 if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Inner) {
3705 self.path.pop();
3706 return Err(e);
3707 }
3708 let parsed = self.parse_inline_value_or_heredoc_in_flow();
3709 if let Err(e) = parsed {
3710 self.path.pop();
3711 return Err(e);
3712 }
3713 let v = parsed.unwrap();
3714 if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Trailing) {
3716 self.path.pop();
3717 return Err(e);
3718 }
3719 self.path.pop();
3720 if t.contains_key(&key) {
3721 return Err(self.err(format!("duplicate key: {key}")));
3722 }
3723 t.insert(key, v);
3724 self.skip_flow_ws()?;
3725 match self.peek() {
3726 Some(',') => { self.bump(); }
3727 Some('}') => { self.bump(); return Ok(t); }
3728 Some(c) => return Err(self.err(format!("unexpected '{c}' in flow table; expected ',' or '}}'"))),
3729 None => return Err(self.err("unterminated flow table")),
3730 }
3731 }
3732 }
3733
3734 fn parse_flow_table_unordered(&mut self) -> Result<DmsHashMap<Value>, DecodeError> {
3738 self.bump(); let mut t: DmsHashMap<Value> = DmsHashMap::default();
3740 loop {
3741 self.skip_flow_ws()?;
3742 if self.peek() == Some('}') {
3743 self.bump();
3744 return Ok(t);
3745 }
3746 let key = self.parse_key()?;
3747 if self.peek() != Some(':') {
3748 return Err(self.err("expected ':' after flow-table key"));
3749 }
3750 self.bump();
3751 if !matches!(self.peek(), Some(' ') | Some('\t') | Some('\n') | Some('\r')) {
3752 return Err(self.err("expected whitespace after ':'"));
3753 }
3754 self.skip_flow_ws()?;
3755 self.path.push(BreadcrumbSegment::Key(key.clone()));
3758 if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Inner) {
3760 self.path.pop();
3761 return Err(e);
3762 }
3763 let parsed = self.parse_inline_value_or_heredoc_in_flow();
3764 if let Err(e) = parsed {
3765 self.path.pop();
3766 return Err(e);
3767 }
3768 let v = parsed.unwrap();
3769 if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Trailing) {
3771 self.path.pop();
3772 return Err(e);
3773 }
3774 self.path.pop();
3775 if t.contains_key(&key) {
3776 return Err(self.err(format!("duplicate key: {key}")));
3777 }
3778 t.insert(key, v);
3779 self.skip_flow_ws()?;
3780 match self.peek() {
3781 Some(',') => { self.bump(); }
3782 Some('}') => { self.bump(); return Ok(t); }
3783 Some(c) => return Err(self.err(format!("unexpected '{c}' in flow table; expected ',' or '}}'"))),
3784 None => return Err(self.err("unterminated flow table")),
3785 }
3786 }
3787 }
3788
3789 fn skip_flow_ws(&mut self) -> Result<(), DecodeError> {
3791 loop {
3792 match self.peek() {
3793 Some(' ') | Some('\t') => { self.bump(); }
3794 Some('\n') => { self.bump(); self.advance_line(); }
3795 Some('\r') if self.rest().starts_with("\r\n") => {
3796 self.pos += 2; self.advance_line();
3797 }
3798 Some('#') => return Err(self.err("comments not allowed inside flow forms")),
3799 Some('/') if self.rest().starts_with("//") => return Err(self.err("comments not allowed inside flow forms")),
3800 Some('/') if self.rest().starts_with("/*") => return Err(self.err("comments not allowed inside flow forms")),
3801 _ => return Ok(()),
3802 }
3803 }
3804 }
3805
3806 fn parse_inline_value_or_heredoc_in_flow(&mut self) -> Result<Value, DecodeError> {
3808 match self.peek() {
3809 Some('"') if self.rest().starts_with("\"\"\"") => {
3810 Err(self.err("heredocs are not allowed inside flow forms"))
3811 }
3812 Some('\'') if self.rest().starts_with("'''") => {
3813 Err(self.err("heredocs are not allowed inside flow forms"))
3814 }
3815 _ => self.parse_inline_value_or_heredoc(),
3816 }
3817 }
3818
3819 fn consume_after_value(&mut self, allow_eof: bool) -> Result<(), DecodeError> {
3822 self.capture_t1_decoration_run(tier1::DecorationPosition::Trailing)?;
3835 let mut first = true;
3836 loop {
3837 let ws_start = self.pos;
3838 self.skip_inline_ws();
3839 let had_ws = self.pos > ws_start;
3840 let captured = match self.peek() {
3841 Some('#') if !self.rest().starts_with("###") => {
3842 if !had_ws {
3843 return Err(self.err("expected whitespace before '#' comment"));
3844 }
3845 let raw = self.read_line_comment_to_eol();
3846 if !self.lite {
3847 self.comments.push(AttachedComment {
3848 comment: Comment { content: raw, kind: CommentKind::Line },
3849 position: CommentPosition::Trailing,
3850 path: self.path.clone(),
3851 });
3852 }
3853 break;
3855 }
3856 Some('/') if self.rest().starts_with("//") => {
3857 if !had_ws {
3858 return Err(self.err("expected whitespace before '//' comment"));
3859 }
3860 let raw = self.read_line_comment_to_eol();
3861 if !self.lite {
3862 self.comments.push(AttachedComment {
3863 comment: Comment { content: raw, kind: CommentKind::Line },
3864 position: CommentPosition::Trailing,
3865 path: self.path.clone(),
3866 });
3867 }
3868 break;
3869 }
3870 Some('/') if self.rest().starts_with("/*") => {
3871 let raw = self.read_c_block_comment()?;
3872 if !self.lite {
3873 self.comments.push(AttachedComment {
3874 comment: Comment { content: raw, kind: CommentKind::Block },
3875 position: CommentPosition::Trailing,
3876 path: self.path.clone(),
3877 });
3878 }
3879 true
3880 }
3881 _ => false,
3882 };
3883 if !captured {
3884 let _ = first;
3888 break;
3889 }
3890 first = false;
3891 }
3892 match self.peek() {
3894 None => {
3895 if allow_eof { Ok(()) } else { Ok(()) }
3896 }
3897 Some('\n') => { self.bump(); self.advance_line(); Ok(()) }
3898 Some('\r') if self.rest().starts_with("\r\n") => {
3899 self.pos += 2; self.advance_line(); Ok(())
3900 }
3901 Some(c) => Err(self.err(format!("unexpected character '{c}' after value"))),
3902 }
3903 }
3904}
3905
3906struct NumTok {
3909 len: usize,
3910 is_float: bool,
3911}
3912
3913#[derive(Debug, Clone)]
3914struct HMod {
3915 name: String,
3916 args: Vec<Value>,
3917}
3918
3919struct HBody {
3920 lines: Vec<HLine>,
3921 strip_depth: usize,
3922}
3923
3924struct HLine {
3925 text: String,
3926 line: usize,
3927 line_start: usize,
3928}
3929
3930fn is_base_digit(b: u8, _prev_for_disambig: u8) -> bool {
3933 b.is_ascii_hexdigit()
3937}
3938
3939fn parse_integer(s: &str) -> Result<i64, String> {
3940 let (sign, rest) = if let Some(r) = s.strip_prefix('-') { (-1i128, r) }
3941 else if let Some(r) = s.strip_prefix('+') { (1i128, r) }
3942 else { (1i128, s) };
3943 let (radix, body) = if let Some(r) = rest.strip_prefix("0x") {
3944 if rest.starts_with("0X") {
3945 return Err("hex prefix must be lowercase '0x'".into());
3946 }
3947 (16u32, r)
3948 } else if let Some(r) = rest.strip_prefix("0o") {
3949 (8u32, r)
3950 } else if let Some(r) = rest.strip_prefix("0b") {
3951 (2u32, r)
3952 } else {
3953 (10u32, rest)
3954 };
3955 if body.is_empty() {
3956 return Err("empty number".into());
3957 }
3958 if body.starts_with('_') || body.ends_with('_') {
3959 return Err("underscore must be between digits".into());
3960 }
3961 if radix == 10 && rest.len() > 1 && rest.starts_with('0') {
3962 return Err("leading zeros are not allowed on decimal integers".into());
3963 }
3964 let mut clean = String::with_capacity(body.len());
3966 let mut prev_is_digit = false;
3967 for c in body.chars() {
3968 if c == '_' {
3969 if !prev_is_digit {
3970 return Err("underscore must be between digits".into());
3971 }
3972 prev_is_digit = false;
3973 } else {
3974 if !c.is_digit(radix) {
3975 return Err(format!("invalid digit '{c}' for base {radix}"));
3976 }
3977 clean.push(c);
3978 prev_is_digit = true;
3979 }
3980 }
3981 if !prev_is_digit {
3982 return Err("underscore must be between digits".into());
3983 }
3984 let n = i128::from_str_radix(&clean, radix)
3985 .map_err(|_| "integer out of range".to_string())?;
3986 let signed = sign * n;
3987 if signed < i64::MIN as i128 || signed > i64::MAX as i128 {
3988 return Err("integer out of i64 range".into());
3989 }
3990 Ok(signed as i64)
3991}
3992
3993fn parse_float(s: &str) -> Option<f64> {
3994 let (sign, rest) = if let Some(r) = s.strip_prefix('-') { (-1.0f64, r) }
3995 else if let Some(r) = s.strip_prefix('+') { (1.0f64, r) }
3996 else { (1.0f64, s) };
3997 if rest.starts_with("0x") || rest.starts_with("0o") || rest.starts_with("0b") {
3998 return parse_nondec_float(rest).map(|v| sign * v);
3999 }
4000 parse_dec_float(rest).map(|v| sign * v)
4001}
4002
4003fn parse_dec_float(s: &str) -> Option<f64> {
4004 let (m, e) = if let Some(idx) = s.find(['e', 'E']) {
4006 (&s[..idx], Some(&s[idx + 1..]))
4007 } else {
4008 (s, None)
4009 };
4010 if !m.contains('.') {
4011 return None; }
4013 let parts: Vec<&str> = m.splitn(2, '.').collect();
4014 if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
4015 return None;
4016 }
4017 if !parts[0].chars().all(|c| c.is_ascii_digit() || c == '_')
4018 || !parts[1].chars().all(|c| c.is_ascii_digit() || c == '_')
4019 {
4020 return None;
4021 }
4022 if !valid_underscores(parts[0]) || !valid_underscores(parts[1]) {
4023 return None;
4024 }
4025 let int_part: String = parts[0].chars().filter(|&c| c != '_').collect();
4026 let frac_part: String = parts[1].chars().filter(|&c| c != '_').collect();
4027 let mut full = format!("{int_part}.{frac_part}");
4028 if let Some(es) = e {
4029 let es_clean = es.trim_start_matches(['+', '-']);
4030 if es_clean.contains('_') {
4031 return None;
4032 }
4033 if !es.chars().all(|c| c.is_ascii_digit() || c == '+' || c == '-') {
4034 return None;
4035 }
4036 if es_clean.is_empty() {
4037 return None;
4038 }
4039 full.push('e');
4040 full.push_str(es);
4041 }
4042 full.parse::<f64>().ok()
4043}
4044
4045fn parse_nondec_float(s: &str) -> Option<f64> {
4046 let (radix, rest) = if let Some(r) = s.strip_prefix("0x") { (16u32, r) }
4048 else if let Some(r) = s.strip_prefix("0o") { (8u32, r) }
4049 else if let Some(r) = s.strip_prefix("0b") { (2u32, r) }
4050 else { return None; };
4051 let p_idx = rest.find('p')?;
4052 let mant = &rest[..p_idx];
4053 let exp_str = &rest[p_idx + 1..];
4054 if exp_str.is_empty() {
4055 return None;
4056 }
4057 if exp_str.contains('_') {
4058 return None;
4059 }
4060 let exp_clean = exp_str.trim_start_matches(['+', '-']);
4061 if !exp_clean.chars().all(|c| c.is_ascii_digit()) {
4062 return None;
4063 }
4064 let exp: i32 = exp_str.parse().ok()?;
4065 let (int_part, frac_part) = if let Some(idx) = mant.find('.') {
4067 (&mant[..idx], &mant[idx + 1..])
4068 } else {
4069 (mant, "")
4070 };
4071 if mant.contains('.') {
4072 if int_part.is_empty() || frac_part.is_empty() {
4073 return None;
4074 }
4075 }
4076 if !valid_underscores(int_part) || !valid_underscores(frac_part) {
4077 return None;
4078 }
4079 let int_clean: String = int_part.chars().filter(|&c| c != '_').collect();
4080 let frac_clean: String = frac_part.chars().filter(|&c| c != '_').collect();
4081 if int_clean.is_empty() && frac_clean.is_empty() {
4082 return None;
4083 }
4084 if !int_clean.chars().all(|c| c.is_digit(radix)) {
4085 return None;
4086 }
4087 if !frac_clean.chars().all(|c| c.is_digit(radix)) {
4088 return None;
4089 }
4090 let int_val = if int_clean.is_empty() {
4091 0u128
4092 } else {
4093 u128::from_str_radix(&int_clean, radix).ok()?
4094 };
4095 let mut frac_val = 0f64;
4096 let mut div = radix as f64;
4097 for c in frac_clean.chars() {
4098 let d = c.to_digit(radix)? as f64;
4099 frac_val += d / div;
4100 div *= radix as f64;
4101 }
4102 let mantissa = int_val as f64 + frac_val;
4103 Some(mantissa * (2f64).powi(exp))
4104}
4105
4106fn valid_underscores(s: &str) -> bool {
4107 if s.is_empty() {
4108 return true;
4109 }
4110 if s.starts_with('_') || s.ends_with('_') {
4111 return false;
4112 }
4113 let mut prev_us = false;
4114 for c in s.chars() {
4115 if c == '_' {
4116 if prev_us {
4117 return false;
4118 }
4119 prev_us = true;
4120 } else {
4121 prev_us = false;
4122 }
4123 }
4124 true
4125}
4126
4127fn validate_date(s: &str) -> Result<(), String> {
4130 if s.len() != 10 {
4131 return Err("invalid date length".into());
4132 }
4133 let b = s.as_bytes();
4134 if b[4] != b'-' || b[7] != b'-' {
4135 return Err("invalid date format".into());
4136 }
4137 for &i in &[0, 1, 2, 3, 5, 6, 8, 9] {
4138 if !b[i].is_ascii_digit() {
4139 return Err("date must be all digits".into());
4140 }
4141 }
4142 let year: u16 = (b[0] - b'0') as u16 * 1000
4143 + (b[1] - b'0') as u16 * 100
4144 + (b[2] - b'0') as u16 * 10
4145 + (b[3] - b'0') as u16;
4146 let month: u8 = (b[5] - b'0') * 10 + (b[6] - b'0');
4147 let day: u8 = (b[8] - b'0') * 10 + (b[9] - b'0');
4148 if month < 1 || month > 12 {
4149 return Err("month out of range".into());
4150 }
4151 if day < 1 || day > days_in_month(year, month) {
4152 return Err("day out of range".into());
4153 }
4154 Ok(())
4155}
4156
4157fn validate_time(s: &str) -> Result<(), String> {
4158 if s.len() != 8 {
4159 return Err("invalid time length".into());
4160 }
4161 let b = s.as_bytes();
4162 if b[2] != b':' || b[5] != b':' {
4163 return Err("invalid time format".into());
4164 }
4165 for &i in &[0, 1, 3, 4, 6, 7] {
4166 if !b[i].is_ascii_digit() {
4167 return Err("time must be all digits".into());
4168 }
4169 }
4170 let hh: u8 = (b[0] - b'0') * 10 + (b[1] - b'0');
4171 let mm: u8 = (b[3] - b'0') * 10 + (b[4] - b'0');
4172 let ss: u8 = (b[6] - b'0') * 10 + (b[7] - b'0');
4173 if hh > 23 {
4174 return Err("hour out of range".into());
4175 }
4176 if mm > 59 {
4177 return Err("minute out of range".into());
4178 }
4179 if ss > 59 {
4180 return Err("second out of range (leap seconds not supported)".into());
4181 }
4182 Ok(())
4183}
4184
4185fn days_in_month(year: u16, month: u8) -> u8 {
4186 match month {
4187 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
4188 4 | 6 | 9 | 11 => 30,
4189 2 => if is_leap(year) { 29 } else { 28 },
4190 _ => 0,
4191 }
4192}
4193
4194fn is_leap(y: u16) -> bool {
4195 (y % 4 == 0 && y % 100 != 0) || y % 400 == 0
4196}
4197
4198fn validate_heredoc_basic_surrogates(body: &HBody) -> Result<(), DecodeError> {
4208 for line in &body.lines {
4209 let bytes = line.text.as_bytes();
4210 let mut i = 0;
4211 while i < bytes.len() {
4212 if bytes[i] == b'\\' {
4217 let mut j = i;
4219 while j < bytes.len() && bytes[j] == b'\\' {
4220 j += 1;
4221 }
4222 let run = j - i;
4223 if run % 2 == 1 && j < bytes.len() {
4226 let intro = bytes[j];
4227 let n = match intro {
4228 b'u' => 4,
4229 b'U' => 8,
4230 _ => 0,
4231 };
4232 if n > 0 && j + 1 + n <= bytes.len() {
4233 let hex = &line.text[j + 1..j + 1 + n];
4234 if hex.bytes().all(|b| b.is_ascii_hexdigit()) {
4235 if let Ok(cp) = u32::from_str_radix(hex, 16) {
4236 if (0xD800..=0xDFFF).contains(&cp) {
4237 let esc_off = j - 1;
4240 return Err(DecodeError {
4241 line: line.line,
4242 column: esc_off + 1,
4243 message: format!(
4244 "surrogate codepoint U+{cp:04X} in escape"
4245 ),
4246 });
4247 }
4248 }
4249 }
4250 }
4251 }
4252 i = j;
4253 } else {
4254 i += 1;
4255 }
4256 }
4257 }
4258 Ok(())
4259}
4260
4261fn strip_indent_and_continuations(body: &HBody, allow_continuation: bool) -> Result<String, DecodeError> {
4262 let mut out = String::new();
4263 let mut first = true;
4264 let mut pending_continuation = false;
4265 let mut last_line_pos = (1usize, 0usize); for line in &body.lines {
4267 let raw = &line.text;
4268 last_line_pos = (line.line, line.line_start);
4269 let is_blank = raw.bytes().all(|b| b == b' ' || b == b'\t');
4271 let stripped: &str = if is_blank {
4272 ""
4273 } else {
4274 let leading_spaces = raw.bytes().take_while(|&b| b == b' ').count();
4276 if leading_spaces < body.strip_depth {
4277 return Err(DecodeError {
4278 line: line.line,
4279 column: leading_spaces + 1,
4280 message: format!(
4281 "heredoc body line indented {} spaces, less than strip depth {}",
4282 leading_spaces, body.strip_depth
4283 ),
4284 });
4285 }
4286 &raw[body.strip_depth..]
4287 };
4288 let mut piece = stripped.to_string();
4289 let mut splice = false;
4291 if allow_continuation {
4292 let trimmed_end = piece.trim_end_matches(|c: char| c == ' ' || c == '\t').to_string();
4294 if let Some(last_idx) = trimmed_end.rfind('\\') {
4295 if last_idx == trimmed_end.len() - 1 {
4296 let preceding = trimmed_end[..last_idx]
4298 .bytes()
4299 .rev()
4300 .take_while(|&b| b == b'\\')
4301 .count();
4302 if preceding % 2 == 0 {
4303 piece = trimmed_end[..last_idx].to_string();
4306 splice = true;
4307 }
4308 }
4309 }
4310 }
4311 if first {
4312 out.push_str(&piece);
4313 first = false;
4314 } else if pending_continuation {
4315 let trimmed_start = piece.trim_start_matches(|c: char| c == ' ' || c == '\t');
4317 if !is_blank {
4319 out.push_str(trimmed_start);
4320 }
4321 } else {
4322 out.push('\n');
4323 out.push_str(&piece);
4324 }
4325 pending_continuation = splice;
4326 }
4327 if pending_continuation {
4328 return Err(DecodeError {
4329 line: last_line_pos.0,
4330 column: 1,
4331 message: "trailing line continuation has nothing to splice to".into(),
4332 });
4333 }
4334 Ok(out)
4335}
4336
4337fn apply_modifiers(s: String, mods: &[HMod]) -> Result<String, String> {
4338 let mut cur = s;
4339 for m in mods {
4340 match m.name.as_str() {
4341 "_fold_paragraphs" => {
4342 if !m.args.is_empty() {
4343 return Err("fold_paragraphs() takes no arguments".into());
4344 }
4345 cur = fold_paragraphs(&cur);
4346 }
4347 "_trim" => {
4348 if m.args.len() < 2 || m.args.len() > 3 {
4349 return Err("trim(chars, where, replacement = \"\") expects 2 or 3 arguments".into());
4350 }
4351 let chars = match &m.args[0] {
4352 Value::String(s) => s.clone(),
4353 _ => return Err("trim: first argument (chars) must be a string".into()),
4354 };
4355 let where_s = match &m.args[1] {
4356 Value::String(s) => s.clone(),
4357 _ => return Err("trim: second argument (where) must be a string".into()),
4358 };
4359 let replacement = if m.args.len() == 3 {
4360 match &m.args[2] {
4361 Value::String(s) => s.clone(),
4362 _ => return Err("trim: third argument (replacement) must be a string".into()),
4363 }
4364 } else {
4365 String::new()
4366 };
4367 cur = apply_trim(&cur, &chars, &where_s, &replacement);
4368 }
4369 other => return Err(format!("unknown modifier: {other}")),
4370 }
4371 }
4372 Ok(cur)
4373}
4374
4375fn fold_paragraphs(s: &str) -> String {
4376 let paragraphs: Vec<&str> = s.split("\n\n").collect();
4379 let folded: Vec<String> = paragraphs
4380 .iter()
4381 .map(|p| {
4382 let lines: Vec<&str> = p.split('\n').collect();
4383 lines
4384 .iter()
4385 .filter(|l| !l.is_empty())
4386 .copied()
4387 .collect::<Vec<&str>>()
4388 .join(" ")
4389 })
4390 .collect();
4391 folded.join("\n")
4392}
4393
4394fn apply_trim(s: &str, chars: &str, where_s: &str, replacement: &str) -> String {
4410 if chars.is_empty() {
4411 return s.to_string();
4412 }
4413 let char_set: std::collections::HashSet<char> = chars.chars().collect();
4414 let has_star = where_s.contains('*');
4415 let has_pipe = where_s.contains('|');
4416 let has_lt = where_s.contains('<');
4417 let has_gt = where_s.contains('>');
4418 if !(has_star || has_pipe || has_lt || has_gt) {
4419 return s.to_string();
4420 }
4421 if has_star {
4422 return replace_all_runs(s, &char_set, replacement);
4423 }
4424 let mut cur = s.to_string();
4425 if has_pipe {
4426 cur = per_line_edges(&cur, &char_set, replacement);
4427 }
4428 if has_lt {
4429 cur = replace_leading_run(&cur, &char_set, replacement);
4430 }
4431 if has_gt {
4432 cur = replace_trailing_run(&cur, &char_set, replacement);
4433 }
4434 cur
4435}
4436
4437fn replace_all_runs(s: &str, char_set: &std::collections::HashSet<char>, replacement: &str) -> String {
4438 let mut out = String::with_capacity(s.len());
4439 let mut chars = s.chars().peekable();
4440 while let Some(c) = chars.next() {
4441 if char_set.contains(&c) {
4442 while let Some(&nc) = chars.peek() {
4443 if char_set.contains(&nc) {
4444 chars.next();
4445 } else {
4446 break;
4447 }
4448 }
4449 out.push_str(replacement);
4450 } else {
4451 out.push(c);
4452 }
4453 }
4454 out
4455}
4456
4457fn replace_leading_run(s: &str, char_set: &std::collections::HashSet<char>, replacement: &str) -> String {
4458 let mut end = 0;
4459 for (i, c) in s.char_indices() {
4460 if char_set.contains(&c) {
4461 end = i + c.len_utf8();
4462 } else {
4463 break;
4464 }
4465 }
4466 if end == 0 {
4467 return s.to_string();
4468 }
4469 let mut out = String::with_capacity(s.len());
4470 out.push_str(replacement);
4471 out.push_str(&s[end..]);
4472 out
4473}
4474
4475fn replace_trailing_run(s: &str, char_set: &std::collections::HashSet<char>, replacement: &str) -> String {
4476 let mut start = s.len();
4477 for (i, c) in s.char_indices().rev() {
4478 if char_set.contains(&c) {
4479 start = i;
4480 } else {
4481 break;
4482 }
4483 }
4484 if start == s.len() {
4485 return s.to_string();
4486 }
4487 let mut out = String::with_capacity(s.len());
4488 out.push_str(&s[..start]);
4489 out.push_str(replacement);
4490 out
4491}
4492
4493fn per_line_edges(s: &str, char_set: &std::collections::HashSet<char>, replacement: &str) -> String {
4494 let mut out = String::with_capacity(s.len());
4495 let mut lines = s.split('\n').peekable();
4496 let mut first = true;
4497 while let Some(line) = lines.next() {
4498 if !first {
4499 out.push('\n');
4500 }
4501 first = false;
4502 let trimmed_start = replace_leading_run(line, char_set, replacement);
4503 let trimmed = replace_trailing_run(&trimmed_start, char_set, replacement);
4504 out.push_str(&trimmed);
4505 }
4506 out
4507}
4508
4509pub fn encode(doc: &Document) -> Result<String, EncodeError> {
4529 if contains_unordered_table(&doc.body) {
4530 return Err(EncodeError::UnorderedInFullMode);
4531 }
4532 let mut emitter = DmsEmitter::new(doc);
4533 emitter.emit_document();
4534 Ok(emitter.out)
4535}
4536
4537fn contains_unordered_table(v: &Value) -> bool {
4541 match v {
4542 Value::UnorderedTable(_) => true,
4543 Value::Table(t) => t.values().any(contains_unordered_table),
4544 Value::List(items) => items.iter().any(contains_unordered_table),
4545 _ => false,
4546 }
4547}
4548
4549pub fn encode_lite(doc: &Document) -> String {
4560 let mut emitter = DmsEmitter::new_lite(doc);
4561 emitter.emit_document();
4562 emitter.out
4563}
4564
4565#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4567pub enum EmitMode {
4568 Full,
4573 Lite,
4576}
4577
4578pub fn encode_with_mode(doc: &Document, mode: EmitMode) -> Result<String, EncodeError> {
4583 match mode {
4584 EmitMode::Full => encode(doc),
4585 EmitMode::Lite => Ok(encode_lite(doc)),
4586 }
4587}
4588
4589#[allow(dead_code)]
4592pub(crate) fn encode_with_decorators(
4593 doc: &Document,
4594 decorators: &[tier1::DecoratorEntry],
4595) -> Result<String, EncodeError> {
4596 encode_with_decorators_and_suppressed(doc, decorators, std::collections::HashSet::new())
4597}
4598
4599pub(crate) fn encode_with_decorators_and_suppressed(
4605 doc: &Document,
4606 decorators: &[tier1::DecoratorEntry],
4607 suppressed_paths: std::collections::HashSet<Vec<BreadcrumbSegment>>,
4608) -> Result<String, EncodeError> {
4609 if contains_unordered_table(&doc.body) {
4610 return Err(EncodeError::UnorderedInFullMode);
4611 }
4612 let comments_by_path = SortedIndex::from_comments(&doc.comments);
4613 let forms_by_path = SortedIndex::from_forms(&doc.original_forms);
4614 let decorators_by_path = SortedIndex::from_decorators(decorators);
4615 let mut emitter = DmsEmitter {
4616 out: String::new(),
4617 comments_by_path,
4618 forms_by_path,
4619 decorators_by_path,
4620 suppressed_paths,
4621 lite: false,
4622 doc,
4623 };
4624 emitter.emit_document();
4625 Ok(emitter.out)
4626}
4627
4628#[deprecated(since = "0.3.0", note = "use `encode` instead")]
4636pub fn to_dms(doc: &Document) -> Result<String, EncodeError> {
4637 encode(doc)
4638}
4639
4640#[deprecated(since = "0.3.0", note = "use `encode_lite` instead")]
4642pub fn to_dms_lite(doc: &Document) -> String {
4643 encode_lite(doc)
4644}
4645
4646#[deprecated(since = "0.3.0", note = "use `encode_with_mode` instead")]
4648pub fn to_dms_with_mode(doc: &Document, mode: EmitMode) -> Result<String, EncodeError> {
4649 encode_with_mode(doc, mode)
4650}
4651
4652struct SortedIndex<'a, V> {
4664 entries: Vec<(&'a [BreadcrumbSegment], V)>,
4666}
4667
4668impl<'a, V> SortedIndex<'a, V> {
4669 fn empty() -> Self {
4670 Self { entries: Vec::new() }
4671 }
4672
4673 fn get(&self, query: &[BreadcrumbSegment]) -> Option<&V> {
4675 match self.entries.binary_search_by(|(p, _)| (*p).cmp(query)) {
4676 Ok(i) => Some(&self.entries[i].1),
4677 Err(_) => None,
4678 }
4679 }
4680}
4681
4682impl<'a> SortedIndex<'a, NodeComments<'a>> {
4683 fn from_comments(comments: &'a [AttachedComment]) -> Self {
4687 if comments.is_empty() {
4688 return Self::empty();
4689 }
4690 let mut idx: Vec<usize> = (0..comments.len()).collect();
4691 idx.sort_by(|&a, &b| comments[a].path.cmp(&comments[b].path));
4692 let mut entries: Vec<(&'a [BreadcrumbSegment], NodeComments<'a>)> =
4693 Vec::with_capacity(comments.len());
4694 for i in idx {
4695 let ac = &comments[i];
4696 let path: &'a [BreadcrumbSegment] = ac.path.as_slice();
4697 let same_as_prev = entries.last().is_some_and(|(p, _)| *p == path);
4698 if !same_as_prev {
4699 entries.push((path, NodeComments::default()));
4700 }
4701 let nc = &mut entries.last_mut().unwrap().1;
4702 match ac.position {
4703 CommentPosition::Leading => nc.leading.push(&ac.comment),
4704 CommentPosition::Inner => nc.inner.push(&ac.comment),
4705 CommentPosition::Trailing => nc.trailing.push(&ac.comment),
4706 CommentPosition::Floating => nc.floating.push(&ac.comment),
4707 }
4708 }
4709 entries.shrink_to_fit();
4712 Self { entries }
4713 }
4714}
4715
4716impl<'a> SortedIndex<'a, &'a OriginalLiteral> {
4717 fn from_forms(forms: &'a [(Vec<BreadcrumbSegment>, OriginalLiteral)]) -> Self {
4720 if forms.is_empty() {
4721 return Self::empty();
4722 }
4723 let mut idx: Vec<usize> = (0..forms.len()).collect();
4724 idx.sort_by(|&a, &b| forms[a].0.cmp(&forms[b].0));
4725 let mut entries: Vec<(&'a [BreadcrumbSegment], &'a OriginalLiteral)> =
4726 Vec::with_capacity(forms.len());
4727 for i in idx {
4728 let (p, lit) = &forms[i];
4729 let path: &'a [BreadcrumbSegment] = p.as_slice();
4730 if entries.last().is_some_and(|(prev, _)| *prev == path) {
4732 continue;
4733 }
4734 entries.push((path, lit));
4735 }
4736 entries.shrink_to_fit();
4737 Self { entries }
4738 }
4739}
4740
4741impl<'a> SortedIndex<'a, NodeDecorations<'a>> {
4742 fn from_decorators(decorators: &'a [tier1::DecoratorEntry]) -> Self {
4746 if decorators.is_empty() {
4747 return Self::empty();
4748 }
4749 let mut path_indices: Vec<usize> = (0..decorators.len()).collect();
4754 path_indices.sort_by(|&a, &b| decorators[a].path.cmp(&decorators[b].path));
4755
4756 let mut entries: Vec<(&'a [BreadcrumbSegment], NodeDecorations<'a>)> =
4757 Vec::with_capacity(decorators.len());
4758
4759 for i in path_indices {
4760 let entry = &decorators[i];
4761 let path: &'a [BreadcrumbSegment] = entry.path.as_slice();
4762 let same_as_prev = entries.last().is_some_and(|(p, _)| *p == path);
4763 if !same_as_prev {
4764 entries.push((path, NodeDecorations::default()));
4765 }
4766 let nd = &mut entries.last_mut().unwrap().1;
4767 for (_sigil, calls) in &entry.decorators {
4769 for call in calls {
4770 match call.position {
4771 tier1::DecorationPosition::Leading => nd.leading.push(call),
4772 tier1::DecorationPosition::Inner => nd.inner.push(call),
4773 tier1::DecorationPosition::Trailing => nd.trailing.push(call),
4774 tier1::DecorationPosition::Floating => nd.floating.push(call),
4775 }
4776 }
4777 }
4778 }
4779 entries.shrink_to_fit();
4780 Self { entries }
4781 }
4782}
4783
4784struct DmsEmitter<'a> {
4785 out: String,
4786 comments_by_path: SortedIndex<'a, NodeComments<'a>>,
4790 forms_by_path: SortedIndex<'a, &'a OriginalLiteral>,
4792 decorators_by_path: SortedIndex<'a, NodeDecorations<'a>>,
4795 suppressed_paths: std::collections::HashSet<Vec<BreadcrumbSegment>>,
4802 lite: bool,
4805 doc: &'a Document,
4806}
4807
4808#[derive(Default)]
4809struct NodeComments<'a> {
4810 leading: Vec<&'a Comment>,
4811 inner: Vec<&'a Comment>,
4812 trailing: Vec<&'a Comment>,
4813 floating: Vec<&'a Comment>,
4814}
4815
4816#[derive(Default)]
4817struct NodeDecorations<'a> {
4818 leading: Vec<&'a tier1::DecoratorCall>,
4819 inner: Vec<&'a tier1::DecoratorCall>,
4820 trailing: Vec<&'a tier1::DecoratorCall>,
4821 floating: Vec<&'a tier1::DecoratorCall>,
4822}
4823
4824const INDENT_STR: &str = " ";
4825
4826impl<'a> DmsEmitter<'a> {
4827 fn new(doc: &'a Document) -> Self {
4828 Self {
4829 out: String::new(),
4830 comments_by_path: SortedIndex::from_comments(&doc.comments),
4831 forms_by_path: SortedIndex::from_forms(&doc.original_forms),
4832 decorators_by_path: SortedIndex::empty(),
4833 suppressed_paths: std::collections::HashSet::new(),
4834 lite: false,
4835 doc,
4836 }
4837 }
4838
4839 fn new_lite(doc: &'a Document) -> Self {
4844 Self {
4845 out: String::new(),
4846 comments_by_path: SortedIndex::empty(),
4847 forms_by_path: SortedIndex::empty(),
4848 decorators_by_path: SortedIndex::empty(),
4849 suppressed_paths: std::collections::HashSet::new(),
4850 lite: true,
4851 doc,
4852 }
4853 }
4854
4855 fn emit_document(&mut self) {
4856 let has_fm_comments = !self.lite && self.doc.comments.iter().any(|ac| {
4867 matches!(ac.path.first(), Some(BreadcrumbSegment::Key(k)) if k == "__fm__")
4868 });
4869 let fm_present = self.doc.meta.is_some();
4870 if fm_present || has_fm_comments {
4871 self.out.push_str("+++\n");
4872 let fm_path: Vec<BreadcrumbSegment> = vec![BreadcrumbSegment::Key("__fm__".to_string())];
4874 if let Some(meta) = &self.doc.meta {
4875 self.emit_table_block(meta, &fm_path, 0);
4876 } else {
4877 self.emit_floating(&fm_path, 0);
4880 }
4881 self.out.push_str("+++\n\n");
4882 }
4883 let body_path: Vec<BreadcrumbSegment> = Vec::new();
4885 match &self.doc.body {
4886 Value::Table(t) => self.emit_table_block(t, &body_path, 0),
4887 Value::UnorderedTable(t) => {
4888 self.emit_unordered_table_block(t, &body_path, 0);
4894 }
4895 Value::List(items) => self.emit_list_block(items, &body_path, 0),
4896 other => {
4897 let leading: Vec<&Comment> = self
4900 .comments_by_path
4901 .get(&body_path)
4902 .map(|nc| nc.leading.iter().copied().collect())
4903 .unwrap_or_default();
4904 for c in leading {
4905 self.emit_comment_line(c, 0);
4906 }
4907 self.emit_value_inline(other, &body_path);
4908 self.emit_trailing_for(&body_path);
4909 self.out.push('\n');
4910 let floating: Vec<&Comment> = self
4911 .comments_by_path
4912 .get(&body_path)
4913 .map(|nc| nc.floating.iter().copied().collect())
4914 .unwrap_or_default();
4915 for c in floating {
4916 self.emit_comment_line(c, 0);
4917 }
4918 }
4919 }
4920 }
4921
4922 fn emit_table_block(
4923 &mut self,
4924 t: &DmsMap<Value>,
4925 path: &[BreadcrumbSegment],
4926 indent: usize,
4927 ) {
4928 for (k, v) in t {
4929 let mut child_path: Vec<BreadcrumbSegment> = path.to_vec();
4930 child_path.push(BreadcrumbSegment::Key(k.clone()));
4931 if let Some(nc) = self.comments_by_path.get(&child_path) {
4933 let leading: Vec<&Comment> = nc.leading.iter().copied().collect();
4934 for c in leading {
4935 self.emit_comment_line(c, indent);
4936 }
4937 }
4938 self.emit_leading_decorators_for(&child_path, indent);
4940 let has_trailing = self
4952 .comments_by_path
4953 .get(&child_path)
4954 .map(|nc| !nc.trailing.is_empty())
4955 .unwrap_or(false)
4956 || self
4957 .decorators_by_path
4958 .get(&child_path)
4959 .map(|nd| !nd.trailing.is_empty())
4960 .unwrap_or(false);
4961 let has_inner = self.has_inner(&child_path)
4962 || self
4963 .decorators_by_path
4964 .get(&child_path)
4965 .map(|nd| !nd.inner.is_empty())
4966 .unwrap_or(false);
4967 let can_block = matches!(v,
4968 Value::Table(tt) if !tt.is_empty()
4969 ) || matches!(v,
4970 Value::UnorderedTable(tt) if !tt.is_empty()
4971 ) || matches!(v,
4972 Value::List(items) if !items.is_empty()
4973 );
4974 let needs_block = can_block
4975 && !(has_trailing && self.is_flow_safe(v, &child_path));
4976 self.push_indent(indent);
4977 self.out.push_str(&format_key(k));
4978 self.out.push(':');
4979 if self.suppressed_paths.contains(&child_path) {
4980 self.out.push(' ');
4985 self.emit_inner_decorators_for(&child_path);
4986 if self.out.ends_with(' ') {
4988 self.out.pop();
4989 }
4990 self.out.push('\n');
4991 } else if needs_block {
4992 if has_inner {
4995 self.out.push(' ');
4996 self.emit_inner_for(&child_path);
4997 self.emit_inner_decorators_for(&child_path);
4998 if self.out.ends_with(' ') {
5000 self.out.pop();
5001 }
5002 }
5003 self.out.push('\n');
5004 match v {
5005 Value::Table(tt) => self.emit_table_block(tt, &child_path, indent + 1),
5006 Value::UnorderedTable(tt) => {
5007 self.emit_unordered_table_block(tt, &child_path, indent + 1)
5008 }
5009 Value::List(items) => self.emit_list_block(items, &child_path, indent + 1),
5010 _ => unreachable!(),
5011 }
5012 } else {
5013 self.out.push(' ');
5014 self.emit_inner_for(&child_path);
5015 self.emit_inner_decorators_for(&child_path);
5016 self.emit_value_inline(v, &child_path);
5017 self.emit_trailing_for(&child_path);
5018 self.emit_trailing_decorators_for(&child_path);
5019 self.out.push('\n');
5020 }
5021 }
5022 self.emit_floating(path, indent);
5024 }
5025
5026 fn emit_unordered_table_block(
5031 &mut self,
5032 t: &DmsHashMap<Value>,
5033 path: &[BreadcrumbSegment],
5034 indent: usize,
5035 ) {
5036 for (k, v) in t {
5037 let mut child_path: Vec<BreadcrumbSegment> = path.to_vec();
5038 child_path.push(BreadcrumbSegment::Key(k.clone()));
5039 if let Some(nc) = self.comments_by_path.get(&child_path) {
5044 let leading: Vec<&Comment> = nc.leading.iter().copied().collect();
5045 for c in leading {
5046 self.emit_comment_line(c, indent);
5047 }
5048 }
5049 let has_trailing = self
5050 .comments_by_path
5051 .get(&child_path)
5052 .map(|nc| !nc.trailing.is_empty())
5053 .unwrap_or(false);
5054 let has_inner = self.has_inner(&child_path);
5055 let can_block = matches!(v,
5056 Value::Table(tt) if !tt.is_empty()
5057 ) || matches!(v,
5058 Value::UnorderedTable(tt) if !tt.is_empty()
5059 ) || matches!(v,
5060 Value::List(items) if !items.is_empty()
5061 );
5062 let needs_block = can_block
5063 && !(has_trailing && self.is_flow_safe(v, &child_path));
5064 self.push_indent(indent);
5065 self.out.push_str(&format_key(k));
5066 self.out.push(':');
5067 if needs_block {
5068 if has_inner {
5069 self.out.push(' ');
5070 self.emit_inner_for(&child_path);
5071 if self.out.ends_with(' ') {
5072 self.out.pop();
5073 }
5074 }
5075 self.out.push('\n');
5076 match v {
5077 Value::Table(tt) => self.emit_table_block(tt, &child_path, indent + 1),
5078 Value::UnorderedTable(tt) => {
5079 self.emit_unordered_table_block(tt, &child_path, indent + 1)
5080 }
5081 Value::List(items) => self.emit_list_block(items, &child_path, indent + 1),
5082 _ => unreachable!(),
5083 }
5084 } else {
5085 self.out.push(' ');
5086 self.emit_inner_for(&child_path);
5087 self.emit_value_inline(v, &child_path);
5088 self.emit_trailing_for(&child_path);
5089 self.out.push('\n');
5090 }
5091 }
5092 self.emit_floating(path, indent);
5093 }
5094
5095 fn emit_list_block(
5096 &mut self,
5097 items: &[Value],
5098 path: &[BreadcrumbSegment],
5099 indent: usize,
5100 ) {
5101 for (i, v) in items.iter().enumerate() {
5102 let mut child_path: Vec<BreadcrumbSegment> = path.to_vec();
5103 child_path.push(BreadcrumbSegment::Index(i));
5104 if let Some(nc) = self.comments_by_path.get(&child_path) {
5105 let leading: Vec<&Comment> = nc.leading.iter().copied().collect();
5106 for c in leading {
5107 self.emit_comment_line(c, indent);
5108 }
5109 }
5110 self.emit_leading_decorators_for(&child_path, indent);
5112 self.push_indent(indent);
5113 self.out.push('+');
5114 let has_inner = self.has_inner(&child_path)
5115 || self
5116 .decorators_by_path
5117 .get(&child_path)
5118 .map(|nd| !nd.inner.is_empty())
5119 .unwrap_or(false);
5120 if self.suppressed_paths.contains(&child_path) {
5121 self.out.push(' ');
5125 self.emit_inner_decorators_for(&child_path);
5126 if self.out.ends_with(' ') {
5127 self.out.pop();
5128 }
5129 self.out.push('\n');
5130 } else {
5131 match v {
5132 Value::Table(tt) if !tt.is_empty() => {
5133 if has_inner {
5135 self.out.push(' ');
5136 self.emit_inner_for(&child_path);
5137 self.emit_inner_decorators_for(&child_path);
5138 if self.out.ends_with(' ') {
5139 self.out.pop();
5140 }
5141 }
5142 self.emit_trailing_for(&child_path);
5143 self.emit_trailing_decorators_for(&child_path);
5144 self.out.push('\n');
5145 self.emit_table_block(tt, &child_path, indent + 1);
5146 }
5147 Value::UnorderedTable(tt) if !tt.is_empty() => {
5148 if has_inner {
5149 self.out.push(' ');
5150 self.emit_inner_for(&child_path);
5151 self.emit_inner_decorators_for(&child_path);
5152 if self.out.ends_with(' ') {
5153 self.out.pop();
5154 }
5155 }
5156 self.emit_trailing_for(&child_path);
5157 self.emit_trailing_decorators_for(&child_path);
5158 self.out.push('\n');
5159 self.emit_unordered_table_block(tt, &child_path, indent + 1);
5160 }
5161 Value::List(inner) if !inner.is_empty() => {
5162 if has_inner {
5163 self.out.push(' ');
5164 self.emit_inner_for(&child_path);
5165 self.emit_inner_decorators_for(&child_path);
5166 if self.out.ends_with(' ') {
5167 self.out.pop();
5168 }
5169 }
5170 self.emit_trailing_for(&child_path);
5171 self.emit_trailing_decorators_for(&child_path);
5172 self.out.push('\n');
5173 self.emit_list_block(inner, &child_path, indent + 1);
5174 }
5175 _ => {
5176 self.out.push(' ');
5177 self.emit_inner_for(&child_path);
5178 self.emit_inner_decorators_for(&child_path);
5179 self.emit_value_inline(v, &child_path);
5180 self.emit_trailing_for(&child_path);
5181 self.emit_trailing_decorators_for(&child_path);
5182 self.out.push('\n');
5183 }
5184 }
5185 } }
5187 self.emit_floating(path, indent);
5188 }
5189
5190 fn emit_value_inline(&mut self, v: &Value, path: &[BreadcrumbSegment]) {
5191 match v {
5192 Value::Bool(b) => self.out.push_str(if *b { "true" } else { "false" }),
5193 Value::Integer(n) => self.emit_integer(*n, path),
5194 Value::Float(f) => self.emit_float(*f),
5195 Value::String(s) => self.emit_string(s, path),
5196 Value::OffsetDateTime(s)
5197 | Value::LocalDateTime(s)
5198 | Value::LocalDate(s)
5199 | Value::LocalTime(s) => self.out.push_str(s),
5200 Value::List(items) => {
5201 if items.is_empty() {
5202 self.out.push_str("[]");
5203 } else {
5204 self.out.push('[');
5209 for (i, item) in items.iter().enumerate() {
5210 if i > 0 {
5211 self.out.push_str(", ");
5212 }
5213 let mut sub_path: Vec<BreadcrumbSegment> = path.to_vec();
5214 sub_path.push(BreadcrumbSegment::Index(i));
5215 self.emit_value_inline(item, &sub_path);
5216 }
5217 self.out.push(']');
5218 }
5219 }
5220 Value::Table(t) => {
5221 if t.is_empty() {
5222 self.out.push_str("{}");
5223 } else {
5224 self.out.push('{');
5225 let mut first = true;
5226 for (k, vv) in t {
5227 if !first {
5228 self.out.push_str(", ");
5229 }
5230 first = false;
5231 self.out.push_str(&format_key(k));
5232 self.out.push_str(": ");
5233 let mut sub_path: Vec<BreadcrumbSegment> = path.to_vec();
5234 sub_path.push(BreadcrumbSegment::Key(k.clone()));
5235 self.emit_value_inline(vv, &sub_path);
5236 }
5237 self.out.push('}');
5238 }
5239 }
5240 Value::UnorderedTable(t) => {
5241 if t.is_empty() {
5242 self.out.push_str("{}");
5243 } else {
5244 self.out.push('{');
5245 let mut first = true;
5246 for (k, vv) in t {
5247 if !first {
5248 self.out.push_str(", ");
5249 }
5250 first = false;
5251 self.out.push_str(&format_key(k));
5252 self.out.push_str(": ");
5253 let mut sub_path: Vec<BreadcrumbSegment> = path.to_vec();
5254 sub_path.push(BreadcrumbSegment::Key(k.clone()));
5255 self.emit_value_inline(vv, &sub_path);
5256 }
5257 self.out.push('}');
5258 }
5259 }
5260 }
5261 }
5262
5263 fn emit_integer(&mut self, n: i64, path: &[BreadcrumbSegment]) {
5264 if let Some(lit_ref) = self.forms_by_path.get(path).copied() {
5265 if let OriginalLiteral::Integer { lit } = lit_ref {
5266 self.out.push_str(lit.as_str());
5267 return;
5268 }
5269 }
5270 self.out.push_str(&n.to_string());
5271 }
5272
5273 fn emit_float(&mut self, f: f64) {
5274 if f.is_nan() {
5275 self.out.push_str("nan");
5276 } else if f.is_infinite() {
5277 self.out.push_str(if f > 0.0 { "inf" } else { "-inf" });
5278 } else {
5279 let mut buf = ryu::Buffer::new();
5280 self.out.push_str(buf.format(f));
5281 }
5282 }
5283
5284 fn emit_string(&mut self, s: &str, path: &[BreadcrumbSegment]) {
5285 let form_opt = self
5286 .forms_by_path
5287 .get(path)
5288 .and_then(|lit| match lit {
5289 OriginalLiteral::String { form } => Some(form.clone()),
5290 _ => None,
5291 });
5292 let form = form_opt.unwrap_or(StringForm::Basic);
5293 match form {
5294 StringForm::Basic => {
5295 self.out.push('"');
5296 self.out.push_str(&escape_basic(s));
5297 self.out.push('"');
5298 }
5299 StringForm::Literal => {
5300 self.out.push('\'');
5301 self.out.push_str(s);
5302 self.out.push('\'');
5303 }
5304 StringForm::Heredoc { flavor, label, modifiers } => {
5305 let body_to_emit = if modifiers.iter().any(|m| m.name == "_fold_paragraphs") {
5315 s.replace('\n', "\n\n")
5316 } else {
5317 s.to_string()
5318 };
5319 self.emit_heredoc(&body_to_emit, flavor, label.as_deref(), &modifiers);
5320 }
5321 }
5322 }
5323
5324 fn emit_heredoc(
5325 &mut self,
5326 body: &str,
5327 flavor: HeredocFlavor,
5328 label: Option<&str>,
5329 modifiers: &[HeredocModifierCall],
5330 ) {
5331 let kv_indent_spaces = {
5336 let bytes = self.out.as_bytes();
5337 let mut last_nl = None;
5338 for i in (0..bytes.len()).rev() {
5339 if bytes[i] == b'\n' {
5340 last_nl = Some(i);
5341 break;
5342 }
5343 }
5344 let line_start = last_nl.map(|i| i + 1).unwrap_or(0);
5345 let mut k = line_start;
5347 let mut n = 0usize;
5348 while k < bytes.len() && bytes[k] == b' ' {
5349 n += 1;
5350 k += 1;
5351 }
5352 n
5353 };
5354 let body_indent_str = " ".repeat(kv_indent_spaces + INDENT_STR.len());
5355 let term_indent_str = " ".repeat(kv_indent_spaces + INDENT_STR.len());
5356 let opener = match flavor {
5357 HeredocFlavor::BasicTriple => "\"\"\"",
5358 HeredocFlavor::LiteralTriple => "'''",
5359 };
5360 self.out.push_str(opener);
5361 if let Some(lbl) = label {
5362 self.out.push_str(lbl);
5363 }
5364 for m in modifiers {
5365 self.out.push(' ');
5366 self.out.push_str(&m.name);
5367 self.out.push('(');
5368 for (i, a) in m.args.iter().enumerate() {
5369 if i > 0 {
5370 self.out.push_str(", ");
5371 }
5372 self.emit_modifier_arg(a);
5373 }
5374 self.out.push(')');
5375 }
5376 self.out.push('\n');
5377 if body.is_empty() {
5384 } else {
5386 for line in body.split('\n') {
5387 if line.is_empty() {
5388 self.out.push('\n');
5391 } else {
5392 self.out.push_str(&body_indent_str);
5393 self.out.push_str(line);
5394 self.out.push('\n');
5395 }
5396 }
5397 }
5398 self.out.push_str(&term_indent_str);
5400 let terminator = match (flavor, label) {
5401 (HeredocFlavor::BasicTriple, None) => "\"\"\"".to_string(),
5402 (HeredocFlavor::LiteralTriple, None) => "'''".to_string(),
5403 (_, Some(lbl)) => lbl.to_string(),
5404 };
5405 self.out.push_str(&terminator);
5406 }
5407
5408 fn emit_modifier_arg(&mut self, v: &Value) {
5409 match v {
5410 Value::Bool(b) => self.out.push_str(if *b { "true" } else { "false" }),
5411 Value::Integer(n) => self.out.push_str(&n.to_string()),
5412 Value::Float(f) => {
5413 if f.is_nan() {
5414 self.out.push_str("nan");
5415 } else if f.is_infinite() {
5416 self.out.push_str(if *f > 0.0 { "inf" } else { "-inf" });
5417 } else {
5418 let mut buf = ryu::Buffer::new();
5419 self.out.push_str(buf.format(*f));
5420 }
5421 }
5422 Value::String(s) => {
5423 self.out.push('"');
5424 self.out.push_str(&escape_basic(s));
5425 self.out.push('"');
5426 }
5427 Value::OffsetDateTime(s)
5428 | Value::LocalDateTime(s)
5429 | Value::LocalDate(s)
5430 | Value::LocalTime(s) => self.out.push_str(s),
5431 Value::List(_) | Value::Table(_) | Value::UnorderedTable(_) => {
5432 if matches!(v, Value::List(_)) {
5435 self.out.push_str("[]");
5436 } else {
5437 self.out.push_str("{}");
5438 }
5439 }
5440 }
5441 }
5442
5443 fn emit_comment_line(&mut self, c: &Comment, indent: usize) {
5444 let text = &c.content;
5452 let prefix = INDENT_STR.repeat(indent);
5453 if !text.contains('\n') {
5454 self.out.push_str(&prefix);
5455 self.out.push_str(text);
5456 self.out.push('\n');
5457 return;
5458 }
5459 let mut first = true;
5460 for line in text.split('\n') {
5461 if !first {
5462 self.out.push('\n');
5463 }
5464 if first {
5465 self.out.push_str(&prefix);
5466 first = false;
5467 }
5468 self.out.push_str(line);
5469 }
5470 self.out.push('\n');
5471 }
5472
5473 fn emit_trailing_for(&mut self, path: &[BreadcrumbSegment]) {
5474 if let Some(nc) = self.comments_by_path.get(path) {
5475 let mut first = true;
5476 for t in &nc.trailing {
5477 if first {
5478 self.out.push_str(" ");
5479 first = false;
5480 } else {
5481 self.out.push(' ');
5482 }
5483 self.out.push_str(&c_oneliner(t));
5484 }
5485 }
5486 }
5487
5488 fn emit_inner_for(&mut self, path: &[BreadcrumbSegment]) {
5489 if let Some(nc) = self.comments_by_path.get(path) {
5490 for c in &nc.inner {
5491 self.out.push_str(&c_oneliner(c));
5492 self.out.push(' ');
5493 }
5494 }
5495 }
5496
5497 fn has_inner(&self, path: &[BreadcrumbSegment]) -> bool {
5498 self.comments_by_path
5499 .get(path)
5500 .map(|nc| !nc.inner.is_empty())
5501 .unwrap_or(false)
5502 }
5503
5504 fn emit_floating(&mut self, path: &[BreadcrumbSegment], indent: usize) {
5505 if let Some(nc) = self.comments_by_path.get(path) {
5506 let floating: Vec<&Comment> = nc.floating.iter().copied().collect();
5507 for c in floating {
5508 self.emit_comment_line(c, indent);
5509 }
5510 }
5511 if let Some(nd) = self.decorators_by_path.get(path) {
5513 let floating: Vec<&tier1::DecoratorCall> = nd.floating.iter().copied().collect();
5514 for call in floating {
5515 self.push_indent(indent);
5516 self.emit_decorator_call(call);
5517 self.out.push('\n');
5518 }
5519 }
5520 }
5521
5522 fn emit_leading_decorators_for(&mut self, path: &[BreadcrumbSegment], indent: usize) {
5525 if let Some(nd) = self.decorators_by_path.get(path) {
5526 let leading: Vec<&tier1::DecoratorCall> = nd.leading.iter().copied().collect();
5527 for call in leading {
5528 self.push_indent(indent);
5529 self.emit_decorator_call(call);
5530 self.out.push('\n');
5531 }
5532 }
5533 }
5534
5535 fn emit_inner_decorators_for(&mut self, path: &[BreadcrumbSegment]) {
5538 if let Some(nd) = self.decorators_by_path.get(path) {
5539 let inner: Vec<&tier1::DecoratorCall> = nd.inner.iter().copied().collect();
5540 for call in inner {
5541 self.emit_decorator_call(call);
5542 self.out.push(' ');
5543 }
5544 }
5545 }
5546
5547 fn emit_trailing_decorators_for(&mut self, path: &[BreadcrumbSegment]) {
5550 if let Some(nd) = self.decorators_by_path.get(path) {
5551 let trailing: Vec<&tier1::DecoratorCall> = nd.trailing.iter().copied().collect();
5552 for call in trailing {
5553 self.out.push(' ');
5554 self.emit_decorator_call(call);
5555 }
5556 }
5557 }
5558
5559 fn emit_decorator_call(&mut self, call: &tier1::DecoratorCall) {
5563 self.out.push_str(&call.sigil);
5564 if call.call_style == tier1::CallStyle::Named {
5565 let name = match &call.ns {
5567 Some(ns) => format!("{}.{}", ns, call.fn_name),
5568 None => call.fn_name.clone(),
5569 };
5570 self.out.push_str(&name);
5571 }
5572 if call.params.len() == 1 {
5576 if let tier1::ParamGroup::Named(m) = &call.params[0] {
5577 if m.is_empty() {
5578 return;
5580 }
5581 }
5582 }
5583
5584 for group in &call.params {
5586 self.out.push('(');
5587 match group {
5588 tier1::ParamGroup::Named(m) => {
5589 let mut first = true;
5590 for (k, v) in m {
5591 if !first {
5592 self.out.push_str(", ");
5593 }
5594 first = false;
5595 self.out.push_str(&format_key(k));
5596 self.out.push_str(": ");
5597 let s = fmt_value_inline(v);
5598 self.out.push_str(&s);
5599 }
5600 }
5601 tier1::ParamGroup::Positional(items) => {
5602 let mut first = true;
5603 for v in items {
5604 if !first {
5605 self.out.push_str(", ");
5606 }
5607 first = false;
5608 let s = fmt_value_inline(v);
5609 self.out.push_str(&s);
5610 }
5611 }
5612 }
5613 self.out.push(')');
5614 }
5615 }
5616
5617 fn push_indent(&mut self, indent: usize) {
5618 for _ in 0..indent {
5619 self.out.push_str(INDENT_STR);
5620 }
5621 }
5622
5623 fn is_flow_safe(&self, v: &Value, path: &[BreadcrumbSegment]) -> bool {
5628 if !self.lite {
5632 for ac in &self.doc.comments {
5633 if ac.path.len() > path.len() && ac.path.starts_with(path) {
5634 return false;
5636 }
5637 }
5638 }
5639 match v {
5640 Value::String(_) => {
5641 if let Some(OriginalLiteral::String { form: StringForm::Heredoc { .. } }) =
5644 self.forms_by_path.get(path).copied()
5645 {
5646 return false;
5647 }
5648 true
5649 }
5650 Value::List(items) => {
5651 for (i, item) in items.iter().enumerate() {
5652 let mut sub: Vec<BreadcrumbSegment> = path.to_vec();
5653 sub.push(BreadcrumbSegment::Index(i));
5654 if !self.is_flow_safe(item, &sub) {
5655 return false;
5656 }
5657 }
5658 true
5659 }
5660 Value::Table(t) => {
5661 for (k, vv) in t {
5662 let mut sub: Vec<BreadcrumbSegment> = path.to_vec();
5663 sub.push(BreadcrumbSegment::Key(k.clone()));
5664 if !self.is_flow_safe(vv, &sub) {
5665 return false;
5666 }
5667 }
5668 true
5669 }
5670 Value::UnorderedTable(t) => {
5671 for (k, vv) in t {
5672 let mut sub: Vec<BreadcrumbSegment> = path.to_vec();
5673 sub.push(BreadcrumbSegment::Key(k.clone()));
5674 if !self.is_flow_safe(vv, &sub) {
5675 return false;
5676 }
5677 }
5678 true
5679 }
5680 _ => true,
5681 }
5682 }
5683}
5684
5685fn c_oneliner(c: &Comment) -> String {
5691 c.content.clone()
5692}
5693
5694fn fmt_value_inline(v: &Value) -> String {
5698 match v {
5699 Value::Bool(b) => (if *b { "true" } else { "false" }).to_string(),
5700 Value::Integer(n) => n.to_string(),
5701 Value::Float(f) => {
5702 if f.is_nan() {
5703 "nan".to_string()
5704 } else if f.is_infinite() {
5705 if *f > 0.0 { "inf".to_string() } else { "-inf".to_string() }
5706 } else {
5707 let mut buf = ryu::Buffer::new();
5708 buf.format(*f).to_string()
5709 }
5710 }
5711 Value::String(s) => {
5712 format!("\"{}\"", escape_basic(s))
5713 }
5714 Value::OffsetDateTime(s)
5715 | Value::LocalDateTime(s)
5716 | Value::LocalDate(s)
5717 | Value::LocalTime(s) => s.clone(),
5718 Value::List(items) => {
5719 let mut out = String::from("[");
5720 for (i, item) in items.iter().enumerate() {
5721 if i > 0 { out.push_str(", "); }
5722 out.push_str(&fmt_value_inline(item));
5723 }
5724 out.push(']');
5725 out
5726 }
5727 Value::Table(t) => {
5728 let mut out = String::from("{");
5729 let mut first = true;
5730 for (k, vv) in t {
5731 if !first { out.push_str(", "); }
5732 first = false;
5733 out.push_str(&format_key(k));
5734 out.push_str(": ");
5735 out.push_str(&fmt_value_inline(vv));
5736 }
5737 out.push('}');
5738 out
5739 }
5740 Value::UnorderedTable(t) => {
5741 let mut out = String::from("{");
5742 let mut first = true;
5743 for (k, vv) in t {
5744 if !first { out.push_str(", "); }
5745 first = false;
5746 out.push_str(&format_key(k));
5747 out.push_str(": ");
5748 out.push_str(&fmt_value_inline(vv));
5749 }
5750 out.push('}');
5751 out
5752 }
5753 }
5754}
5755
5756fn escape_basic(s: &str) -> String {
5757 let mut out = String::with_capacity(s.len());
5758 for c in s.chars() {
5759 match c {
5760 '\\' => out.push_str("\\\\"),
5761 '"' => out.push_str("\\\""),
5762 '\n' => out.push_str("\\n"),
5763 '\r' => out.push_str("\\r"),
5764 '\t' => out.push_str("\\t"),
5765 '\u{08}' => out.push_str("\\b"),
5766 '\u{0c}' => out.push_str("\\f"),
5767 c if (c as u32) < 0x20 => {
5768 out.push_str(&format!("\\u{:04X}", c as u32));
5769 }
5770 c => out.push(c),
5771 }
5772 }
5773 out
5774}
5775
5776fn format_key(k: &str) -> String {
5777 if !k.is_empty() && k.chars().all(is_bare_key_char) {
5781 k.to_string()
5782 } else {
5783 if !k.contains('\'') && !k.contains('\n') && !k.contains('\r') {
5786 format!("'{}'", k)
5787 } else {
5788 format!("\"{}\"", escape_basic(k))
5789 }
5790 }
5791}
5792
5793#[cfg(test)]
5796mod tests {
5797 use super::*;
5798
5799 fn parse_table(src: &str) -> DmsMap<Value> {
5800 match decode(src).expect("decode failed") {
5801 Value::Table(t) => t,
5802 other => panic!("expected table, got {other:?}"),
5803 }
5804 }
5805
5806 #[test]
5807 fn empty_doc() {
5808 assert!(parse_table("").is_empty());
5809 }
5810
5811 #[test]
5812 fn single_int() {
5813 let t = parse_table("n: 42\n");
5814 assert_eq!(t["n"], Value::Integer(42));
5815 }
5816
5817 #[test]
5818 fn hex_int() {
5819 let t = parse_table("n: 0xFF\n");
5820 assert_eq!(t["n"], Value::Integer(255));
5821 }
5822
5823 #[test]
5824 fn float_dec() {
5825 let t = parse_table("n: 3.14\n");
5826 if let Value::Float(f) = t["n"] {
5827 assert!((f - 3.14).abs() < 1e-12);
5828 } else {
5829 panic!();
5830 }
5831 }
5832
5833 #[test]
5834 fn float_inf() {
5835 let t = parse_table("n: inf\n");
5836 assert_eq!(t["n"], Value::Float(f64::INFINITY));
5837 }
5838
5839 #[test]
5840 fn flow_array() {
5841 let t = parse_table("xs: [1, 2, 3]\n");
5842 if let Value::List(v) = &t["xs"] {
5843 assert_eq!(v.len(), 3);
5844 } else {
5845 panic!();
5846 }
5847 }
5848
5849 #[test]
5850 fn flow_table() {
5851 let t = parse_table("p: {x: 1, y: 2}\n");
5852 if let Value::Table(v) = &t["p"] {
5853 assert_eq!(v.len(), 2);
5854 } else {
5855 panic!();
5856 }
5857 }
5858
5859 #[test]
5860 fn nested_table() {
5861 let t = parse_table("a:\n b: 1\n c: 2\n");
5862 if let Value::Table(v) = &t["a"] {
5863 assert_eq!(v.len(), 2);
5864 } else {
5865 panic!();
5866 }
5867 }
5868
5869 #[test]
5870 fn nested_list() {
5871 let t = parse_table("xs:\n + 1\n + 2\n + 3\n");
5872 if let Value::List(v) = &t["xs"] {
5873 assert_eq!(v.len(), 3);
5874 } else {
5875 panic!();
5876 }
5877 }
5878
5879 #[test]
5880 fn list_of_tables() {
5881 let src = "servers:\n + name: \"web1\"\n port: 80\n + name: \"web2\"\n port: 81\n";
5882 let t = parse_table(src);
5883 if let Value::List(v) = &t["servers"] {
5884 assert_eq!(v.len(), 2);
5885 } else {
5886 panic!();
5887 }
5888 }
5889
5890 #[test]
5891 fn heredoc_basic() {
5892 let src = "doc: \"\"\"EOF\n hello\n world\n EOF\n";
5893 let t = parse_table(src);
5894 assert_eq!(t["doc"], Value::String("hello\nworld".into()));
5895 }
5896
5897 #[test]
5898 fn date_only() {
5899 let t = parse_table("d: 1979-05-27\n");
5900 assert_eq!(t["d"], Value::LocalDate("1979-05-27".into()));
5901 }
5902
5903 #[test]
5904 fn offset_dt() {
5905 let t = parse_table("d: 1979-05-27T07:32:00Z\n");
5906 assert_eq!(t["d"], Value::OffsetDateTime("1979-05-27T07:32:00Z".into()));
5907 }
5908
5909 #[test]
5910 fn rejects_tier1_in_front_matter() {
5911 let err = decode("+++\n_dms_tier: 1\n+++\nx: 1\n").unwrap_err();
5912 assert!(
5916 err.message.contains("decode_t1"),
5917 "expected tier-1 rejection to mention decode_t1, got: {}",
5918 err.message
5919 );
5920 }
5921
5922 #[test]
5923 fn accepts_explicit_tier_zero() {
5924 let t = match decode_document("+++\n_dms_tier: 0\n+++\nx: 1\n").unwrap().body {
5925 Value::Table(t) => t,
5926 other => panic!("expected table, got {other:?}"),
5927 };
5928 assert_eq!(t["x"], Value::Integer(1));
5929 }
5930
5931 fn key(s: &str) -> BreadcrumbSegment {
5934 BreadcrumbSegment::Key(s.to_string())
5935 }
5936
5937 #[test]
5938 fn comment_leading_attaches_to_next_kvpair() {
5939 let doc = decode_document("# leading\nport: 8080\n").unwrap();
5941 assert_eq!(doc.comments.len(), 1);
5942 let ac = &doc.comments[0];
5943 assert_eq!(ac.position, CommentPosition::Leading);
5944 assert_eq!(ac.path, vec![key("port")]);
5945 assert_eq!(ac.comment.kind, CommentKind::Line);
5946 assert_eq!(ac.comment.content, "# leading");
5947 }
5948
5949 #[test]
5950 fn comment_blank_line_gap_is_floating_on_container() {
5951 let doc = decode_document("# floating\n\nport: 8080\n").unwrap();
5953 assert_eq!(doc.comments.len(), 1);
5954 let ac = &doc.comments[0];
5955 assert_eq!(ac.position, CommentPosition::Floating);
5956 assert!(ac.path.is_empty(), "expected empty path, got {:?}", ac.path);
5958 assert_eq!(ac.comment.content, "# floating");
5959 }
5960
5961 #[test]
5962 fn comment_trailing_on_same_line() {
5963 let doc = decode_document("port: 8080 # default\n").unwrap();
5965 assert_eq!(doc.comments.len(), 1);
5966 let ac = &doc.comments[0];
5967 assert_eq!(ac.position, CommentPosition::Trailing);
5968 assert_eq!(ac.path, vec![key("port")]);
5969 assert_eq!(ac.comment.content, "# default");
5970 }
5971
5972 #[test]
5973 fn comment_floating_at_end_of_block() {
5974 let src = "a:\n x: 1\n # leftover\n";
5976 let doc = decode_document(src).unwrap();
5977 assert_eq!(doc.comments.len(), 1);
5978 let ac = &doc.comments[0];
5979 assert_eq!(ac.position, CommentPosition::Floating);
5980 assert_eq!(ac.path, vec![key("a")]);
5981 assert_eq!(ac.comment.content, "# leftover");
5982 }
5983
5984 #[test]
5985 fn block_comment_attaches_as_leading() {
5986 let src = "###\nNOTE\n###\nport: 8080\n";
5987 let doc = decode_document(src).unwrap();
5988 assert_eq!(doc.comments.len(), 1);
5989 let ac = &doc.comments[0];
5990 assert_eq!(ac.position, CommentPosition::Leading);
5991 assert_eq!(ac.path, vec![key("port")]);
5992 assert_eq!(ac.comment.kind, CommentKind::Block);
5993 assert!(
5994 ac.comment.content.starts_with("###") && ac.comment.content.ends_with("###"),
5995 "expected delimiters preserved, got: {:?}",
5996 ac.comment.content
5997 );
5998 }
5999
6000 #[test]
6001 fn front_matter_comment_recorded_with_fm_prefix() {
6002 let src = "+++\n# meta-leading\nauthor: \"x\"\n+++\nbody: 1\n";
6004 let doc = decode_document(src).unwrap();
6005 assert_eq!(doc.comments.len(), 1);
6006 let ac = &doc.comments[0];
6007 assert_eq!(ac.position, CommentPosition::Leading);
6008 assert_eq!(ac.path, vec![key("__fm__"), key("author")]);
6009 assert_eq!(ac.comment.content, "# meta-leading");
6010 }
6011
6012 #[test]
6013 fn document_with_no_comments_has_empty_comments_vec() {
6014 let doc = decode_document("a: 1\nb: 2\n").unwrap();
6015 assert!(doc.comments.is_empty(), "expected no comments, got {:?}", doc.comments);
6016 }
6017
6018 fn roundtrip(src: &str) -> String {
6021 let doc = decode_document(src).expect("parse failed");
6022 encode(&doc).expect("encode failed")
6023 }
6024
6025 #[test]
6026 fn encode_int_base_preserved() {
6027 let src = "a: 0x1F40\nb: 0o755\nc: 0b1010_0110\nd: 1_000_000\ne: +42\nf: -7\n";
6028 let doc = decode_document(src).unwrap();
6029 let out = encode(&doc).unwrap();
6030 assert!(out.contains("a: 0x1F40"), "out:\n{out}");
6031 assert!(out.contains("b: 0o755"), "out:\n{out}");
6032 assert!(out.contains("c: 0b1010_0110"), "out:\n{out}");
6033 assert!(out.contains("d: 1_000_000"), "out:\n{out}");
6034 assert!(out.contains("e: +42"), "out:\n{out}");
6035 assert!(out.contains("f: -7"), "out:\n{out}");
6036 let doc2 = decode_document(&out).unwrap();
6038 assert_eq!(doc.body, doc2.body);
6039 }
6040
6041 #[test]
6042 fn encode_string_forms_preserved() {
6043 let src = concat!(
6044 "basic: \"hello\"\n",
6045 "lit: 'C:\\path'\n",
6046 "hd_b_lab: \"\"\"END\n",
6047 " hello\n",
6048 " END\n",
6049 "hd_b_unl: \"\"\"\n",
6050 " one\n",
6051 " \"\"\"\n",
6052 "hd_l_lab: '''END\n",
6053 " raw\n",
6054 " END\n",
6055 "hd_l_unl: '''\n",
6056 " raw2\n",
6057 " '''\n",
6058 );
6059 let doc = decode_document(src).unwrap();
6060 let out = encode(&doc).unwrap();
6061 assert!(out.contains("basic: \"hello\""), "out:\n{out}");
6062 assert!(out.contains("lit: 'C:\\path'"), "out:\n{out}");
6063 assert!(out.contains("\"\"\"END"), "out:\n{out}");
6064 assert!(out.contains("'''END"), "out:\n{out}");
6065 let doc2 = decode_document(&out).unwrap();
6067 assert_eq!(doc.body, doc2.body);
6068 }
6069
6070 #[test]
6071 fn encode_heredoc_modifiers_preserved() {
6072 let src = concat!(
6073 "msg: \"\"\"END _trim(\"\\n\", \">\")\n",
6074 " > hi\n",
6075 " END\n",
6076 );
6077 let doc = decode_document(src).unwrap();
6078 let out = encode(&doc).unwrap();
6079 assert!(out.contains("_trim("), "expected _trim() in output:\n{out}");
6080 let doc2 = decode_document(&out).unwrap();
6082 assert_eq!(doc.body, doc2.body);
6083 }
6084
6085 #[test]
6086 fn encode_comments_at_attached_paths() {
6087 let src = concat!(
6088 "# leading on a\n",
6089 "a: 1 # trailing on a\n",
6090 "b:\n",
6091 " x: 2\n",
6092 " # floating in b\n",
6093 );
6094 let doc = decode_document(src).unwrap();
6095 let out = encode(&doc).unwrap();
6096 let doc2 = decode_document(&out).unwrap();
6098 assert_eq!(doc2.comments.len(), 3);
6099 let mut have_leading = false;
6100 let mut have_trailing = false;
6101 let mut have_floating = false;
6102 for ac in &doc2.comments {
6103 match ac.position {
6104 CommentPosition::Leading => {
6105 if ac.path == vec![key("a")] {
6106 have_leading = true;
6107 }
6108 }
6109 CommentPosition::Trailing => {
6110 if ac.path == vec![key("a")] {
6111 have_trailing = true;
6112 }
6113 }
6114 CommentPosition::Floating => {
6115 if ac.path == vec![key("b")] {
6116 have_floating = true;
6117 }
6118 }
6119 CommentPosition::Inner => { }
6120 }
6121 }
6122 assert!(have_leading && have_trailing && have_floating, "out:\n{out}\ncomments: {:?}", doc2.comments);
6123 }
6124
6125 #[test]
6126 fn encode_front_matter_omitted_when_empty() {
6127 let src = "x: 1\n";
6128 let out = roundtrip(src);
6129 assert!(!out.contains("+++"), "expected no front matter in output:\n{out}");
6130 }
6131
6132 #[test]
6133 fn encode_second_round_byte_stable() {
6134 let cases = [
6135 "a: 1\nb: 2\n",
6136 "# leading\nport: 0x1F40 # trailing\n",
6137 "+++\nauthor: \"x\"\n+++\nbody: 1\n",
6138 concat!(
6139 "msg: \"\"\"END\n",
6140 " hello world\n",
6141 " END\n",
6142 "ints:\n",
6143 " + 0xFF\n",
6144 " + 1_000\n",
6145 ),
6146 "items: [1, 2, 3]\np: {x: 1, y: 2}\n",
6147 ];
6148 for src in cases {
6149 let out1 = encode(&decode_document(src).unwrap()).unwrap();
6150 let out2 = encode(&decode_document(&out1).unwrap()).unwrap();
6151 assert_eq!(out1, out2, "round-trip not stable for src:\n{src}\nout1:\n{out1}\nout2:\n{out2}");
6152 }
6153 }
6154
6155 fn count_comments_at_path(doc: &Document, path: &[BreadcrumbSegment]) -> usize {
6164 doc.comments.iter().filter(|c| c.path == path).count()
6165 }
6166
6167 #[test]
6168 fn encode_value_update_preserves_attached_comments() {
6169 let src = "# the listening port\nport: 8080 # default for staging\nhost: \"localhost\"\n";
6172 let mut doc = decode_document(src).unwrap();
6173 if let Value::Table(ref mut t) = doc.body {
6174 *t.get_mut("port").unwrap() = Value::Integer(5432);
6176 } else {
6177 panic!("expected table");
6178 }
6179 let emitted = encode(&doc).unwrap();
6180 let doc2 = decode_document(&emitted).unwrap();
6181 if let Value::Table(t) = &doc2.body {
6183 assert_eq!(t["port"], Value::Integer(5432));
6184 } else {
6185 panic!("expected table after re-parse");
6186 }
6187 let port = vec![BreadcrumbSegment::Key("port".to_string())];
6189 let leading = doc2.comments.iter().filter(|c| c.path == port && c.position == CommentPosition::Leading).count();
6190 let trailing = doc2.comments.iter().filter(|c| c.path == port && c.position == CommentPosition::Trailing).count();
6191 assert_eq!(leading, 1, "leading comment on `port` should survive update; got doc2.comments = {:?}", doc2.comments);
6192 assert_eq!(trailing, 1, "trailing comment on `port` should survive update; got doc2.comments = {:?}", doc2.comments);
6193 }
6194
6195 #[test]
6196 fn encode_deleted_key_drops_attached_comments() {
6197 let src = "# keep this\nkeep: 1 # me too\n# drop this\ndrop: 2 # bye\n";
6200 let mut doc = decode_document(src).unwrap();
6201 if let Value::Table(ref mut t) = doc.body {
6202 t.shift_remove("drop");
6203 } else {
6204 panic!("expected table");
6205 }
6206 let emitted = encode(&doc).unwrap();
6207 let doc2 = decode_document(&emitted).unwrap();
6208 if let Value::Table(t) = &doc2.body {
6210 assert!(!t.contains_key("drop"), "deleted key should not reappear");
6211 assert!(t.contains_key("keep"), "non-deleted key should remain");
6212 } else {
6213 panic!("expected table after re-parse");
6214 }
6215 let drop_path = vec![BreadcrumbSegment::Key("drop".to_string())];
6217 assert_eq!(
6218 count_comments_at_path(&doc2, &drop_path),
6219 0,
6220 "deleted key's comments must not survive; got: {:?}",
6221 doc2.comments
6222 );
6223 let keep_path = vec![BreadcrumbSegment::Key("keep".to_string())];
6225 let keep_leading = doc2.comments.iter().filter(|c| c.path == keep_path && c.position == CommentPosition::Leading).count();
6226 let keep_trailing = doc2.comments.iter().filter(|c| c.path == keep_path && c.position == CommentPosition::Trailing).count();
6227 assert_eq!(keep_leading, 1, "leading comment on `keep` must survive sibling deletion");
6228 assert_eq!(keep_trailing, 1, "trailing comment on `keep` must survive sibling deletion");
6229 }
6230
6231 #[test]
6232 fn encode_inserted_key_carries_no_comments() {
6233 let src = "# leading on existing\nexisting: 1 # trailing on existing\n";
6236 let mut doc = decode_document(src).unwrap();
6237 if let Value::Table(ref mut t) = doc.body {
6238 t.insert("inserted".to_string(), Value::String("new".to_string()));
6239 } else {
6240 panic!("expected table");
6241 }
6242 let emitted = encode(&doc).unwrap();
6243 let doc2 = decode_document(&emitted).unwrap();
6244 if let Value::Table(t) = &doc2.body {
6246 assert_eq!(t["existing"], Value::Integer(1));
6247 assert_eq!(t["inserted"], Value::String("new".to_string()));
6248 } else {
6249 panic!("expected table after re-parse");
6250 }
6251 let inserted_path = vec![BreadcrumbSegment::Key("inserted".to_string())];
6253 assert_eq!(
6254 count_comments_at_path(&doc2, &inserted_path),
6255 0,
6256 "newly inserted key must not pick up comments; got: {:?}",
6257 doc2.comments
6258 );
6259 let existing_path = vec![BreadcrumbSegment::Key("existing".to_string())];
6261 assert_eq!(
6262 count_comments_at_path(&doc2, &existing_path),
6263 2,
6264 "existing key's leading + trailing comments must survive insertion of a sibling"
6265 );
6266 }
6267
6268 #[test]
6269 fn encode_combined_mutations() {
6270 let src = "# A\na: 1 # a-trail\n# B\nb: 2 # b-trail\n# C\nc: 3 # c-trail\n";
6274 let mut doc = decode_document(src).unwrap();
6275 if let Value::Table(ref mut t) = doc.body {
6276 *t.get_mut("a").unwrap() = Value::Integer(100); t.shift_remove("b"); t.insert("d".to_string(), Value::Integer(4)); } else {
6280 panic!("expected table");
6281 }
6282 let emitted = encode(&doc).unwrap();
6283 let doc2 = decode_document(&emitted).unwrap();
6284 if let Value::Table(t) = &doc2.body {
6285 assert_eq!(t["a"], Value::Integer(100));
6286 assert!(!t.contains_key("b"));
6287 assert_eq!(t["c"], Value::Integer(3));
6288 assert_eq!(t["d"], Value::Integer(4));
6289 } else {
6290 panic!("expected table after re-parse");
6291 }
6292 let p = |k: &str| vec![BreadcrumbSegment::Key(k.to_string())];
6293 assert_eq!(count_comments_at_path(&doc2, &p("a")), 2, "a should keep both comments");
6295 assert_eq!(count_comments_at_path(&doc2, &p("b")), 0, "b should be gone with its comments");
6297 assert_eq!(count_comments_at_path(&doc2, &p("c")), 2, "c should keep both comments");
6299 assert_eq!(count_comments_at_path(&doc2, &p("d")), 0, "d should have no comments");
6301 }
6302
6303 #[test]
6306 fn encode_lite_drops_comments() {
6307 let src = "# leading\na: 1 # trailing\n# floating\n";
6309 let doc = decode_document(src).unwrap();
6310 assert!(!doc.comments.is_empty(), "fixture should capture comments");
6311 let lite = encode_lite(&doc);
6312 assert!(!lite.contains('#'), "lite emit should have no comments, got:\n{lite}");
6313 let doc2 = decode_document(&lite).unwrap();
6315 assert_eq!(doc.body, doc2.body);
6316 assert!(doc2.comments.is_empty(), "re-parsed lite emit should have no comments");
6317 }
6318
6319 #[test]
6320 fn encode_lite_canonicalises_integer_base() {
6321 let src = "a: 0x1F40\nb: 0o755\nc: 0b1010_0110\nd: 1_000_000\n";
6324 let doc = decode_document(src).unwrap();
6325 let lite = encode_lite(&doc);
6326 assert!(!lite.contains("0x"), "lite emit should not preserve hex base, got:\n{lite}");
6328 assert!(!lite.contains("0o"), "lite emit should not preserve oct base, got:\n{lite}");
6329 assert!(!lite.contains("0b"), "lite emit should not preserve bin base, got:\n{lite}");
6330 assert!(lite.contains("a: 8000"), "lite emit should canonicalise 0x1F40 to 8000, got:\n{lite}");
6331 assert!(lite.contains("b: 493"), "lite emit should canonicalise 0o755 to 493, got:\n{lite}");
6332 assert!(lite.contains("c: 166"), "lite emit should canonicalise 0b10100110 to 166, got:\n{lite}");
6333 assert!(lite.contains("d: 1000000"), "lite emit should canonicalise underscored 1_000_000 to 1000000, got:\n{lite}");
6334 let doc2 = decode_document(&lite).unwrap();
6336 assert_eq!(doc.body, doc2.body);
6337 }
6338
6339 #[test]
6340 fn encode_lite_canonicalises_string_form() {
6341 let src = "a: 'literal'\nb: \"basic\"\n";
6343 let doc = decode_document(src).unwrap();
6344 let lite = encode_lite(&doc);
6345 assert!(!lite.contains("'literal'"),
6347 "lite emit should canonicalise '...' to \"...\", got:\n{lite}");
6348 assert!(lite.contains("a: \"literal\""), "got:\n{lite}");
6349 assert!(lite.contains("b: \"basic\""), "got:\n{lite}");
6350 }
6351
6352 #[test]
6353 fn encode_lite_works_on_lite_parse() {
6354 let src = "# leading\na: 0x1F40\nb: 'hello'\n";
6357 let doc = decode_lite_document(src).unwrap();
6358 assert!(doc.comments.is_empty());
6359 assert!(doc.original_forms.is_empty());
6360 let lite = encode_lite(&doc);
6361 let doc2 = decode_document(&lite).unwrap();
6362 assert_eq!(doc.body, doc2.body);
6363 }
6364
6365 #[test]
6366 fn encode_with_mode_dispatches_correctly() {
6367 let src = "# c\na: 1\n";
6368 let doc = decode_document(src).unwrap();
6369 assert_eq!(
6370 encode_with_mode(&doc, EmitMode::Full).unwrap(),
6371 encode(&doc).unwrap()
6372 );
6373 assert_eq!(
6374 encode_with_mode(&doc, EmitMode::Lite).unwrap(),
6375 encode_lite(&doc)
6376 );
6377 }
6378
6379 #[test]
6382 fn decode_document_unordered_produces_unordered_table() {
6383 let doc = decode_document_unordered("a: 1\nb: 2\n").unwrap();
6387 match &doc.body {
6388 Value::UnorderedTable(t) => {
6389 assert_eq!(t.len(), 2);
6390 assert_eq!(t["a"], Value::Integer(1));
6391 assert_eq!(t["b"], Value::Integer(2));
6392 }
6393 other => panic!("expected Value::UnorderedTable, got {other:?}"),
6394 }
6395 }
6396
6397 #[test]
6398 fn decode_lite_document_unordered_produces_unordered_table() {
6399 let src = "# leading\na: 0xFF\nb: 'lit'\n";
6402 let doc = decode_lite_document_unordered(src).unwrap();
6403 assert!(doc.comments.is_empty(), "lite mode must drop comments");
6404 assert!(doc.original_forms.is_empty(), "lite mode must drop original_forms");
6405 match &doc.body {
6406 Value::UnorderedTable(t) => {
6407 assert_eq!(t.len(), 2);
6408 assert_eq!(t["a"], Value::Integer(255));
6409 assert_eq!(t["b"], Value::String("lit".to_string()));
6410 }
6411 other => panic!("expected Value::UnorderedTable, got {other:?}"),
6412 }
6413 }
6414
6415 #[test]
6416 fn unordered_data_round_trip() {
6417 let src = "a: 1\nb: 2\nc: 3\nd: 'x'\n";
6421 let unordered = decode_document_unordered(src).unwrap();
6422 let emitted = encode_lite(&unordered);
6423 let reparsed = decode_document(&emitted).unwrap();
6424 let unordered_keys: std::collections::HashSet<&String> = match &unordered.body {
6427 Value::UnorderedTable(t) => t.keys().collect(),
6428 other => panic!("expected UnorderedTable, got {other:?}"),
6429 };
6430 let reparsed_map: &DmsMap<Value> = match &reparsed.body {
6431 Value::Table(t) => t,
6432 other => panic!("expected Table after re-parse, got {other:?}"),
6433 };
6434 let reparsed_keys: std::collections::HashSet<&String> =
6435 reparsed_map.keys().collect();
6436 assert_eq!(unordered_keys, reparsed_keys, "key sets must match");
6437 if let Value::UnorderedTable(t) = &unordered.body {
6438 for (k, v) in t {
6439 assert_eq!(reparsed_map.get(k), Some(v), "value for {k} must match");
6440 }
6441 }
6442 }
6443
6444 #[test]
6445 fn encode_full_errors_on_unordered() {
6446 let doc = decode_document_unordered("a: 1\nb: 2\n").unwrap();
6451 assert_eq!(encode(&doc), Err(EncodeError::UnorderedInFullMode));
6452 }
6453
6454 #[test]
6455 fn encode_full_errors_on_unordered_nested() {
6456 let mut nested: DmsHashMap<Value> = DmsHashMap::default();
6460 nested.insert("x".to_string(), Value::Integer(1));
6461 let doc = Document {
6462 meta: None,
6463 body: Value::List(vec![Value::UnorderedTable(nested)]),
6464 comments: Vec::new(),
6465 original_forms: Vec::new(),
6466 };
6467 assert_eq!(encode(&doc), Err(EncodeError::UnorderedInFullMode));
6468 }
6469
6470 #[test]
6471 fn encode_lite_accepts_unordered() {
6472 let src = "a: 1\nb: 2\n";
6475 let doc = decode_document_unordered(src).unwrap();
6476 let lite = encode_lite(&doc);
6477 let doc2 = decode_document(&lite).unwrap();
6479 match &doc2.body {
6480 Value::Table(t) => {
6481 assert_eq!(t.len(), 2);
6482 assert_eq!(t["a"], Value::Integer(1));
6483 assert_eq!(t["b"], Value::Integer(2));
6484 }
6485 other => panic!("expected Table after re-parse, got {other:?}"),
6486 }
6487 }
6488
6489 #[test]
6490 fn decode_document_ordered_unaffected() {
6491 let doc = decode_document("a: 1\nb: 2\n").unwrap();
6494 match &doc.body {
6495 Value::Table(t) => {
6496 assert_eq!(t.len(), 2);
6497 let keys: Vec<&String> = t.keys().collect();
6498 assert_eq!(keys, vec![&"a".to_string(), &"b".to_string()]);
6501 }
6502 other => panic!("expected Value::Table, got {other:?}"),
6503 }
6504 let emitted = encode(&doc).expect("encode failed");
6506 let doc2 = decode_document(&emitted).unwrap();
6507 assert_eq!(doc.body, doc2.body);
6508 }
6509
6510 #[test]
6511 fn unordered_nested_table_via_block() {
6512 let src = "outer:\n inner: 1\n other: 2\n";
6515 let doc = decode_document_unordered(src).unwrap();
6516 match &doc.body {
6517 Value::UnorderedTable(t) => {
6518 match t.get("outer").unwrap() {
6519 Value::UnorderedTable(inner) => {
6520 assert_eq!(inner.len(), 2);
6521 assert_eq!(inner["inner"], Value::Integer(1));
6522 assert_eq!(inner["other"], Value::Integer(2));
6523 }
6524 other => panic!("expected UnorderedTable inner, got {other:?}"),
6525 }
6526 }
6527 other => panic!("expected UnorderedTable outer, got {other:?}"),
6528 }
6529 }
6530
6531 #[test]
6532 fn unordered_flow_table() {
6533 let src = "x: { a: 1, b: 2 }\n";
6536 let doc = decode_document_unordered(src).unwrap();
6537 match &doc.body {
6538 Value::UnorderedTable(outer) => match outer.get("x").unwrap() {
6539 Value::UnorderedTable(t) => {
6540 assert_eq!(t.len(), 2);
6541 assert_eq!(t["a"], Value::Integer(1));
6542 assert_eq!(t["b"], Value::Integer(2));
6543 }
6544 other => panic!("expected UnorderedTable for flow, got {other:?}"),
6545 },
6546 other => panic!("expected UnorderedTable, got {other:?}"),
6547 }
6548 }
6549}